diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..63ea463ded9407870911961eedefe070d0a19891 --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Python +__pycache__ +*.pyc +*.egg-info + +# Log +*.log +*.log.* +# *.json +# *.jsonl + +# Data +!**/alpaca-data-conversation.json + +# Editor +.idea +*.swp + +# Other +.DS_Store +wandb +# output + +checkpoints +ckpts* +pretrained* + +.ipynb_checkpoints +*.ipynb + +# DevContainer +!.devcontainer/* + +# Demo +serve_images/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000000000000000000000000000000000000..341ddaa21d0c04d2cee7c5ad85b186e926eddaca --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,304 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +**Grasp Any Region (GAR)** is a research project for region-level multimodal understanding in vision-language models. It enables: + +1. **Single Region Understanding**: Detailed description of specific image/video regions via points/boxes/scribbles/masks +2. **Multi-Region Reasoning**: Complex relationship modeling and reasoning across multiple regions simultaneously +3. **Advanced Compositional Reasoning**: Active dialogue about regions rather than passive description + +The model is built on top of Facebook's Perception-LM architecture and uses xTuner training framework with PyTorch distributed training. + +## Architecture + +### Core Components + +**Model Architecture** (`projects/grasp_any_region/models/grasp_any_region.py:GraspAnyRegion`): +- Wraps `PerceptionLMForConditionalGeneration` from HuggingFace +- Key innovation: **RoI-aligned feature replay technique** using `torchvision.ops.roi_align` +- Adds `mask_patch_embedding` layer (Conv2d) for region mask encoding +- Supports 15 visual prompt tokens (`` through ``) plus `` +- Forward pass implements feature replay mechanism at grasp_any_region.py:291-377 + +**Visual Prompt System**: +- Masks are encoded with prompt IDs (0-14) where each ID represents a different region +- Special value (15 = ``) indicates background/non-region areas +- RoI features are extracted using bounding boxes and replayed into the sequence at crop token positions + +**Training Pipeline**: +- Uses xTuner framework (built on MMEngine) +- Dataset: Arrow format with three subsets (Seed, Fine-Grained, Relation) +- Custom collate function handles variable-length sequences and multi-region inputs +- Flash Attention 2 required for efficiency + +### Directory Structure + +``` +projects/grasp_any_region/ # Main model code + ├── configs/ # Training configs (gar_1b.py, gar_8b.py) + ├── models/ + │ ├── grasp_any_region.py # Main model wrapper + │ └── modeling/ # Custom PerceptionLM implementations + ├── datasets/ # Dataset and data loading + └── hf_models/ # HuggingFace conversion utilities + +demo/ # Inference demos + ├── gar_with_mask.py # Direct mask input + ├── gar_with_sam.py # SAM-based region selection + ├── gar_relationship.py # Multi-region reasoning + └── gradio/ # Web demo + +evaluation/ # Benchmarks + ├── GAR-Bench/ # Custom benchmark (Caption-Simple, Caption-Detailed, VQA) + ├── DLC-Bench/ # Detailed localized captioning + ├── Ferret-Bench/ # Region description + └── MDVP-Bench/ # Multi-domain visual perception + +tools/ + ├── train.py # Training entry point + ├── test.py # Testing entry point + └── dist.sh # Distributed training launcher +``` + +## Common Commands + +### Environment Setup + +```bash +# Create environment (requires Python 3.11.2) +conda create -n gar python=3.11.2 -y +conda activate gar + +# Install dependencies +pip3 install xtuner==0.2.0rc0 +pip3 install -r requirements.txt +pip3 install flash-attn==2.7.4.post1 --no-build-isolation -v +``` + +### Training + +```bash +# Single-node distributed training (8 GPUs) +bash tools/dist.sh train projects/grasp_any_region/configs/gar_1b.py 8 + +# The dist.sh script uses torchrun with: +# - Configurable MASTER_ADDR, PORT, NNODES, NODE_RANK +# - DeepSpeed Zero2 by default (set DEEPSPEED env var to override) +# - 5-hour timeout (TORCHELASTIC_TIMEOUT=18000) +``` + +**Config Files**: +- `projects/grasp_any_region/configs/gar_1b.py` - 1B model +- `projects/grasp_any_region/configs/gar_8b.py` - 8B model + +Key training settings (gar_1b.py): +- Base model: `facebook/Perception-LM-1B` +- Batch size: 1 per device × 2 accumulation × 32 GPUs = 64 global +- Learning rate: 1e-5 (AdamW), warmup: 3%, cosine annealing +- Max length: 16384 tokens +- Saves every 5000 steps, keeps last 2 checkpoints + +### Dataset Preparation + +```bash +# Download dataset from HuggingFace +hf download HaochenWang/Grasp-Any-Region-Dataset --local-dir data --repo-type dataset + +# Expected structure: +# data/ +# ├── Seed-Dataset/data-*.arrow +# ├── Fine-Grained-Dataset/data-*.arrow +# └── Relation-Dataset/data-*.arrow +``` + +### Inference Demos + +**Single Region with Mask**: +```bash +torchrun --nproc-per-node=1 --master-port=8119 \ + demo/gar_with_mask.py \ + --image_path assets/demo_image_1.png \ + --mask_path assets/demo_mask_1.png +``` + +**Single Region with SAM** (points or box): +```bash +# Using points +torchrun --nproc-per-node=1 --master-port=8119 \ + demo/gar_with_sam.py \ + --image_path assets/demo_image_2.jpg \ + --points '[[1172, 812], [1572, 800]]' + +# Using bounding box +torchrun --nproc-per-node=1 --master-port=8119 \ + demo/gar_with_sam.py \ + --image_path assets/demo_image_2.jpg \ + --box '[800, 500, 1800, 1000]' \ + --use_box +``` + +**Multi-Region Relationship**: +```bash +torchrun --nproc-per-node=1 --master-port=8119 \ + demo/gar_relationship.py \ + --image_path assets/demo_image_3.png \ + --mask_paths "['assets/demo_mask_3_0.png', 'assets/demo_mask_3_1.png', 'assets/demo_mask_3_2.png']" \ + --question_str 'Question: What is the relationship between , , and ?' +``` + +**Gradio Demo**: +```bash +cd demo/gradio +pip install -r requirements.txt +python app.py +``` + +### Evaluation + +All evaluation scripts follow the same pattern: inference → evaluation with LLM judge (GPT-4o or Llama). + +**GARBench-Caption-Simple**: +```bash +# Inference +torchrun --nproc-per-node=1 --master-port=9811 \ + evaluation/GAR-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --anno_file evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Simple.json \ + --mode simple \ + --cache_name my_test \ + --data_type bf16 \ + --seed 42 + +# Evaluation (requires Azure OpenAI) +export AZURE_OPENAI_ENDPOINT=YOUR_ENDPOINT +export AZURE_OPENAI_KEY=YOUR_KEY +python3 evaluation/GAR-Bench/eval_simple.py \ + --pred evaluation/GAR-Bench/model_outputs/my_test_simple.json +``` + +**GARBench-VQA** (multi-region reasoning): +```bash +torchrun --nproc-per-node=1 --master-port=9811 \ + evaluation/GAR-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --anno_file evaluation/GAR-Bench/annotations/GAR-Bench-VQA.json \ + --mode vqa \ + --cache_name my_test \ + --data_type bf16 +# VQA evaluation is automatic (no LLM judge) +``` + +**DLC-Bench** (detailed localized captioning): +```bash +# Download images first +cd evaluation/DLC-Bench/annotations +hf download nvidia/DLC-Bench --repo-type dataset --include "images/*" --local-dir ./ +cd ../../.. + +# Inference +torchrun --nproc-per-node=1 --master-port=8841 \ + evaluation/DLC-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --cache_name my_test \ + --data_type bf16 + +# Evaluation with GPT-4o +python3 evaluation/DLC-Bench/eval_gpt_with_image.py \ + --pred evaluation/DLC-Bench/model_outputs/my_test.json + +# Alternative: Evaluation with Llama3.1-8B (requires vLLM server) +bash evaluation/DLC-Bench/serve_judge.sh # in one terminal +python3 evaluation/DLC-Bench/eval_llama_without_image.py \ + --pred evaluation/DLC-Bench/model_outputs/my_test.json \ + --base_url http://localhost:8007/v1 +``` + +### Model Conversion + +```bash +# Convert trained checkpoint to HuggingFace format +python3 projects/grasp_any_region/hf_models/convert_to_hf.py \ + projects/grasp_any_region/configs/gar_1b.py \ + --pth-model PATH_TO_PTH_MODEL \ + --save-path PATH_TO_SAVE_FOLDER + +# Note: Manually copy required .py files to save folder after conversion +``` + +## Key Implementation Details + +### RoI Feature Replay Mechanism + +The core innovation is at `grasp_any_region.py:291-377`: + +1. Image features are extracted as tiles (16×16 patches per tile) +2. Tiles are merged into full-resolution feature map +3. For each `` token in input: + - Extract RoI bounding box from `data["bboxes"]` + - Apply `torchvision.ops.roi_align` to extract 16×16 features + - Replace prompt tokens in sequence with RoI features +4. This allows attending to region-specific features with global context + +### Mask Encoding + +Masks are provided as 3-channel images where pixel values encode prompt IDs: +- Values 0-14: Different region prompts +- Value 15 (or `prompt_numbers`): Background (no prompt) +- `mask_patch_embedding` (Conv2d) encodes binary masks into feature space +- Masks are processed at patch level matching vision encoder stride + +### Data Format + +Dataset uses Arrow format with fields: +- `pixel_values`: (num_tiles, 3, H, W) image tiles +- `input_ids`: Token sequence with special image/prompt tokens +- `labels`: Target sequence (-100 for non-loss positions) +- `global_mask_values`: Region masks with prompt IDs +- `aspect_ratios`: (ncw, nch) tile arrangement +- `bboxes`: Dict mapping crop tokens to normalized bbox coordinates + +### Special Tokens + +The model extends base tokenizer with: +- `` through ``: Region identifiers in text +- ``: Background/non-region marker +- `<|reserved_special_token_{pid+2}|>`: Internal crop tokens for feature replay + +## Important Notes + +- **Flash Attention 2 is required** - training will fail without it +- **Python 3.11.2 specifically** - later versions may have compatibility issues +- **Single batch size only** - code asserts `batch_size=1` at grasp_any_region.py:270 +- **Distributed training required** - single-GPU training not well supported +- **DeepSpeed Zero2** - default optimization for memory efficiency +- **torchrun vs torch.distributed.launch** - dist.sh tries torchrun first, falls back to launch +- **xTuner framework** - all training uses xTuner's runner, not native PyTorch +- **Evaluation randomness** - LLM judges have variance even with temperature=0 + +## HuggingFace Models + +Pre-trained models available: +- `HaochenWang/GAR-1B` - 1 billion parameter model +- `HaochenWang/GAR-8B` - 8 billion parameter model + +Base architecture: +- `facebook/Perception-LM-1B` - Base vision-language model +- `facebook/Perception-LM-8B` - Larger variant + +## Citation + +```bibtex +@article{wang2025grasp, + title={Grasp Any Region: Prompting MLLM to Understand the Dense World}, + author={Haochen Wang and Yuhao Wang and Tao Zhang and Yikang Zhou and Yanwei Li and Jiacong Wang and Ye Tian and Jiahao Meng and Zilong Huang and Guangcan Mai and Anran Wang and Yunhai Tong and Zhuochen Wang and Xiangtai Li and Zhaoxiang Zhang}, + journal={arXiv preprint arXiv:2510.18876}, + year={2025} +} +``` + +## License + +Apache-2.0 License diff --git a/GRADIO_APP_SUMMARY.md b/GRADIO_APP_SUMMARY.md new file mode 100644 index 0000000000000000000000000000000000000000..7c23f5b6c708098816255d01a1b1e1baf6445a77 --- /dev/null +++ b/GRADIO_APP_SUMMARY.md @@ -0,0 +1,180 @@ +# Gradio App Summary for Grasp Any Region (GAR) + +## ✅ Completion Status + +Successfully created a comprehensive Gradio demo for the Grasp Any Region (GAR) project. + +## 📁 Files Created/Modified + +### 1. **app.py** (NEW) +- Complete Gradio interface with 3 tabs: + - **Points → Describe**: Interactive point-based segmentation with SAM + - **Box → Describe**: Bounding box-based segmentation + - **Mask → Describe**: Direct mask upload for region description +- Features: + - ZeroGPU integration with `@spaces.GPU` decorator + - Proper import order (spaces first, then CUDA packages) + - SAM (Segment Anything Model) integration for interactive segmentation + - GAR-1B model for detailed region descriptions + - Visualization with contours and input annotations + - Example images and clear instructions + - Error handling and status messages + +### 2. **requirements.txt** (UPDATED) +- Gradio 5.49.1 (required version) +- httpx version fixed to >=0.24.1,<1.0 (Gradio compatibility) +- PyTorch 2.8.0 (pinned for FlashAttention compatibility) +- FlashAttention 2.8.3 prebuilt wheel (PyTorch 2.8, Python 3.10, CUDA 12, abiFALSE) +- spaces==0.30.4 for ZeroGPU +- All original dependencies preserved +- Segment Anything from GitHub +- Vision libraries (opencv-python, pillow, pycocotools) +- Transformers 4.56.2 and supporting ML libraries + +## 🎯 Key Features + +1. **Three Interaction Modes**: + - Points: Click or enter coordinates to segment regions + - Box: Draw or enter bounding boxes + - Mask: Upload pre-made masks directly + +2. **Model Integration**: + - GAR-1B for region understanding (1 billion parameters) + - SAM ViT-Huge for automatic segmentation + - Both models loaded once at startup for efficiency + +3. **ZeroGPU Optimization**: + - Proper `@spaces.GPU(duration=120)` decorator usage + - 2-minute GPU allocation per function call + - NVIDIA H200 with 70GB VRAM available + - Critical import order: `spaces` imported before torch + +4. **User Experience**: + - Clear step-by-step instructions + - Example images included + - Real-time visualization with overlays + - Comprehensive error handling + - Professional UI with Gradio 5.x Soft theme + +## 🔧 Technical Details + +### Import Order (CRITICAL) +```python +# 🚨 spaces MUST be imported FIRST +import spaces + +# Then import CUDA packages +import torch +from transformers import AutoModel, AutoProcessor +``` + +This prevents the "CUDA has been initialized" error. + +### FlashAttention Configuration +- Using prebuilt wheel for PyTorch 2.8.0 +- Python 3.10 (cp310) +- CUDA 12 (cu12) +- abiFALSE (REQUIRED - never use abiTRUE) +- URL: https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp310-cp310-linux_x86_64.whl + +### Model Loading Strategy +- Models loaded once at startup (outside decorated functions) +- Moved to CUDA device after loading +- GPU-decorated functions only handle inference +- Efficient memory usage + +## 📋 Dependencies Highlights + +**Core:** +- gradio==5.49.1 +- torch==2.8.0 +- spaces==0.30.4 +- flash-attn (prebuilt wheel) + +**AI/ML:** +- transformers==4.56.2 +- accelerate>=0.28.0 +- timm==1.0.19 +- peft==0.15.2 + +**Vision:** +- opencv-python +- pillow>=9.4.0 +- segment-anything (from GitHub) +- pycocotools + +## 🎨 UI Structure + +``` +Grasp Any Region (GAR) Demo +├── Introduction & Links +├── Tab 1: Points → Describe +│ ├── Image upload + points input +│ ├── Generate Mask button +│ ├── Describe Region button +│ └── Outputs: mask, visualization, description +├── Tab 2: Box → Describe +│ ├── Image upload + box input +│ ├── Generate Mask button +│ ├── Describe Region button +│ └── Outputs: mask, visualization, description +├── Tab 3: Mask → Describe +│ ├── Image upload + mask upload +│ ├── Describe Region button +│ └── Outputs: visualization, description +└── Documentation & Citation +``` + +## 🚀 How to Run + +```bash +# Install dependencies +pip install -r requirements.txt + +# Run the app +python app.py +``` + +The app will automatically: +1. Load GAR-1B and SAM models +2. Launch Gradio interface +3. Allocate GPU on-demand with ZeroGPU + +## 📊 Expected Performance + +- **Model**: GAR-1B (lightweight, fast inference) +- **GPU**: NVIDIA H200, 70GB VRAM +- **Inference Time**: ~10-30 seconds per region (depending on complexity) +- **Max New Tokens**: 1024 (configurable) + +## ⚠️ Important Notes + +1. **Import Order**: Always import `spaces` before torch/CUDA packages +2. **Python Version**: Requires Python 3.10 (for FlashAttention wheel) +3. **FlashAttention**: Uses prebuilt wheel (no compilation needed) +4. **Asset Files**: Demo expects images in `assets/` directory +5. **SingleRegionCaptionDataset**: Required from evaluation module + +## 🔗 References + +- **Paper**: https://arxiv.org/abs/2510.18876 +- **GitHub**: https://github.com/Haochen-Wang409/Grasp-Any-Region +- **Model**: https://huggingface.co/HaochenWang/GAR-1B +- **SAM**: https://github.com/facebookresearch/segment-anything + +## 📝 Citation + +```bibtex +@article{wang2025grasp, + title={Grasp Any Region: Prompting MLLM to Understand the Dense World}, + author={Haochen Wang et al.}, + journal={arXiv preprint arXiv:2510.18876}, + year={2025} +} +``` + +--- + +**Created**: 2025-10-25 +**Status**: ✅ Ready for deployment +**Hardware**: zerogpu (NVIDIA H200, 70GB VRAM) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index d325ea70e0cdef85877d682974e166627c22bae4..fdabec8d0e2d8ddb94ecfbc17c7797800bf57c36 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,56 @@ --- -title: SNIPED Grasp-any-region -emoji: ⚡ -colorFrom: green -colorTo: purple +title: "Grasp-Any-Region" +emoji: 🤖 +colorFrom: yellow +colorTo: blue sdk: gradio sdk_version: 5.49.1 app_file: app.py pinned: false +short_description: "Manual Entry: https://huggingface.co/papers/2510.18876" +hardware: zerogpu +tags: + - research + - paper + - code + - cheatcode +license: mit --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# Grasp-Any-Region + +**Automated upload by CheatCode** 🚀 + +## 📄 Paper Information + +- **Paper ID**: 2510.18876 +- **Title**: Manual Entry: https://huggingface.co/papers/2510.18876 +- **Original Repository**: [https://github.com/Haochen-Wang409/Grasp-Any-Region](https://github.com/Haochen-Wang409/Grasp-Any-Region) + +## 🛠️ Repository Information + +- **Languages**: JavaScript, Python, Shell, TypeScript +- **Gradio App**: ✅ Generated by CheatCode + +## 🤖 About CheatCode + +This Space was automatically created by [CheatCode](https://github.com/jbilcke-hf/CheatCode), +an AI-powered tool that: + +1. Discovers research papers from HuggingFace +2. Extracts and analyzes linked repositories +3. Generates Gradio demo applications +4. Uploads everything to HuggingFace Spaces + +## 📝 Usage + +This Space includes a Gradio app that was automatically generated from the repository code. + +## ⚠️ Disclaimer + +This is an automated upload. The code comes from the original repository and may require +additional configuration or dependencies to run properly. + +## 📜 License + +Please refer to the original repository for licensing information: https://github.com/Haochen-Wang409/Grasp-Any-Region diff --git a/README_original.md b/README_original.md new file mode 100644 index 0000000000000000000000000000000000000000..f7077e32af29869342f796271d1138887ccd4642 --- /dev/null +++ b/README_original.md @@ -0,0 +1,208 @@ +--- +title: Grasp Any Region - Region-Level Visual Understanding +emoji: 🎯 +colorFrom: blue +colorTo: purple +sdk: gradio +sdk_version: 5.49.1 +app_file: app.py +pinned: false +short_description: A multimodal model for precise region-level understanding and reasoning in images and videos +hardware: zerogpu +--- + +# Grasp Any Region: Towards Precise, Contextual Pixel Understanding for Multimodal LLMs + +by +[Haochen Wang](https://haochen-wang409.github.io), +Yuhao Wang, +[Tao Zhang](https://scholar.google.com/citations?user=3xu4a5oAAAAJ), +[Yikang Zhou](https://scholar.google.com/citations?user=dZikW2YAAAAJ), +[Yanwei Li](https://yanwei-li.com/), +[Jiacong Wang](https://scholar.google.com/citations?user=rzYgLkgAAAAJ), +[Ye Tian](https://scholar.google.com/citations?user=vUY_PIUAAAAJ), +[Jiahao Meng](https://scholar.google.com/citations?user=NJfjvfIAAAAJ), +[Zilong Huang](https://speedinghzl.github.io/), +[Guangcan Mai](https://scholar.google.com/citations?user=739cUNMAAAAJ), +[Anran Wang](https://sites.google.com/view/anranwang/home), +[Yunhai Tong](https://scholar.google.com/citations?user=T4gqdPkAAAAJ), +Zhuochen Wang, +[Xiangtai Li](https://lxtgh.github.io/), and +[Zhaoxiang Zhang](https://scholar.google.com/citations?user=qxWfV6cAAAAJ). + +[[Paper](https://arxiv.org/abs/2510.18876)] | [[HuggingFace](https://huggingface.co/collections/HaochenWang/grasp-any-region-68f7433671030d6ea682f692)] | [[Citation](#citation)] + +**TL; DR**: Our Grasp Any Region (GAR) supports both (1) describing a *single* region of an image or a video in the form of points/boxes/scribbles/masks in detail and (2) understanding *multiple* regions such as modeling interactions and performing complex reasoning. We also release a new benchmark, GARBench, to evaluate models on advanced region-level understanding tasks. + +![](./assets/teaser.png) + +> **Abstract.** While Multimodal Large Language Models (MLLMs) excel at holistic understanding, they struggle +> in capturing the dense world with complex scenes, requiring fine-grained analysis of intricate +> details and object inter-relationships. Region-level MLLMs have been a promising step. However, +> previous attempts are generally optimized to understand given regions in isolation, neglecting +> crucial global contexts. To address this, we introduce Grasp Any Region (GAR) for comprehensive +> region-level visual understanding. Empowered by an effective RoI-aligned feature replay +> technique, GAR supports (1) precise perception by leveraging necessary global contexts, and (2) +> modeling interactions between multiple prompts. Together, it then naturally achieves (3) advanced +> compositional reasoning to answer specific free-form questions about any region, shifting the +> paradigm from passive description to active dialogue. Moreover, we construct GARBench, which +> not only provides a more accurate evaluation of single-region comprehension, but also, more +> importantly, measures interactions and complex reasoning across multiple regions. Extensive +> experiments have demonstrated that GAR-1B not only maintains the state-of-the-art captioning +> capabilities, e.g., outperforming DAM-3B +4.5 on DLC-Bench, but also excels at modeling rela- +> tionships between multiple prompts with advanced comprehension capabilities, even surpassing +> InternVL3-78B on GARBench-VQA. More importantly, our zero-shot GAR-8B even outperforms +> in-domain VideoRefer-7B on VideoRefer-BenchQ, indicating its strong capabilities can be easily +> transferred to videos. + +# Installation + +```bash +conda create -n gar python=3.11.2 -y +conda activate gar + +pip3 install xtuner==0.2.0rc0 +pip3 install -r requirements.txt +pip3 install flash-attn==2.7.4.post1 --no-build-isolation -v +``` + +# Demos + +## Gradio Demo + +Please refer to [`demo/gradio/README.md`](demo/gradio/README.md) for serving an online captioning demo using gradio. + +## Examples + +### Detailed Localized Image Descriptions with Masks + +- [`demo/gar_with_mask.py`](demo/gar_with_mask.py) - Command-line tool for processing single images, allowing users to specify specify the region-of-interest using its segmentation mask. + +
+Expand to see example commands + + + +```bash +torchrun --nproc-per-node=1 --master-port=8119 demo/gar_with_mask.py --image_path assets/demo_image_1.png --mask_path assets/demo_mask_1.png +``` + +**Input instruction:** Describe the masked region in detail. + +**Output answer:** A bright green, **frog-shaped slipper** with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling a frog's eye. + +
+ +### Detailed Localized Image Descriptions with SAM + +- [`demo/gar_with_sam.py`](demo/gar_with_sam.py) - Command-line tool for processing single images using SAM v1, allowing users to specify points or bounding boxes for mask generation + +
+Expand to see example commands + + + +```bash +# You can use it with points or a bounding box for the region of interest. +# SAM is used to turn points or a bounding box into a mask. +# You can also use mask directly, see `demo/gar_with_mask.py`. +torchrun --nproc-per-node=1 --master-port=8119 demo/gar_with_sam.py --image_path assets/demo_image_2.jpg --points '[[1172, 812], [1572, 800]]' --output_image_path output_visualization.png +torchrun --nproc-per-node=1 --master-port=8119 demo/gar_with_sam.py --image_path assets/demo_image_2.jpg --box '[800, 500, 1800, 1000]' --use_box --output_image_path output_visualization.png +``` + +**Input instruction:** Describe the masked region in detail. + +**Output answer:** A medium-sized, short-haired dog with a predominantly tan coat featuring white markings on its face, chest, and paws. The dog has a white stripe running down the center of its face, extending from the forehead to the nose. Its ears are large, pointed, and stand erect. The dog is wearing a red collar with a visible tag. Its mouth is open, revealing its tongue and teeth, and it appears to be in mid-leap with its front legs extended forward and hind legs stretched out behind. + +
+ +### Modeling Complex Relationship between Multiple Regions + +- [`demo/gar_relationship.py`](demo/gar_relationship.py) - Command-line tool for processing single images with multiple regions-of-interest, allowing users to specify specify the region-of-interest using its segmentation mask + +
+Expand to see example commands + + + +```bash +torchrun --nproc-per-node=1 --master-port=8119 demo/gar_relationship.py --image_path assets/demo_image_3.png --mask_paths "['assets/demo_mask_3_0.png', 'assets/demo_mask_3_1.png', 'assets/demo_mask_3_2.png']" --question_str 'Question: What is the relationship between , , and ?\nOptions:\nA. is using to point at \nB. has already hit with \nC. is swinging and is about to hit \nD. is holding while looking away from ' +``` + +**Input instruction:** + +``` +Question: What is the relationship between , , and ? +Options: +A. is using to point at +B. has already hit with +C. is swinging and is about to hit +D. is holding while looking away from +Answer with the correct option's letter directly. +``` + +**Output answer:** C + +Note that ``, ``, and `` are illustrated in red, green, and blue, respectively. + +
+ +# Training + +**1. Dataset Preparation** + +First, download the dataset: + +`hf download HaochenWang/Grasp-Any-Region-Dataset --local-dir data --repo-type dataset` + +The overall data structure should be: +```sh +data +├── Fine-Grained-Dataset +│ └── data-*-of-*.arrow +├── Relation-Dataset +│ └── data-*-of-*.arrow +└── Seed-Dataset + └── data-*-of-*.arrow +``` + +**2. Launch Training** + +Next, run the following script to train using 8 GPUS: + +`bash tools/dist.sh train projects/grasp_any_region/configs/gar_1b.py 8` + +**3. Convert to HuggingFace Format** + +```python3 projects/grasp_any_region/hf_models/convert_to_hf.py projects/grasp_any_region/configs/gar_1b.py --pth-model PATH_TO_PTH_MODEL --save-path PATH_TO_SAVE_FOLDER``` + +Note that this script only convert the checkpoint and some `*.py` files requires manually copy to `${PATH_TO_SAVE_FOLDER}`. + +# Evaluation + +Please refer to [`evaluation/EVALUATION.md`](evaluation/EVALUATION.md). + +# License + +This project is licensed under the [Apache-2.0 License](LICENSE). + +# Citation + +If you use our work or our implementation in this repo, or find them helpful, please consider giving a citation in the following format. + +``` +@article{wang2025grasp, + title={Grasp Any Region: Prompting MLLM to Understand the Dense World}, + author={Haochen Wang and Yuhao Wang and Tao Zhang and Yikang Zhou and Yanwei Li and Jiacong Wang and Ye Tian and Jiahao Meng and Zilong Huang and Guangcan Mai and Anran Wang and Yunhai Tong and Zhuochen Wang and Xiangtai Li and Zhaoxiang Zhang}, + journal={arXiv preprint arXiv:2510.18876}, + year={2025} +} +``` + +# Acknowledgements + +We would like to thank the following projects for their contributions to this work: + +- [SAM](https://github.com/facebookresearch/segment-anything) +- [DAM](https://github.com/NVlabs/describe-anything) +- [Sa2VA](https://github.com/bytedance/Sa2VA) \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..77888224da7545b2851b17b88240505dc5a3b80a --- /dev/null +++ b/app.py @@ -0,0 +1,442 @@ +# ************************************************************************* +# Grasp Any Region (GAR) - Gradio Demo +# Region-level Multimodal Understanding for Vision-Language Models +# ************************************************************************* + +# 🚨 CRITICAL: Import spaces FIRST before any CUDA-related packages +import spaces + +# Now import CUDA-related packages +import torch +import numpy as np +from PIL import Image +import gradio as gr +from transformers import ( + AutoModel, + AutoProcessor, + GenerationConfig, + SamModel, + SamProcessor, +) +import cv2 +import sys +import os + +# Add project root to path for imports +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +try: + from evaluation.eval_dataset import SingleRegionCaptionDataset +except ImportError: + print("Warning: Could not import SingleRegionCaptionDataset. Using simplified version.") + SingleRegionCaptionDataset = None + +# Initialize device +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Global model variables (loaded once) +gar_model = None +gar_processor = None +sam_model = None +sam_processor = None + +def load_models(): + """Load models once at startup""" + global gar_model, gar_processor, sam_model, sam_processor + + if gar_model is None: + print("Loading GAR model...") + model_path = "HaochenWang/GAR-1B" + gar_model = AutoModel.from_pretrained( + model_path, + trust_remote_code=True, + torch_dtype=torch.bfloat16, + device_map="auto", + ).eval() + + gar_processor = AutoProcessor.from_pretrained( + model_path, + trust_remote_code=True, + ) + print("GAR model loaded successfully!") + + if sam_model is None: + print("Loading SAM model...") + sam_model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device) + sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") + print("SAM model loaded successfully!") + +@spaces.GPU(duration=120) +def generate_mask_from_points(image, points_str): + """Generate mask using SAM from point coordinates""" + try: + load_models() + + if not points_str or points_str.strip() == "": + return None, "Please provide points in format: x1,y1;x2,y2" + + # Parse points + points = [] + labels = [] + for point in points_str.split(';'): + point = point.strip() + if point: + x, y = map(float, point.split(',')) + points.append([x, y]) + labels.append(1) # Foreground point + + if not points: + return None, "No valid points provided" + + # Apply SAM + inputs = sam_processor( + image, + input_points=[points], + input_labels=[labels], + return_tensors="pt", + ).to(device) + + with torch.no_grad(): + outputs = sam_model(**inputs) + + masks = sam_processor.image_processor.post_process_masks( + outputs.pred_masks.cpu(), + inputs["original_sizes"].cpu(), + inputs["reshaped_input_sizes"].cpu(), + )[0][0] + + scores = outputs.iou_scores[0, 0] + mask_selection_index = scores.argmax() + mask_np = masks[mask_selection_index].numpy() + + # Visualize mask + mask_img = (mask_np * 255).astype(np.uint8) + + return Image.fromarray(mask_img), "Mask generated successfully!" + + except Exception as e: + return None, f"Error generating mask: {str(e)}" + +@spaces.GPU(duration=120) +def generate_mask_from_box(image, box_str): + """Generate mask using SAM from bounding box""" + try: + load_models() + + if not box_str or box_str.strip() == "": + return None, "Please provide box in format: x1,y1,x2,y2" + + # Parse box + box = list(map(float, box_str.split(','))) + if len(box) != 4: + return None, "Box must have 4 coordinates: x1,y1,x2,y2" + + # Apply SAM + inputs = sam_processor( + image, + input_boxes=[[box]], + return_tensors="pt", + ).to(device) + + with torch.no_grad(): + outputs = sam_model(**inputs) + + masks = sam_processor.image_processor.post_process_masks( + outputs.pred_masks.cpu(), + inputs["original_sizes"].cpu(), + inputs["reshaped_input_sizes"].cpu(), + )[0][0] + + scores = outputs.iou_scores[0, 0] + mask_selection_index = scores.argmax() + mask_np = masks[mask_selection_index].numpy() + + # Visualize mask + mask_img = (mask_np * 255).astype(np.uint8) + + return Image.fromarray(mask_img), "Mask generated successfully!" + + except Exception as e: + return None, f"Error generating mask: {str(e)}" + +@spaces.GPU(duration=120) +def describe_region(image, mask): + """Generate description for a region defined by a mask""" + try: + load_models() + + if image is None: + return "Please provide an image" + + if mask is None: + return "Please provide a mask (upload or generate using SAM)" + + # Convert mask to numpy + if isinstance(mask, Image.Image): + mask_np = np.array(mask.convert("L")) + else: + mask_np = np.array(mask) + + # Ensure mask is binary + mask_np = (mask_np > 127).astype(np.uint8) + + # Prepare data + prompt_number = gar_model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [""] + + if SingleRegionCaptionDataset is not None: + dataset = SingleRegionCaptionDataset( + image=image, + mask=mask_np, + processor=gar_processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=torch.bfloat16, + ) + data_sample = dataset[0] + else: + # Simplified processing if dataset class not available + # This is a fallback - the actual implementation requires SingleRegionCaptionDataset + return "Error: SingleRegionCaptionDataset not available. Please check installation." + + # Generate description + with torch.no_grad(): + generate_ids = gar_model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + do_sample=False, + eos_token_id=gar_processor.tokenizer.eos_token_id, + pad_token_id=gar_processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + output_caption = gar_processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + return output_caption + + except Exception as e: + return f"Error generating description: {str(e)}" + +def create_visualization(image, mask, points_str=None, box_str=None): + """Create visualization with mask overlay""" + try: + if image is None or mask is None: + return None + + img_np = np.array(image).astype(float) / 255.0 + if isinstance(mask, Image.Image): + mask_np = np.array(mask.convert("L")) > 127 + else: + mask_np = np.array(mask) > 127 + + # Draw contour + mask_uint8 = mask_np.astype(np.uint8) * 255 + contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + img_vis = img_np.copy() + cv2.drawContours(img_vis, contours, -1, (1.0, 1.0, 0.0), thickness=3) + + # Draw points if provided + if points_str: + for point in points_str.split(';'): + point = point.strip() + if point: + x, y = map(float, point.split(',')) + cv2.circle(img_vis, (int(x), int(y)), radius=8, color=(1.0, 0.0, 0.0), thickness=-1) + cv2.circle(img_vis, (int(x), int(y)), radius=8, color=(1.0, 1.0, 1.0), thickness=2) + + # Draw box if provided + if box_str: + coords = list(map(float, box_str.split(','))) + if len(coords) == 4: + x1, y1, x2, y2 = map(int, coords) + cv2.rectangle(img_vis, (x1, y1), (x2, y2), color=(1.0, 1.0, 1.0), thickness=3) + cv2.rectangle(img_vis, (x1, y1), (x2, y2), color=(1.0, 0.0, 0.0), thickness=1) + + img_pil = Image.fromarray((img_vis * 255.0).astype(np.uint8)) + return img_pil + + except Exception as e: + print(f"Error creating visualization: {str(e)}") + return None + +# Create Gradio interface +with gr.Blocks(title="Grasp Any Region (GAR) Demo", theme=gr.themes.Soft()) as demo: + gr.Markdown(""" + # 🎯 Grasp Any Region (GAR) + + **Region-level Multimodal Understanding for Vision-Language Models** + + This demo showcases GAR's ability to understand and describe specific regions in images: + - 🎨 **Single Region Understanding**: Describe specific areas using points, boxes, or masks + - 🔍 **SAM Integration**: Generate masks interactively using Segment Anything Model + - 💡 **Detailed Descriptions**: Get comprehensive descriptions of any region + + Built on top of Perception-LM with RoI-aligned feature replay technique. + + 📄 [Paper](https://arxiv.org/abs/2510.18876) | 💻 [GitHub](https://github.com/Haochen-Wang409/Grasp-Any-Region) | 🤗 [Model](https://huggingface.co/HaochenWang/GAR-1B) + """) + + with gr.Tabs(): + # Tab 1: Points-based segmentation + with gr.Tab("🎯 Points → Describe"): + gr.Markdown("### Click points on the image or enter coordinates to segment and describe a region") + with gr.Row(): + with gr.Column(): + img_points = gr.Image(label="Input Image", type="pil") + points_input = gr.Textbox( + label="Points (format: x1,y1;x2,y2;...)", + placeholder="e.g., 1172,812;1572,800", + value="1172,812;1572,800" + ) + with gr.Row(): + gen_mask_points_btn = gr.Button("Generate Mask", variant="primary") + describe_points_btn = gr.Button("Describe Region", variant="secondary") + + with gr.Column(): + mask_points = gr.Image(label="Generated Mask", type="pil") + vis_points = gr.Image(label="Visualization") + desc_points = gr.Textbox(label="Region Description", lines=5) + + points_status = gr.Textbox(label="Status", visible=False) + + gen_mask_points_btn.click( + fn=generate_mask_from_points, + inputs=[img_points, points_input], + outputs=[mask_points, points_status] + ) + + describe_points_btn.click( + fn=describe_region, + inputs=[img_points, mask_points], + outputs=desc_points + ).then( + fn=create_visualization, + inputs=[img_points, mask_points, points_input, gr.Textbox(visible=False)], + outputs=vis_points + ) + + gr.Examples( + examples=[ + ["assets/demo_image_2.jpg", "1172,812;1572,800"], + ], + inputs=[img_points, points_input], + label="Example Images" + ) + + # Tab 2: Box-based segmentation + with gr.Tab("📦 Box → Describe"): + gr.Markdown("### Draw a bounding box or enter coordinates to segment and describe a region") + with gr.Row(): + with gr.Column(): + img_box = gr.Image(label="Input Image", type="pil") + box_input = gr.Textbox( + label="Bounding Box (format: x1,y1,x2,y2)", + placeholder="e.g., 800,500,1800,1000", + value="800,500,1800,1000" + ) + with gr.Row(): + gen_mask_box_btn = gr.Button("Generate Mask", variant="primary") + describe_box_btn = gr.Button("Describe Region", variant="secondary") + + with gr.Column(): + mask_box = gr.Image(label="Generated Mask", type="pil") + vis_box = gr.Image(label="Visualization") + desc_box = gr.Textbox(label="Region Description", lines=5) + + box_status = gr.Textbox(label="Status", visible=False) + + gen_mask_box_btn.click( + fn=generate_mask_from_box, + inputs=[img_box, box_input], + outputs=[mask_box, box_status] + ) + + describe_box_btn.click( + fn=describe_region, + inputs=[img_box, mask_box], + outputs=desc_box + ).then( + fn=create_visualization, + inputs=[img_box, mask_box, gr.Textbox(visible=False), box_input], + outputs=vis_box + ) + + gr.Examples( + examples=[ + ["assets/demo_image_2.jpg", "800,500,1800,1000"], + ], + inputs=[img_box, box_input], + label="Example Images" + ) + + # Tab 3: Direct mask upload + with gr.Tab("🎭 Mask → Describe"): + gr.Markdown("### Upload a pre-made mask to describe a region") + with gr.Row(): + with gr.Column(): + img_mask = gr.Image(label="Input Image", type="pil") + mask_upload = gr.Image(label="Upload Mask", type="pil") + describe_mask_btn = gr.Button("Describe Region", variant="primary") + + with gr.Column(): + vis_mask = gr.Image(label="Visualization") + desc_mask = gr.Textbox(label="Region Description", lines=5) + + describe_mask_btn.click( + fn=describe_region, + inputs=[img_mask, mask_upload], + outputs=desc_mask + ).then( + fn=create_visualization, + inputs=[img_mask, mask_upload, gr.Textbox(visible=False), gr.Textbox(visible=False)], + outputs=vis_mask + ) + + gr.Examples( + examples=[ + ["assets/demo_image_1.png", "assets/demo_mask_1.png"], + ], + inputs=[img_mask, mask_upload], + label="Example Images" + ) + + gr.Markdown(""" + --- + ### 📖 How to Use: + + 1. **Points → Describe**: Click or enter point coordinates, generate mask, then describe + 2. **Box → Describe**: Draw or enter a bounding box, generate mask, then describe + 3. **Mask → Describe**: Upload a pre-made mask directly and describe + + ### 🔧 Technical Details: + + - **Model**: GAR-1B (1 billion parameters) + - **Base**: Facebook Perception-LM with RoI-aligned feature replay + - **Segmentation**: Segment Anything Model (SAM ViT-Huge) + - **Hardware**: Powered by ZeroGPU (NVIDIA H200, 70GB VRAM) + + ### 📚 Citation: + + ```bibtex + @article{wang2025grasp, + title={Grasp Any Region: Prompting MLLM to Understand the Dense World}, + author={Haochen Wang et al.}, + journal={arXiv preprint arXiv:2510.18876}, + year={2025} + } + ``` + """) + +# Load models on startup +try: + load_models() +except Exception as e: + print(f"Warning: Could not pre-load models: {e}") + print("Models will be loaded on first use.") + +if __name__ == "__main__": + demo.launch() diff --git a/demo/gar_relationship.py b/demo/gar_relationship.py new file mode 100644 index 0000000000000000000000000000000000000000..1edadfec19fdba7580da63e1f806ad5e866be8ce --- /dev/null +++ b/demo/gar_relationship.py @@ -0,0 +1,143 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang +# -------------------------------------------------------- + +import argparse +import ast + +import numpy as np +import torch +from PIL import Image +from transformers import AutoModel, AutoProcessor, GenerationConfig + +from evaluation.eval_dataset import MultiRegionDataset + +TORCH_DTYPE_MAP = dict(fp16=torch.float16, bf16=torch.bfloat16, fp32=torch.float32) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Inference of Grasp Any Region models on DLC-Bench." + ) + + parser.add_argument( + "--model_name_or_path", + help="HF model name or path", + default="HaochenWang/GAR-8B", + ) + parser.add_argument( + "--image_path", + help="image path", + required=True, + ) + parser.add_argument( + "--mask_paths", + help="mask path", + required=True, + ) + parser.add_argument( + "--question_str", + help="input instructions", + required=True, + ) + parser.add_argument( + "--data_type", + help="data dtype", + type=str, + choices=["fp16", "bf16", "fp32"], + default="bf16", + ) + parser.add_argument( + "--seed", + type=int, + default=0, + help="Random seed for reproducible text generation", + ) + args = parser.parse_args() + return args + + +def select_ann(coco, img_id, area_min=None, area_max=None): + cat_ids = coco.getCatIds() + ann_ids = coco.getAnnIds(imgIds=[img_id], catIds=cat_ids, iscrowd=None) + + if area_min is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] >= area_min + ] + + if area_max is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] <= area_max + ] + + return ann_ids + + +def main(): + args = parse_args() + data_dtype = TORCH_DTYPE_MAP[args.data_type] + torch.manual_seed(args.seed) + + # init ditribution for dispatch_modules in LLM + torch.cuda.set_device(0) + torch.distributed.init_process_group(backend="nccl") + + # build HF model + model = AutoModel.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + torch_dtype=data_dtype, + device_map="cuda:0", + ).eval() + + processor = AutoProcessor.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + ) + + img = Image.open(args.image_path) + masks = [] + for mask_path in ast.literal_eval(args.mask_paths): + mask = np.array(Image.open(mask_path).convert("L")).astype(bool) + masks.append(mask) + + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [""] + dataset = MultiRegionDataset( + image=img, + masks=masks, + question_str=args.question_str + + "\nAnswer with the correct option's letter directly.", + processor=processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=data_dtype, + ) + + data_sample = dataset[0] + + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + do_sample=False, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + outputs = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + print(outputs) # Print model output for this image + + +if __name__ == "__main__": + main() diff --git a/demo/gar_with_mask.py b/demo/gar_with_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..b7548550c85fb3d1aa151f1c9da6cf803e210421 --- /dev/null +++ b/demo/gar_with_mask.py @@ -0,0 +1,132 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang +# -------------------------------------------------------- + +import argparse + +import numpy as np +import torch +from PIL import Image +from transformers import AutoModel, AutoProcessor, GenerationConfig + +from evaluation.eval_dataset import SingleRegionCaptionDataset + +TORCH_DTYPE_MAP = dict(fp16=torch.float16, bf16=torch.bfloat16, fp32=torch.float32) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Inference demo of Grasp Any Region models." + ) + + parser.add_argument( + "--model_name_or_path", + help="HF model name or path", + default="HaochenWang/GAR-8B", + ) + parser.add_argument( + "--image_path", + help="image path", + required=True, + ) + parser.add_argument( + "--mask_path", + help="mask path", + required=True, + ) + parser.add_argument( + "--data_type", + help="data dtype", + type=str, + choices=["fp16", "bf16", "fp32"], + default="bf16", + ) + parser.add_argument( + "--seed", + type=int, + default=0, + help="Random seed for reproducible text generation", + ) + args = parser.parse_args() + return args + + +def select_ann(coco, img_id, area_min=None, area_max=None): + cat_ids = coco.getCatIds() + ann_ids = coco.getAnnIds(imgIds=[img_id], catIds=cat_ids, iscrowd=None) + + if area_min is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] >= area_min + ] + + if area_max is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] <= area_max + ] + + return ann_ids + + +def main(): + args = parse_args() + data_dtype = TORCH_DTYPE_MAP[args.data_type] + torch.manual_seed(args.seed) + + # init ditribution for dispatch_modules in LLM + torch.cuda.set_device(0) + torch.distributed.init_process_group(backend="nccl") + + # build HF model + model = AutoModel.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + torch_dtype=data_dtype, + device_map="cuda:0", + ).eval() + + processor = AutoProcessor.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + ) + + img = Image.open(args.image_path) + mask = np.array(Image.open(args.mask_path).convert("L")).astype(bool) + + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [""] + dataset = SingleRegionCaptionDataset( + image=img, + mask=mask, + processor=processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=data_dtype, + ) + + data_sample = dataset[0] + + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + do_sample=False, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + outputs = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + print(outputs) # Print model output for this image + + +if __name__ == "__main__": + main() diff --git a/demo/gar_with_sam.py b/demo/gar_with_sam.py new file mode 100644 index 0000000000000000000000000000000000000000..8353523c994b6b49d2ee564831d81848fce57a56 --- /dev/null +++ b/demo/gar_with_sam.py @@ -0,0 +1,272 @@ +# ************************************************************************* +# This file may have been modified by Bytedance Inc. (“Bytedance Inc.'s Mo- +# difications”). All Bytedance Inc.'s Modifications are Copyright (2025) B- +# ytedance Inc.. +# ************************************************************************* + +# Adapted from https://github.com/NVlabs/describe-anything/blob/main/examples/dam_with_sam.py + +# Copyright 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import ast + +import cv2 +import numpy as np +import torch +from PIL import Image +from transformers import ( + AutoModel, + AutoProcessor, + GenerationConfig, + SamModel, + SamProcessor, +) + +from evaluation.eval_dataset import SingleRegionCaptionDataset + +TORCH_DTYPE_MAP = dict(fp16=torch.float16, bf16=torch.bfloat16, fp32=torch.float32) + + +def apply_sam(image, input_points=None, input_boxes=None, input_labels=None): + inputs = sam_processor( + image, + input_points=input_points, + input_boxes=input_boxes, + input_labels=input_labels, + return_tensors="pt", + ).to(device) + + with torch.no_grad(): + outputs = sam_model(**inputs) + + masks = sam_processor.image_processor.post_process_masks( + outputs.pred_masks.cpu(), + inputs["original_sizes"].cpu(), + inputs["reshaped_input_sizes"].cpu(), + )[0][0] + scores = outputs.iou_scores[0, 0] + + mask_selection_index = scores.argmax() + + mask_np = masks[mask_selection_index].numpy() + + return mask_np + + +def add_contour(img, mask, input_points=None, input_boxes=None): + img = img.copy() + + # Draw contour + mask = mask.astype(np.uint8) * 255 + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + cv2.drawContours(img, contours, -1, (1.0, 1.0, 1.0), thickness=6) + + # Draw points if provided + if input_points is not None: + for points in input_points: # Handle batch of points + for x, y in points: + # Draw a filled circle for each point + cv2.circle( + img, + (int(x), int(y)), + radius=10, + color=(1.0, 0.0, 0.0), + thickness=-1, + ) + # Draw a white border around the circle + cv2.circle( + img, (int(x), int(y)), radius=10, color=(1.0, 1.0, 1.0), thickness=2 + ) + + # Draw boxes if provided + if input_boxes is not None: + for box_batch in input_boxes: # Handle batch of boxes + for box in box_batch: # Iterate through boxes in the batch + x1, y1, x2, y2 = map(int, box) + # Draw rectangle with white color + cv2.rectangle( + img, (x1, y1), (x2, y2), color=(1.0, 1.0, 1.0), thickness=4 + ) + # Draw inner rectangle with red color + cv2.rectangle( + img, (x1, y1), (x2, y2), color=(1.0, 0.0, 0.0), thickness=2 + ) + + return img + + +def denormalize_coordinates(coords, image_size, is_box=False): + """Convert normalized coordinates (0-1) to pixel coordinates.""" + width, height = image_size + if is_box: + # For boxes: [x1, y1, x2, y2] + x1, y1, x2, y2 = coords + return [int(x1 * width), int(y1 * height), int(x2 * width), int(y2 * height)] + else: + # For points: [x, y] + x, y = coords + return [int(x * width), int(y * height)] + + +def print_streaming(text): + """Helper function to print streaming text with flush""" + print(text, end="", flush=True) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Detailed Localized Image Descriptions with SAM" + ) + parser.add_argument( + "--model_name_or_path", + help="HF model name or path", + default="HaochenWang/GAR-8B", + ) + parser.add_argument( + "--image_path", type=str, required=True, help="Path to the image file" + ) + parser.add_argument( + "--points", + type=str, + default="[[1172, 812], [1572, 800]]", + help="List of points for SAM input", + ) + parser.add_argument( + "--box", + type=str, + default="[773, 518, 1172, 812]", + help="Bounding box for SAM input (x1, y1, x2, y2)", + ) + parser.add_argument( + "--use_box", + action="store_true", + help="Use box instead of points for SAM input (default: use points)", + ) + parser.add_argument( + "--normalized_coords", + action="store_true", + help="Interpret coordinates as normalized (0-1) values", + ) + parser.add_argument( + "--output_image_path", + type=str, + default=None, + help="Path to save the output image with contour", + ) + parser.add_argument( + "--data_type", + help="data dtype", + type=str, + choices=["fp16", "bf16", "fp32"], + default="bf16", + ) + + args = parser.parse_args() + data_dtype = TORCH_DTYPE_MAP[args.data_type] + + # Load the image + img = Image.open(args.image_path).convert("RGB") + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + sam_model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device) + sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") + + image_size = img.size # (width, height) + + # Prepare input_points or input_boxes + if args.use_box: + input_boxes = ast.literal_eval(args.box) + if args.normalized_coords: + input_boxes = denormalize_coordinates(input_boxes, image_size, is_box=True) + input_boxes = [[input_boxes]] # Add an extra level of nesting + print(f"Using input_boxes: {input_boxes}") + mask_np = apply_sam(img, input_boxes=input_boxes) + else: + input_points = ast.literal_eval(args.points) + if args.normalized_coords: + input_points = [ + denormalize_coordinates(point, image_size) for point in input_points + ] + # Assume all points are foreground + input_labels = [1] * len(input_points) + input_points = [[x, y] for x, y in input_points] # Convert to list of lists + input_points = [input_points] # Wrap in outer list + input_labels = [input_labels] # Wrap labels in list + print(f"Using input_points: {input_points}") + mask_np = apply_sam(img, input_points=input_points, input_labels=input_labels) + + # build HF model + model = AutoModel.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + torch_dtype=data_dtype, + device_map="cuda:0", + ).eval() + + processor = AutoProcessor.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + ) + + # Get description + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [""] + dataset = SingleRegionCaptionDataset( + image=img, + mask=mask_np, + processor=processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=data_dtype, + ) + + data_sample = dataset[0] + + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + do_sample=False, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + outputs = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + print(outputs) # Print model output for this image + + if args.output_image_path: + img_np = np.asarray(img).astype(float) / 255.0 + + # Prepare visualization inputs + vis_points = input_points if not args.use_box else None + vis_boxes = input_boxes if args.use_box else None + + img_with_contour_np = add_contour( + img_np, mask_np, input_points=vis_points, input_boxes=vis_boxes + ) + img_with_contour_pil = Image.fromarray( + (img_with_contour_np * 255.0).astype(np.uint8) + ) + img_with_contour_pil.save(args.output_image_path) + print(f"Output image with contour saved as {args.output_image_path}") diff --git a/demo/gradio/.gradio/certificate.pem b/demo/gradio/.gradio/certificate.pem new file mode 100644 index 0000000000000000000000000000000000000000..b85c8037f6b60976b2546fdbae88312c5246d9a3 --- /dev/null +++ b/demo/gradio/.gradio/certificate.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw +TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh +cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4 +WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu +ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY +MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc +h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+ +0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U +A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW +T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH +B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC +B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv +KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn +OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn +jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw +qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI +rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV +HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq +hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL +ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ +3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK +NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5 +ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur +TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC +jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc +oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq +4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA +mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d +emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc= +-----END CERTIFICATE----- diff --git a/demo/gradio/README.md b/demo/gradio/README.md new file mode 100644 index 0000000000000000000000000000000000000000..85f62f3c59c01a375ceffbf843c9056dff387a7b --- /dev/null +++ b/demo/gradio/README.md @@ -0,0 +1,11 @@ +Please install segment-anything package through: +``` +pip install git+https://github.com/facebookresearch/segment-anything.git +``` + +This demo is based on the Segment Anything demo under Apache 2.0 license. Please refer to the [Segment Anything LICENSE](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE) for more details. + +## Run the demo +``` +python demo/gradio/app.py +``` \ No newline at end of file diff --git a/demo/gradio/app.py b/demo/gradio/app.py new file mode 100644 index 0000000000000000000000000000000000000000..817109f487c5949ea7e1dbd2e2c002e0645f5ca8 --- /dev/null +++ b/demo/gradio/app.py @@ -0,0 +1,267 @@ +# ************************************************************************* +# This file may have been modified by Bytedance Inc. (“Bytedance Inc.'s Mo- +# difications”). All Bytedance Inc.'s Modifications are Copyright (2025) B- +# ytedance Inc.. +# ************************************************************************* + +# Adapted from https://github.com/NVlabs/describe-anything/blob/main/examples/dam_with_sam.py + +# Copyright 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import base64 +import io + +import cv2 +import gradio as gr +import numpy as np +import torch +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from PIL import Image +from segment_anything import SamPredictor, sam_model_registry +from transformers import ( + AutoModel, + AutoProcessor, + GenerationConfig, + SamModel, + SamProcessor, +) + +try: + from spaces import GPU +except ImportError: + print("Spaces not installed, using dummy GPU decorator") + + def GPU(*args, **kwargs): + def decorator(fn): + return fn + + return decorator + + +from evaluation.eval_dataset import SingleRegionCaptionDataset + +# Load SAM model +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +sam_model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device) +sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") + +# Initialize the captioning model and processor +model_path = "HaochenWang/GAR-1B" +model = AutoModel.from_pretrained( + model_path, + trust_remote_code=True, + torch_dtype=torch.bfloat16, + device_map="cuda:0", +).eval() + +processor = AutoProcessor.from_pretrained( + model_path, + trust_remote_code=True, +) + + +@GPU(duration=75) +def image_to_sam_embedding(base64_image): + try: + # Decode base64 string to bytes + image_bytes = base64.b64decode(base64_image) + + # Convert bytes to PIL Image + image = Image.open(io.BytesIO(image_bytes)) + + # Process image with SAM processor + inputs = sam_processor(image, return_tensors="pt").to(device) + + # Get image embedding + with torch.no_grad(): + image_embedding = sam_model.get_image_embeddings(inputs["pixel_values"]) + + # Convert to CPU and numpy + image_embedding = image_embedding.cpu().numpy() + + # Encode the embedding as base64 + embedding_bytes = image_embedding.tobytes() + embedding_base64 = base64.b64encode(embedding_bytes).decode("utf-8") + + return embedding_base64 + except Exception as e: + print(f"Error processing image: {str(e)}") + raise gr.Error(f"Failed to process image: {str(e)}") + + +@GPU(duration=75) +def describe(image_base64: str, mask_base64: str, query: str): + # Convert base64 to PIL Image + image_bytes = base64.b64decode( + image_base64.split(",")[1] if "," in image_base64 else image_base64 + ) + img = Image.open(io.BytesIO(image_bytes)) + mask_bytes = base64.b64decode( + mask_base64.split(",")[1] if "," in mask_base64 else mask_base64 + ) + mask = Image.open(io.BytesIO(mask_bytes)) + mask = np.array(mask.convert("L")) + + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [""] + + # Assuming mask is given as a numpy array and the image is a PIL image + dataset = SingleRegionCaptionDataset( + image=img, + mask=mask, + processor=processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=torch.bfloat16, + ) + + data_sample = dataset[0] + + # Generate the caption + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + output_caption = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + # Stream the tokens + text = "" + for token in output_caption: + text += token + yield text + + +@GPU(duration=75) +def describe_without_streaming(image_base64: str, mask_base64: str, query: str): + # Convert base64 to PIL Image + image_bytes = base64.b64decode( + image_base64.split(",")[1] if "," in image_base64 else image_base64 + ) + img = Image.open(io.BytesIO(image_bytes)) + mask_bytes = base64.b64decode( + mask_base64.split(",")[1] if "," in mask_base64 else mask_base64 + ) + mask = Image.open(io.BytesIO(mask_bytes)) + mask = np.array(mask.convert("L")) + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [""] + + # Assuming mask is given as a numpy array and the image is a PIL image + dataset = SingleRegionCaptionDataset( + image=img, + mask=mask, + processor=processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=torch.bfloat16, + ) + + data_sample = dataset[0] + + # Generate the caption + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + # do_sample=False, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + output_caption = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + return output_caption + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Describe Anything gradio demo") + parser.add_argument( + "--server_addr", + "--host", + type=str, + default=None, + help="The server address to listen on.", + ) + parser.add_argument( + "--server_port", "--port", type=int, default=None, help="The port to listen on." + ) + + args = parser.parse_args() + + # Create Gradio interface + with gr.Blocks() as demo: + gr.Interface( + fn=image_to_sam_embedding, + inputs=gr.Textbox(label="Image Base64"), + outputs=gr.Textbox(label="Embedding Base64"), + title="Image Embedding Generator", + api_name="image_to_sam_embedding", + ) + gr.Interface( + fn=describe, + inputs=[ + gr.Textbox(label="Image Base64"), + gr.Text(label="Mask Base64"), + gr.Text(label="Prompt"), + ], + outputs=[gr.Text(label="Description")], + title="Mask Description Generator", + api_name="describe", + ) + gr.Interface( + fn=describe_without_streaming, + inputs=[ + gr.Textbox(label="Image Base64"), + gr.Text(label="Mask Base64"), + gr.Text(label="Prompt"), + ], + outputs=[gr.Text(label="Description")], + title="Mask Description Generator (Non-Streaming)", + api_name="describe_without_streaming", + ) + + demo._block_thread = demo.block_thread + demo.block_thread = lambda: None + demo.launch( + share=True, + server_name=args.server_addr, + server_port=args.server_port, + ssr_mode=False, + ) + + for route in demo.app.routes: + if route.path == "/": + demo.app.routes.remove(route) + demo.app.mount("/", StaticFiles(directory="dist", html=True), name="demo") + + demo._block_thread() diff --git a/demo/gradio/frontend/README.md b/demo/gradio/frontend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..41f1ddd8073590bce6db3b0f2d2b2b803b09d3f5 --- /dev/null +++ b/demo/gradio/frontend/README.md @@ -0,0 +1,126 @@ +## Segment Anything Simple Web demo + +This **front-end only** React based web demo shows how to load a fixed image and corresponding `.npy` file of the SAM image embedding, and run the SAM ONNX model in the browser using Web Assembly with mulithreading enabled by `SharedArrayBuffer`, Web Worker, and SIMD128. + + + +## Run the app + +Install Yarn + +``` +npm install --g yarn +``` + +Build and run: + +``` +yarn && yarn start +``` + +Navigate to [`http://localhost:8081/`](http://localhost:8081/) + +Move your cursor around to see the mask prediction update in real time. + +## Export the image embedding + +In the [ONNX Model Example notebook](https://github.com/facebookresearch/segment-anything/blob/main/notebooks/onnx_model_example.ipynb) upload the image of your choice and generate and save corresponding embedding. + +Initialize the predictor: + +```python +checkpoint = "sam_vit_h_4b8939.pth" +model_type = "vit_h" +sam = sam_model_registry[model_type](checkpoint=checkpoint) +sam.to(device='cuda') +predictor = SamPredictor(sam) +``` + +Set the new image and export the embedding: + +``` +image = cv2.imread('src/assets/dogs.jpg') +predictor.set_image(image) +image_embedding = predictor.get_image_embedding().cpu().numpy() +np.save("dogs_embedding.npy", image_embedding) +``` + +Save the new image and embedding in `src/assets/data`. + +## Export the ONNX model + +You also need to export the quantized ONNX model from the [ONNX Model Example notebook](https://github.com/facebookresearch/segment-anything/blob/main/notebooks/onnx_model_example.ipynb). + +Run the cell in the notebook which saves the `sam_onnx_quantized_example.onnx` file, download it and copy it to the path `/model/sam_onnx_quantized_example.onnx`. + +Here is a snippet of the export/quantization code: + +``` +onnx_model_path = "sam_onnx_example.onnx" +onnx_model_quantized_path = "sam_onnx_quantized_example.onnx" +quantize_dynamic( + model_input=onnx_model_path, + model_output=onnx_model_quantized_path, + optimize_model=True, + per_channel=False, + reduce_range=False, + weight_type=QuantType.QUInt8, +) +``` + +**NOTE: if you change the ONNX model by using a new checkpoint you need to also re-export the embedding.** + +## Update the image, embedding, model in the app + +Update the following file paths at the top of`App.tsx`: + +```py +const IMAGE_PATH = "/assets/data/dogs.jpg"; +const IMAGE_EMBEDDING = "/assets/data/dogs_embedding.npy"; +const MODEL_DIR = "/model/sam_onnx_quantized_example.onnx"; +``` + +## ONNX multithreading with SharedArrayBuffer + +To use multithreading, the appropriate headers need to be set to create a cross origin isolation state which will enable use of `SharedArrayBuffer` (see this [blog post](https://cloudblogs.microsoft.com/opensource/2021/09/02/onnx-runtime-web-running-your-machine-learning-model-in-browser/) for more details) + +The headers below are set in `configs/webpack/dev.js`: + +```js +headers: { + "Cross-Origin-Opener-Policy": "same-origin", + "Cross-Origin-Embedder-Policy": "credentialless", +} +``` + +## Structure of the app + +**`App.tsx`** + +- Initializes ONNX model +- Loads image embedding and image +- Runs the ONNX model based on input prompts + +**`Stage.tsx`** + +- Handles mouse move interaction to update the ONNX model prompt + +**`Tool.tsx`** + +- Renders the image and the mask prediction + +**`helpers/maskUtils.tsx`** + +- Conversion of ONNX model output from array to an HTMLImageElement + +**`helpers/onnxModelAPI.tsx`** + +- Formats the inputs for the ONNX model + +**`helpers/scaleHelper.tsx`** + +- Handles image scaling logic for SAM (longest size 1024) + +**`hooks/`** + +- Handle shared state for the app diff --git a/demo/gradio/frontend/configs/webpack/common.js b/demo/gradio/frontend/configs/webpack/common.js new file mode 100644 index 0000000000000000000000000000000000000000..182681d52bbb70ae175b8c9a1d627b59c428f7ce --- /dev/null +++ b/demo/gradio/frontend/configs/webpack/common.js @@ -0,0 +1,85 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +const { resolve } = require("path"); +const HtmlWebpackPlugin = require("html-webpack-plugin"); +const FriendlyErrorsWebpackPlugin = require("friendly-errors-webpack-plugin"); +const CopyPlugin = require("copy-webpack-plugin"); +const webpack = require("webpack"); + +module.exports = { + entry: "./src/index.tsx", + resolve: { + extensions: [".js", ".jsx", ".ts", ".tsx"], + fallback: { 'process/browser': require.resolve('process/browser'), } + }, + output: { + path: resolve(__dirname, "dist"), + }, + module: { + rules: [ + { + test: /\.mjs$/, + include: /node_modules/, + type: "javascript/auto", + resolve: { + fullySpecified: false, + }, + }, + { + test: [/\.jsx?$/, /\.tsx?$/], + use: ["ts-loader"], + exclude: /node_modules/, + }, + { + test: /\.css$/, + use: ["style-loader", "css-loader"], + }, + { + test: /\.(scss|sass)$/, + use: ["style-loader", "css-loader", "postcss-loader"], + }, + { + test: /\.(jpe?g|png|gif|svg)$/i, + use: [ + "file-loader?hash=sha512&digest=hex&name=img/[contenthash].[ext]", + "image-webpack-loader?bypassOnDebug&optipng.optimizationLevel=7&gifsicle.interlaced=false", + ], + }, + { + test: /\.(woff|woff2|ttf)$/, + use: { + loader: "url-loader", + }, + }, + ], + }, + plugins: [ + new CopyPlugin({ + patterns: [ + { + from: "node_modules/onnxruntime-web/dist/*.wasm", + to: "[name][ext]", + }, + { + from: "model", + to: "model", + }, + { + from: "src/assets/examples", + to: "examples", + }, + ], + }), + new HtmlWebpackPlugin({ + template: "./src/assets/index.html", + }), + new FriendlyErrorsWebpackPlugin(), + new webpack.ProvidePlugin({ + process: "process/browser", + }), + ], +}; diff --git a/demo/gradio/frontend/configs/webpack/dev.js b/demo/gradio/frontend/configs/webpack/dev.js new file mode 100644 index 0000000000000000000000000000000000000000..f2f521623ed824abeaf3877bd23951bbcf9475bb --- /dev/null +++ b/demo/gradio/frontend/configs/webpack/dev.js @@ -0,0 +1,25 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +// development config +const { merge } = require("webpack-merge"); +const commonConfig = require("./common"); + +module.exports = merge(commonConfig, { + mode: "development", + devServer: { + hot: true, // enable HMR on the server + open: true, + // These headers enable the cross origin isolation state + // needed to enable use of SharedArrayBuffer for ONNX + // multithreading. + headers: { + "Cross-Origin-Opener-Policy": "same-origin", + "Cross-Origin-Embedder-Policy": "credentialless", + }, + }, + devtool: "cheap-module-source-map", +}); diff --git a/demo/gradio/frontend/configs/webpack/prod.js b/demo/gradio/frontend/configs/webpack/prod.js new file mode 100644 index 0000000000000000000000000000000000000000..b598f486b642bda9df05d0fa51b0ba7eaf3a8974 --- /dev/null +++ b/demo/gradio/frontend/configs/webpack/prod.js @@ -0,0 +1,22 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +// production config +const { merge } = require("webpack-merge"); +const { resolve } = require("path"); +const Dotenv = require("dotenv-webpack"); +const commonConfig = require("./common"); + +module.exports = merge(commonConfig, { + mode: "production", + output: { + filename: "js/bundle.[contenthash].min.js", + path: resolve(__dirname, "../../dist"), + publicPath: "/", + }, + devtool: "source-map", + plugins: [new Dotenv()], +}); diff --git a/demo/gradio/frontend/package.json b/demo/gradio/frontend/package.json new file mode 100644 index 0000000000000000000000000000000000000000..f0eabf93e4d1e8c016089bb457f5c7d047d3cff6 --- /dev/null +++ b/demo/gradio/frontend/package.json @@ -0,0 +1,64 @@ +{ + "name": "segment-anything-mini-demo", + "version": "0.1.0", + "license": "MIT", + "scripts": { + "build": "yarn run clean-dist && webpack --config=configs/webpack/prod.js && mv dist/*.wasm dist/js && rsync -r --delete dist ../", + "clean-dist": "rimraf dist/*", + "lint": "eslint './src/**/*.{js,ts,tsx}' --quiet", + "start": "yarn run start-dev", + "test": "yarn run start-model-test", + "start-dev": "webpack serve --config=configs/webpack/dev.js" + }, + "devDependencies": { + "@babel/core": "^7.18.13", + "@babel/preset-env": "^7.18.10", + "@babel/preset-react": "^7.18.6", + "@babel/preset-typescript": "^7.18.6", + "@pmmmwh/react-refresh-webpack-plugin": "^0.5.7", + "@testing-library/react": "^13.3.0", + "@types/node": "^18.7.13", + "@types/react": "^18.0.17", + "@types/react-dom": "^18.0.6", + "@types/underscore": "^1.11.4", + "@typescript-eslint/eslint-plugin": "^5.35.1", + "@typescript-eslint/parser": "^5.35.1", + "babel-loader": "^8.2.5", + "copy-webpack-plugin": "^11.0.0", + "css-loader": "^6.7.1", + "dotenv": "^16.0.2", + "dotenv-webpack": "^8.0.1", + "eslint": "^8.22.0", + "eslint-plugin-react": "^7.31.0", + "file-loader": "^6.2.0", + "fork-ts-checker-webpack-plugin": "^7.2.13", + "friendly-errors-webpack-plugin": "^1.7.0", + "html-webpack-plugin": "^5.5.0", + "image-webpack-loader": "^8.1.0", + "postcss-loader": "^7.0.1", + "postcss-preset-env": "^7.8.0", + "process": "^0.11.10", + "rimraf": "^3.0.2", + "sass": "^1.54.5", + "sass-loader": "^13.0.2", + "style-loader": "^3.3.1", + "tailwindcss": "^3.1.8", + "ts-loader": "^9.3.1", + "typescript": "^4.8.2", + "webpack": "^5.74.0", + "webpack-cli": "^4.10.0", + "webpack-dev-server": "^4.10.0", + "webpack-dotenv-plugin": "^2.1.0", + "webpack-merge": "^5.8.0" + }, + "dependencies": { + "@gradio/client": "^1.7.1", + "npyjs": "^0.4.0", + "onnxruntime-web": "1.14.0", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-refresh": "^0.14.0", + "underscore": "^1.13.6", + "axios": "^1.6.7" + } +} diff --git a/demo/gradio/frontend/postcss.config.js b/demo/gradio/frontend/postcss.config.js new file mode 100644 index 0000000000000000000000000000000000000000..064a2ba5ff820c6b2328f51f0ae6b147ec698881 --- /dev/null +++ b/demo/gradio/frontend/postcss.config.js @@ -0,0 +1,10 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +const tailwindcss = require("tailwindcss"); +module.exports = { + plugins: ["postcss-preset-env", 'tailwindcss/nesting', tailwindcss], +}; diff --git a/demo/gradio/frontend/src/App.tsx b/demo/gradio/frontend/src/App.tsx new file mode 100644 index 0000000000000000000000000000000000000000..8be63c41ea370f590bbe82c6fffe3d0ca4b822d3 --- /dev/null +++ b/demo/gradio/frontend/src/App.tsx @@ -0,0 +1,306 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +import { InferenceSession, Tensor } from "onnxruntime-web"; +import React, { useContext, useEffect, useState, useRef } from "react"; +import axios from "axios"; +import "./assets/scss/App.scss"; +import { handleImageScale } from "./components/helpers/scaleHelper"; +import { modelScaleProps, QueueStatus } from "./components/helpers/Interfaces"; +import { onnxMaskToImage, arrayToImageData, imageDataToURL } from "./components/helpers/maskUtils"; +import { modelData } from "./components/helpers/onnxModelAPI"; +import Stage, { DescriptionState } from "./components/Stage"; +import AppContext from "./components/hooks/createContext"; +import { imageToSamEmbedding } from "./services/maskApi"; +import LoadingOverlay from "./components/LoadingOverlay"; +import ErrorModal from './components/ErrorModal'; +import QueueStatusIndicator from "./components/QueueStatusIndicator"; + +const ort = require("onnxruntime-web"); + +// Define image and model paths +const MODEL_DIR = "/model/sam_onnx_quantized_example.onnx"; + +const App = () => { + const { + clicks: [clicks, setClicks], + image: [image, setImage], + maskImg: [maskImg, setMaskImg], + maskImgData: [maskImgData, setMaskImgData], + isClicked: [isClicked, setIsClicked] + } = useContext(AppContext)!; + const [model, setModel] = useState(null); + const [tensor, setTensor] = useState(null); + const [modelScale, setModelScale] = useState(null); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + const [descriptionState, setDescriptionState] = useState({ + state: 'ready', + description: '' + }); + const [queueStatus, setQueueStatus] = useState({ inQueue: false }); + + // Initialize the ONNX model + useEffect(() => { + const initModel = async () => { + try { + if (MODEL_DIR === undefined) return; + const URL: string = MODEL_DIR; + const model = await InferenceSession.create(URL); + setModel(model); + } catch (e) { + console.log(e); + } + }; + initModel(); + }, []); + + const handleImageUpload = async (event: React.ChangeEvent) => { + const file = event.target.files?.[0]; + if (!file) return; + + try { + const url = URL.createObjectURL(file); + await loadImage(new URL(url)); + } catch (error) { + setError('Failed to load image. Please try again with a different image.'); + console.error('Error loading image:', error); + } + }; + + const loadImage = async (url: URL) => { + try { + setIsLoading(true); + const img = new Image(); + img.src = url.href; + img.onload = async () => { + const { height, width, samScale } = handleImageScale(img); + setModelScale({ + height: height, + width: width, + samScale: samScale, + }); + img.width = width; + img.height = height; + setImage(img); + + // After image is loaded, fetch its embedding from Gradio + await fetchImageEmbedding(img); + setIsLoading(false); + }; + } catch (error) { + console.log(error); + setIsLoading(false); + } + }; + + const fetchImageEmbedding = async (img: HTMLImageElement) => { + try { + // Create a canvas to convert the image to base64 + const canvas = document.createElement('canvas'); + canvas.width = img.width; + canvas.height = img.height; + const ctx = canvas.getContext('2d'); + ctx?.drawImage(img, 0, 0); + + // Convert image to base64 data URL and extract the base64 string + const base64Image = canvas.toDataURL('image/jpeg').split(',')[1]; + + // Make request to Gradio API + const samEmbedding = await imageToSamEmbedding( + base64Image, + (status: QueueStatus) => { + setQueueStatus(status); + } + ); + + // Convert base64 embedding back to array buffer + const binaryString = window.atob(samEmbedding); + const len = binaryString.length; + const bytes = new Uint8Array(len); + for (let i = 0; i < len; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + + // Create tensor from the embedding + const embedding = new ort.Tensor( + 'float32', + new Float32Array(bytes.buffer), // Convert to Float32Array + [1, 256, 64, 64] // SAM embedding shape + ); + setTensor(embedding); + } catch (error) { + setQueueStatus({ inQueue: false }); // Reset queue status on error + let errorMessage = 'Failed to process image. Please try again.'; + if (axios.isAxiosError(error)) { + errorMessage = error.response?.data?.message || errorMessage; + } + setError(errorMessage); + console.error('Error fetching embedding:', error); + } + }; + + useEffect(() => { + const handleMaskUpdate = async () => { + await runONNX(); + }; + handleMaskUpdate(); + }, [clicks]); + + const runONNX = async () => { + try { + // Don't run if already described or is describing + if (descriptionState.state !== 'ready') return; + + console.log('Running ONNX model with:', { + modelLoaded: model !== null, + hasClicks: clicks !== null, + hasTensor: tensor !== null, + hasModelScale: modelScale !== null + }); + + if ( + model === null || + clicks === null || + tensor === null || + modelScale === null + ) { + console.log('Missing required inputs, returning early'); + return; + } + else { + console.log('Preparing model feeds with:', { + clicks, + tensorShape: tensor.dims, + modelScale + }); + + const feeds = modelData({ + clicks, + tensor, + modelScale, + }); + + if (feeds === undefined) { + console.log('Model feeds undefined, returning early'); + return; + } + + console.log('Running model with feeds:', feeds); + const results = await model.run(feeds); + console.log('Model run complete, got results:', results); + + const output = results[model.outputNames[0]]; + console.log('Processing output with dims:', output.dims); + + // Calculate and log the mask area (number of non-zero values) + const maskArray = Array.from(output.data as Uint8Array); + const maskArea = maskArray.filter(val => val > 0).length; + console.log('Mask area (number of non-zero pixels):', maskArea); + + // Double check that the state is ready before processing the mask since the state may have changed + if (descriptionState.state !== 'ready') return; + // If clicked, we only handle the first mask (note that mask will be cleared after clicking before handling to let us know if it's the first mask). + if (isClicked && maskImgData != null) return; + if (maskArea > 0) { + setMaskImg(onnxMaskToImage(output.data, output.dims[2], output.dims[3], false)); + setMaskImgData(imageDataToURL(arrayToImageData(output.data, output.dims[2], output.dims[3], true))); + } else { + console.warn('No mask area detected, clearing mask'); + setMaskImg(null); + // setMaskImgData(null); + } + + console.log('Mask processing complete'); + } + } catch (e) { + setError('Failed to process the image. Please try again.'); + console.error('Error running ONNX model:', e); + } + }; + + const handleNewRegion = () => { + setDescriptionState({ + state: 'ready', + description: '' + } as DescriptionState); + setMaskImg(null); + // setMaskImgData(null); + setIsClicked(false); + }; + + const handleCopyDescription = () => { + navigator.clipboard.writeText(descriptionState.description); + }; + + const handleReset = () => { + // Clear all states + setDescriptionState({ + state: 'ready', + description: '' + } as DescriptionState); + setMaskImg(null); + // setMaskImgData(null); + setImage(null); + setClicks(null); + setIsClicked(false); + }; + + return ( +
+ {isLoading && } + {error && setError(null)} />} + +
+ +
+
+
+ {descriptionState.description ? ( + descriptionState.description + (descriptionState.state === 'describing' ? '...' : '') + ) : descriptionState.state === 'describing' ? ( + Describing the region... (this may take a while if compute resources are busy) + ) : ( + image ? ( + Click on the image to describe the region + ) : ( + Upload an image to describe the region + ) + )} +
+
+ + + +
+
+
+ ); +}; + +export default App; diff --git a/demo/gradio/frontend/src/components/ErrorModal.tsx b/demo/gradio/frontend/src/components/ErrorModal.tsx new file mode 100644 index 0000000000000000000000000000000000000000..6762240e1d064fc12768c5fc66b5fd176db15faf --- /dev/null +++ b/demo/gradio/frontend/src/components/ErrorModal.tsx @@ -0,0 +1,32 @@ +import React from 'react'; + +interface ErrorModalProps { + message: string; + onClose: () => void; +} + +const ErrorModal: React.FC = ({ message, onClose }) => { + return ( +
+
+
+
+ + + +
+

Error

+

{message}

+ +
+
+
+ ); +}; + +export default ErrorModal; \ No newline at end of file diff --git a/demo/gradio/frontend/src/components/LoadingOverlay.tsx b/demo/gradio/frontend/src/components/LoadingOverlay.tsx new file mode 100644 index 0000000000000000000000000000000000000000..633afdcc7313c850b5c15bdbe2c2ae436b246db2 --- /dev/null +++ b/demo/gradio/frontend/src/components/LoadingOverlay.tsx @@ -0,0 +1,30 @@ +import React from 'react'; + +const LoadingOverlay: React.FC = () => { + return ( +
+
+ + + + + + + + + + + + + + + + + +

Loading image embedding...

+
+
+ ); +}; + +export default LoadingOverlay; \ No newline at end of file diff --git a/demo/gradio/frontend/src/components/QueueStatusIndicator.tsx b/demo/gradio/frontend/src/components/QueueStatusIndicator.tsx new file mode 100644 index 0000000000000000000000000000000000000000..d26f3aa49dc3ab1d27577810c8e7d5dc6f22c6bc --- /dev/null +++ b/demo/gradio/frontend/src/components/QueueStatusIndicator.tsx @@ -0,0 +1,29 @@ +import React from 'react'; +import { QueueStatus } from './helpers/Interfaces'; + +interface QueueStatusIndicatorProps { + queueStatus: QueueStatus; +} + +const QueueStatusIndicator: React.FC = ({ queueStatus }) => { + if (!queueStatus.inQueue) return null; + + return ( +
+
+ {queueStatus.rank === 0 ? ( +

You're next in line! ({queueStatus.queueSize} total in queue)

+ ) : ( +

Queue position: {queueStatus.rank! + 1} of {queueStatus.queueSize}

+ )} + {queueStatus.rankEta && ( +

+ Estimated wait: {Math.ceil(queueStatus.rankEta)} seconds +

+ )} +
+
+ ); +}; + +export default QueueStatusIndicator; \ No newline at end of file diff --git a/demo/gradio/frontend/src/components/Stage.tsx b/demo/gradio/frontend/src/components/Stage.tsx new file mode 100644 index 0000000000000000000000000000000000000000..bb316d768e3c1fd028d691e61192dbb9be8bb819 --- /dev/null +++ b/demo/gradio/frontend/src/components/Stage.tsx @@ -0,0 +1,343 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +import React, { useContext, useState, useEffect } from "react"; +import * as _ from "underscore"; +import Tool from "./Tool"; +import { modelInputProps, QueueStatus } from "./helpers/Interfaces"; +import AppContext from "./hooks/createContext"; +// import { describeMask } from '../services/maskApi'; + +interface DescriptionState { + state: string; // 'ready', 'describing', 'described' + description: string; +} + +interface StageProps { + onImageUpload: (event: React.ChangeEvent) => Promise; + descriptionState: DescriptionState; + setDescriptionState: React.Dispatch>; + queueStatus: QueueStatus; + setQueueStatus: (status: QueueStatus) => void; +} + +const EXAMPLE_IMAGES = Array.from({ length: 21 }, (_, i) => `/examples/${i + 1}.jpg`); +const BREAKPOINT_MEDIUM = 2100; +const BREAKPOINT_SMALL = 1100; + +const Stage = ({ onImageUpload, descriptionState, setDescriptionState, queueStatus, setQueueStatus }: StageProps) => { + const { + clicks: [, setClicks], + image: [image], + maskImg: [maskImg], + maskImgData: [maskImgData] + } = useContext(AppContext)!; + + const [isDragging, setIsDragging] = useState(false); + const [currentPage, setCurrentPage] = useState(1); + const [imagesPerPage, setImagesPerPage] = useState(8); + + useEffect(() => { + const handleResize = () => { + if (window.innerWidth < BREAKPOINT_SMALL) { + setImagesPerPage(1); + } else if (window.innerWidth < BREAKPOINT_MEDIUM) { + setImagesPerPage(4); + } else { + setImagesPerPage(8); + } + }; + + // Set initial value + handleResize(); + + // Add event listener + window.addEventListener('resize', handleResize); + + // Cleanup + return () => window.removeEventListener('resize', handleResize); + }, []); + + const getClick = (x: number, y: number): modelInputProps => { + const clickType = 1; + return { x, y, clickType }; + }; + + const handleMouseMove = _.throttle((e: any) => { + if (descriptionState.state !== 'ready') return; + if (e.clientX === undefined || e.clientY === undefined) { + console.warn('Mouse move event does not contain clientX or clientY'); + return; + } + let el = e.nativeEvent.target; + const rect = el.getBoundingClientRect(); + + // Calculate the actual dimensions of the contained image + const containerAspectRatio = el.offsetWidth / el.offsetHeight; + const imageAspectRatio = image ? image.width / image.height : 1; + + let renderedWidth, renderedHeight; + if (containerAspectRatio > imageAspectRatio) { + // Image is constrained by height + renderedHeight = el.offsetHeight; + renderedWidth = renderedHeight * imageAspectRatio; + } else { + // Image is constrained by width + renderedWidth = el.offsetWidth; + renderedHeight = renderedWidth / imageAspectRatio; + } + + // Calculate the empty space offset + const offsetX = (el.offsetWidth - renderedWidth) / 2; + const offsetY = (el.offsetHeight - renderedHeight) / 2; + + // Get click position relative to the actual image + let x = e.clientX - rect.left - offsetX; + let y = e.clientY - rect.top - offsetY; + + // Convert to original image coordinates + const scaleX = image ? image.width / renderedWidth : 1; + const scaleY = image ? image.height / renderedHeight : 1; + x *= scaleX; + y *= scaleY; + + // Ensure coordinates are within bounds + if (image) { + x = Math.max(0, Math.min(x, image.width)); + y = Math.max(0, Math.min(y, image.height)); + } + + const click = getClick(x, y); + if (click) { + setClicks([click]); + } + }, 15); + + const handleDragEnter = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(true); + }; + + const handleDragLeave = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(false); + }; + + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + }; + + const handleDrop = async (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(false); + + const files = e.dataTransfer.files; + if (files && files[0]) { + const file = files[0]; + // Cast to unknown first, then to the desired type + const syntheticEvent = { + target: { + files: [file] + } + } as unknown as React.ChangeEvent; + + onImageUpload(syntheticEvent); + } + }; + + const flexCenterClasses = "flex items-center justify-center"; + + // const handleDescribeMask = async () => { + // if (!maskImg || !maskImgData || !image) { + // console.warn('No mask or image available to describe'); + // return; + // } + + // try { + // const canvas = document.createElement('canvas'); + // canvas.width = image.width; + // canvas.height = image.height; + // const ctx = canvas.getContext('2d'); + // ctx?.drawImage(image, 0, 0); + // const imageBase64 = canvas.toDataURL('image/jpeg').split(',')[1]; + // const maskBase64 = maskImgData.split(',')[1]; + + // const result = await describeMask(maskBase64, imageBase64); + // console.log('Mask description:', result.description); + + // alert("Mask description: " + result.description); + // } catch (error) { + // console.error('Failed to describe mask:', error); + // } + // }; + + return ( +
+ {/* Title and Description */} +
+ +
+ {!image &&
+

Describe Anything Model (DAM) takes in a region of an image or a video in the form of points/boxes/scribbles/masks and outputs detailed descriptions to the region. For videos, it is sufficient to supply annotation on any frame.

+

This demo supports DAM model that takes points on images as queries. For other use cases, please refer to the inference scripts and video demo for more details.

+
} +
+ + {/* Main Content Area */} +
+ {/* Main Stage */} +
+ {image ? ( + <> + + + ) : ( + <> +
+
+ +
+ {isDragging ? 'Drop image here' : 'Upload your own image'} +
+
+ + +
+ +
+ or choose an example image below +
+ +
+ {/* Left Arrow */} + + + {/* Example Images */} +
+ {EXAMPLE_IMAGES.slice( + (currentPage - 1) * imagesPerPage, + currentPage * imagesPerPage + ).map((src, index) => ( + {`Example { + fetch(src) + .then(res => res.blob()) + .then(blob => { + const file = new File([blob], `example-${index + 1}.jpg`, { type: 'image/jpeg' }); + const syntheticEvent = { + target: { + files: [file] + } + } as unknown as React.ChangeEvent; + + onImageUpload(syntheticEvent); + }); + }} + /> + ))} +
+ + {/* Right Arrow */} + + + {/* Page Indicator */} + {/*
+ Page {currentPage} of {Math.ceil(EXAMPLE_IMAGES.length / imagesPerPage)} +
*/} +
+ +
{/* Bottom spacer */} + {/* Image Credits */} + {!image && ( +
+ Image credit for example images: {' '} + + Segment Anything Materials + + {' '}(CC BY-SA 4.0) +
+ )} +
+ + )} +
+
+ +
+ ); +}; + +export default Stage; +export type { DescriptionState }; diff --git a/demo/gradio/frontend/src/components/Tool.tsx b/demo/gradio/frontend/src/components/Tool.tsx new file mode 100644 index 0000000000000000000000000000000000000000..ea7bc3e3d443d464c59cdf8f3ce62e6aaaa51e11 --- /dev/null +++ b/demo/gradio/frontend/src/components/Tool.tsx @@ -0,0 +1,182 @@ +import React, { useContext, useEffect, useState } from "react"; +import AppContext from "./hooks/createContext"; +import { ToolProps, QueueStatus } from "./helpers/Interfaces"; +import * as _ from "underscore"; +import { describeMask, describeMaskWithoutStreaming } from "../services/maskApi"; +import ErrorModal from './ErrorModal'; +import { DescriptionState } from "./Stage"; + +const prompt = "\nDescribe the masked region in detail."; + +const Tool = ({ + handleMouseMove, + descriptionState, + setDescriptionState, + queueStatus, + setQueueStatus +}: ToolProps) => { + console.log("Tool handleMouseMove"); + const { + image: [image], + maskImg: [maskImg, setMaskImg], + maskImgData: [maskImgData, setMaskImgData], + isClicked: [isClicked, setIsClicked] + } = useContext(AppContext)!; + + const [shouldFitToWidth, setShouldFitToWidth] = useState(true); + const bodyEl = document.body; + const fitToPage = () => { + if (!image) return; + const maxWidth = window.innerWidth - 64; // Account for padding (32px on each side) + const maxHeight = window.innerHeight - 200; // Account for header and some padding + const imageAspectRatio = image.width / image.height; + const containerAspectRatio = maxWidth / maxHeight; + + setShouldFitToWidth( + imageAspectRatio > containerAspectRatio || + image.width > maxWidth + ); + }; + const resizeObserver = new ResizeObserver((entries) => { + for (const entry of entries) { + if (entry.target === bodyEl) { + fitToPage(); + } + } + }); + useEffect(() => { + fitToPage(); + resizeObserver.observe(bodyEl); + return () => { + resizeObserver.unobserve(bodyEl); + }; + }, [image]); + + const imageClasses = ""; + const maskImageClasses = `absolute opacity-40 pointer-events-none`; + + const [error, setError] = useState(null); + const [useStreaming, setUseStreaming] = useState(true); + + useEffect(() => { + if (!isClicked || !maskImg || !maskImgData || !image || descriptionState.state !== 'ready') { + console.log("Not ready to call model, isClicked:", isClicked, "maskImg:", maskImg !== null, "maskImgData:", maskImgData !== null, "image:", image !== null, "descriptionState.state:", descriptionState.state); + return; + } + + try { + setDescriptionState({ + state: 'describing', + description: '' + } as DescriptionState); + + const canvas = document.createElement('canvas'); + canvas.width = image.width; + canvas.height = image.height; + const ctx = canvas.getContext('2d'); + ctx?.drawImage(image, 0, 0); + const imageBase64 = canvas.toDataURL('image/jpeg').split(',')[1]; + const maskBase64 = maskImgData.split(',')[1]; + + const describeMaskWithFallback = async (useStreamingInFunction: boolean) => { + try { + let result; + console.log("useStreaming", useStreaming, "useStreamingInFunction", useStreamingInFunction); + if (useStreamingInFunction) { + result = await describeMask( + maskBase64, + imageBase64, + prompt, + (streamResult: string) => { + setDescriptionState({ + state: 'describing', + description: streamResult + } as DescriptionState); + }, + (status: QueueStatus) => { + setQueueStatus(status); + } + ); + } else { + result = await describeMaskWithoutStreaming( + maskBase64, + imageBase64, + prompt + ); + } + + setDescriptionState({ + state: 'described', + description: result + } as DescriptionState); + setQueueStatus({ inQueue: false }); + setIsClicked(false); + } catch (error) { + if (useStreaming) { + console.log("Error describing mask, switching to non-streaming", error); + setUseStreaming(false); + describeMaskWithFallback(false); + } else { + setError('Failed to generate description. Please try again.'); + setDescriptionState({ + state: 'ready', + description: '' + } as DescriptionState); + setIsClicked(false); + console.error('Failed to describe mask:', error); + } + } + }; + + describeMaskWithFallback(useStreaming); + + } catch (error) { + setIsClicked(false); + setError('Failed to generate description. Please try again.'); + setDescriptionState({ + state: 'ready', + description: '' + } as DescriptionState); + console.error('Failed to describe mask:', error); + } + }, [maskImgData]); + + const handleClick = async (e: React.MouseEvent) => { + if (descriptionState.state !== 'ready') return; + + setMaskImg(null); + setMaskImgData(null); + setIsClicked(true); + handleMouseMove(e); + }; + + return ( + <> + {error && setError(null)} />} +
+ {image && ( + _.defer(() => (descriptionState.state === 'ready' && !isClicked) ? setMaskImg(null) : undefined)} + onTouchStart={handleMouseMove} + onClick={handleClick} + src={image.src} + className={`${ + shouldFitToWidth ? "w-full" : "h-full" + } ${imageClasses} object-contain max-h-full max-w-full`} + > + )} + {maskImg && ( + + )} +
+ + ); +}; + +export default Tool; diff --git a/demo/gradio/frontend/src/components/helpers/Interfaces.tsx b/demo/gradio/frontend/src/components/helpers/Interfaces.tsx new file mode 100644 index 0000000000000000000000000000000000000000..cea3340b03dd09d9876c1884fa7e379c19440e39 --- /dev/null +++ b/demo/gradio/frontend/src/components/helpers/Interfaces.tsx @@ -0,0 +1,47 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +import { Tensor } from "onnxruntime-web"; +import { DescriptionState } from "../Stage"; + +export interface modelScaleProps { + samScale: number; + height: number; + width: number; +} + +export interface modelInputProps { + x: number; + y: number; + clickType: number; +} + +export interface modeDataProps { + clicks?: Array; + tensor: Tensor; + modelScale: modelScaleProps; +} + +export interface ToolProps { + handleMouseMove: (e: any) => void; + descriptionState: DescriptionState; + setDescriptionState: (value: DescriptionState) => void; + queueStatus: QueueStatus; + setQueueStatus: (value: QueueStatus) => void; +} + +export interface StageProps { + onImageUpload: (event: React.ChangeEvent) => void; + descriptionState: DescriptionState; + setDescriptionState: (value: DescriptionState) => void; +} + +export interface QueueStatus { + inQueue: boolean; + rank?: number; + queueSize?: number; + rankEta?: number | null; +} diff --git a/demo/gradio/frontend/src/components/helpers/imageUtils.tsx b/demo/gradio/frontend/src/components/helpers/imageUtils.tsx new file mode 100644 index 0000000000000000000000000000000000000000..f5c9a0e263e0dc9563658b14133bb8736a163ef4 --- /dev/null +++ b/demo/gradio/frontend/src/components/helpers/imageUtils.tsx @@ -0,0 +1,21 @@ +import { Buffer } from 'buffer'; + +export const base64ToImage = async (base64String: string): Promise => { + return new Promise((resolve, reject) => { + const img = new Image(); + img.onload = () => resolve(img); + img.onerror = reject; + img.src = base64String.startsWith('data:') ? + base64String : + `data:image/png;base64,${base64String}`; + }); +}; + +export const imageToBase64 = (img: HTMLImageElement): string => { + const canvas = document.createElement('canvas'); + canvas.width = img.width; + canvas.height = img.height; + const ctx = canvas.getContext('2d'); + ctx?.drawImage(img, 0, 0); + return canvas.toDataURL('image/png'); +}; \ No newline at end of file diff --git a/demo/gradio/frontend/src/components/helpers/maskUtils.tsx b/demo/gradio/frontend/src/components/helpers/maskUtils.tsx new file mode 100644 index 0000000000000000000000000000000000000000..dc9473101b9ff5d2d52f5e9890b107e851e1b9ab --- /dev/null +++ b/demo/gradio/frontend/src/components/helpers/maskUtils.tsx @@ -0,0 +1,65 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +// Convert the onnx model mask prediction to ImageData +function arrayToImageData(input: any, width: number, height: number, binary: boolean) { + let [r, g, b, a] = [0, 114, 189, 255]; // the masks's blue color + let [r_bg, g_bg, b_bg, a_bg] = [0, 0, 0, 0]; // the background's white color + if (binary) { + [r, g, b, a] = [255, 255, 255, 255]; // black and white + [r_bg, g_bg, b_bg, a_bg] = [0, 0, 0, 255]; // black and white + } + + const arr = new Uint8ClampedArray(4 * width * height).fill(0); + for (let i = 0; i < input.length; i++) { + + // Threshold the onnx model mask prediction at 0.0 + // This is equivalent to thresholding the mask using predictor.model.mask_threshold + // in python + if (input[i] > 0.0) { + arr[4 * i + 0] = r; + arr[4 * i + 1] = g; + arr[4 * i + 2] = b; + arr[4 * i + 3] = a; + } else if (binary){ + arr[4 * i + 0] = r_bg; + arr[4 * i + 1] = g_bg; + arr[4 * i + 2] = b_bg; + arr[4 * i + 3] = a_bg; + } + } + return new ImageData(arr, height, width); +} + +// Use a Canvas element to produce an image from ImageData +function imageDataToImage(imageData: ImageData) { + const canvas = imageDataToCanvas(imageData); + const image = new Image(); + image.src = canvas.toDataURL(); + return image; +} + +function imageDataToURL(imageData: ImageData) { + const canvas = imageDataToCanvas(imageData); + return canvas.toDataURL(); +} + +// Canvas elements can be created from ImageData +function imageDataToCanvas(imageData: ImageData) { + const canvas = document.createElement("canvas"); + const ctx = canvas.getContext("2d"); + canvas.width = imageData.width; + canvas.height = imageData.height; + ctx?.putImageData(imageData, 0, 0); + return canvas; +} + +// Convert the onnx model mask output to an HTMLImageElement +function onnxMaskToImage(input: any, width: number, height: number, binary: boolean) { + return imageDataToImage(arrayToImageData(input, width, height, binary)); +} + +export { arrayToImageData, imageDataToImage, onnxMaskToImage, imageDataToURL }; diff --git a/demo/gradio/frontend/src/components/helpers/onnxModelAPI.tsx b/demo/gradio/frontend/src/components/helpers/onnxModelAPI.tsx new file mode 100644 index 0000000000000000000000000000000000000000..2e006c95b407ff4a7c0c071badf6a9cf2fe34ef0 --- /dev/null +++ b/demo/gradio/frontend/src/components/helpers/onnxModelAPI.tsx @@ -0,0 +1,71 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +import { Tensor } from "onnxruntime-web"; +import { modeDataProps } from "./Interfaces"; + +const modelData = ({ clicks, tensor, modelScale }: modeDataProps) => { + const imageEmbedding = tensor; + let pointCoords; + let pointLabels; + let pointCoordsTensor; + let pointLabelsTensor; + + // Check there are input click prompts + if (clicks) { + let n = clicks.length; + + // If there is no box input, a single padding point with + // label -1 and coordinates (0.0, 0.0) should be concatenated + // so initialize the array to support (n + 1) points. + pointCoords = new Float32Array(2 * (n + 1)); + pointLabels = new Float32Array(n + 1); + + // Add clicks and scale to what SAM expects + for (let i = 0; i < n; i++) { + pointCoords[2 * i] = clicks[i].x * modelScale.samScale; + pointCoords[2 * i + 1] = clicks[i].y * modelScale.samScale; + pointLabels[i] = clicks[i].clickType; + } + + // Add in the extra point/label when only clicks and no box + // The extra point is at (0, 0) with label -1 + pointCoords[2 * n] = 0.0; + pointCoords[2 * n + 1] = 0.0; + pointLabels[n] = -1.0; + + // Create the tensor + pointCoordsTensor = new Tensor("float32", pointCoords, [1, n + 1, 2]); + pointLabelsTensor = new Tensor("float32", pointLabels, [1, n + 1]); + } + const imageSizeTensor = new Tensor("float32", [ + modelScale.height, + modelScale.width, + ]); + + if (pointCoordsTensor === undefined || pointLabelsTensor === undefined) + return; + + // There is no previous mask, so default to an empty tensor + const maskInput = new Tensor( + "float32", + new Float32Array(256 * 256), + [1, 1, 256, 256] + ); + // There is no previous mask, so default to 0 + const hasMaskInput = new Tensor("float32", [0]); + + return { + image_embeddings: imageEmbedding, + point_coords: pointCoordsTensor, + point_labels: pointLabelsTensor, + orig_im_size: imageSizeTensor, + mask_input: maskInput, + has_mask_input: hasMaskInput, + }; +}; + +export { modelData }; diff --git a/demo/gradio/frontend/src/components/helpers/scaleHelper.tsx b/demo/gradio/frontend/src/components/helpers/scaleHelper.tsx new file mode 100644 index 0000000000000000000000000000000000000000..815ceaac472a18915b33e78c70231b88e5dd2eee --- /dev/null +++ b/demo/gradio/frontend/src/components/helpers/scaleHelper.tsx @@ -0,0 +1,18 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + + +// Helper function for handling image scaling needed for SAM +const handleImageScale = (image: HTMLImageElement) => { + // Input images to SAM must be resized so the longest side is 1024 + const LONG_SIDE_LENGTH = 1024; + let w = image.naturalWidth; + let h = image.naturalHeight; + const samScale = LONG_SIDE_LENGTH / Math.max(h, w); + return { height: h, width: w, samScale }; +}; + +export { handleImageScale }; diff --git a/demo/gradio/frontend/src/components/hooks/context.tsx b/demo/gradio/frontend/src/components/hooks/context.tsx new file mode 100644 index 0000000000000000000000000000000000000000..f3c104fff0099e0eb502231351b6421fc8b7e2fb --- /dev/null +++ b/demo/gradio/frontend/src/components/hooks/context.tsx @@ -0,0 +1,35 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +import React, { useState } from "react"; +import { modelInputProps } from "../helpers/Interfaces"; +import AppContext from "./createContext"; + +const AppContextProvider = (props: { + children: React.ReactElement>; +}) => { + const [clicks, setClicks] = useState | null>(null); + const [image, setImage] = useState(null); + const [maskImg, setMaskImg] = useState(null); + const [maskImgData, setMaskImgData] = useState(null); + const [isClicked, setIsClicked] = useState(false); + + return ( + + {props.children} + + ); +}; + +export default AppContextProvider; diff --git a/demo/gradio/frontend/src/components/hooks/createContext.tsx b/demo/gradio/frontend/src/components/hooks/createContext.tsx new file mode 100644 index 0000000000000000000000000000000000000000..3bb90be5e191f6005203c37a174324b9e12d069e --- /dev/null +++ b/demo/gradio/frontend/src/components/hooks/createContext.tsx @@ -0,0 +1,35 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +import { createContext } from "react"; +import { modelInputProps } from "../helpers/Interfaces"; + +interface contextProps { + clicks: [ + clicks: modelInputProps[] | null, + setClicks: (e: modelInputProps[] | null) => void + ]; + image: [ + image: HTMLImageElement | null, + setImage: (e: HTMLImageElement | null) => void + ]; + maskImg: [ + maskImg: HTMLImageElement | null, + setMaskImg: (e: HTMLImageElement | null) => void + ]; + maskImgData: [ + maskImgData: string | null, + setMaskImgData: (e: string | null) => void + ]; + isClicked: [ + isClicked: boolean, + setIsClicked: (e: boolean) => void + ]; +} + +const AppContext = createContext(null); + +export default AppContext; diff --git a/demo/gradio/frontend/src/index.tsx b/demo/gradio/frontend/src/index.tsx new file mode 100644 index 0000000000000000000000000000000000000000..714dfc91cb980e00a48b658bd4f1a173c803a26b --- /dev/null +++ b/demo/gradio/frontend/src/index.tsx @@ -0,0 +1,17 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +import * as React from "react"; +import { createRoot } from "react-dom/client"; +import AppContextProvider from "./components/hooks/context"; +import App from "./App"; +const container = document.getElementById("root"); +const root = createRoot(container!); +root.render( + + + +); diff --git a/demo/gradio/frontend/src/services/maskApi.tsx b/demo/gradio/frontend/src/services/maskApi.tsx new file mode 100644 index 0000000000000000000000000000000000000000..3f8d21744a7bc4daa1b8b936316fae155e5bef4b --- /dev/null +++ b/demo/gradio/frontend/src/services/maskApi.tsx @@ -0,0 +1,211 @@ +import axios from 'axios'; +import * as _ from 'underscore'; + +const API_URL = process.env.NODE_ENV === 'development' ? 'http://localhost:7860/gradio_api' : '/gradio_api'; + +export const describeMaskWithoutStreaming = _.throttle(async ( + maskBase64: string, + imageBase64: string, + query: string +): Promise => { + try { + const response = await axios.post(`${API_URL}/run/describe_without_streaming`, { + data: [imageBase64, maskBase64, query], + }); + + console.log("response", response.data); + return response.data.data[0]; + } catch (error) { + console.error('Error describing mask:', error); + throw error; + } +}, 100); + +export const describeMask = _.throttle(async ( + maskBase64: string, + imageBase64: string, + query: string, + onStreamUpdate: (token: string) => void, + onQueueUpdate?: (status: { + inQueue: boolean, + rank?: number, + queueSize?: number, + rankEta?: number | null + }) => void +): Promise => { + console.log("describeMask"); + const initiateResponse = await axios.post(`${API_URL}/call/describe`, { + data: [imageBase64, maskBase64, query], + }); + + const eventId = initiateResponse.data.event_id; + + const response = await axios.get(`${API_URL}/queue/data?session_hash=${eventId}`, { + headers: { + 'Accept': 'text/event-stream', + }, + responseType: 'stream', + adapter: 'fetch', + }); + + const stream = response.data; + const reader = stream.pipeThrough(new TextDecoderStream()).getReader(); + + let result = ''; + let partialMessage = ''; + + while (true) { + const { value, done } = await reader.read(); + if (done) { + return result; + } + + // Concatenate with any previous partial message + const currentData = partialMessage + value; + const lines = currentData.split('\n'); + + // Save the last line if it's incomplete + partialMessage = lines[lines.length - 1]; + + // Process all complete lines except the last one + let eventType = ''; + for (let i = 0; i < lines.length - 1; i++) { + const line = lines[i]; + if (line.startsWith('event: ')) { + eventType = line.slice(7); // Remove 'event: ' prefix + console.log('Event message', line); + } else if (line.startsWith('data: ')) { + const eventData = line.slice(6); // Remove 'data: ' prefix + try { + let data = JSON.parse(eventData); + if (data['msg']) { + eventType = data['msg']; + if (eventType === 'process_generating') { + eventType = 'generating'; + data = data['output']['data']; + } else if (eventType === 'process_completed') { + eventType = 'complete'; + data = data['output']['data']; + } + } + + if (eventType === 'estimation' && onQueueUpdate) { + onQueueUpdate({ + inQueue: true, + rank: data.rank, + queueSize: data.queue_size, + rankEta: data.rank_eta + }); + } else if (eventType === 'process_starts' && onQueueUpdate) { + onQueueUpdate({ + inQueue: false + }); + } else if ((eventType === 'generating' || eventType === 'complete') && data[0]) { + result = data[0]; + onStreamUpdate(data[0]); + + if (eventType === 'complete') { + return result; + } + } + } catch (e) { + console.log('Error parsing SSE message:', e); + } + } else if (line !== '') { + console.log('Unknown message', line); + } + } + } +}, 100); + +export const imageToSamEmbedding = _.throttle(async ( + imageBase64: string, + onQueueUpdate?: (status: { + inQueue: boolean, + rank?: number, + queueSize?: number, + rankEta?: number | null + }) => void +): Promise => { + // First call to initiate the process + const initiateResponse = await axios.post(`${API_URL}/call/image_to_sam_embedding`, { + data: [imageBase64] + }); + + const eventId = initiateResponse.data.event_id; + + // Get the stream for queue updates and results + const response = await axios.get(`${API_URL}/queue/data?session_hash=${eventId}`, { + headers: { + 'Accept': 'text/event-stream', + }, + responseType: 'stream', + adapter: 'fetch', + }); + + const stream = response.data; + const reader = stream.pipeThrough(new TextDecoderStream()).getReader(); + + let result = ''; + let partialMessage = ''; + + while (true) { + const { value, done } = await reader.read(); + if (done) { + return result; + } + + // Concatenate with any previous partial message + const currentData = partialMessage + value; + const lines = currentData.split('\n'); + + // Save the last line if it's incomplete (doesn't end with \n) + // The endpoint will send an empty line to indicate the end of a message, so it's ok to not process the partial message. + partialMessage = lines[lines.length - 1]; + + // Process all complete lines except the last one + let eventType = ''; + for (let i = 0; i < lines.length - 1; i++) { + const line = lines[i]; + if (line.startsWith('event: ')) { + eventType = line.slice(7); + } else if (line.startsWith('data: ')) { + const eventData = line.slice(6); + try { + let data = JSON.parse(eventData); + if (data['msg']) { + eventType = data['msg']; + console.log("Event type:", eventType); + if (eventType === 'process_completed') { + eventType = 'complete'; + data = data['output']['data']; + } + } + + if (eventType === 'estimation' && onQueueUpdate) { + onQueueUpdate({ + inQueue: true, + rank: data.rank, + queueSize: data.queue_size, + rankEta: data.rank_eta + }); + } else if (eventType === 'process_starts' && onQueueUpdate) { + onQueueUpdate({ + inQueue: false + }); + } else if (eventType === 'complete' && data[0]) { + result = data[0]; + console.log("Result for image to sam embedding:", result); + return result; + } else { + console.log("Unknown event type:", eventType); + } + } catch (e) { + console.log('Error parsing SSE message:', e, 'Raw data:', eventData); + } + } + } + } +}, 100); + +export { API_URL }; diff --git a/demo/gradio/frontend/tailwind.config.js b/demo/gradio/frontend/tailwind.config.js new file mode 100644 index 0000000000000000000000000000000000000000..e92b38b8fe466d9592f9eaff10de94803b320154 --- /dev/null +++ b/demo/gradio/frontend/tailwind.config.js @@ -0,0 +1,12 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. + +// This source code is licensed under the license found in the +// LICENSE file in the root directory of this source tree. + +/** @type {import('tailwindcss').Config} */ +module.exports = { + content: ["./src/**/*.{html,js,tsx}"], + theme: {}, + plugins: [], +}; diff --git a/demo/gradio/frontend/tsconfig.json b/demo/gradio/frontend/tsconfig.json new file mode 100644 index 0000000000000000000000000000000000000000..25b59894f509e315610f675d050b62945570daf6 --- /dev/null +++ b/demo/gradio/frontend/tsconfig.json @@ -0,0 +1,24 @@ +{ + "compilerOptions": { + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true, + "skipLibCheck": true, + "strict": true, + "forceConsistentCasingInFileNames": true, + "noEmit": false, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "node", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "react", + "incremental": true, + "target": "ESNext", + "useDefineForClassFields": true, + "allowSyntheticDefaultImports": true, + "outDir": "./dist/", + "sourceMap": true + }, + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", "src"], + "exclude": ["node_modules"] +} diff --git a/demo/gradio/frontend/yarn.lock b/demo/gradio/frontend/yarn.lock new file mode 100644 index 0000000000000000000000000000000000000000..a9607570e331100dc4fd9435a5ca1a031758eab1 --- /dev/null +++ b/demo/gradio/frontend/yarn.lock @@ -0,0 +1,8149 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +"@alloc/quick-lru@^5.2.0": + version "5.2.0" + resolved "https://registry.yarnpkg.com/@alloc/quick-lru/-/quick-lru-5.2.0.tgz#7bf68b20c0a350f936915fcae06f58e32007ce30" + integrity sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw== + +"@ampproject/remapping@^2.2.0": + version "2.3.0" + resolved "https://registry.yarnpkg.com/@ampproject/remapping/-/remapping-2.3.0.tgz#ed441b6fa600072520ce18b43d2c8cc8caecc7f4" + integrity sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw== + dependencies: + "@jridgewell/gen-mapping" "^0.3.5" + "@jridgewell/trace-mapping" "^0.3.24" + +"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.10.4", "@babel/code-frame@^7.16.7", "@babel/code-frame@^7.25.9", "@babel/code-frame@^7.26.0": + version "7.26.2" + resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.26.2.tgz#4b5fab97d33338eff916235055f0ebc21e573a85" + integrity sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ== + dependencies: + "@babel/helper-validator-identifier" "^7.25.9" + js-tokens "^4.0.0" + picocolors "^1.0.0" + +"@babel/compat-data@^7.22.6", "@babel/compat-data@^7.25.9", "@babel/compat-data@^7.26.0": + version "7.26.2" + resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.26.2.tgz#278b6b13664557de95b8f35b90d96785850bb56e" + integrity sha512-Z0WgzSEa+aUcdiJuCIqgujCshpMWgUpgOxXotrYPSA53hA3qopNaqcJpyr0hVb1FeWdnqFA35/fUtXgBK8srQg== + +"@babel/core@^7.18.13": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.26.0.tgz#d78b6023cc8f3114ccf049eb219613f74a747b40" + integrity sha512-i1SLeK+DzNnQ3LL/CswPCa/E5u4lh1k6IAEphON8F+cXt0t9euTshDru0q7/IqMa1PMPz5RnHuHscF8/ZJsStg== + dependencies: + "@ampproject/remapping" "^2.2.0" + "@babel/code-frame" "^7.26.0" + "@babel/generator" "^7.26.0" + "@babel/helper-compilation-targets" "^7.25.9" + "@babel/helper-module-transforms" "^7.26.0" + "@babel/helpers" "^7.26.0" + "@babel/parser" "^7.26.0" + "@babel/template" "^7.25.9" + "@babel/traverse" "^7.25.9" + "@babel/types" "^7.26.0" + convert-source-map "^2.0.0" + debug "^4.1.0" + gensync "^1.0.0-beta.2" + json5 "^2.2.3" + semver "^6.3.1" + +"@babel/generator@^7.25.9", "@babel/generator@^7.26.0": + version "7.26.2" + resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.26.2.tgz#87b75813bec87916210e5e01939a4c823d6bb74f" + integrity sha512-zevQbhbau95nkoxSq3f/DC/SC+EEOUZd3DYqfSkMhY2/wfSeaHV1Ew4vk8e+x8lja31IbyuUa2uQ3JONqKbysw== + dependencies: + "@babel/parser" "^7.26.2" + "@babel/types" "^7.26.0" + "@jridgewell/gen-mapping" "^0.3.5" + "@jridgewell/trace-mapping" "^0.3.25" + jsesc "^3.0.2" + +"@babel/helper-annotate-as-pure@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-annotate-as-pure/-/helper-annotate-as-pure-7.25.9.tgz#d8eac4d2dc0d7b6e11fa6e535332e0d3184f06b4" + integrity sha512-gv7320KBUFJz1RnylIg5WWYPRXKZ884AGkYpgpWW02TH66Dl+HaC1t1CKd0z3R4b6hdYEcmrNZHUmfCP+1u3/g== + dependencies: + "@babel/types" "^7.25.9" + +"@babel/helper-builder-binary-assignment-operator-visitor@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-builder-binary-assignment-operator-visitor/-/helper-builder-binary-assignment-operator-visitor-7.25.9.tgz#f41752fe772a578e67286e6779a68a5a92de1ee9" + integrity sha512-C47lC7LIDCnz0h4vai/tpNOI95tCd5ZT3iBt/DBH5lXKHZsyNQv18yf1wIIg2ntiQNgmAvA+DgZ82iW8Qdym8g== + dependencies: + "@babel/traverse" "^7.25.9" + "@babel/types" "^7.25.9" + +"@babel/helper-compilation-targets@^7.22.6", "@babel/helper-compilation-targets@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.25.9.tgz#55af025ce365be3cdc0c1c1e56c6af617ce88875" + integrity sha512-j9Db8Suy6yV/VHa4qzrj9yZfZxhLWQdVnRlXxmKLYlhWUVB1sB2G5sxuWYXk/whHD9iW76PmNzxZ4UCnTQTVEQ== + dependencies: + "@babel/compat-data" "^7.25.9" + "@babel/helper-validator-option" "^7.25.9" + browserslist "^4.24.0" + lru-cache "^5.1.1" + semver "^6.3.1" + +"@babel/helper-create-class-features-plugin@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-create-class-features-plugin/-/helper-create-class-features-plugin-7.25.9.tgz#7644147706bb90ff613297d49ed5266bde729f83" + integrity sha512-UTZQMvt0d/rSz6KI+qdu7GQze5TIajwTS++GUozlw8VBJDEOAqSXwm1WvmYEZwqdqSGQshRocPDqrt4HBZB3fQ== + dependencies: + "@babel/helper-annotate-as-pure" "^7.25.9" + "@babel/helper-member-expression-to-functions" "^7.25.9" + "@babel/helper-optimise-call-expression" "^7.25.9" + "@babel/helper-replace-supers" "^7.25.9" + "@babel/helper-skip-transparent-expression-wrappers" "^7.25.9" + "@babel/traverse" "^7.25.9" + semver "^6.3.1" + +"@babel/helper-create-regexp-features-plugin@^7.18.6", "@babel/helper-create-regexp-features-plugin@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-create-regexp-features-plugin/-/helper-create-regexp-features-plugin-7.25.9.tgz#3e8999db94728ad2b2458d7a470e7770b7764e26" + integrity sha512-ORPNZ3h6ZRkOyAa/SaHU+XsLZr0UQzRwuDQ0cczIA17nAzZ+85G5cVkOJIj7QavLZGSe8QXUmNFxSZzjcZF9bw== + dependencies: + "@babel/helper-annotate-as-pure" "^7.25.9" + regexpu-core "^6.1.1" + semver "^6.3.1" + +"@babel/helper-define-polyfill-provider@^0.6.2": + version "0.6.2" + resolved "https://registry.yarnpkg.com/@babel/helper-define-polyfill-provider/-/helper-define-polyfill-provider-0.6.2.tgz#18594f789c3594acb24cfdb4a7f7b7d2e8bd912d" + integrity sha512-LV76g+C502biUK6AyZ3LK10vDpDyCzZnhZFXkH1L75zHPj68+qc8Zfpx2th+gzwA2MzyK+1g/3EPl62yFnVttQ== + dependencies: + "@babel/helper-compilation-targets" "^7.22.6" + "@babel/helper-plugin-utils" "^7.22.5" + debug "^4.1.1" + lodash.debounce "^4.0.8" + resolve "^1.14.2" + +"@babel/helper-member-expression-to-functions@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.25.9.tgz#9dfffe46f727005a5ea29051ac835fb735e4c1a3" + integrity sha512-wbfdZ9w5vk0C0oyHqAJbc62+vet5prjj01jjJ8sKn3j9h3MQQlflEdXYvuqRWjHnM12coDEqiC1IRCi0U/EKwQ== + dependencies: + "@babel/traverse" "^7.25.9" + "@babel/types" "^7.25.9" + +"@babel/helper-module-imports@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.25.9.tgz#e7f8d20602ebdbf9ebbea0a0751fb0f2a4141715" + integrity sha512-tnUA4RsrmflIM6W6RFTLFSXITtl0wKjgpnLgXyowocVPrbYrLUXSBXDgTs8BlbmIzIdlBySRQjINYs2BAkiLtw== + dependencies: + "@babel/traverse" "^7.25.9" + "@babel/types" "^7.25.9" + +"@babel/helper-module-transforms@^7.25.9", "@babel/helper-module-transforms@^7.26.0": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.26.0.tgz#8ce54ec9d592695e58d84cd884b7b5c6a2fdeeae" + integrity sha512-xO+xu6B5K2czEnQye6BHA7DolFFmS3LB7stHZFaOLb1pAwO1HWLS8fXA+eh0A2yIvltPVmx3eNNDBJA2SLHXFw== + dependencies: + "@babel/helper-module-imports" "^7.25.9" + "@babel/helper-validator-identifier" "^7.25.9" + "@babel/traverse" "^7.25.9" + +"@babel/helper-optimise-call-expression@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.25.9.tgz#3324ae50bae7e2ab3c33f60c9a877b6a0146b54e" + integrity sha512-FIpuNaz5ow8VyrYcnXQTDRGvV6tTjkNtCK/RYNDXGSLlUD6cBuQTSw43CShGxjvfBTfcUA/r6UhUCbtYqkhcuQ== + dependencies: + "@babel/types" "^7.25.9" + +"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.18.6", "@babel/helper-plugin-utils@^7.22.5", "@babel/helper-plugin-utils@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.25.9.tgz#9cbdd63a9443a2c92a725cca7ebca12cc8dd9f46" + integrity sha512-kSMlyUVdWe25rEsRGviIgOWnoT/nfABVWlqt9N19/dIPWViAOW2s9wznP5tURbs/IDuNk4gPy3YdYRgH3uxhBw== + +"@babel/helper-remap-async-to-generator@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-remap-async-to-generator/-/helper-remap-async-to-generator-7.25.9.tgz#e53956ab3d5b9fb88be04b3e2f31b523afd34b92" + integrity sha512-IZtukuUeBbhgOcaW2s06OXTzVNJR0ybm4W5xC1opWFFJMZbwRj5LCk+ByYH7WdZPZTt8KnFwA8pvjN2yqcPlgw== + dependencies: + "@babel/helper-annotate-as-pure" "^7.25.9" + "@babel/helper-wrap-function" "^7.25.9" + "@babel/traverse" "^7.25.9" + +"@babel/helper-replace-supers@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.25.9.tgz#ba447224798c3da3f8713fc272b145e33da6a5c5" + integrity sha512-IiDqTOTBQy0sWyeXyGSC5TBJpGFXBkRynjBeXsvbhQFKj2viwJC76Epz35YLU1fpe/Am6Vppb7W7zM4fPQzLsQ== + dependencies: + "@babel/helper-member-expression-to-functions" "^7.25.9" + "@babel/helper-optimise-call-expression" "^7.25.9" + "@babel/traverse" "^7.25.9" + +"@babel/helper-simple-access@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.25.9.tgz#6d51783299884a2c74618d6ef0f86820ec2e7739" + integrity sha512-c6WHXuiaRsJTyHYLJV75t9IqsmTbItYfdj99PnzYGQZkYKvan5/2jKJ7gu31J3/BJ/A18grImSPModuyG/Eo0Q== + dependencies: + "@babel/traverse" "^7.25.9" + "@babel/types" "^7.25.9" + +"@babel/helper-skip-transparent-expression-wrappers@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-skip-transparent-expression-wrappers/-/helper-skip-transparent-expression-wrappers-7.25.9.tgz#0b2e1b62d560d6b1954893fd2b705dc17c91f0c9" + integrity sha512-K4Du3BFa3gvyhzgPcntrkDgZzQaq6uozzcpGbOO1OEJaI+EJdqWIMTLgFgQf6lrfiDFo5FU+BxKepI9RmZqahA== + dependencies: + "@babel/traverse" "^7.25.9" + "@babel/types" "^7.25.9" + +"@babel/helper-string-parser@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz#1aabb72ee72ed35789b4bbcad3ca2862ce614e8c" + integrity sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA== + +"@babel/helper-validator-identifier@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz#24b64e2c3ec7cd3b3c547729b8d16871f22cbdc7" + integrity sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ== + +"@babel/helper-validator-option@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.25.9.tgz#86e45bd8a49ab7e03f276577f96179653d41da72" + integrity sha512-e/zv1co8pp55dNdEcCynfj9X7nyUKUXoUEwfXqaZt0omVOmDe9oOTdKStH4GmAw6zxMFs50ZayuMfHDKlO7Tfw== + +"@babel/helper-wrap-function@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/helper-wrap-function/-/helper-wrap-function-7.25.9.tgz#d99dfd595312e6c894bd7d237470025c85eea9d0" + integrity sha512-ETzz9UTjQSTmw39GboatdymDq4XIQbR8ySgVrylRhPOFpsd+JrKHIuF0de7GCWmem+T4uC5z7EZguod7Wj4A4g== + dependencies: + "@babel/template" "^7.25.9" + "@babel/traverse" "^7.25.9" + "@babel/types" "^7.25.9" + +"@babel/helpers@^7.26.0": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.26.0.tgz#30e621f1eba5aa45fe6f4868d2e9154d884119a4" + integrity sha512-tbhNuIxNcVb21pInl3ZSjksLCvgdZy9KwJ8brv993QtIVKJBBkYXz4q4ZbAv31GdnC+R90np23L5FbEBlthAEw== + dependencies: + "@babel/template" "^7.25.9" + "@babel/types" "^7.26.0" + +"@babel/parser@^7.25.9", "@babel/parser@^7.26.0", "@babel/parser@^7.26.2": + version "7.26.2" + resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.26.2.tgz#fd7b6f487cfea09889557ef5d4eeb9ff9a5abd11" + integrity sha512-DWMCZH9WA4Maitz2q21SRKHo9QXZxkDsbNZoVD62gusNtNBBqDg9i7uOhASfTfIGNzW+O+r7+jAlM8dwphcJKQ== + dependencies: + "@babel/types" "^7.26.0" + +"@babel/plugin-bugfix-firefox-class-in-computed-class-key@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-bugfix-firefox-class-in-computed-class-key/-/plugin-bugfix-firefox-class-in-computed-class-key-7.25.9.tgz#cc2e53ebf0a0340777fff5ed521943e253b4d8fe" + integrity sha512-ZkRyVkThtxQ/J6nv3JFYv1RYY+JT5BvU0y3k5bWrmuG4woXypRa4PXmm9RhOwodRkYFWqC0C0cqcJ4OqR7kW+g== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/traverse" "^7.25.9" + +"@babel/plugin-bugfix-safari-class-field-initializer-scope@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-bugfix-safari-class-field-initializer-scope/-/plugin-bugfix-safari-class-field-initializer-scope-7.25.9.tgz#af9e4fb63ccb8abcb92375b2fcfe36b60c774d30" + integrity sha512-MrGRLZxLD/Zjj0gdU15dfs+HH/OXvnw/U4jJD8vpcP2CJQapPEv1IWwjc/qMg7ItBlPwSv1hRBbb7LeuANdcnw== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression/-/plugin-bugfix-safari-id-destructuring-collision-in-function-expression-7.25.9.tgz#e8dc26fcd616e6c5bf2bd0d5a2c151d4f92a9137" + integrity sha512-2qUwwfAFpJLZqxd02YW9btUCZHl+RFvdDkNfZwaIJrvB8Tesjsk8pEQkTvGwZXLqXUx/2oyY3ySRhm6HOXuCug== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-bugfix-v8-spread-parameters-in-optional-chaining@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-bugfix-v8-spread-parameters-in-optional-chaining/-/plugin-bugfix-v8-spread-parameters-in-optional-chaining-7.25.9.tgz#807a667f9158acac6f6164b4beb85ad9ebc9e1d1" + integrity sha512-6xWgLZTJXwilVjlnV7ospI3xi+sl8lN8rXXbBD6vYn3UYDlGsag8wrZkKcSI8G6KgqKP7vNFaDgeDnfAABq61g== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-skip-transparent-expression-wrappers" "^7.25.9" + "@babel/plugin-transform-optional-chaining" "^7.25.9" + +"@babel/plugin-bugfix-v8-static-class-fields-redefine-readonly@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-bugfix-v8-static-class-fields-redefine-readonly/-/plugin-bugfix-v8-static-class-fields-redefine-readonly-7.25.9.tgz#de7093f1e7deaf68eadd7cc6b07f2ab82543269e" + integrity sha512-aLnMXYPnzwwqhYSCyXfKkIkYgJ8zv9RK+roo9DkTXz38ynIhd9XCbN08s3MGvqL2MYGVUGdRQLL/JqBIeJhJBg== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/traverse" "^7.25.9" + +"@babel/plugin-proposal-private-property-in-object@7.21.0-placeholder-for-preset-env.2": + version "7.21.0-placeholder-for-preset-env.2" + resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-private-property-in-object/-/plugin-proposal-private-property-in-object-7.21.0-placeholder-for-preset-env.2.tgz#7844f9289546efa9febac2de4cfe358a050bd703" + integrity sha512-SOSkfJDddaM7mak6cPEpswyTRnuRltl429hMraQEglW+OkovnCzsiszTmsrlY//qLFjCpQDFRvjdm2wA5pPm9w== + +"@babel/plugin-syntax-import-assertions@^7.26.0": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-import-assertions/-/plugin-syntax-import-assertions-7.26.0.tgz#620412405058efa56e4a564903b79355020f445f" + integrity sha512-QCWT5Hh830hK5EQa7XzuqIkQU9tT/whqbDz7kuaZMHFl1inRRg7JnuAEOQ0Ur0QUl0NufCk1msK2BeY79Aj/eg== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-syntax-import-attributes@^7.26.0": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-import-attributes/-/plugin-syntax-import-attributes-7.26.0.tgz#3b1412847699eea739b4f2602c74ce36f6b0b0f7" + integrity sha512-e2dttdsJ1ZTpi3B9UYGLw41hifAubg19AtCu/2I/F1QNVclOBr1dYpTdmdyZ84Xiz43BS/tCUkMAZNLv12Pi+A== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-syntax-jsx@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.25.9.tgz#a34313a178ea56f1951599b929c1ceacee719290" + integrity sha512-ld6oezHQMZsZfp6pWtbjaNDF2tiiCYYDqQszHt5VV437lewP9aSi2Of99CK0D0XB21k7FLgnLcmQKyKzynfeAA== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-syntax-typescript@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.25.9.tgz#67dda2b74da43727cf21d46cf9afef23f4365399" + integrity sha512-hjMgRy5hb8uJJjUcdWunWVcoi9bGpJp8p5Ol1229PoN6aytsLwNMgmdftO23wnCLMfVmTwZDWMPNq/D1SY60JQ== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-syntax-unicode-sets-regex@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-unicode-sets-regex/-/plugin-syntax-unicode-sets-regex-7.18.6.tgz#d49a3b3e6b52e5be6740022317580234a6a47357" + integrity sha512-727YkEAPwSIQTv5im8QHz3upqp92JTWhidIC81Tdx4VJYIte/VndKf1qKrfnnhPLiPghStWfvC/iFaMCQu7Nqg== + dependencies: + "@babel/helper-create-regexp-features-plugin" "^7.18.6" + "@babel/helper-plugin-utils" "^7.18.6" + +"@babel/plugin-transform-arrow-functions@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-arrow-functions/-/plugin-transform-arrow-functions-7.25.9.tgz#7821d4410bee5daaadbb4cdd9a6649704e176845" + integrity sha512-6jmooXYIwn9ca5/RylZADJ+EnSxVUS5sjeJ9UPk6RWRzXCmOJCy6dqItPJFpw2cuCangPK4OYr5uhGKcmrm5Qg== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-async-generator-functions@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-async-generator-functions/-/plugin-transform-async-generator-functions-7.25.9.tgz#1b18530b077d18a407c494eb3d1d72da505283a2" + integrity sha512-RXV6QAzTBbhDMO9fWwOmwwTuYaiPbggWQ9INdZqAYeSHyG7FzQ+nOZaUUjNwKv9pV3aE4WFqFm1Hnbci5tBCAw== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-remap-async-to-generator" "^7.25.9" + "@babel/traverse" "^7.25.9" + +"@babel/plugin-transform-async-to-generator@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-async-to-generator/-/plugin-transform-async-to-generator-7.25.9.tgz#c80008dacae51482793e5a9c08b39a5be7e12d71" + integrity sha512-NT7Ejn7Z/LjUH0Gv5KsBCxh7BH3fbLTV0ptHvpeMvrt3cPThHfJfst9Wrb7S8EvJ7vRTFI7z+VAvFVEQn/m5zQ== + dependencies: + "@babel/helper-module-imports" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-remap-async-to-generator" "^7.25.9" + +"@babel/plugin-transform-block-scoped-functions@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-block-scoped-functions/-/plugin-transform-block-scoped-functions-7.25.9.tgz#5700691dbd7abb93de300ca7be94203764fce458" + integrity sha512-toHc9fzab0ZfenFpsyYinOX0J/5dgJVA2fm64xPewu7CoYHWEivIWKxkK2rMi4r3yQqLnVmheMXRdG+k239CgA== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-block-scoping@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-block-scoping/-/plugin-transform-block-scoping-7.25.9.tgz#c33665e46b06759c93687ca0f84395b80c0473a1" + integrity sha512-1F05O7AYjymAtqbsFETboN1NvBdcnzMerO+zlMyJBEz6WkMdejvGWw9p05iTSjC85RLlBseHHQpYaM4gzJkBGg== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-class-properties@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-class-properties/-/plugin-transform-class-properties-7.25.9.tgz#a8ce84fedb9ad512549984101fa84080a9f5f51f" + integrity sha512-bbMAII8GRSkcd0h0b4X+36GksxuheLFjP65ul9w6C3KgAamI3JqErNgSrosX6ZPj+Mpim5VvEbawXxJCyEUV3Q== + dependencies: + "@babel/helper-create-class-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-class-static-block@^7.26.0": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-class-static-block/-/plugin-transform-class-static-block-7.26.0.tgz#6c8da219f4eb15cae9834ec4348ff8e9e09664a0" + integrity sha512-6J2APTs7BDDm+UMqP1useWqhcRAXo0WIoVj26N7kPFB6S73Lgvyka4KTZYIxtgYXiN5HTyRObA72N2iu628iTQ== + dependencies: + "@babel/helper-create-class-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-classes@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-classes/-/plugin-transform-classes-7.25.9.tgz#7152457f7880b593a63ade8a861e6e26a4469f52" + integrity sha512-mD8APIXmseE7oZvZgGABDyM34GUmK45Um2TXiBUt7PnuAxrgoSVf123qUzPxEr/+/BHrRn5NMZCdE2m/1F8DGg== + dependencies: + "@babel/helper-annotate-as-pure" "^7.25.9" + "@babel/helper-compilation-targets" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-replace-supers" "^7.25.9" + "@babel/traverse" "^7.25.9" + globals "^11.1.0" + +"@babel/plugin-transform-computed-properties@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-computed-properties/-/plugin-transform-computed-properties-7.25.9.tgz#db36492c78460e534b8852b1d5befe3c923ef10b" + integrity sha512-HnBegGqXZR12xbcTHlJ9HGxw1OniltT26J5YpfruGqtUHlz/xKf/G2ak9e+t0rVqrjXa9WOhvYPz1ERfMj23AA== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/template" "^7.25.9" + +"@babel/plugin-transform-destructuring@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-destructuring/-/plugin-transform-destructuring-7.25.9.tgz#966ea2595c498224340883602d3cfd7a0c79cea1" + integrity sha512-WkCGb/3ZxXepmMiX101nnGiU+1CAdut8oHyEOHxkKuS1qKpU2SMXE2uSvfz8PBuLd49V6LEsbtyPhWC7fnkgvQ== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-dotall-regex@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-dotall-regex/-/plugin-transform-dotall-regex-7.25.9.tgz#bad7945dd07734ca52fe3ad4e872b40ed09bb09a" + integrity sha512-t7ZQ7g5trIgSRYhI9pIJtRl64KHotutUJsh4Eze5l7olJv+mRSg4/MmbZ0tv1eeqRbdvo/+trvJD/Oc5DmW2cA== + dependencies: + "@babel/helper-create-regexp-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-duplicate-keys@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-duplicate-keys/-/plugin-transform-duplicate-keys-7.25.9.tgz#8850ddf57dce2aebb4394bb434a7598031059e6d" + integrity sha512-LZxhJ6dvBb/f3x8xwWIuyiAHy56nrRG3PeYTpBkkzkYRRQ6tJLu68lEF5VIqMUZiAV7a8+Tb78nEoMCMcqjXBw== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-duplicate-named-capturing-groups-regex@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-duplicate-named-capturing-groups-regex/-/plugin-transform-duplicate-named-capturing-groups-regex-7.25.9.tgz#6f7259b4de127721a08f1e5165b852fcaa696d31" + integrity sha512-0UfuJS0EsXbRvKnwcLjFtJy/Sxc5J5jhLHnFhy7u4zih97Hz6tJkLU+O+FMMrNZrosUPxDi6sYxJ/EA8jDiAog== + dependencies: + "@babel/helper-create-regexp-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-dynamic-import@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-dynamic-import/-/plugin-transform-dynamic-import-7.25.9.tgz#23e917de63ed23c6600c5dd06d94669dce79f7b8" + integrity sha512-GCggjexbmSLaFhqsojeugBpeaRIgWNTcgKVq/0qIteFEqY2A+b9QidYadrWlnbWQUrW5fn+mCvf3tr7OeBFTyg== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-exponentiation-operator@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-exponentiation-operator/-/plugin-transform-exponentiation-operator-7.25.9.tgz#ece47b70d236c1d99c263a1e22b62dc20a4c8b0f" + integrity sha512-KRhdhlVk2nObA5AYa7QMgTMTVJdfHprfpAk4DjZVtllqRg9qarilstTKEhpVjyt+Npi8ThRyiV8176Am3CodPA== + dependencies: + "@babel/helper-builder-binary-assignment-operator-visitor" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-export-namespace-from@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-export-namespace-from/-/plugin-transform-export-namespace-from-7.25.9.tgz#90745fe55053394f554e40584cda81f2c8a402a2" + integrity sha512-2NsEz+CxzJIVOPx2o9UsW1rXLqtChtLoVnwYHHiB04wS5sgn7mrV45fWMBX0Kk+ub9uXytVYfNP2HjbVbCB3Ww== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-for-of@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-for-of/-/plugin-transform-for-of-7.25.9.tgz#4bdc7d42a213397905d89f02350c5267866d5755" + integrity sha512-LqHxduHoaGELJl2uhImHwRQudhCM50pT46rIBNvtT/Oql3nqiS3wOwP+5ten7NpYSXrrVLgtZU3DZmPtWZo16A== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-skip-transparent-expression-wrappers" "^7.25.9" + +"@babel/plugin-transform-function-name@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-function-name/-/plugin-transform-function-name-7.25.9.tgz#939d956e68a606661005bfd550c4fc2ef95f7b97" + integrity sha512-8lP+Yxjv14Vc5MuWBpJsoUCd3hD6V9DgBon2FVYL4jJgbnVQ9fTgYmonchzZJOVNgzEgbxp4OwAf6xz6M/14XA== + dependencies: + "@babel/helper-compilation-targets" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/traverse" "^7.25.9" + +"@babel/plugin-transform-json-strings@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-json-strings/-/plugin-transform-json-strings-7.25.9.tgz#c86db407cb827cded902a90c707d2781aaa89660" + integrity sha512-xoTMk0WXceiiIvsaquQQUaLLXSW1KJ159KP87VilruQm0LNNGxWzahxSS6T6i4Zg3ezp4vA4zuwiNUR53qmQAw== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-literals@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-literals/-/plugin-transform-literals-7.25.9.tgz#1a1c6b4d4aa59bc4cad5b6b3a223a0abd685c9de" + integrity sha512-9N7+2lFziW8W9pBl2TzaNht3+pgMIRP74zizeCSrtnSKVdUl8mAjjOP2OOVQAfZ881P2cNjDj1uAMEdeD50nuQ== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-logical-assignment-operators@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-logical-assignment-operators/-/plugin-transform-logical-assignment-operators-7.25.9.tgz#b19441a8c39a2fda0902900b306ea05ae1055db7" + integrity sha512-wI4wRAzGko551Y8eVf6iOY9EouIDTtPb0ByZx+ktDGHwv6bHFimrgJM/2T021txPZ2s4c7bqvHbd+vXG6K948Q== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-member-expression-literals@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-member-expression-literals/-/plugin-transform-member-expression-literals-7.25.9.tgz#63dff19763ea64a31f5e6c20957e6a25e41ed5de" + integrity sha512-PYazBVfofCQkkMzh2P6IdIUaCEWni3iYEerAsRWuVd8+jlM1S9S9cz1dF9hIzyoZ8IA3+OwVYIp9v9e+GbgZhA== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-modules-amd@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-amd/-/plugin-transform-modules-amd-7.25.9.tgz#49ba478f2295101544abd794486cd3088dddb6c5" + integrity sha512-g5T11tnI36jVClQlMlt4qKDLlWnG5pP9CSM4GhdRciTNMRgkfpo5cR6b4rGIOYPgRRuFAvwjPQ/Yk+ql4dyhbw== + dependencies: + "@babel/helper-module-transforms" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-modules-commonjs@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-commonjs/-/plugin-transform-modules-commonjs-7.25.9.tgz#d165c8c569a080baf5467bda88df6425fc060686" + integrity sha512-dwh2Ol1jWwL2MgkCzUSOvfmKElqQcuswAZypBSUsScMXvgdT8Ekq5YA6TtqpTVWH+4903NmboMuH1o9i8Rxlyg== + dependencies: + "@babel/helper-module-transforms" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-simple-access" "^7.25.9" + +"@babel/plugin-transform-modules-systemjs@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-systemjs/-/plugin-transform-modules-systemjs-7.25.9.tgz#8bd1b43836269e3d33307151a114bcf3ba6793f8" + integrity sha512-hyss7iIlH/zLHaehT+xwiymtPOpsiwIIRlCAOwBB04ta5Tt+lNItADdlXw3jAWZ96VJ2jlhl/c+PNIQPKNfvcA== + dependencies: + "@babel/helper-module-transforms" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-validator-identifier" "^7.25.9" + "@babel/traverse" "^7.25.9" + +"@babel/plugin-transform-modules-umd@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-umd/-/plugin-transform-modules-umd-7.25.9.tgz#6710079cdd7c694db36529a1e8411e49fcbf14c9" + integrity sha512-bS9MVObUgE7ww36HEfwe6g9WakQ0KF07mQF74uuXdkoziUPfKyu/nIm663kz//e5O1nPInPFx36z7WJmJ4yNEw== + dependencies: + "@babel/helper-module-transforms" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-named-capturing-groups-regex@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-named-capturing-groups-regex/-/plugin-transform-named-capturing-groups-regex-7.25.9.tgz#454990ae6cc22fd2a0fa60b3a2c6f63a38064e6a" + integrity sha512-oqB6WHdKTGl3q/ItQhpLSnWWOpjUJLsOCLVyeFgeTktkBSCiurvPOsyt93gibI9CmuKvTUEtWmG5VhZD+5T/KA== + dependencies: + "@babel/helper-create-regexp-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-new-target@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-new-target/-/plugin-transform-new-target-7.25.9.tgz#42e61711294b105c248336dcb04b77054ea8becd" + integrity sha512-U/3p8X1yCSoKyUj2eOBIx3FOn6pElFOKvAAGf8HTtItuPyB+ZeOqfn+mvTtg9ZlOAjsPdK3ayQEjqHjU/yLeVQ== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-nullish-coalescing-operator@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-nullish-coalescing-operator/-/plugin-transform-nullish-coalescing-operator-7.25.9.tgz#bcb1b0d9e948168102d5f7104375ca21c3266949" + integrity sha512-ENfftpLZw5EItALAD4WsY/KUWvhUlZndm5GC7G3evUsVeSJB6p0pBeLQUnRnBCBx7zV0RKQjR9kCuwrsIrjWog== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-numeric-separator@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-numeric-separator/-/plugin-transform-numeric-separator-7.25.9.tgz#bfed75866261a8b643468b0ccfd275f2033214a1" + integrity sha512-TlprrJ1GBZ3r6s96Yq8gEQv82s8/5HnCVHtEJScUj90thHQbwe+E5MLhi2bbNHBEJuzrvltXSru+BUxHDoog7Q== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-object-rest-spread@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-object-rest-spread/-/plugin-transform-object-rest-spread-7.25.9.tgz#0203725025074164808bcf1a2cfa90c652c99f18" + integrity sha512-fSaXafEE9CVHPweLYw4J0emp1t8zYTXyzN3UuG+lylqkvYd7RMrsOQ8TYx5RF231be0vqtFC6jnx3UmpJmKBYg== + dependencies: + "@babel/helper-compilation-targets" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/plugin-transform-parameters" "^7.25.9" + +"@babel/plugin-transform-object-super@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-object-super/-/plugin-transform-object-super-7.25.9.tgz#385d5de135162933beb4a3d227a2b7e52bb4cf03" + integrity sha512-Kj/Gh+Rw2RNLbCK1VAWj2U48yxxqL2x0k10nPtSdRa0O2xnHXalD0s+o1A6a0W43gJ00ANo38jxkQreckOzv5A== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-replace-supers" "^7.25.9" + +"@babel/plugin-transform-optional-catch-binding@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-optional-catch-binding/-/plugin-transform-optional-catch-binding-7.25.9.tgz#10e70d96d52bb1f10c5caaac59ac545ea2ba7ff3" + integrity sha512-qM/6m6hQZzDcZF3onzIhZeDHDO43bkNNlOX0i8n3lR6zLbu0GN2d8qfM/IERJZYauhAHSLHy39NF0Ctdvcid7g== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-optional-chaining@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-optional-chaining/-/plugin-transform-optional-chaining-7.25.9.tgz#e142eb899d26ef715435f201ab6e139541eee7dd" + integrity sha512-6AvV0FsLULbpnXeBjrY4dmWF8F7gf8QnvTEoO/wX/5xm/xE1Xo8oPuD3MPS+KS9f9XBEAWN7X1aWr4z9HdOr7A== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-skip-transparent-expression-wrappers" "^7.25.9" + +"@babel/plugin-transform-parameters@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-parameters/-/plugin-transform-parameters-7.25.9.tgz#b856842205b3e77e18b7a7a1b94958069c7ba257" + integrity sha512-wzz6MKwpnshBAiRmn4jR8LYz/g8Ksg0o80XmwZDlordjwEk9SxBzTWC7F5ef1jhbrbOW2DJ5J6ayRukrJmnr0g== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-private-methods@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-private-methods/-/plugin-transform-private-methods-7.25.9.tgz#847f4139263577526455d7d3223cd8bda51e3b57" + integrity sha512-D/JUozNpQLAPUVusvqMxyvjzllRaF8/nSrP1s2YGQT/W4LHK4xxsMcHjhOGTS01mp9Hda8nswb+FblLdJornQw== + dependencies: + "@babel/helper-create-class-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-private-property-in-object@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-private-property-in-object/-/plugin-transform-private-property-in-object-7.25.9.tgz#9c8b73e64e6cc3cbb2743633885a7dd2c385fe33" + integrity sha512-Evf3kcMqzXA3xfYJmZ9Pg1OvKdtqsDMSWBDzZOPLvHiTt36E75jLDQo5w1gtRU95Q4E5PDttrTf25Fw8d/uWLw== + dependencies: + "@babel/helper-annotate-as-pure" "^7.25.9" + "@babel/helper-create-class-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-property-literals@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-property-literals/-/plugin-transform-property-literals-7.25.9.tgz#d72d588bd88b0dec8b62e36f6fda91cedfe28e3f" + integrity sha512-IvIUeV5KrS/VPavfSM/Iu+RE6llrHrYIKY1yfCzyO/lMXHQ+p7uGhonmGVisv6tSBSVgWzMBohTcvkC9vQcQFA== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-react-display-name@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.25.9.tgz#4b79746b59efa1f38c8695065a92a9f5afb24f7d" + integrity sha512-KJfMlYIUxQB1CJfO3e0+h0ZHWOTLCPP115Awhaz8U0Zpq36Gl/cXlpoyMRnUWlhNUBAzldnCiAZNvCDj7CrKxQ== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-react-jsx-development@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx-development/-/plugin-transform-react-jsx-development-7.25.9.tgz#8fd220a77dd139c07e25225a903b8be8c829e0d7" + integrity sha512-9mj6rm7XVYs4mdLIpbZnHOYdpW42uoiBCTVowg7sP1thUOiANgMb4UtpRivR0pp5iL+ocvUv7X4mZgFRpJEzGw== + dependencies: + "@babel/plugin-transform-react-jsx" "^7.25.9" + +"@babel/plugin-transform-react-jsx@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.25.9.tgz#06367940d8325b36edff5e2b9cbe782947ca4166" + integrity sha512-s5XwpQYCqGerXl+Pu6VDL3x0j2d82eiV77UJ8a2mDHAW7j9SWRqQ2y1fNo1Z74CdcYipl5Z41zvjj4Nfzq36rw== + dependencies: + "@babel/helper-annotate-as-pure" "^7.25.9" + "@babel/helper-module-imports" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/plugin-syntax-jsx" "^7.25.9" + "@babel/types" "^7.25.9" + +"@babel/plugin-transform-react-pure-annotations@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-pure-annotations/-/plugin-transform-react-pure-annotations-7.25.9.tgz#ea1c11b2f9dbb8e2d97025f43a3b5bc47e18ae62" + integrity sha512-KQ/Takk3T8Qzj5TppkS1be588lkbTp5uj7w6a0LeQaTMSckU/wK0oJ/pih+T690tkgI5jfmg2TqDJvd41Sj1Cg== + dependencies: + "@babel/helper-annotate-as-pure" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-regenerator@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-regenerator/-/plugin-transform-regenerator-7.25.9.tgz#03a8a4670d6cebae95305ac6defac81ece77740b" + integrity sha512-vwDcDNsgMPDGP0nMqzahDWE5/MLcX8sv96+wfX7as7LoF/kr97Bo/7fI00lXY4wUXYfVmwIIyG80fGZ1uvt2qg== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + regenerator-transform "^0.15.2" + +"@babel/plugin-transform-regexp-modifiers@^7.26.0": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-regexp-modifiers/-/plugin-transform-regexp-modifiers-7.26.0.tgz#2f5837a5b5cd3842a919d8147e9903cc7455b850" + integrity sha512-vN6saax7lrA2yA/Pak3sCxuD6F5InBjn9IcrIKQPjpsLvuHYLVroTxjdlVRHjjBWxKOqIwpTXDkOssYT4BFdRw== + dependencies: + "@babel/helper-create-regexp-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-reserved-words@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-reserved-words/-/plugin-transform-reserved-words-7.25.9.tgz#0398aed2f1f10ba3f78a93db219b27ef417fb9ce" + integrity sha512-7DL7DKYjn5Su++4RXu8puKZm2XBPHyjWLUidaPEkCUBbE7IPcsrkRHggAOOKydH1dASWdcUBxrkOGNxUv5P3Jg== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-shorthand-properties@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-shorthand-properties/-/plugin-transform-shorthand-properties-7.25.9.tgz#bb785e6091f99f826a95f9894fc16fde61c163f2" + integrity sha512-MUv6t0FhO5qHnS/W8XCbHmiRWOphNufpE1IVxhK5kuN3Td9FT1x4rx4K42s3RYdMXCXpfWkGSbCSd0Z64xA7Ng== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-spread@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-spread/-/plugin-transform-spread-7.25.9.tgz#24a35153931b4ba3d13cec4a7748c21ab5514ef9" + integrity sha512-oNknIB0TbURU5pqJFVbOOFspVlrpVwo2H1+HUIsVDvp5VauGGDP1ZEvO8Nn5xyMEs3dakajOxlmkNW7kNgSm6A== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-skip-transparent-expression-wrappers" "^7.25.9" + +"@babel/plugin-transform-sticky-regex@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-sticky-regex/-/plugin-transform-sticky-regex-7.25.9.tgz#c7f02b944e986a417817b20ba2c504dfc1453d32" + integrity sha512-WqBUSgeVwucYDP9U/xNRQam7xV8W5Zf+6Eo7T2SRVUFlhRiMNFdFz58u0KZmCVVqs2i7SHgpRnAhzRNmKfi2uA== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-template-literals@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-template-literals/-/plugin-transform-template-literals-7.25.9.tgz#6dbd4a24e8fad024df76d1fac6a03cf413f60fe1" + integrity sha512-o97AE4syN71M/lxrCtQByzphAdlYluKPDBzDVzMmfCobUjjhAryZV0AIpRPrxN0eAkxXO6ZLEScmt+PNhj2OTw== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-typeof-symbol@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-typeof-symbol/-/plugin-transform-typeof-symbol-7.25.9.tgz#224ba48a92869ddbf81f9b4a5f1204bbf5a2bc4b" + integrity sha512-v61XqUMiueJROUv66BVIOi0Fv/CUuZuZMl5NkRoCVxLAnMexZ0A3kMe7vvZ0nulxMuMp0Mk6S5hNh48yki08ZA== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-typescript@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-typescript/-/plugin-transform-typescript-7.25.9.tgz#69267905c2b33c2ac6d8fe765e9dc2ddc9df3849" + integrity sha512-7PbZQZP50tzv2KGGnhh82GSyMB01yKY9scIjf1a+GfZCtInOWqUH5+1EBU4t9fyR5Oykkkc9vFTs4OHrhHXljQ== + dependencies: + "@babel/helper-annotate-as-pure" "^7.25.9" + "@babel/helper-create-class-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-skip-transparent-expression-wrappers" "^7.25.9" + "@babel/plugin-syntax-typescript" "^7.25.9" + +"@babel/plugin-transform-unicode-escapes@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-unicode-escapes/-/plugin-transform-unicode-escapes-7.25.9.tgz#a75ef3947ce15363fccaa38e2dd9bc70b2788b82" + integrity sha512-s5EDrE6bW97LtxOcGj1Khcx5AaXwiMmi4toFWRDP9/y0Woo6pXC+iyPu/KuhKtfSrNFd7jJB+/fkOtZy6aIC6Q== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-unicode-property-regex@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-unicode-property-regex/-/plugin-transform-unicode-property-regex-7.25.9.tgz#a901e96f2c1d071b0d1bb5dc0d3c880ce8f53dd3" + integrity sha512-Jt2d8Ga+QwRluxRQ307Vlxa6dMrYEMZCgGxoPR8V52rxPyldHu3hdlHspxaqYmE7oID5+kB+UKUB/eWS+DkkWg== + dependencies: + "@babel/helper-create-regexp-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-unicode-regex@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-unicode-regex/-/plugin-transform-unicode-regex-7.25.9.tgz#5eae747fe39eacf13a8bd006a4fb0b5d1fa5e9b1" + integrity sha512-yoxstj7Rg9dlNn9UQxzk4fcNivwv4nUYz7fYXBaKxvw/lnmPuOm/ikoELygbYq68Bls3D/D+NBPHiLwZdZZ4HA== + dependencies: + "@babel/helper-create-regexp-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/plugin-transform-unicode-sets-regex@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/plugin-transform-unicode-sets-regex/-/plugin-transform-unicode-sets-regex-7.25.9.tgz#65114c17b4ffc20fa5b163c63c70c0d25621fabe" + integrity sha512-8BYqO3GeVNHtx69fdPshN3fnzUNLrWdHhk/icSwigksJGczKSizZ+Z6SBCxTs723Fr5VSNorTIK7a+R2tISvwQ== + dependencies: + "@babel/helper-create-regexp-features-plugin" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + +"@babel/preset-env@^7.18.10": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/preset-env/-/preset-env-7.26.0.tgz#30e5c6bc1bcc54865bff0c5a30f6d4ccdc7fa8b1" + integrity sha512-H84Fxq0CQJNdPFT2DrfnylZ3cf5K43rGfWK4LJGPpjKHiZlk0/RzwEus3PDDZZg+/Er7lCA03MVacueUuXdzfw== + dependencies: + "@babel/compat-data" "^7.26.0" + "@babel/helper-compilation-targets" "^7.25.9" + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-validator-option" "^7.25.9" + "@babel/plugin-bugfix-firefox-class-in-computed-class-key" "^7.25.9" + "@babel/plugin-bugfix-safari-class-field-initializer-scope" "^7.25.9" + "@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression" "^7.25.9" + "@babel/plugin-bugfix-v8-spread-parameters-in-optional-chaining" "^7.25.9" + "@babel/plugin-bugfix-v8-static-class-fields-redefine-readonly" "^7.25.9" + "@babel/plugin-proposal-private-property-in-object" "7.21.0-placeholder-for-preset-env.2" + "@babel/plugin-syntax-import-assertions" "^7.26.0" + "@babel/plugin-syntax-import-attributes" "^7.26.0" + "@babel/plugin-syntax-unicode-sets-regex" "^7.18.6" + "@babel/plugin-transform-arrow-functions" "^7.25.9" + "@babel/plugin-transform-async-generator-functions" "^7.25.9" + "@babel/plugin-transform-async-to-generator" "^7.25.9" + "@babel/plugin-transform-block-scoped-functions" "^7.25.9" + "@babel/plugin-transform-block-scoping" "^7.25.9" + "@babel/plugin-transform-class-properties" "^7.25.9" + "@babel/plugin-transform-class-static-block" "^7.26.0" + "@babel/plugin-transform-classes" "^7.25.9" + "@babel/plugin-transform-computed-properties" "^7.25.9" + "@babel/plugin-transform-destructuring" "^7.25.9" + "@babel/plugin-transform-dotall-regex" "^7.25.9" + "@babel/plugin-transform-duplicate-keys" "^7.25.9" + "@babel/plugin-transform-duplicate-named-capturing-groups-regex" "^7.25.9" + "@babel/plugin-transform-dynamic-import" "^7.25.9" + "@babel/plugin-transform-exponentiation-operator" "^7.25.9" + "@babel/plugin-transform-export-namespace-from" "^7.25.9" + "@babel/plugin-transform-for-of" "^7.25.9" + "@babel/plugin-transform-function-name" "^7.25.9" + "@babel/plugin-transform-json-strings" "^7.25.9" + "@babel/plugin-transform-literals" "^7.25.9" + "@babel/plugin-transform-logical-assignment-operators" "^7.25.9" + "@babel/plugin-transform-member-expression-literals" "^7.25.9" + "@babel/plugin-transform-modules-amd" "^7.25.9" + "@babel/plugin-transform-modules-commonjs" "^7.25.9" + "@babel/plugin-transform-modules-systemjs" "^7.25.9" + "@babel/plugin-transform-modules-umd" "^7.25.9" + "@babel/plugin-transform-named-capturing-groups-regex" "^7.25.9" + "@babel/plugin-transform-new-target" "^7.25.9" + "@babel/plugin-transform-nullish-coalescing-operator" "^7.25.9" + "@babel/plugin-transform-numeric-separator" "^7.25.9" + "@babel/plugin-transform-object-rest-spread" "^7.25.9" + "@babel/plugin-transform-object-super" "^7.25.9" + "@babel/plugin-transform-optional-catch-binding" "^7.25.9" + "@babel/plugin-transform-optional-chaining" "^7.25.9" + "@babel/plugin-transform-parameters" "^7.25.9" + "@babel/plugin-transform-private-methods" "^7.25.9" + "@babel/plugin-transform-private-property-in-object" "^7.25.9" + "@babel/plugin-transform-property-literals" "^7.25.9" + "@babel/plugin-transform-regenerator" "^7.25.9" + "@babel/plugin-transform-regexp-modifiers" "^7.26.0" + "@babel/plugin-transform-reserved-words" "^7.25.9" + "@babel/plugin-transform-shorthand-properties" "^7.25.9" + "@babel/plugin-transform-spread" "^7.25.9" + "@babel/plugin-transform-sticky-regex" "^7.25.9" + "@babel/plugin-transform-template-literals" "^7.25.9" + "@babel/plugin-transform-typeof-symbol" "^7.25.9" + "@babel/plugin-transform-unicode-escapes" "^7.25.9" + "@babel/plugin-transform-unicode-property-regex" "^7.25.9" + "@babel/plugin-transform-unicode-regex" "^7.25.9" + "@babel/plugin-transform-unicode-sets-regex" "^7.25.9" + "@babel/preset-modules" "0.1.6-no-external-plugins" + babel-plugin-polyfill-corejs2 "^0.4.10" + babel-plugin-polyfill-corejs3 "^0.10.6" + babel-plugin-polyfill-regenerator "^0.6.1" + core-js-compat "^3.38.1" + semver "^6.3.1" + +"@babel/preset-modules@0.1.6-no-external-plugins": + version "0.1.6-no-external-plugins" + resolved "https://registry.yarnpkg.com/@babel/preset-modules/-/preset-modules-0.1.6-no-external-plugins.tgz#ccb88a2c49c817236861fee7826080573b8a923a" + integrity sha512-HrcgcIESLm9aIR842yhJ5RWan/gebQUJ6E/E5+rf0y9o6oj7w0Br+sWuL6kEQ/o/AdfvR1Je9jG18/gnpwjEyA== + dependencies: + "@babel/helper-plugin-utils" "^7.0.0" + "@babel/types" "^7.4.4" + esutils "^2.0.2" + +"@babel/preset-react@^7.18.6": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/preset-react/-/preset-react-7.25.9.tgz#5f473035dc2094bcfdbc7392d0766bd42dce173e" + integrity sha512-D3to0uSPiWE7rBrdIICCd0tJSIGpLaaGptna2+w7Pft5xMqLpA1sz99DK5TZ1TjGbdQ/VI1eCSZ06dv3lT4JOw== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-validator-option" "^7.25.9" + "@babel/plugin-transform-react-display-name" "^7.25.9" + "@babel/plugin-transform-react-jsx" "^7.25.9" + "@babel/plugin-transform-react-jsx-development" "^7.25.9" + "@babel/plugin-transform-react-pure-annotations" "^7.25.9" + +"@babel/preset-typescript@^7.18.6": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/preset-typescript/-/preset-typescript-7.26.0.tgz#4a570f1b8d104a242d923957ffa1eaff142a106d" + integrity sha512-NMk1IGZ5I/oHhoXEElcm+xUnL/szL6xflkFZmoEU9xj1qSJXpiS7rsspYo92B4DRCDvZn2erT5LdsCeXAKNCkg== + dependencies: + "@babel/helper-plugin-utils" "^7.25.9" + "@babel/helper-validator-option" "^7.25.9" + "@babel/plugin-syntax-jsx" "^7.25.9" + "@babel/plugin-transform-modules-commonjs" "^7.25.9" + "@babel/plugin-transform-typescript" "^7.25.9" + +"@babel/runtime@^7.12.5", "@babel/runtime@^7.8.4": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.26.0.tgz#8600c2f595f277c60815256418b85356a65173c1" + integrity sha512-FDSOghenHTiToteC/QRlv2q3DhPZ/oOXTBoirfWNx1Cx3TMVcGWQtMMmQcSvb/JjpNeGzx8Pq/b4fKEJuWm1sw== + dependencies: + regenerator-runtime "^0.14.0" + +"@babel/template@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.25.9.tgz#ecb62d81a8a6f5dc5fe8abfc3901fc52ddf15016" + integrity sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg== + dependencies: + "@babel/code-frame" "^7.25.9" + "@babel/parser" "^7.25.9" + "@babel/types" "^7.25.9" + +"@babel/traverse@^7.25.9": + version "7.25.9" + resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.25.9.tgz#a50f8fe49e7f69f53de5bea7e413cd35c5e13c84" + integrity sha512-ZCuvfwOwlz/bawvAuvcj8rrithP2/N55Tzz342AkTvq4qaWbGfmCk/tKhNaV2cthijKrPAA8SRJV5WWe7IBMJw== + dependencies: + "@babel/code-frame" "^7.25.9" + "@babel/generator" "^7.25.9" + "@babel/parser" "^7.25.9" + "@babel/template" "^7.25.9" + "@babel/types" "^7.25.9" + debug "^4.3.1" + globals "^11.1.0" + +"@babel/types@^7.25.9", "@babel/types@^7.26.0", "@babel/types@^7.4.4": + version "7.26.0" + resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.26.0.tgz#deabd08d6b753bc8e0f198f8709fb575e31774ff" + integrity sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA== + dependencies: + "@babel/helper-string-parser" "^7.25.9" + "@babel/helper-validator-identifier" "^7.25.9" + +"@bundled-es-modules/cookie@^2.0.1": + version "2.0.1" + resolved "https://registry.yarnpkg.com/@bundled-es-modules/cookie/-/cookie-2.0.1.tgz#b41376af6a06b3e32a15241d927b840a9b4de507" + integrity sha512-8o+5fRPLNbjbdGRRmJj3h6Hh1AQJf2dk3qQ/5ZFb+PXkRNiSoMGGUKlsgLfrxneb72axVJyIYji64E2+nNfYyw== + dependencies: + cookie "^0.7.2" + +"@bundled-es-modules/statuses@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@bundled-es-modules/statuses/-/statuses-1.0.1.tgz#761d10f44e51a94902c4da48675b71a76cc98872" + integrity sha512-yn7BklA5acgcBr+7w064fGV+SGIFySjCKpqjcWgBAIfrAkY+4GQTJJHQMeT3V/sgz23VTEVV8TtOmkvJAhFVfg== + dependencies: + statuses "^2.0.1" + +"@bundled-es-modules/tough-cookie@^0.1.6": + version "0.1.6" + resolved "https://registry.yarnpkg.com/@bundled-es-modules/tough-cookie/-/tough-cookie-0.1.6.tgz#fa9cd3cedfeecd6783e8b0d378b4a99e52bde5d3" + integrity sha512-dvMHbL464C0zI+Yqxbz6kZ5TOEp7GLW+pry/RWndAR8MJQAXZ2rPmIs8tziTZjeIyhSNZgZbCePtfSbdWqStJw== + dependencies: + "@types/tough-cookie" "^4.0.5" + tough-cookie "^4.1.4" + +"@csstools/postcss-cascade-layers@^1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@csstools/postcss-cascade-layers/-/postcss-cascade-layers-1.1.1.tgz#8a997edf97d34071dd2e37ea6022447dd9e795ad" + integrity sha512-+KdYrpKC5TgomQr2DlZF4lDEpHcoxnj5IGddYYfBWJAKfj1JtuHUIqMa+E1pJJ+z3kvDViWMqyqPlG4Ja7amQA== + dependencies: + "@csstools/selector-specificity" "^2.0.2" + postcss-selector-parser "^6.0.10" + +"@csstools/postcss-color-function@^1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@csstools/postcss-color-function/-/postcss-color-function-1.1.1.tgz#2bd36ab34f82d0497cfacdc9b18d34b5e6f64b6b" + integrity sha512-Bc0f62WmHdtRDjf5f3e2STwRAl89N2CLb+9iAwzrv4L2hncrbDwnQD9PCq0gtAt7pOI2leIV08HIBUd4jxD8cw== + dependencies: + "@csstools/postcss-progressive-custom-properties" "^1.1.0" + postcss-value-parser "^4.2.0" + +"@csstools/postcss-font-format-keywords@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@csstools/postcss-font-format-keywords/-/postcss-font-format-keywords-1.0.1.tgz#677b34e9e88ae997a67283311657973150e8b16a" + integrity sha512-ZgrlzuUAjXIOc2JueK0X5sZDjCtgimVp/O5CEqTcs5ShWBa6smhWYbS0x5cVc/+rycTDbjjzoP0KTDnUneZGOg== + dependencies: + postcss-value-parser "^4.2.0" + +"@csstools/postcss-hwb-function@^1.0.2": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@csstools/postcss-hwb-function/-/postcss-hwb-function-1.0.2.tgz#ab54a9fce0ac102c754854769962f2422ae8aa8b" + integrity sha512-YHdEru4o3Rsbjmu6vHy4UKOXZD+Rn2zmkAmLRfPet6+Jz4Ojw8cbWxe1n42VaXQhD3CQUXXTooIy8OkVbUcL+w== + dependencies: + postcss-value-parser "^4.2.0" + +"@csstools/postcss-ic-unit@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@csstools/postcss-ic-unit/-/postcss-ic-unit-1.0.1.tgz#28237d812a124d1a16a5acc5c3832b040b303e58" + integrity sha512-Ot1rcwRAaRHNKC9tAqoqNZhjdYBzKk1POgWfhN4uCOE47ebGcLRqXjKkApVDpjifL6u2/55ekkpnFcp+s/OZUw== + dependencies: + "@csstools/postcss-progressive-custom-properties" "^1.1.0" + postcss-value-parser "^4.2.0" + +"@csstools/postcss-is-pseudo-class@^2.0.7": + version "2.0.7" + resolved "https://registry.yarnpkg.com/@csstools/postcss-is-pseudo-class/-/postcss-is-pseudo-class-2.0.7.tgz#846ae6c0d5a1eaa878fce352c544f9c295509cd1" + integrity sha512-7JPeVVZHd+jxYdULl87lvjgvWldYu+Bc62s9vD/ED6/QTGjy0jy0US/f6BG53sVMTBJ1lzKZFpYmofBN9eaRiA== + dependencies: + "@csstools/selector-specificity" "^2.0.0" + postcss-selector-parser "^6.0.10" + +"@csstools/postcss-nested-calc@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@csstools/postcss-nested-calc/-/postcss-nested-calc-1.0.0.tgz#d7e9d1d0d3d15cf5ac891b16028af2a1044d0c26" + integrity sha512-JCsQsw1wjYwv1bJmgjKSoZNvf7R6+wuHDAbi5f/7MbFhl2d/+v+TvBTU4BJH3G1X1H87dHl0mh6TfYogbT/dJQ== + dependencies: + postcss-value-parser "^4.2.0" + +"@csstools/postcss-normalize-display-values@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@csstools/postcss-normalize-display-values/-/postcss-normalize-display-values-1.0.1.tgz#15da54a36e867b3ac5163ee12c1d7f82d4d612c3" + integrity sha512-jcOanIbv55OFKQ3sYeFD/T0Ti7AMXc9nM1hZWu8m/2722gOTxFg7xYu4RDLJLeZmPUVQlGzo4jhzvTUq3x4ZUw== + dependencies: + postcss-value-parser "^4.2.0" + +"@csstools/postcss-oklab-function@^1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@csstools/postcss-oklab-function/-/postcss-oklab-function-1.1.1.tgz#88cee0fbc8d6df27079ebd2fa016ee261eecf844" + integrity sha512-nJpJgsdA3dA9y5pgyb/UfEzE7W5Ka7u0CX0/HIMVBNWzWemdcTH3XwANECU6anWv/ao4vVNLTMxhiPNZsTK6iA== + dependencies: + "@csstools/postcss-progressive-custom-properties" "^1.1.0" + postcss-value-parser "^4.2.0" + +"@csstools/postcss-progressive-custom-properties@^1.1.0", "@csstools/postcss-progressive-custom-properties@^1.3.0": + version "1.3.0" + resolved "https://registry.yarnpkg.com/@csstools/postcss-progressive-custom-properties/-/postcss-progressive-custom-properties-1.3.0.tgz#542292558384361776b45c85226b9a3a34f276fa" + integrity sha512-ASA9W1aIy5ygskZYuWams4BzafD12ULvSypmaLJT2jvQ8G0M3I8PRQhC0h7mG0Z3LI05+agZjqSR9+K9yaQQjA== + dependencies: + postcss-value-parser "^4.2.0" + +"@csstools/postcss-stepped-value-functions@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@csstools/postcss-stepped-value-functions/-/postcss-stepped-value-functions-1.0.1.tgz#f8772c3681cc2befed695e2b0b1d68e22f08c4f4" + integrity sha512-dz0LNoo3ijpTOQqEJLY8nyaapl6umbmDcgj4AD0lgVQ572b2eqA1iGZYTTWhrcrHztWDDRAX2DGYyw2VBjvCvQ== + dependencies: + postcss-value-parser "^4.2.0" + +"@csstools/postcss-text-decoration-shorthand@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@csstools/postcss-text-decoration-shorthand/-/postcss-text-decoration-shorthand-1.0.0.tgz#ea96cfbc87d921eca914d3ad29340d9bcc4c953f" + integrity sha512-c1XwKJ2eMIWrzQenN0XbcfzckOLLJiczqy+YvfGmzoVXd7pT9FfObiSEfzs84bpE/VqfpEuAZ9tCRbZkZxxbdw== + dependencies: + postcss-value-parser "^4.2.0" + +"@csstools/postcss-trigonometric-functions@^1.0.2": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@csstools/postcss-trigonometric-functions/-/postcss-trigonometric-functions-1.0.2.tgz#94d3e4774c36d35dcdc88ce091336cb770d32756" + integrity sha512-woKaLO///4bb+zZC2s80l+7cm07M7268MsyG3M0ActXXEFi6SuhvriQYcb58iiKGbjwwIU7n45iRLEHypB47Og== + dependencies: + postcss-value-parser "^4.2.0" + +"@csstools/postcss-unset-value@^1.0.2": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@csstools/postcss-unset-value/-/postcss-unset-value-1.0.2.tgz#c99bb70e2cdc7312948d1eb41df2412330b81f77" + integrity sha512-c8J4roPBILnelAsdLr4XOAR/GsTm0GJi4XpcfvoWk3U6KiTCqiFYc63KhRMQQX35jYMp4Ao8Ij9+IZRgMfJp1g== + +"@csstools/selector-specificity@^2.0.0", "@csstools/selector-specificity@^2.0.2": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@csstools/selector-specificity/-/selector-specificity-2.2.0.tgz#2cbcf822bf3764c9658c4d2e568bd0c0cb748016" + integrity sha512-+OJ9konv95ClSTOJCmMZqpd5+YGsB2S+x6w3E1oaM8UuR5j8nTNHYSz8c9BEPGDOCMQYIEEGlVPj/VY64iTbGw== + +"@discoveryjs/json-ext@^0.5.0": + version "0.5.7" + resolved "https://registry.yarnpkg.com/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz#1d572bfbbe14b7704e0ba0f39b74815b84870d70" + integrity sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw== + +"@eslint-community/eslint-utils@^4.2.0": + version "4.4.1" + resolved "https://registry.yarnpkg.com/@eslint-community/eslint-utils/-/eslint-utils-4.4.1.tgz#d1145bf2c20132d6400495d6df4bf59362fd9d56" + integrity sha512-s3O3waFUrMV8P/XaF/+ZTp1X9XBZW1a4B97ZnjQF2KYWaFD2A8KyFBsrsfSjEmjn3RGWAIuvlneuZm3CUK3jbA== + dependencies: + eslint-visitor-keys "^3.4.3" + +"@eslint-community/regexpp@^4.4.0", "@eslint-community/regexpp@^4.6.1": + version "4.12.1" + resolved "https://registry.yarnpkg.com/@eslint-community/regexpp/-/regexpp-4.12.1.tgz#cfc6cffe39df390a3841cde2abccf92eaa7ae0e0" + integrity sha512-CCZCDJuduB9OUkFkY2IgppNZMi2lBQgD2qzwXkEia16cge2pijY/aXi96CJMquDMn3nJdlPV1A5KrJEXwfLNzQ== + +"@eslint/eslintrc@^2.1.4": + version "2.1.4" + resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-2.1.4.tgz#388a269f0f25c1b6adc317b5a2c55714894c70ad" + integrity sha512-269Z39MS6wVJtsoUl10L60WdkhJVdPG24Q4eZTH3nnF6lpvSShEK3wQjDX9JRWAUPvPh7COouPpU9IrqaZFvtQ== + dependencies: + ajv "^6.12.4" + debug "^4.3.2" + espree "^9.6.0" + globals "^13.19.0" + ignore "^5.2.0" + import-fresh "^3.2.1" + js-yaml "^4.1.0" + minimatch "^3.1.2" + strip-json-comments "^3.1.1" + +"@eslint/js@8.57.1": + version "8.57.1" + resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.57.1.tgz#de633db3ec2ef6a3c89e2f19038063e8a122e2c2" + integrity sha512-d9zaMRSTIKDLhctzH12MtXvJKSSUhaHcjV+2Z+GK+EEY7XKpP5yR4x+N3TAcHTcu963nIr+TMcCb4DBCYX1z6Q== + +"@gradio/client@^1.7.1": + version "1.7.1" + resolved "https://registry.yarnpkg.com/@gradio/client/-/client-1.7.1.tgz#67b8bd3073c5de92ec1fb9f17598c93c400f4031" + integrity sha512-ZncBZV2ubr5D6FYptMvS+udzEAMnteGOYhUnT1oHDIuYAdbvFxDIkFWojS0GXoGMdYTWIkKizFFGbVrBJ3Y6mg== + dependencies: + "@types/eventsource" "^1.1.15" + bufferutil "^4.0.7" + eventsource "^2.0.2" + fetch-event-stream "^0.1.5" + msw "^2.2.1" + semiver "^1.1.0" + textlinestream "^1.1.1" + typescript "^5.0.0" + ws "^8.13.0" + +"@humanwhocodes/config-array@^0.13.0": + version "0.13.0" + resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.13.0.tgz#fb907624df3256d04b9aa2df50d7aa97ec648748" + integrity sha512-DZLEEqFWQFiyK6h5YIeynKx7JlvCYWL0cImfSRXZ9l4Sg2efkFGTuFf6vzXjK1cq6IYkU+Eg/JizXw+TD2vRNw== + dependencies: + "@humanwhocodes/object-schema" "^2.0.3" + debug "^4.3.1" + minimatch "^3.0.5" + +"@humanwhocodes/module-importer@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz#af5b2691a22b44be847b0ca81641c5fb6ad0172c" + integrity sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA== + +"@humanwhocodes/object-schema@^2.0.3": + version "2.0.3" + resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz#4a2868d75d6d6963e423bcf90b7fd1be343409d3" + integrity sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA== + +"@inquirer/confirm@^5.0.0": + version "5.0.2" + resolved "https://registry.yarnpkg.com/@inquirer/confirm/-/confirm-5.0.2.tgz#2b9dcf6b7da5f518c74abe4aeaf3173253d83c93" + integrity sha512-KJLUHOaKnNCYzwVbryj3TNBxyZIrr56fR5N45v6K9IPrbT6B7DcudBMfylkV1A8PUdJE15mybkEQyp2/ZUpxUA== + dependencies: + "@inquirer/core" "^10.1.0" + "@inquirer/type" "^3.0.1" + +"@inquirer/core@^10.1.0": + version "10.1.0" + resolved "https://registry.yarnpkg.com/@inquirer/core/-/core-10.1.0.tgz#c5fdc34c4cafd7248da29a3c3b3120fe6e1c45be" + integrity sha512-I+ETk2AL+yAVbvuKx5AJpQmoaWhpiTFOg/UJb7ZkMAK4blmtG8ATh5ct+T/8xNld0CZG/2UhtkdMwpgvld92XQ== + dependencies: + "@inquirer/figures" "^1.0.8" + "@inquirer/type" "^3.0.1" + ansi-escapes "^4.3.2" + cli-width "^4.1.0" + mute-stream "^2.0.0" + signal-exit "^4.1.0" + strip-ansi "^6.0.1" + wrap-ansi "^6.2.0" + yoctocolors-cjs "^2.1.2" + +"@inquirer/figures@^1.0.8": + version "1.0.8" + resolved "https://registry.yarnpkg.com/@inquirer/figures/-/figures-1.0.8.tgz#d9e414a1376a331a0e71b151fea27c48845788b0" + integrity sha512-tKd+jsmhq21AP1LhexC0pPwsCxEhGgAkg28byjJAd+xhmIs8LUX8JbUc3vBf3PhLxWiB5EvyBE5X7JSPAqMAqg== + +"@inquirer/type@^3.0.1": + version "3.0.1" + resolved "https://registry.yarnpkg.com/@inquirer/type/-/type-3.0.1.tgz#619ce9f65c3e114d8e39c41822bed3440d20b478" + integrity sha512-+ksJMIy92sOAiAccGpcKZUc3bYO07cADnscIxHBknEm3uNts3movSmBofc1908BNy5edKscxYeAdaX1NXkHS6A== + +"@isaacs/cliui@^8.0.2": + version "8.0.2" + resolved "https://registry.yarnpkg.com/@isaacs/cliui/-/cliui-8.0.2.tgz#b37667b7bc181c168782259bab42474fbf52b550" + integrity sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA== + dependencies: + string-width "^5.1.2" + string-width-cjs "npm:string-width@^4.2.0" + strip-ansi "^7.0.1" + strip-ansi-cjs "npm:strip-ansi@^6.0.1" + wrap-ansi "^8.1.0" + wrap-ansi-cjs "npm:wrap-ansi@^7.0.0" + +"@jridgewell/gen-mapping@^0.3.2", "@jridgewell/gen-mapping@^0.3.5": + version "0.3.5" + resolved "https://registry.yarnpkg.com/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz#dcce6aff74bdf6dad1a95802b69b04a2fcb1fb36" + integrity sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg== + dependencies: + "@jridgewell/set-array" "^1.2.1" + "@jridgewell/sourcemap-codec" "^1.4.10" + "@jridgewell/trace-mapping" "^0.3.24" + +"@jridgewell/resolve-uri@^3.1.0": + version "3.1.2" + resolved "https://registry.yarnpkg.com/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz#7a0ee601f60f99a20c7c7c5ff0c80388c1189bd6" + integrity sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw== + +"@jridgewell/set-array@^1.2.1": + version "1.2.1" + resolved "https://registry.yarnpkg.com/@jridgewell/set-array/-/set-array-1.2.1.tgz#558fb6472ed16a4c850b889530e6b36438c49280" + integrity sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A== + +"@jridgewell/source-map@^0.3.3": + version "0.3.6" + resolved "https://registry.yarnpkg.com/@jridgewell/source-map/-/source-map-0.3.6.tgz#9d71ca886e32502eb9362c9a74a46787c36df81a" + integrity sha512-1ZJTZebgqllO79ue2bm3rIGud/bOe0pP5BjSRCRxxYkEZS8STV7zN84UBbiYu7jy+eCKSnVIUgoWWE/tt+shMQ== + dependencies: + "@jridgewell/gen-mapping" "^0.3.5" + "@jridgewell/trace-mapping" "^0.3.25" + +"@jridgewell/sourcemap-codec@^1.4.10", "@jridgewell/sourcemap-codec@^1.4.14": + version "1.5.0" + resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz#3188bcb273a414b0d215fd22a58540b989b9409a" + integrity sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ== + +"@jridgewell/trace-mapping@^0.3.20", "@jridgewell/trace-mapping@^0.3.24", "@jridgewell/trace-mapping@^0.3.25": + version "0.3.25" + resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz#15f190e98895f3fc23276ee14bc76b675c2e50f0" + integrity sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ== + dependencies: + "@jridgewell/resolve-uri" "^3.1.0" + "@jridgewell/sourcemap-codec" "^1.4.14" + +"@leichtgewicht/ip-codec@^2.0.1": + version "2.0.5" + resolved "https://registry.yarnpkg.com/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz#4fc56c15c580b9adb7dc3c333a134e540b44bfb1" + integrity sha512-Vo+PSpZG2/fmgmiNzYK9qWRh8h/CHrwD0mo1h1DzL4yzHNSfWYujGTYsWGreD000gcgmZ7K4Ys6Tx9TxtsKdDw== + +"@mswjs/interceptors@^0.36.5": + version "0.36.10" + resolved "https://registry.yarnpkg.com/@mswjs/interceptors/-/interceptors-0.36.10.tgz#028a51a3c01f6ed87679e972d9f12a9a3726f7a0" + integrity sha512-GXrJgakgJW3DWKueebkvtYgGKkxA7s0u5B0P5syJM5rvQUnrpLPigvci8Hukl7yEM+sU06l+er2Fgvx/gmiRgg== + dependencies: + "@open-draft/deferred-promise" "^2.2.0" + "@open-draft/logger" "^0.3.0" + "@open-draft/until" "^2.0.0" + is-node-process "^1.2.0" + outvariant "^1.4.3" + strict-event-emitter "^0.5.1" + +"@nodelib/fs.scandir@2.1.5": + version "2.1.5" + resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" + integrity sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g== + dependencies: + "@nodelib/fs.stat" "2.0.5" + run-parallel "^1.1.9" + +"@nodelib/fs.stat@2.0.5", "@nodelib/fs.stat@^2.0.2": + version "2.0.5" + resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz#5bd262af94e9d25bd1e71b05deed44876a222e8b" + integrity sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A== + +"@nodelib/fs.walk@^1.2.3", "@nodelib/fs.walk@^1.2.8": + version "1.2.8" + resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz#e95737e8bb6746ddedf69c556953494f196fe69a" + integrity sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg== + dependencies: + "@nodelib/fs.scandir" "2.1.5" + fastq "^1.6.0" + +"@open-draft/deferred-promise@^2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@open-draft/deferred-promise/-/deferred-promise-2.2.0.tgz#4a822d10f6f0e316be4d67b4d4f8c9a124b073bd" + integrity sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA== + +"@open-draft/logger@^0.3.0": + version "0.3.0" + resolved "https://registry.yarnpkg.com/@open-draft/logger/-/logger-0.3.0.tgz#2b3ab1242b360aa0adb28b85f5d7da1c133a0954" + integrity sha512-X2g45fzhxH238HKO4xbSr7+wBS8Fvw6ixhTDuvLd5mqh6bJJCFAPwU9mPDxbcrRtfxv4u5IHCEH77BmxvXmmxQ== + dependencies: + is-node-process "^1.2.0" + outvariant "^1.4.0" + +"@open-draft/until@^2.0.0", "@open-draft/until@^2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@open-draft/until/-/until-2.1.0.tgz#0acf32f470af2ceaf47f095cdecd40d68666efda" + integrity sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg== + +"@parcel/watcher-android-arm64@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-android-arm64/-/watcher-android-arm64-2.4.1.tgz#c2c19a3c442313ff007d2d7a9c2c1dd3e1c9ca84" + integrity sha512-LOi/WTbbh3aTn2RYddrO8pnapixAziFl6SMxHM69r3tvdSm94JtCenaKgk1GRg5FJ5wpMCpHeW+7yqPlvZv7kg== + +"@parcel/watcher-darwin-arm64@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-darwin-arm64/-/watcher-darwin-arm64-2.4.1.tgz#c817c7a3b4f3a79c1535bfe54a1c2818d9ffdc34" + integrity sha512-ln41eihm5YXIY043vBrrHfn94SIBlqOWmoROhsMVTSXGh0QahKGy77tfEywQ7v3NywyxBBkGIfrWRHm0hsKtzA== + +"@parcel/watcher-darwin-x64@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-darwin-x64/-/watcher-darwin-x64-2.4.1.tgz#1a3f69d9323eae4f1c61a5f480a59c478d2cb020" + integrity sha512-yrw81BRLjjtHyDu7J61oPuSoeYWR3lDElcPGJyOvIXmor6DEo7/G2u1o7I38cwlcoBHQFULqF6nesIX3tsEXMg== + +"@parcel/watcher-freebsd-x64@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-freebsd-x64/-/watcher-freebsd-x64-2.4.1.tgz#0d67fef1609f90ba6a8a662bc76a55fc93706fc8" + integrity sha512-TJa3Pex/gX3CWIx/Co8k+ykNdDCLx+TuZj3f3h7eOjgpdKM+Mnix37RYsYU4LHhiYJz3DK5nFCCra81p6g050w== + +"@parcel/watcher-linux-arm-glibc@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-linux-arm-glibc/-/watcher-linux-arm-glibc-2.4.1.tgz#ce5b340da5829b8e546bd00f752ae5292e1c702d" + integrity sha512-4rVYDlsMEYfa537BRXxJ5UF4ddNwnr2/1O4MHM5PjI9cvV2qymvhwZSFgXqbS8YoTk5i/JR0L0JDs69BUn45YA== + +"@parcel/watcher-linux-arm64-glibc@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-linux-arm64-glibc/-/watcher-linux-arm64-glibc-2.4.1.tgz#6d7c00dde6d40608f9554e73998db11b2b1ff7c7" + integrity sha512-BJ7mH985OADVLpbrzCLgrJ3TOpiZggE9FMblfO65PlOCdG++xJpKUJ0Aol74ZUIYfb8WsRlUdgrZxKkz3zXWYA== + +"@parcel/watcher-linux-arm64-musl@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-linux-arm64-musl/-/watcher-linux-arm64-musl-2.4.1.tgz#bd39bc71015f08a4a31a47cd89c236b9d6a7f635" + integrity sha512-p4Xb7JGq3MLgAfYhslU2SjoV9G0kI0Xry0kuxeG/41UfpjHGOhv7UoUDAz/jb1u2elbhazy4rRBL8PegPJFBhA== + +"@parcel/watcher-linux-x64-glibc@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-linux-x64-glibc/-/watcher-linux-x64-glibc-2.4.1.tgz#0ce29966b082fb6cdd3de44f2f74057eef2c9e39" + integrity sha512-s9O3fByZ/2pyYDPoLM6zt92yu6P4E39a03zvO0qCHOTjxmt3GHRMLuRZEWhWLASTMSrrnVNWdVI/+pUElJBBBg== + +"@parcel/watcher-linux-x64-musl@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-linux-x64-musl/-/watcher-linux-x64-musl-2.4.1.tgz#d2ebbf60e407170bb647cd6e447f4f2bab19ad16" + integrity sha512-L2nZTYR1myLNST0O632g0Dx9LyMNHrn6TOt76sYxWLdff3cB22/GZX2UPtJnaqQPdCRoszoY5rcOj4oMTtp5fQ== + +"@parcel/watcher-win32-arm64@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-win32-arm64/-/watcher-win32-arm64-2.4.1.tgz#eb4deef37e80f0b5e2f215dd6d7a6d40a85f8adc" + integrity sha512-Uq2BPp5GWhrq/lcuItCHoqxjULU1QYEcyjSO5jqqOK8RNFDBQnenMMx4gAl3v8GiWa59E9+uDM7yZ6LxwUIfRg== + +"@parcel/watcher-win32-ia32@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-win32-ia32/-/watcher-win32-ia32-2.4.1.tgz#94fbd4b497be39fd5c8c71ba05436927842c9df7" + integrity sha512-maNRit5QQV2kgHFSYwftmPBxiuK5u4DXjbXx7q6eKjq5dsLXZ4FJiVvlcw35QXzk0KrUecJmuVFbj4uV9oYrcw== + +"@parcel/watcher-win32-x64@2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher-win32-x64/-/watcher-win32-x64-2.4.1.tgz#4bf920912f67cae5f2d264f58df81abfea68dadf" + integrity sha512-+DvS92F9ezicfswqrvIRM2njcYJbd5mb9CUgtrHCHmvn7pPPa+nMDRu1o1bYYz/l5IB2NVGNJWiH7h1E58IF2A== + +"@parcel/watcher@^2.4.1": + version "2.4.1" + resolved "https://registry.yarnpkg.com/@parcel/watcher/-/watcher-2.4.1.tgz#a50275151a1bb110879c6123589dba90c19f1bf8" + integrity sha512-HNjmfLQEVRZmHRET336f20H/8kOozUGwk7yajvsonjNxbj2wBTK1WsQuHkD5yYh9RxFGL2EyDHryOihOwUoKDA== + dependencies: + detect-libc "^1.0.3" + is-glob "^4.0.3" + micromatch "^4.0.5" + node-addon-api "^7.0.0" + optionalDependencies: + "@parcel/watcher-android-arm64" "2.4.1" + "@parcel/watcher-darwin-arm64" "2.4.1" + "@parcel/watcher-darwin-x64" "2.4.1" + "@parcel/watcher-freebsd-x64" "2.4.1" + "@parcel/watcher-linux-arm-glibc" "2.4.1" + "@parcel/watcher-linux-arm64-glibc" "2.4.1" + "@parcel/watcher-linux-arm64-musl" "2.4.1" + "@parcel/watcher-linux-x64-glibc" "2.4.1" + "@parcel/watcher-linux-x64-musl" "2.4.1" + "@parcel/watcher-win32-arm64" "2.4.1" + "@parcel/watcher-win32-ia32" "2.4.1" + "@parcel/watcher-win32-x64" "2.4.1" + +"@pkgjs/parseargs@^0.11.0": + version "0.11.0" + resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33" + integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg== + +"@pmmmwh/react-refresh-webpack-plugin@^0.5.7": + version "0.5.15" + resolved "https://registry.yarnpkg.com/@pmmmwh/react-refresh-webpack-plugin/-/react-refresh-webpack-plugin-0.5.15.tgz#f126be97c30b83ed777e2aeabd518bc592e6e7c4" + integrity sha512-LFWllMA55pzB9D34w/wXUCf8+c+IYKuJDgxiZ3qMhl64KRMBHYM1I3VdGaD2BV5FNPV2/S2596bppxHbv2ZydQ== + dependencies: + ansi-html "^0.0.9" + core-js-pure "^3.23.3" + error-stack-parser "^2.0.6" + html-entities "^2.1.0" + loader-utils "^2.0.4" + schema-utils "^4.2.0" + source-map "^0.7.3" + +"@protobufjs/aspromise@^1.1.1", "@protobufjs/aspromise@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/aspromise/-/aspromise-1.1.2.tgz#9b8b0cc663d669a7d8f6f5d0893a14d348f30fbf" + integrity sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ== + +"@protobufjs/base64@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/base64/-/base64-1.1.2.tgz#4c85730e59b9a1f1f349047dbf24296034bb2735" + integrity sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg== + +"@protobufjs/codegen@^2.0.4": + version "2.0.4" + resolved "https://registry.yarnpkg.com/@protobufjs/codegen/-/codegen-2.0.4.tgz#7ef37f0d010fb028ad1ad59722e506d9262815cb" + integrity sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg== + +"@protobufjs/eventemitter@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz#355cbc98bafad5978f9ed095f397621f1d066b70" + integrity sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q== + +"@protobufjs/fetch@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/fetch/-/fetch-1.1.0.tgz#ba99fb598614af65700c1619ff06d454b0d84c45" + integrity sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ== + dependencies: + "@protobufjs/aspromise" "^1.1.1" + "@protobufjs/inquire" "^1.1.0" + +"@protobufjs/float@^1.0.2": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@protobufjs/float/-/float-1.0.2.tgz#5e9e1abdcb73fc0a7cb8b291df78c8cbd97b87d1" + integrity sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ== + +"@protobufjs/inquire@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/inquire/-/inquire-1.1.0.tgz#ff200e3e7cf2429e2dcafc1140828e8cc638f089" + integrity sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q== + +"@protobufjs/path@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/path/-/path-1.1.2.tgz#6cc2b20c5c9ad6ad0dccfd21ca7673d8d7fbf68d" + integrity sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA== + +"@protobufjs/pool@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/pool/-/pool-1.1.0.tgz#09fd15f2d6d3abfa9b65bc366506d6ad7846ff54" + integrity sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw== + +"@protobufjs/utf8@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/utf8/-/utf8-1.1.0.tgz#a777360b5b39a1a2e5106f8e858f2fd2d060c570" + integrity sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw== + +"@sindresorhus/is@^0.7.0": + version "0.7.0" + resolved "https://registry.yarnpkg.com/@sindresorhus/is/-/is-0.7.0.tgz#9a06f4f137ee84d7df0460c1fdb1135ffa6c50fd" + integrity sha512-ONhaKPIufzzrlNbqtWFFd+jlnemX6lJAgq9ZeiZtS7I1PIf/la7CW4m83rTXRnVnsMbW2k56pGYu7AUFJD9Pow== + +"@testing-library/dom@^8.5.0": + version "8.20.1" + resolved "https://registry.yarnpkg.com/@testing-library/dom/-/dom-8.20.1.tgz#2e52a32e46fc88369eef7eef634ac2a192decd9f" + integrity sha512-/DiOQ5xBxgdYRC8LNk7U+RWat0S3qRLeIw3ZIkMQ9kkVlRmwD/Eg8k8CqIpD6GW7u20JIUOfMKbxtiLutpjQ4g== + dependencies: + "@babel/code-frame" "^7.10.4" + "@babel/runtime" "^7.12.5" + "@types/aria-query" "^5.0.1" + aria-query "5.1.3" + chalk "^4.1.0" + dom-accessibility-api "^0.5.9" + lz-string "^1.5.0" + pretty-format "^27.0.2" + +"@testing-library/react@^13.3.0": + version "13.4.0" + resolved "https://registry.yarnpkg.com/@testing-library/react/-/react-13.4.0.tgz#6a31e3bf5951615593ad984e96b9e5e2d9380966" + integrity sha512-sXOGON+WNTh3MLE9rve97ftaZukN3oNf2KjDy7YTx6hcTO2uuLHuCGynMDhFwGw/jYf4OJ2Qk0i4i79qMNNkyw== + dependencies: + "@babel/runtime" "^7.12.5" + "@testing-library/dom" "^8.5.0" + "@types/react-dom" "^18.0.0" + +"@trysound/sax@0.2.0": + version "0.2.0" + resolved "https://registry.yarnpkg.com/@trysound/sax/-/sax-0.2.0.tgz#cccaab758af56761eb7bf37af6f03f326dd798ad" + integrity sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA== + +"@types/aria-query@^5.0.1": + version "5.0.4" + resolved "https://registry.yarnpkg.com/@types/aria-query/-/aria-query-5.0.4.tgz#1a31c3d378850d2778dabb6374d036dcba4ba708" + integrity sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw== + +"@types/body-parser@*": + version "1.19.5" + resolved "https://registry.yarnpkg.com/@types/body-parser/-/body-parser-1.19.5.tgz#04ce9a3b677dc8bd681a17da1ab9835dc9d3ede4" + integrity sha512-fB3Zu92ucau0iQ0JMCFQE7b/dv8Ot07NI3KaZIkIUNXq82k4eBAqUaneXfleGY9JWskeS9y+u0nXMyspcuQrCg== + dependencies: + "@types/connect" "*" + "@types/node" "*" + +"@types/bonjour@^3.5.9": + version "3.5.13" + resolved "https://registry.yarnpkg.com/@types/bonjour/-/bonjour-3.5.13.tgz#adf90ce1a105e81dd1f9c61fdc5afda1bfb92956" + integrity sha512-z9fJ5Im06zvUL548KvYNecEVlA7cVDkGUi6kZusb04mpyEFKCIZJvloCcmpmLaIahDpOQGHaHmG6imtPMmPXGQ== + dependencies: + "@types/node" "*" + +"@types/connect-history-api-fallback@^1.3.5": + version "1.5.4" + resolved "https://registry.yarnpkg.com/@types/connect-history-api-fallback/-/connect-history-api-fallback-1.5.4.tgz#7de71645a103056b48ac3ce07b3520b819c1d5b3" + integrity sha512-n6Cr2xS1h4uAulPRdlw6Jl6s1oG8KrVilPN2yUITEs+K48EzMJJ3W1xy8K5eWuFvjp3R74AOIGSmp2UfBJ8HFw== + dependencies: + "@types/express-serve-static-core" "*" + "@types/node" "*" + +"@types/connect@*": + version "3.4.38" + resolved "https://registry.yarnpkg.com/@types/connect/-/connect-3.4.38.tgz#5ba7f3bc4fbbdeaff8dded952e5ff2cc53f8d858" + integrity sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug== + dependencies: + "@types/node" "*" + +"@types/cookie@^0.6.0": + version "0.6.0" + resolved "https://registry.yarnpkg.com/@types/cookie/-/cookie-0.6.0.tgz#eac397f28bf1d6ae0ae081363eca2f425bedf0d5" + integrity sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA== + +"@types/estree@^1.0.6": + version "1.0.6" + resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.6.tgz#628effeeae2064a1b4e79f78e81d87b7e5fc7b50" + integrity sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw== + +"@types/eventsource@^1.1.15": + version "1.1.15" + resolved "https://registry.yarnpkg.com/@types/eventsource/-/eventsource-1.1.15.tgz#949383d3482e20557cbecbf3b038368d94b6be27" + integrity sha512-XQmGcbnxUNa06HR3VBVkc9+A2Vpi9ZyLJcdS5dwaQQ/4ZMWFO+5c90FnMUpbtMZwB/FChoYHwuVg8TvkECacTA== + +"@types/express-serve-static-core@*", "@types/express-serve-static-core@^5.0.0": + version "5.0.1" + resolved "https://registry.yarnpkg.com/@types/express-serve-static-core/-/express-serve-static-core-5.0.1.tgz#3c9997ae9d00bc236e45c6374e84f2596458d9db" + integrity sha512-CRICJIl0N5cXDONAdlTv5ShATZ4HEwk6kDDIW2/w9qOWKg+NU/5F8wYRWCrONad0/UKkloNSmmyN/wX4rtpbVA== + dependencies: + "@types/node" "*" + "@types/qs" "*" + "@types/range-parser" "*" + "@types/send" "*" + +"@types/express-serve-static-core@^4.17.33": + version "4.19.6" + resolved "https://registry.yarnpkg.com/@types/express-serve-static-core/-/express-serve-static-core-4.19.6.tgz#e01324c2a024ff367d92c66f48553ced0ab50267" + integrity sha512-N4LZ2xG7DatVqhCZzOGb1Yi5lMbXSZcmdLDe9EzSndPV2HpWYWzRbaerl2n27irrm94EPpprqa8KpskPT085+A== + dependencies: + "@types/node" "*" + "@types/qs" "*" + "@types/range-parser" "*" + "@types/send" "*" + +"@types/express@*": + version "5.0.0" + resolved "https://registry.yarnpkg.com/@types/express/-/express-5.0.0.tgz#13a7d1f75295e90d19ed6e74cab3678488eaa96c" + integrity sha512-DvZriSMehGHL1ZNLzi6MidnsDhUZM/x2pRdDIKdwbUNqqwHxMlRdkxtn6/EPKyqKpHqTl/4nRZsRNLpZxZRpPQ== + dependencies: + "@types/body-parser" "*" + "@types/express-serve-static-core" "^5.0.0" + "@types/qs" "*" + "@types/serve-static" "*" + +"@types/express@^4.17.13": + version "4.17.21" + resolved "https://registry.yarnpkg.com/@types/express/-/express-4.17.21.tgz#c26d4a151e60efe0084b23dc3369ebc631ed192d" + integrity sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ== + dependencies: + "@types/body-parser" "*" + "@types/express-serve-static-core" "^4.17.33" + "@types/qs" "*" + "@types/serve-static" "*" + +"@types/glob@^7.1.1": + version "7.2.0" + resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.2.0.tgz#bc1b5bf3aa92f25bd5dd39f35c57361bdce5b2eb" + integrity sha512-ZUxbzKl0IfJILTS6t7ip5fQQM/J3TJYubDm3nMbgubNNYS62eXeUpoLUC8/7fJNiFYHTrGPQn7hspDUzIHX3UA== + dependencies: + "@types/minimatch" "*" + "@types/node" "*" + +"@types/html-minifier-terser@^6.0.0": + version "6.1.0" + resolved "https://registry.yarnpkg.com/@types/html-minifier-terser/-/html-minifier-terser-6.1.0.tgz#4fc33a00c1d0c16987b1a20cf92d20614c55ac35" + integrity sha512-oh/6byDPnL1zeNXFrDXFLyZjkr1MsBG667IM792caf1L2UPOOMf65NFzjUH/ltyfwjAGfs1rsX1eftK0jC/KIg== + +"@types/http-errors@*": + version "2.0.4" + resolved "https://registry.yarnpkg.com/@types/http-errors/-/http-errors-2.0.4.tgz#7eb47726c391b7345a6ec35ad7f4de469cf5ba4f" + integrity sha512-D0CFMMtydbJAegzOyHjtiKPLlvnm3iTZyZRSZoLq2mRhDdmLfIWOCYPfQJ4cu2erKghU++QvjcUjp/5h7hESpA== + +"@types/http-proxy@^1.17.8": + version "1.17.15" + resolved "https://registry.yarnpkg.com/@types/http-proxy/-/http-proxy-1.17.15.tgz#12118141ce9775a6499ecb4c01d02f90fc839d36" + integrity sha512-25g5atgiVNTIv0LBDTg1H74Hvayx0ajtJPLLcYE3whFv75J0pWNtOBzaXJQgDTmrX1bx5U9YC2w/n65BN1HwRQ== + dependencies: + "@types/node" "*" + +"@types/json-schema@^7.0.5", "@types/json-schema@^7.0.8", "@types/json-schema@^7.0.9": + version "7.0.15" + resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841" + integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA== + +"@types/long@^4.0.1": + version "4.0.2" + resolved "https://registry.yarnpkg.com/@types/long/-/long-4.0.2.tgz#b74129719fc8d11c01868010082d483b7545591a" + integrity sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA== + +"@types/mime@^1": + version "1.3.5" + resolved "https://registry.yarnpkg.com/@types/mime/-/mime-1.3.5.tgz#1ef302e01cf7d2b5a0fa526790c9123bf1d06690" + integrity sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w== + +"@types/minimatch@*": + version "5.1.2" + resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-5.1.2.tgz#07508b45797cb81ec3f273011b054cd0755eddca" + integrity sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA== + +"@types/node-forge@^1.3.0": + version "1.3.11" + resolved "https://registry.yarnpkg.com/@types/node-forge/-/node-forge-1.3.11.tgz#0972ea538ddb0f4d9c2fa0ec5db5724773a604da" + integrity sha512-FQx220y22OKNTqaByeBGqHWYz4cl94tpcxeFdvBo3wjG6XPBuZ0BNgNZRV5J5TFmmcsJ4IzsLkmGRiQbnYsBEQ== + dependencies: + "@types/node" "*" + +"@types/node@*", "@types/node@>=13.7.0": + version "22.8.6" + resolved "https://registry.yarnpkg.com/@types/node/-/node-22.8.6.tgz#e8a0c0871623283d8b3ef7d7b9b1bfdfd3028e22" + integrity sha512-tosuJYKrIqjQIlVCM4PEGxOmyg3FCPa/fViuJChnGeEIhjA46oy8FMVoF9su1/v8PNs2a8Q0iFNyOx0uOF91nw== + dependencies: + undici-types "~6.19.8" + +"@types/node@^18.7.13": + version "18.19.63" + resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.63.tgz#2b2cdb8b3bd2b739ae38c5676cd012c80568022f" + integrity sha512-hcUB7THvrGmaEcPcvUZCZtQ2Z3C+UR/aOcraBLCvTsFMh916Gc1kCCYcfcMuB76HM2pSerxl1PoP3KnmHzd9Lw== + dependencies: + undici-types "~5.26.4" + +"@types/parse-json@^4.0.0": + version "4.0.2" + resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.2.tgz#5950e50960793055845e956c427fc2b0d70c5239" + integrity sha512-dISoDXWWQwUquiKsyZ4Ng+HX2KsPL7LyHKHQwgGFEA3IaKac4Obd+h2a/a6waisAoepJlBcx9paWqjA8/HVjCw== + +"@types/prop-types@*": + version "15.7.13" + resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.13.tgz#2af91918ee12d9d32914feb13f5326658461b451" + integrity sha512-hCZTSvwbzWGvhqxp/RqVqwU999pBf2vp7hzIjiYOsl8wqOmUxkQ6ddw1cV3l8811+kdUFus/q4d1Y3E3SyEifA== + +"@types/qs@*": + version "6.9.16" + resolved "https://registry.yarnpkg.com/@types/qs/-/qs-6.9.16.tgz#52bba125a07c0482d26747d5d4947a64daf8f794" + integrity sha512-7i+zxXdPD0T4cKDuxCUXJ4wHcsJLwENa6Z3dCu8cfCK743OGy5Nu1RmAGqDPsoTDINVEcdXKRvR/zre+P2Ku1A== + +"@types/range-parser@*": + version "1.2.7" + resolved "https://registry.yarnpkg.com/@types/range-parser/-/range-parser-1.2.7.tgz#50ae4353eaaddc04044279812f52c8c65857dbcb" + integrity sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ== + +"@types/react-dom@^18.0.0", "@types/react-dom@^18.0.6": + version "18.3.1" + resolved "https://registry.yarnpkg.com/@types/react-dom/-/react-dom-18.3.1.tgz#1e4654c08a9cdcfb6594c780ac59b55aad42fe07" + integrity sha512-qW1Mfv8taImTthu4KoXgDfLuk4bydU6Q/TkADnDWWHwi4NX4BR+LWfTp2sVmTqRrsHvyDDTelgelxJ+SsejKKQ== + dependencies: + "@types/react" "*" + +"@types/react@*", "@types/react@^18.0.17": + version "18.3.12" + resolved "https://registry.yarnpkg.com/@types/react/-/react-18.3.12.tgz#99419f182ccd69151813b7ee24b792fe08774f60" + integrity sha512-D2wOSq/d6Agt28q7rSI3jhU7G6aiuzljDGZ2hTZHIkrTLUI+AF3WMeKkEZ9nN2fkBAlcktT6vcZjDFiIhMYEQw== + dependencies: + "@types/prop-types" "*" + csstype "^3.0.2" + +"@types/retry@0.12.0": + version "0.12.0" + resolved "https://registry.yarnpkg.com/@types/retry/-/retry-0.12.0.tgz#2b35eccfcee7d38cd72ad99232fbd58bffb3c84d" + integrity sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA== + +"@types/semver@^7.3.12": + version "7.5.8" + resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.8.tgz#8268a8c57a3e4abd25c165ecd36237db7948a55e" + integrity sha512-I8EUhyrgfLrcTkzV3TSsGyl1tSuPrEDzr0yd5m90UgNxQkyDXULk3b6MlQqTCpZpNtWe1K0hzclnZkTcLBe2UQ== + +"@types/send@*": + version "0.17.4" + resolved "https://registry.yarnpkg.com/@types/send/-/send-0.17.4.tgz#6619cd24e7270793702e4e6a4b958a9010cfc57a" + integrity sha512-x2EM6TJOybec7c52BX0ZspPodMsQUd5L6PRwOunVyVUhXiBSKf3AezDL8Dgvgt5o0UfKNfuA0eMLr2wLT4AiBA== + dependencies: + "@types/mime" "^1" + "@types/node" "*" + +"@types/serve-index@^1.9.1": + version "1.9.4" + resolved "https://registry.yarnpkg.com/@types/serve-index/-/serve-index-1.9.4.tgz#e6ae13d5053cb06ed36392110b4f9a49ac4ec898" + integrity sha512-qLpGZ/c2fhSs5gnYsQxtDEq3Oy8SXPClIXkW5ghvAvsNuVSA8k+gCONcUCS/UjLEYvYps+e8uBtfgXgvhwfNug== + dependencies: + "@types/express" "*" + +"@types/serve-static@*", "@types/serve-static@^1.13.10": + version "1.15.7" + resolved "https://registry.yarnpkg.com/@types/serve-static/-/serve-static-1.15.7.tgz#22174bbd74fb97fe303109738e9b5c2f3064f714" + integrity sha512-W8Ym+h8nhuRwaKPaDw34QUkwsGi6Rc4yYqvKFo5rm2FUEhCFbzVWrxXUxuKK8TASjWsysJY0nsmNCGhCOIsrOw== + dependencies: + "@types/http-errors" "*" + "@types/node" "*" + "@types/send" "*" + +"@types/sockjs@^0.3.33": + version "0.3.36" + resolved "https://registry.yarnpkg.com/@types/sockjs/-/sockjs-0.3.36.tgz#ce322cf07bcc119d4cbf7f88954f3a3bd0f67535" + integrity sha512-MK9V6NzAS1+Ud7JV9lJLFqW85VbC9dq3LmwZCuBe4wBDgKC0Kj/jd8Xl+nSviU+Qc3+m7umHHyHg//2KSa0a0Q== + dependencies: + "@types/node" "*" + +"@types/statuses@^2.0.4": + version "2.0.5" + resolved "https://registry.yarnpkg.com/@types/statuses/-/statuses-2.0.5.tgz#f61ab46d5352fd73c863a1ea4e1cef3b0b51ae63" + integrity sha512-jmIUGWrAiwu3dZpxntxieC+1n/5c3mjrImkmOSQ2NC5uP6cYO4aAZDdSmRcI5C1oiTmqlZGHC+/NmJrKogbP5A== + +"@types/tough-cookie@^4.0.5": + version "4.0.5" + resolved "https://registry.yarnpkg.com/@types/tough-cookie/-/tough-cookie-4.0.5.tgz#cb6e2a691b70cb177c6e3ae9c1d2e8b2ea8cd304" + integrity sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA== + +"@types/underscore@^1.11.4": + version "1.13.0" + resolved "https://registry.yarnpkg.com/@types/underscore/-/underscore-1.13.0.tgz#dd8c034a92e5b8e24650c31af43d807c5340cee4" + integrity sha512-L6LBgy1f0EFQZ+7uSA57+n2g/s4Qs5r06Vwrwn0/nuK1de+adz00NWaztRQ30aEqw5qOaWbPI8u2cGQ52lj6VA== + +"@types/ws@^8.5.5": + version "8.5.12" + resolved "https://registry.yarnpkg.com/@types/ws/-/ws-8.5.12.tgz#619475fe98f35ccca2a2f6c137702d85ec247b7e" + integrity sha512-3tPRkv1EtkDpzlgyKyI8pGsGZAGPEaXeu0DOj5DI25Ja91bdAYddYHbADRYVrZMRbfW+1l5YwXVDKohDJNQxkQ== + dependencies: + "@types/node" "*" + +"@typescript-eslint/eslint-plugin@^5.35.1": + version "5.62.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.62.0.tgz#aeef0328d172b9e37d9bab6dbc13b87ed88977db" + integrity sha512-TiZzBSJja/LbhNPvk6yc0JrX9XqhQ0hdh6M2svYfsHGejaKFIAGd9MQ+ERIMzLGlN/kZoYIgdxFV0PuljTKXag== + dependencies: + "@eslint-community/regexpp" "^4.4.0" + "@typescript-eslint/scope-manager" "5.62.0" + "@typescript-eslint/type-utils" "5.62.0" + "@typescript-eslint/utils" "5.62.0" + debug "^4.3.4" + graphemer "^1.4.0" + ignore "^5.2.0" + natural-compare-lite "^1.4.0" + semver "^7.3.7" + tsutils "^3.21.0" + +"@typescript-eslint/parser@^5.35.1": + version "5.62.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-5.62.0.tgz#1b63d082d849a2fcae8a569248fbe2ee1b8a56c7" + integrity sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA== + dependencies: + "@typescript-eslint/scope-manager" "5.62.0" + "@typescript-eslint/types" "5.62.0" + "@typescript-eslint/typescript-estree" "5.62.0" + debug "^4.3.4" + +"@typescript-eslint/scope-manager@5.62.0": + version "5.62.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-5.62.0.tgz#d9457ccc6a0b8d6b37d0eb252a23022478c5460c" + integrity sha512-VXuvVvZeQCQb5Zgf4HAxc04q5j+WrNAtNh9OwCsCgpKqESMTu3tF/jhZ3xG6T4NZwWl65Bg8KuS2uEvhSfLl0w== + dependencies: + "@typescript-eslint/types" "5.62.0" + "@typescript-eslint/visitor-keys" "5.62.0" + +"@typescript-eslint/type-utils@5.62.0": + version "5.62.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-5.62.0.tgz#286f0389c41681376cdad96b309cedd17d70346a" + integrity sha512-xsSQreu+VnfbqQpW5vnCJdq1Z3Q0U31qiWmRhr98ONQmcp/yhiPJFPq8MXiJVLiksmOKSjIldZzkebzHuCGzew== + dependencies: + "@typescript-eslint/typescript-estree" "5.62.0" + "@typescript-eslint/utils" "5.62.0" + debug "^4.3.4" + tsutils "^3.21.0" + +"@typescript-eslint/types@5.62.0": + version "5.62.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-5.62.0.tgz#258607e60effa309f067608931c3df6fed41fd2f" + integrity sha512-87NVngcbVXUahrRTqIK27gD2t5Cu1yuCXxbLcFtCzZGlfyVWWh8mLHkoxzjsB6DDNnvdL+fW8MiwPEJyGJQDgQ== + +"@typescript-eslint/typescript-estree@5.62.0": + version "5.62.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-5.62.0.tgz#7d17794b77fabcac615d6a48fb143330d962eb9b" + integrity sha512-CmcQ6uY7b9y694lKdRB8FEel7JbU/40iSAPomu++SjLMntB+2Leay2LO6i8VnJk58MtE9/nQSFIH6jpyRWyYzA== + dependencies: + "@typescript-eslint/types" "5.62.0" + "@typescript-eslint/visitor-keys" "5.62.0" + debug "^4.3.4" + globby "^11.1.0" + is-glob "^4.0.3" + semver "^7.3.7" + tsutils "^3.21.0" + +"@typescript-eslint/utils@5.62.0": + version "5.62.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-5.62.0.tgz#141e809c71636e4a75daa39faed2fb5f4b10df86" + integrity sha512-n8oxjeb5aIbPFEtmQxQYOLI0i9n5ySBEY/ZEHHZqKQSFnxio1rv6dthascc9dLuwrL0RC5mPCxB7vnAVGAYWAQ== + dependencies: + "@eslint-community/eslint-utils" "^4.2.0" + "@types/json-schema" "^7.0.9" + "@types/semver" "^7.3.12" + "@typescript-eslint/scope-manager" "5.62.0" + "@typescript-eslint/types" "5.62.0" + "@typescript-eslint/typescript-estree" "5.62.0" + eslint-scope "^5.1.1" + semver "^7.3.7" + +"@typescript-eslint/visitor-keys@5.62.0": + version "5.62.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-5.62.0.tgz#2174011917ce582875954ffe2f6912d5931e353e" + integrity sha512-07ny+LHRzQXepkGg6w0mFY41fVUNBrL2Roj/++7V1txKugfjm/Ci/qSND03r2RhlJhJYMcTn9AhhSSqQp0Ysyw== + dependencies: + "@typescript-eslint/types" "5.62.0" + eslint-visitor-keys "^3.3.0" + +"@ungap/structured-clone@^1.2.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" + integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ== + +"@webassemblyjs/ast@1.12.1", "@webassemblyjs/ast@^1.12.1": + version "1.12.1" + resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.12.1.tgz#bb16a0e8b1914f979f45864c23819cc3e3f0d4bb" + integrity sha512-EKfMUOPRRUTy5UII4qJDGPpqfwjOmZ5jeGFwid9mnoqIFK+e0vqoi1qH56JpmZSzEL53jKnNzScdmftJyG5xWg== + dependencies: + "@webassemblyjs/helper-numbers" "1.11.6" + "@webassemblyjs/helper-wasm-bytecode" "1.11.6" + +"@webassemblyjs/floating-point-hex-parser@1.11.6": + version "1.11.6" + resolved "https://registry.yarnpkg.com/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.6.tgz#dacbcb95aff135c8260f77fa3b4c5fea600a6431" + integrity sha512-ejAj9hfRJ2XMsNHk/v6Fu2dGS+i4UaXBXGemOfQ/JfQ6mdQg/WXtwleQRLLS4OvfDhv8rYnVwH27YJLMyYsxhw== + +"@webassemblyjs/helper-api-error@1.11.6": + version "1.11.6" + resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.6.tgz#6132f68c4acd59dcd141c44b18cbebbd9f2fa768" + integrity sha512-o0YkoP4pVu4rN8aTJgAyj9hC2Sv5UlkzCHhxqWj8butaLvnpdc2jOwh4ewE6CX0txSfLn/UYaV/pheS2Txg//Q== + +"@webassemblyjs/helper-buffer@1.12.1": + version "1.12.1" + resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-buffer/-/helper-buffer-1.12.1.tgz#6df20d272ea5439bf20ab3492b7fb70e9bfcb3f6" + integrity sha512-nzJwQw99DNDKr9BVCOZcLuJJUlqkJh+kVzVl6Fmq/tI5ZtEyWT1KZMyOXltXLZJmDtvLCDgwsyrkohEtopTXCw== + +"@webassemblyjs/helper-numbers@1.11.6": + version "1.11.6" + resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.6.tgz#cbce5e7e0c1bd32cf4905ae444ef64cea919f1b5" + integrity sha512-vUIhZ8LZoIWHBohiEObxVm6hwP034jwmc9kuq5GdHZH0wiLVLIPcMCdpJzG4C11cHoQ25TFIQj9kaVADVX7N3g== + dependencies: + "@webassemblyjs/floating-point-hex-parser" "1.11.6" + "@webassemblyjs/helper-api-error" "1.11.6" + "@xtuc/long" "4.2.2" + +"@webassemblyjs/helper-wasm-bytecode@1.11.6": + version "1.11.6" + resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.6.tgz#bb2ebdb3b83aa26d9baad4c46d4315283acd51e9" + integrity sha512-sFFHKwcmBprO9e7Icf0+gddyWYDViL8bpPjJJl0WHxCdETktXdmtWLGVzoHbqUcY4Be1LkNfwTmXOJUFZYSJdA== + +"@webassemblyjs/helper-wasm-section@1.12.1": + version "1.12.1" + resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.12.1.tgz#3da623233ae1a60409b509a52ade9bc22a37f7bf" + integrity sha512-Jif4vfB6FJlUlSbgEMHUyk1j234GTNG9dBJ4XJdOySoj518Xj0oGsNi59cUQF4RRMS9ouBUxDDdyBVfPTypa5g== + dependencies: + "@webassemblyjs/ast" "1.12.1" + "@webassemblyjs/helper-buffer" "1.12.1" + "@webassemblyjs/helper-wasm-bytecode" "1.11.6" + "@webassemblyjs/wasm-gen" "1.12.1" + +"@webassemblyjs/ieee754@1.11.6": + version "1.11.6" + resolved "https://registry.yarnpkg.com/@webassemblyjs/ieee754/-/ieee754-1.11.6.tgz#bb665c91d0b14fffceb0e38298c329af043c6e3a" + integrity sha512-LM4p2csPNvbij6U1f19v6WR56QZ8JcHg3QIJTlSwzFcmx6WSORicYj6I63f9yU1kEUtrpG+kjkiIAkevHpDXrg== + dependencies: + "@xtuc/ieee754" "^1.2.0" + +"@webassemblyjs/leb128@1.11.6": + version "1.11.6" + resolved "https://registry.yarnpkg.com/@webassemblyjs/leb128/-/leb128-1.11.6.tgz#70e60e5e82f9ac81118bc25381a0b283893240d7" + integrity sha512-m7a0FhE67DQXgouf1tbN5XQcdWoNgaAuoULHIfGFIEVKA6tu/edls6XnIlkmS6FrXAquJRPni3ZZKjw6FSPjPQ== + dependencies: + "@xtuc/long" "4.2.2" + +"@webassemblyjs/utf8@1.11.6": + version "1.11.6" + resolved "https://registry.yarnpkg.com/@webassemblyjs/utf8/-/utf8-1.11.6.tgz#90f8bc34c561595fe156603be7253cdbcd0fab5a" + integrity sha512-vtXf2wTQ3+up9Zsg8sa2yWiQpzSsMyXj0qViVP6xKGCUT8p8YJ6HqI7l5eCnWx1T/FYdsv07HQs2wTFbbof/RA== + +"@webassemblyjs/wasm-edit@^1.12.1": + version "1.12.1" + resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-edit/-/wasm-edit-1.12.1.tgz#9f9f3ff52a14c980939be0ef9d5df9ebc678ae3b" + integrity sha512-1DuwbVvADvS5mGnXbE+c9NfA8QRcZ6iKquqjjmR10k6o+zzsRVesil54DKexiowcFCPdr/Q0qaMgB01+SQ1u6g== + dependencies: + "@webassemblyjs/ast" "1.12.1" + "@webassemblyjs/helper-buffer" "1.12.1" + "@webassemblyjs/helper-wasm-bytecode" "1.11.6" + "@webassemblyjs/helper-wasm-section" "1.12.1" + "@webassemblyjs/wasm-gen" "1.12.1" + "@webassemblyjs/wasm-opt" "1.12.1" + "@webassemblyjs/wasm-parser" "1.12.1" + "@webassemblyjs/wast-printer" "1.12.1" + +"@webassemblyjs/wasm-gen@1.12.1": + version "1.12.1" + resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-gen/-/wasm-gen-1.12.1.tgz#a6520601da1b5700448273666a71ad0a45d78547" + integrity sha512-TDq4Ojh9fcohAw6OIMXqiIcTq5KUXTGRkVxbSo1hQnSy6lAM5GSdfwWeSxpAo0YzgsgF182E/U0mDNhuA0tW7w== + dependencies: + "@webassemblyjs/ast" "1.12.1" + "@webassemblyjs/helper-wasm-bytecode" "1.11.6" + "@webassemblyjs/ieee754" "1.11.6" + "@webassemblyjs/leb128" "1.11.6" + "@webassemblyjs/utf8" "1.11.6" + +"@webassemblyjs/wasm-opt@1.12.1": + version "1.12.1" + resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-opt/-/wasm-opt-1.12.1.tgz#9e6e81475dfcfb62dab574ac2dda38226c232bc5" + integrity sha512-Jg99j/2gG2iaz3hijw857AVYekZe2SAskcqlWIZXjji5WStnOpVoat3gQfT/Q5tb2djnCjBtMocY/Su1GfxPBg== + dependencies: + "@webassemblyjs/ast" "1.12.1" + "@webassemblyjs/helper-buffer" "1.12.1" + "@webassemblyjs/wasm-gen" "1.12.1" + "@webassemblyjs/wasm-parser" "1.12.1" + +"@webassemblyjs/wasm-parser@1.12.1", "@webassemblyjs/wasm-parser@^1.12.1": + version "1.12.1" + resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-parser/-/wasm-parser-1.12.1.tgz#c47acb90e6f083391e3fa61d113650eea1e95937" + integrity sha512-xikIi7c2FHXysxXe3COrVUPSheuBtpcfhbpFj4gmu7KRLYOzANztwUU0IbsqvMqzuNK2+glRGWCEqZo1WCLyAQ== + dependencies: + "@webassemblyjs/ast" "1.12.1" + "@webassemblyjs/helper-api-error" "1.11.6" + "@webassemblyjs/helper-wasm-bytecode" "1.11.6" + "@webassemblyjs/ieee754" "1.11.6" + "@webassemblyjs/leb128" "1.11.6" + "@webassemblyjs/utf8" "1.11.6" + +"@webassemblyjs/wast-printer@1.12.1": + version "1.12.1" + resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-printer/-/wast-printer-1.12.1.tgz#bcecf661d7d1abdaf989d8341a4833e33e2b31ac" + integrity sha512-+X4WAlOisVWQMikjbcvY2e0rwPsKQ9F688lksZhBcPycBBuii3O7m8FACbDMWDojpAqvjIncrG8J0XHKyQfVeA== + dependencies: + "@webassemblyjs/ast" "1.12.1" + "@xtuc/long" "4.2.2" + +"@webpack-cli/configtest@^1.2.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@webpack-cli/configtest/-/configtest-1.2.0.tgz#7b20ce1c12533912c3b217ea68262365fa29a6f5" + integrity sha512-4FB8Tj6xyVkyqjj1OaTqCjXYULB9FMkqQ8yGrZjRDrYh0nOE+7Lhs45WioWQQMV+ceFlE368Ukhe6xdvJM9Egg== + +"@webpack-cli/info@^1.5.0": + version "1.5.0" + resolved "https://registry.yarnpkg.com/@webpack-cli/info/-/info-1.5.0.tgz#6c78c13c5874852d6e2dd17f08a41f3fe4c261b1" + integrity sha512-e8tSXZpw2hPl2uMJY6fsMswaok5FdlGNRTktvFk2sD8RjH0hE2+XistawJx1vmKteh4NmGmNUrp+Tb2w+udPcQ== + dependencies: + envinfo "^7.7.3" + +"@webpack-cli/serve@^1.7.0": + version "1.7.0" + resolved "https://registry.yarnpkg.com/@webpack-cli/serve/-/serve-1.7.0.tgz#e1993689ac42d2b16e9194376cfb6753f6254db1" + integrity sha512-oxnCNGj88fL+xzV+dacXs44HcDwf1ovs3AuEzvP7mqXw7fQntqIhQ1BRmynh4qEKQSSSRSWVyXRjmTbZIX9V2Q== + +"@xtuc/ieee754@^1.2.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@xtuc/ieee754/-/ieee754-1.2.0.tgz#eef014a3145ae477a1cbc00cd1e552336dceb790" + integrity sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA== + +"@xtuc/long@4.2.2": + version "4.2.2" + resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.2.tgz#d291c6a4e97989b5c61d9acf396ae4fe133a718d" + integrity sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ== + +accepts@~1.3.4, accepts@~1.3.8: + version "1.3.8" + resolved "https://registry.yarnpkg.com/accepts/-/accepts-1.3.8.tgz#0bf0be125b67014adcb0b0921e62db7bffe16b2e" + integrity sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw== + dependencies: + mime-types "~2.1.34" + negotiator "0.6.3" + +acorn-jsx@^5.3.2: + version "5.3.2" + resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.2.tgz#7ed5bb55908b3b2f1bc55c6af1653bada7f07937" + integrity sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ== + +acorn@^8.14.0, acorn@^8.8.2, acorn@^8.9.0: + version "8.14.0" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.14.0.tgz#063e2c70cac5fb4f6467f0b11152e04c682795b0" + integrity sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA== + +ajv-formats@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/ajv-formats/-/ajv-formats-2.1.1.tgz#6e669400659eb74973bbf2e33327180a0996b520" + integrity sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA== + dependencies: + ajv "^8.0.0" + +ajv-keywords@^3.5.2: + version "3.5.2" + resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d" + integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ== + +ajv-keywords@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-5.1.0.tgz#69d4d385a4733cdbeab44964a1170a88f87f0e16" + integrity sha512-YCS/JNFAUyr5vAuhk1DWm1CBxRHW9LbJ2ozWeemrIqpbsqKjHVxYPyi5GC0rjZIT5JxJ3virVTS8wk4i/Z+krw== + dependencies: + fast-deep-equal "^3.1.3" + +ajv@^6.12.4, ajv@^6.12.5: + version "6.12.6" + resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4" + integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g== + dependencies: + fast-deep-equal "^3.1.1" + fast-json-stable-stringify "^2.0.0" + json-schema-traverse "^0.4.1" + uri-js "^4.2.2" + +ajv@^8.0.0, ajv@^8.9.0: + version "8.17.1" + resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.17.1.tgz#37d9a5c776af6bc92d7f4f9510eba4c0a60d11a6" + integrity sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g== + dependencies: + fast-deep-equal "^3.1.3" + fast-uri "^3.0.1" + json-schema-traverse "^1.0.0" + require-from-string "^2.0.2" + +ansi-escapes@^4.3.2: + version "4.3.2" + resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.2.tgz#6b2291d1db7d98b6521d5f1efa42d0f3a9feb65e" + integrity sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ== + dependencies: + type-fest "^0.21.3" + +ansi-html-community@^0.0.8: + version "0.0.8" + resolved "https://registry.yarnpkg.com/ansi-html-community/-/ansi-html-community-0.0.8.tgz#69fbc4d6ccbe383f9736934ae34c3f8290f1bf41" + integrity sha512-1APHAyr3+PCamwNw3bXCPp4HFLONZt/yIH0sZp0/469KWNTEy+qN5jQ3GVX6DMZ1UXAi34yVwtTeaG/HpBuuzw== + +ansi-html@^0.0.9: + version "0.0.9" + resolved "https://registry.yarnpkg.com/ansi-html/-/ansi-html-0.0.9.tgz#6512d02342ae2cc68131952644a129cb734cd3f0" + integrity sha512-ozbS3LuenHVxNRh/wdnN16QapUHzauqSomAl1jwwJRRsGwFwtj644lIhxfWu0Fy0acCij2+AEgHvjscq3dlVXg== + +ansi-regex@^2.0.0: + version "2.1.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-2.1.1.tgz#c3b33ab5ee360d86e0e628f0468ae7ef27d654df" + integrity sha512-TIGnTpdo+E3+pCyAluZvtED5p5wCqLdezCyhPZzKPcxvFplEt4i+W7OONCKgeZFT3+y5NZZfOOS/Bdcanm1MYA== + +ansi-regex@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-3.0.1.tgz#123d6479e92ad45ad897d4054e3c7ca7db4944e1" + integrity sha512-+O9Jct8wf++lXxxFc4hc8LsjaSq0HFzzL7cVsw8pRDIPdjKD2mT4ytDZlLuSBZ4cLKZFXIrMGO7DbQCtMJJMKw== + +ansi-regex@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304" + integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ== + +ansi-regex@^6.0.1: + version "6.1.0" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-6.1.0.tgz#95ec409c69619d6cb1b8b34f14b660ef28ebd654" + integrity sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA== + +ansi-styles@^2.2.1: + version "2.2.1" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-2.2.1.tgz#b432dd3358b634cf75e1e4664368240533c1ddbe" + integrity sha512-kmCevFghRiWM7HB5zTPULl4r9bVFSWjz62MhqizDGUrq2NWuNMQyuv4tHHoKJHs69M/MF64lEcHdYIocrdWQYA== + +ansi-styles@^4.0.0, ansi-styles@^4.1.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937" + integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg== + dependencies: + color-convert "^2.0.1" + +ansi-styles@^5.0.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b" + integrity sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA== + +ansi-styles@^6.1.0: + version "6.2.1" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-6.2.1.tgz#0e62320cf99c21afff3b3012192546aacbfb05c5" + integrity sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug== + +any-promise@^1.0.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/any-promise/-/any-promise-1.3.0.tgz#abc6afeedcea52e809cdc0376aed3ce39635d17f" + integrity sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A== + +anymatch@~3.1.2: + version "3.1.3" + resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.3.tgz#790c58b19ba1720a84205b57c618d5ad8524973e" + integrity sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw== + dependencies: + normalize-path "^3.0.0" + picomatch "^2.0.4" + +arch@^2.1.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/arch/-/arch-2.2.0.tgz#1bc47818f305764f23ab3306b0bfc086c5a29d11" + integrity sha512-Of/R0wqp83cgHozfIYLbBMnej79U/SVGOOyuB3VVFv1NRM/PSFMK12x9KVtiYzJqmnU5WR2qp0Z5rHb7sWGnFQ== + +archive-type@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/archive-type/-/archive-type-4.0.0.tgz#f92e72233056dfc6969472749c267bdb046b1d70" + integrity sha512-zV4Ky0v1F8dBrdYElwTvQhweQ0P7Kwc1aluqJsYtOBP01jXcWCyW2IEfI1YiqsG+Iy7ZR+o5LF1N+PGECBxHWA== + dependencies: + file-type "^4.2.0" + +arg@^5.0.2: + version "5.0.2" + resolved "https://registry.yarnpkg.com/arg/-/arg-5.0.2.tgz#c81433cc427c92c4dcf4865142dbca6f15acd59c" + integrity sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg== + +argparse@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38" + integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q== + +aria-query@5.1.3: + version "5.1.3" + resolved "https://registry.yarnpkg.com/aria-query/-/aria-query-5.1.3.tgz#19db27cd101152773631396f7a95a3b58c22c35e" + integrity sha512-R5iJ5lkuHybztUfuOAznmboyjWq8O6sqNqtK7CLOqdydi54VNbORp49mb14KbWgG1QD3JFO9hJdZ+y4KutfdOQ== + dependencies: + deep-equal "^2.0.5" + +array-buffer-byte-length@^1.0.0, array-buffer-byte-length@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz#1e5583ec16763540a27ae52eed99ff899223568f" + integrity sha512-ahC5W1xgou+KTXix4sAO8Ki12Q+jf4i0+tmk3sC+zgcynshkHxzpXdImBehiUYKKKDwvfFiJl1tZt6ewscS1Mg== + dependencies: + call-bind "^1.0.5" + is-array-buffer "^3.0.4" + +array-flatten@1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/array-flatten/-/array-flatten-1.1.1.tgz#9a5f699051b1e7073328f2a008968b64ea2955d2" + integrity sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg== + +array-includes@^3.1.6, array-includes@^3.1.8: + version "3.1.8" + resolved "https://registry.yarnpkg.com/array-includes/-/array-includes-3.1.8.tgz#5e370cbe172fdd5dd6530c1d4aadda25281ba97d" + integrity sha512-itaWrbYbqpGXkGhZPGUulwnhVf5Hpy1xiCFsGqyIGglbBxmG5vSjxQen3/WGOjPpNEv1RtBLKxbmVXm8HpJStQ== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-abstract "^1.23.2" + es-object-atoms "^1.0.0" + get-intrinsic "^1.2.4" + is-string "^1.0.7" + +array-union@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/array-union/-/array-union-2.1.0.tgz#b798420adbeb1de828d84acd8a2e23d3efe85e8d" + integrity sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw== + +array.prototype.findlast@^1.2.5: + version "1.2.5" + resolved "https://registry.yarnpkg.com/array.prototype.findlast/-/array.prototype.findlast-1.2.5.tgz#3e4fbcb30a15a7f5bf64cf2faae22d139c2e4904" + integrity sha512-CVvd6FHg1Z3POpBLxO6E6zr+rSKEQ9L6rZHAaY7lLfhKsWYUBBOuMs0e9o24oopj6H+geRCX0YJ+TJLBK2eHyQ== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-abstract "^1.23.2" + es-errors "^1.3.0" + es-object-atoms "^1.0.0" + es-shim-unscopables "^1.0.2" + +array.prototype.flat@^1.3.1: + version "1.3.2" + resolved "https://registry.yarnpkg.com/array.prototype.flat/-/array.prototype.flat-1.3.2.tgz#1476217df8cff17d72ee8f3ba06738db5b387d18" + integrity sha512-djYB+Zx2vLewY8RWlNCUdHjDXs2XOgm602S9E7P/UpHgfeHL00cRiIF+IN/G/aUJ7kGPb6yO/ErDI5V2s8iycA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + es-shim-unscopables "^1.0.0" + +array.prototype.flatmap@^1.3.2: + version "1.3.2" + resolved "https://registry.yarnpkg.com/array.prototype.flatmap/-/array.prototype.flatmap-1.3.2.tgz#c9a7c6831db8e719d6ce639190146c24bbd3e527" + integrity sha512-Ewyx0c9PmpcsByhSW4r+9zDU7sGjFc86qf/kKtuSCRdhfbk0SNLLkaT5qvcHnRGgc5NP/ly/y+qkXkqONX54CQ== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + es-shim-unscopables "^1.0.0" + +array.prototype.tosorted@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/array.prototype.tosorted/-/array.prototype.tosorted-1.1.4.tgz#fe954678ff53034e717ea3352a03f0b0b86f7ffc" + integrity sha512-p6Fx8B7b7ZhL/gmUsAy0D15WhvDccw3mnGNbZpi3pmeJdxtWsj2jEaI4Y6oo3XiHfzuSgPwKc04MYt6KgvC/wA== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-abstract "^1.23.3" + es-errors "^1.3.0" + es-shim-unscopables "^1.0.2" + +arraybuffer.prototype.slice@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.3.tgz#097972f4255e41bc3425e37dc3f6421cf9aefde6" + integrity sha512-bMxMKAjg13EBSVscxTaYA4mRc5t1UAXa2kXiGTNfZ079HIWXEkKmkgFrh/nJqamaLSrXO5H4WFFkPEaLJWbs3A== + dependencies: + array-buffer-byte-length "^1.0.1" + call-bind "^1.0.5" + define-properties "^1.2.1" + es-abstract "^1.22.3" + es-errors "^1.2.1" + get-intrinsic "^1.2.3" + is-array-buffer "^3.0.4" + is-shared-array-buffer "^1.0.2" + +asynckit@^0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79" + integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q== + +autoprefixer@^10.4.13: + version "10.4.20" + resolved "https://registry.yarnpkg.com/autoprefixer/-/autoprefixer-10.4.20.tgz#5caec14d43976ef42e32dcb4bd62878e96be5b3b" + integrity sha512-XY25y5xSv/wEoqzDyXXME4AFfkZI0P23z6Fs3YgymDnKJkCGOnkL0iTxCa85UTqaSgfcqyf3UA6+c7wUvx/16g== + dependencies: + browserslist "^4.23.3" + caniuse-lite "^1.0.30001646" + fraction.js "^4.3.7" + normalize-range "^0.1.2" + picocolors "^1.0.1" + postcss-value-parser "^4.2.0" + +available-typed-arrays@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz#a5cc375d6a03c2efc87a553f3e0b1522def14846" + integrity sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ== + dependencies: + possible-typed-array-names "^1.0.0" + +axios@^1.6.7: + version "1.7.7" + resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.7.tgz#2f554296f9892a72ac8d8e4c5b79c14a91d0a47f" + integrity sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q== + dependencies: + follow-redirects "^1.15.6" + form-data "^4.0.0" + proxy-from-env "^1.1.0" + +babel-loader@^8.2.5: + version "8.4.1" + resolved "https://registry.yarnpkg.com/babel-loader/-/babel-loader-8.4.1.tgz#6ccb75c66e62c3b144e1c5f2eaec5b8f6c08c675" + integrity sha512-nXzRChX+Z1GoE6yWavBQg6jDslyFF3SDjl2paADuoQtQW10JqShJt62R6eJQ5m/pjJFDT8xgKIWSP85OY8eXeA== + dependencies: + find-cache-dir "^3.3.1" + loader-utils "^2.0.4" + make-dir "^3.1.0" + schema-utils "^2.6.5" + +babel-plugin-polyfill-corejs2@^0.4.10: + version "0.4.11" + resolved "https://registry.yarnpkg.com/babel-plugin-polyfill-corejs2/-/babel-plugin-polyfill-corejs2-0.4.11.tgz#30320dfe3ffe1a336c15afdcdafd6fd615b25e33" + integrity sha512-sMEJ27L0gRHShOh5G54uAAPaiCOygY/5ratXuiyb2G46FmlSpc9eFCzYVyDiPxfNbwzA7mYahmjQc5q+CZQ09Q== + dependencies: + "@babel/compat-data" "^7.22.6" + "@babel/helper-define-polyfill-provider" "^0.6.2" + semver "^6.3.1" + +babel-plugin-polyfill-corejs3@^0.10.6: + version "0.10.6" + resolved "https://registry.yarnpkg.com/babel-plugin-polyfill-corejs3/-/babel-plugin-polyfill-corejs3-0.10.6.tgz#2deda57caef50f59c525aeb4964d3b2f867710c7" + integrity sha512-b37+KR2i/khY5sKmWNVQAnitvquQbNdWy6lJdsr0kmquCKEEUgMKK4SboVM3HtfnZilfjr4MMQ7vY58FVWDtIA== + dependencies: + "@babel/helper-define-polyfill-provider" "^0.6.2" + core-js-compat "^3.38.0" + +babel-plugin-polyfill-regenerator@^0.6.1: + version "0.6.2" + resolved "https://registry.yarnpkg.com/babel-plugin-polyfill-regenerator/-/babel-plugin-polyfill-regenerator-0.6.2.tgz#addc47e240edd1da1058ebda03021f382bba785e" + integrity sha512-2R25rQZWP63nGwaAswvDazbPXfrM3HwVoBXK6HcqeKrSrL/JqcC/rDcf95l4r7LXLyxDXc8uQDa064GubtCABg== + dependencies: + "@babel/helper-define-polyfill-provider" "^0.6.2" + +balanced-match@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" + integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== + +base64-js@^1.3.1: + version "1.5.1" + resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" + integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== + +batch@0.6.1: + version "0.6.1" + resolved "https://registry.yarnpkg.com/batch/-/batch-0.6.1.tgz#dc34314f4e679318093fc760272525f94bf25c16" + integrity sha512-x+VAiMRL6UPkx+kudNvxTl6hB2XNNCG2r+7wixVfIYwu/2HKRXimwQyaumLjMveWvT2Hkd/cAJw+QBMfJ/EKVw== + +big.js@^5.2.2: + version "5.2.2" + resolved "https://registry.yarnpkg.com/big.js/-/big.js-5.2.2.tgz#65f0af382f578bcdc742bd9c281e9cb2d7768328" + integrity sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ== + +bin-build@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/bin-build/-/bin-build-3.0.0.tgz#c5780a25a8a9f966d8244217e6c1f5082a143861" + integrity sha512-jcUOof71/TNAI2uM5uoUaDq2ePcVBQ3R/qhxAz1rX7UfvduAL/RXD3jXzvn8cVcDJdGVkiR1shal3OH0ImpuhA== + dependencies: + decompress "^4.0.0" + download "^6.2.2" + execa "^0.7.0" + p-map-series "^1.0.0" + tempfile "^2.0.0" + +bin-check@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/bin-check/-/bin-check-4.1.0.tgz#fc495970bdc88bb1d5a35fc17e65c4a149fc4a49" + integrity sha512-b6weQyEUKsDGFlACWSIOfveEnImkJyK/FGW6FAG42loyoquvjdtOIqO6yBFzHyqyVVhNgNkQxxx09SFLK28YnA== + dependencies: + execa "^0.7.0" + executable "^4.1.0" + +bin-version-check@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/bin-version-check/-/bin-version-check-4.0.0.tgz#7d819c62496991f80d893e6e02a3032361608f71" + integrity sha512-sR631OrhC+1f8Cvs8WyVWOA33Y8tgwjETNPyyD/myRBXLkfS/vl74FmH/lFcRl9KY3zwGh7jFhvyk9vV3/3ilQ== + dependencies: + bin-version "^3.0.0" + semver "^5.6.0" + semver-truncate "^1.1.2" + +bin-version@^3.0.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/bin-version/-/bin-version-3.1.0.tgz#5b09eb280752b1bd28f0c9db3f96f2f43b6c0839" + integrity sha512-Mkfm4iE1VFt4xd4vH+gx+0/71esbfus2LsnCGe8Pi4mndSPyT+NGES/Eg99jx8/lUGWfu3z2yuB/bt5UB+iVbQ== + dependencies: + execa "^1.0.0" + find-versions "^3.0.0" + +bin-wrapper@^4.0.0, bin-wrapper@^4.0.1: + version "4.1.0" + resolved "https://registry.yarnpkg.com/bin-wrapper/-/bin-wrapper-4.1.0.tgz#99348f2cf85031e3ef7efce7e5300aeaae960605" + integrity sha512-hfRmo7hWIXPkbpi0ZltboCMVrU+0ClXR/JgbCKKjlDjQf6igXa7OwdqNcFWQZPZTgiY7ZpzE3+LjjkLiTN2T7Q== + dependencies: + bin-check "^4.1.0" + bin-version-check "^4.0.0" + download "^7.1.0" + import-lazy "^3.1.0" + os-filter-obj "^2.0.0" + pify "^4.0.1" + +binary-extensions@^2.0.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.3.0.tgz#f6e14a97858d327252200242d4ccfe522c445522" + integrity sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw== + +bl@^1.0.0: + version "1.2.3" + resolved "https://registry.yarnpkg.com/bl/-/bl-1.2.3.tgz#1e8dd80142eac80d7158c9dccc047fb620e035e7" + integrity sha512-pvcNpa0UU69UT341rO6AYy4FVAIkUHuZXRIWbq+zHnsVcRzDDjIAhGuuYoi0d//cwIwtt4pkpKycWEfjdV+vww== + dependencies: + readable-stream "^2.3.5" + safe-buffer "^5.1.1" + +body-parser@1.20.3: + version "1.20.3" + resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.3.tgz#1953431221c6fb5cd63c4b36d53fab0928e548c6" + integrity sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g== + dependencies: + bytes "3.1.2" + content-type "~1.0.5" + debug "2.6.9" + depd "2.0.0" + destroy "1.2.0" + http-errors "2.0.0" + iconv-lite "0.4.24" + on-finished "2.4.1" + qs "6.13.0" + raw-body "2.5.2" + type-is "~1.6.18" + unpipe "1.0.0" + +bonjour-service@^1.0.11: + version "1.2.1" + resolved "https://registry.yarnpkg.com/bonjour-service/-/bonjour-service-1.2.1.tgz#eb41b3085183df3321da1264719fbada12478d02" + integrity sha512-oSzCS2zV14bh2kji6vNe7vrpJYCHGvcZnlffFQ1MEoX/WOeQ/teD8SYWKR942OI3INjq8OMNJlbPK5LLLUxFDw== + dependencies: + fast-deep-equal "^3.1.3" + multicast-dns "^7.2.5" + +boolbase@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e" + integrity sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww== + +brace-expansion@^1.1.7: + version "1.1.11" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" + integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA== + dependencies: + balanced-match "^1.0.0" + concat-map "0.0.1" + +brace-expansion@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-2.0.1.tgz#1edc459e0f0c548486ecf9fc99f2221364b9a0ae" + integrity sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA== + dependencies: + balanced-match "^1.0.0" + +braces@^3.0.3, braces@~3.0.2: + version "3.0.3" + resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.3.tgz#490332f40919452272d55a8480adc0c441358789" + integrity sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA== + dependencies: + fill-range "^7.1.1" + +browserslist@^4.21.4, browserslist@^4.23.3, browserslist@^4.24.0, browserslist@^4.24.2: + version "4.24.2" + resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.24.2.tgz#f5845bc91069dbd55ee89faf9822e1d885d16580" + integrity sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg== + dependencies: + caniuse-lite "^1.0.30001669" + electron-to-chromium "^1.5.41" + node-releases "^2.0.18" + update-browserslist-db "^1.1.1" + +buffer-alloc-unsafe@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/buffer-alloc-unsafe/-/buffer-alloc-unsafe-1.1.0.tgz#bd7dc26ae2972d0eda253be061dba992349c19f0" + integrity sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg== + +buffer-alloc@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/buffer-alloc/-/buffer-alloc-1.2.0.tgz#890dd90d923a873e08e10e5fd51a57e5b7cce0ec" + integrity sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow== + dependencies: + buffer-alloc-unsafe "^1.1.0" + buffer-fill "^1.0.0" + +buffer-crc32@~0.2.3: + version "0.2.13" + resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242" + integrity sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ== + +buffer-fill@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/buffer-fill/-/buffer-fill-1.0.0.tgz#f8f78b76789888ef39f205cd637f68e702122b2c" + integrity sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ== + +buffer-from@^1.0.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5" + integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ== + +buffer@^5.2.1: + version "5.7.1" + resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.7.1.tgz#ba62e7c13133053582197160851a8f648e99eed0" + integrity sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ== + dependencies: + base64-js "^1.3.1" + ieee754 "^1.1.13" + +bufferutil@^4.0.7: + version "4.0.8" + resolved "https://registry.yarnpkg.com/bufferutil/-/bufferutil-4.0.8.tgz#1de6a71092d65d7766c4d8a522b261a6e787e8ea" + integrity sha512-4T53u4PdgsXqKaIctwF8ifXlRTTmEPJ8iEPWFdGZvcf7sbwYo6FKFEX9eNNAnzFZ7EzJAQ3CJeOtCRA4rDp7Pw== + dependencies: + node-gyp-build "^4.3.0" + +bytes@3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.1.2.tgz#8b0beeb98605adf1b128fa4386403c009e0221a5" + integrity sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg== + +cacheable-request@^2.1.1: + version "2.1.4" + resolved "https://registry.yarnpkg.com/cacheable-request/-/cacheable-request-2.1.4.tgz#0d808801b6342ad33c91df9d0b44dc09b91e5c3d" + integrity sha512-vag0O2LKZ/najSoUwDbVlnlCFvhBE/7mGTY2B5FgCBDcRD+oVV1HYTOwM6JZfMg/hIcM6IwnTZ1uQQL5/X3xIQ== + dependencies: + clone-response "1.0.2" + get-stream "3.0.0" + http-cache-semantics "3.8.1" + keyv "3.0.0" + lowercase-keys "1.0.0" + normalize-url "2.0.1" + responselike "1.0.2" + +call-bind@^1.0.2, call-bind@^1.0.5, call-bind@^1.0.6, call-bind@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/call-bind/-/call-bind-1.0.7.tgz#06016599c40c56498c18769d2730be242b6fa3b9" + integrity sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w== + dependencies: + es-define-property "^1.0.0" + es-errors "^1.3.0" + function-bind "^1.1.2" + get-intrinsic "^1.2.4" + set-function-length "^1.2.1" + +callsites@^3.0.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73" + integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ== + +camel-case@^4.1.2: + version "4.1.2" + resolved "https://registry.yarnpkg.com/camel-case/-/camel-case-4.1.2.tgz#9728072a954f805228225a6deea6b38461e1bd5a" + integrity sha512-gxGWBrTT1JuMx6R+o5PTXMmUnhnVzLQ9SNutD4YqKtI6ap897t3tKECYla6gCWEkplXnlNybEkZg9GEGxKFCgw== + dependencies: + pascal-case "^3.1.2" + tslib "^2.0.3" + +camelcase-css@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/camelcase-css/-/camelcase-css-2.0.1.tgz#ee978f6947914cc30c6b44741b6ed1df7f043fd5" + integrity sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA== + +caniuse-lite@^1.0.30001646, caniuse-lite@^1.0.30001669: + version "1.0.30001676" + resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001676.tgz#fe133d41fe74af8f7cc93b8a714c3e86a86e6f04" + integrity sha512-Qz6zwGCiPghQXGJvgQAem79esjitvJ+CxSbSQkW9H/UX5hg8XM88d4lp2W+MEQ81j+Hip58Il+jGVdazk1z9cw== + +caw@^2.0.0, caw@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/caw/-/caw-2.0.1.tgz#6c3ca071fc194720883c2dc5da9b074bfc7e9e95" + integrity sha512-Cg8/ZSBEa8ZVY9HspcGUYaK63d/bN7rqS3CYCzEGUxuYv6UlmcjzDUz2fCFFHyTvUW5Pk0I+3hkA3iXlIj6guA== + dependencies: + get-proxy "^2.0.0" + isurl "^1.0.0-alpha5" + tunnel-agent "^0.6.0" + url-to-options "^1.0.1" + +chalk@^1.1.3: + version "1.1.3" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-1.1.3.tgz#a8115c55e4a702fe4d150abd3872822a7e09fc98" + integrity sha512-U3lRVLMSlsCfjqYPbLyVv11M9CPW4I728d6TCKMAOJueEeB9/8o+eSsMnxPJD+Q+K909sdESg7C+tIkoH6on1A== + dependencies: + ansi-styles "^2.2.1" + escape-string-regexp "^1.0.2" + has-ansi "^2.0.0" + strip-ansi "^3.0.0" + supports-color "^2.0.0" + +chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.2: + version "4.1.2" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01" + integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +chokidar@^3.5.3: + version "3.6.0" + resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.6.0.tgz#197c6cc669ef2a8dc5e7b4d97ee4e092c3eb0d5b" + integrity sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw== + dependencies: + anymatch "~3.1.2" + braces "~3.0.2" + glob-parent "~5.1.2" + is-binary-path "~2.1.0" + is-glob "~4.0.1" + normalize-path "~3.0.0" + readdirp "~3.6.0" + optionalDependencies: + fsevents "~2.3.2" + +chokidar@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-4.0.1.tgz#4a6dff66798fb0f72a94f616abbd7e1a19f31d41" + integrity sha512-n8enUVCED/KVRQlab1hr3MVpcVMvxtZjmEa956u+4YijlmQED223XMSYj2tLuKvr4jcCTzNNMpQDUer72MMmzA== + dependencies: + readdirp "^4.0.1" + +chrome-trace-event@^1.0.2: + version "1.0.4" + resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.4.tgz#05bffd7ff928465093314708c93bdfa9bd1f0f5b" + integrity sha512-rNjApaLzuwaOTjCiT8lSDdGN1APCiqkChLMJxJPWLunPAt5fy8xgU9/jNOchV84wfIxrA0lRQB7oCT8jrn/wrQ== + +clean-css@^5.2.2: + version "5.3.3" + resolved "https://registry.yarnpkg.com/clean-css/-/clean-css-5.3.3.tgz#b330653cd3bd6b75009cc25c714cae7b93351ccd" + integrity sha512-D5J+kHaVb/wKSFcyyV75uCn8fiY4sV38XJoe4CUyGQ+mOU/fMVYUdH1hJC+CJQ5uY3EnW27SbJYS4X8BiLrAFg== + dependencies: + source-map "~0.6.0" + +cli-width@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-4.1.0.tgz#42daac41d3c254ef38ad8ac037672130173691c5" + integrity sha512-ouuZd4/dm2Sw5Gmqy6bGyNNNe1qt9RpmxveLSO7KcgsTnU7RXfsw+/bukWGo1abgBiMAic068rclZsO4IWmmxQ== + +cliui@^8.0.1: + version "8.0.1" + resolved "https://registry.yarnpkg.com/cliui/-/cliui-8.0.1.tgz#0c04b075db02cbfe60dc8e6cf2f5486b1a3608aa" + integrity sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ== + dependencies: + string-width "^4.2.0" + strip-ansi "^6.0.1" + wrap-ansi "^7.0.0" + +clone-deep@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/clone-deep/-/clone-deep-4.0.1.tgz#c19fd9bdbbf85942b4fd979c84dcf7d5f07c2387" + integrity sha512-neHB9xuzh/wk0dIHweyAXv2aPGZIVk3pLMe+/RNzINf17fe0OG96QroktYAUm7SM1PBnzTabaLboqqxDyMU+SQ== + dependencies: + is-plain-object "^2.0.4" + kind-of "^6.0.2" + shallow-clone "^3.0.0" + +clone-response@1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/clone-response/-/clone-response-1.0.2.tgz#d1dc973920314df67fbeb94223b4ee350239e96b" + integrity sha512-yjLXh88P599UOyPTFX0POsd7WxnbsVsGohcwzHOLspIhhpalPw1BcqED8NblyZLKcGrL8dTgMlcaZxV2jAD41Q== + dependencies: + mimic-response "^1.0.0" + +color-convert@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" + integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== + dependencies: + color-name "~1.1.4" + +color-name@~1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" + integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== + +colorette@^2.0.10, colorette@^2.0.14: + version "2.0.20" + resolved "https://registry.yarnpkg.com/colorette/-/colorette-2.0.20.tgz#9eb793e6833067f7235902fcd3b09917a000a95a" + integrity sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w== + +combined-stream@^1.0.8: + version "1.0.8" + resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f" + integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg== + dependencies: + delayed-stream "~1.0.0" + +commander@^2.20.0, commander@^2.8.1: + version "2.20.3" + resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" + integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ== + +commander@^4.0.0: + version "4.1.1" + resolved "https://registry.yarnpkg.com/commander/-/commander-4.1.1.tgz#9fd602bd936294e9e9ef46a3f4d6964044b18068" + integrity sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA== + +commander@^7.0.0, commander@^7.2.0: + version "7.2.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-7.2.0.tgz#a36cb57d0b501ce108e4d20559a150a391d97ab7" + integrity sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw== + +commander@^8.3.0: + version "8.3.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-8.3.0.tgz#4837ea1b2da67b9c616a67afbb0fafee567bca66" + integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww== + +commondir@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b" + integrity sha512-W9pAhw0ja1Edb5GVdIF1mjZw/ASI0AlShXM83UUGe2DVr5TdAPEA1OA8m/g8zWp9x6On7gqufY+FatDbC3MDQg== + +compressible@~2.0.18: + version "2.0.18" + resolved "https://registry.yarnpkg.com/compressible/-/compressible-2.0.18.tgz#af53cca6b070d4c3c0750fbd77286a6d7cc46fba" + integrity sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg== + dependencies: + mime-db ">= 1.43.0 < 2" + +compression@^1.7.4: + version "1.7.5" + resolved "https://registry.yarnpkg.com/compression/-/compression-1.7.5.tgz#fdd256c0a642e39e314c478f6c2cd654edd74c93" + integrity sha512-bQJ0YRck5ak3LgtnpKkiabX5pNF7tMUh1BSy2ZBOTh0Dim0BUu6aPPwByIns6/A5Prh8PufSPerMDUklpzes2Q== + dependencies: + bytes "3.1.2" + compressible "~2.0.18" + debug "2.6.9" + negotiator "~0.6.4" + on-headers "~1.0.2" + safe-buffer "5.2.1" + vary "~1.1.2" + +concat-map@0.0.1: + version "0.0.1" + resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" + integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg== + +config-chain@^1.1.11: + version "1.1.13" + resolved "https://registry.yarnpkg.com/config-chain/-/config-chain-1.1.13.tgz#fad0795aa6a6cdaff9ed1b68e9dff94372c232f4" + integrity sha512-qj+f8APARXHrM0hraqXYb2/bOVSV4PvJQlNZ/DVj0QrmNM2q2euizkeuVckQ57J+W0mRH6Hvi+k50M4Jul2VRQ== + dependencies: + ini "^1.3.4" + proto-list "~1.2.1" + +connect-history-api-fallback@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/connect-history-api-fallback/-/connect-history-api-fallback-2.0.0.tgz#647264845251a0daf25b97ce87834cace0f5f1c8" + integrity sha512-U73+6lQFmfiNPrYbXqr6kZ1i1wiRqXnp2nhMsINseWXO8lDau0LGEffJ8kQi4EjLZympVgRdvqjAgiZ1tgzDDA== + +content-disposition@0.5.4, content-disposition@^0.5.2: + version "0.5.4" + resolved "https://registry.yarnpkg.com/content-disposition/-/content-disposition-0.5.4.tgz#8b82b4efac82512a02bb0b1dcec9d2c5e8eb5bfe" + integrity sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ== + dependencies: + safe-buffer "5.2.1" + +content-type@~1.0.4, content-type@~1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/content-type/-/content-type-1.0.5.tgz#8b773162656d1d1086784c8f23a54ce6d73d7918" + integrity sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA== + +convert-source-map@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-2.0.0.tgz#4b560f649fc4e918dd0ab75cf4961e8bc882d82a" + integrity sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg== + +cookie-signature@1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/cookie-signature/-/cookie-signature-1.0.6.tgz#e303a882b342cc3ee8ca513a79999734dab3ae2c" + integrity sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ== + +cookie@0.7.1: + version "0.7.1" + resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.7.1.tgz#2f73c42142d5d5cf71310a74fc4ae61670e5dbc9" + integrity sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w== + +cookie@^0.7.2: + version "0.7.2" + resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.7.2.tgz#556369c472a2ba910f2979891b526b3436237ed7" + integrity sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w== + +copy-webpack-plugin@^11.0.0: + version "11.0.0" + resolved "https://registry.yarnpkg.com/copy-webpack-plugin/-/copy-webpack-plugin-11.0.0.tgz#96d4dbdb5f73d02dd72d0528d1958721ab72e04a" + integrity sha512-fX2MWpamkW0hZxMEg0+mYnA40LTosOSa5TqZ9GYIBzyJa9C3QUaMPSE2xAi/buNr8u89SfD9wHSQVBzrRa/SOQ== + dependencies: + fast-glob "^3.2.11" + glob-parent "^6.0.1" + globby "^13.1.1" + normalize-path "^3.0.0" + schema-utils "^4.0.0" + serialize-javascript "^6.0.0" + +core-js-compat@^3.38.0, core-js-compat@^3.38.1: + version "3.39.0" + resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.39.0.tgz#b12dccb495f2601dc860bdbe7b4e3ffa8ba63f61" + integrity sha512-VgEUx3VwlExr5no0tXlBt+silBvhTryPwCXRI2Id1PN8WTKu7MreethvddqOubrYxkFdv/RnYrqlv1sFNAUelw== + dependencies: + browserslist "^4.24.2" + +core-js-pure@^3.23.3: + version "3.39.0" + resolved "https://registry.yarnpkg.com/core-js-pure/-/core-js-pure-3.39.0.tgz#aa0d54d70a15bdc13e7c853db87c10abc30d68f3" + integrity sha512-7fEcWwKI4rJinnK+wLTezeg2smbFFdSBP6E2kQZNbnzM2s1rpKQ6aaRteZSSg7FLU3P0HGGVo/gbpfanU36urg== + +core-util-is@~1.0.0: + version "1.0.3" + resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.3.tgz#a6042d3634c2b27e9328f837b965fac83808db85" + integrity sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ== + +cosmiconfig@^7.0.1: + version "7.1.0" + resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-7.1.0.tgz#1443b9afa596b670082ea46cbd8f6a62b84635f6" + integrity sha512-AdmX6xUzdNASswsFtmwSt7Vj8po9IuqXm0UXz7QKPuEUmPB4XyjGfaAr2PSuELMwkRMVH1EpIkX5bTZGRB3eCA== + dependencies: + "@types/parse-json" "^4.0.0" + import-fresh "^3.2.1" + parse-json "^5.0.0" + path-type "^4.0.0" + yaml "^1.10.0" + +cosmiconfig@^8.3.5: + version "8.3.6" + resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-8.3.6.tgz#060a2b871d66dba6c8538ea1118ba1ac16f5fae3" + integrity sha512-kcZ6+W5QzcJ3P1Mt+83OUv/oHFqZHIx8DuxG6eZ5RGMERoLqp4BuGjhHLYGK+Kf5XVkQvqBSmAy/nGWN3qDgEA== + dependencies: + import-fresh "^3.3.0" + js-yaml "^4.1.0" + parse-json "^5.2.0" + path-type "^4.0.0" + +cross-fetch@^3.1.5: + version "3.1.8" + resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.8.tgz#0327eba65fd68a7d119f8fb2bf9334a1a7956f82" + integrity sha512-cvA+JwZoU0Xq+h6WkMvAUqPEYy92Obet6UdKLfW60qn99ftItKjB5T+BkyWOFWe2pUyfQ+IJHmpOTznqk1M6Kg== + dependencies: + node-fetch "^2.6.12" + +cross-spawn@^5.0.1: + version "5.1.0" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-5.1.0.tgz#e8bd0efee58fcff6f8f94510a0a554bbfa235449" + integrity sha512-pTgQJ5KC0d2hcY8eyL1IzlBPYjTkyH72XRZPnLyKus2mBfNjQs3klqbJU2VILqZryAZUt9JOb3h/mWMy23/f5A== + dependencies: + lru-cache "^4.0.1" + shebang-command "^1.2.0" + which "^1.2.9" + +cross-spawn@^6.0.0: + version "6.0.5" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4" + integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ== + dependencies: + nice-try "^1.0.4" + path-key "^2.0.1" + semver "^5.5.0" + shebang-command "^1.2.0" + which "^1.2.9" + +cross-spawn@^7.0.0, cross-spawn@^7.0.2, cross-spawn@^7.0.3: + version "7.0.3" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" + integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== + dependencies: + path-key "^3.1.0" + shebang-command "^2.0.0" + which "^2.0.1" + +css-blank-pseudo@^3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/css-blank-pseudo/-/css-blank-pseudo-3.0.3.tgz#36523b01c12a25d812df343a32c322d2a2324561" + integrity sha512-VS90XWtsHGqoM0t4KpH053c4ehxZ2E6HtGI7x68YFV0pTo/QmkV/YFA+NnlvK8guxZVNWGQhVNJGC39Q8XF4OQ== + dependencies: + postcss-selector-parser "^6.0.9" + +css-has-pseudo@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/css-has-pseudo/-/css-has-pseudo-3.0.4.tgz#57f6be91ca242d5c9020ee3e51bbb5b89fc7af73" + integrity sha512-Vse0xpR1K9MNlp2j5w1pgWIJtm1a8qS0JwS9goFYcImjlHEmywP9VUF05aGBXzGpDJF86QXk4L0ypBmwPhGArw== + dependencies: + postcss-selector-parser "^6.0.9" + +css-loader@^6.7.1: + version "6.11.0" + resolved "https://registry.yarnpkg.com/css-loader/-/css-loader-6.11.0.tgz#33bae3bf6363d0a7c2cf9031c96c744ff54d85ba" + integrity sha512-CTJ+AEQJjq5NzLga5pE39qdiSV56F8ywCIsqNIRF0r7BDgWsN25aazToqAFg7ZrtA/U016xudB3ffgweORxX7g== + dependencies: + icss-utils "^5.1.0" + postcss "^8.4.33" + postcss-modules-extract-imports "^3.1.0" + postcss-modules-local-by-default "^4.0.5" + postcss-modules-scope "^3.2.0" + postcss-modules-values "^4.0.0" + postcss-value-parser "^4.2.0" + semver "^7.5.4" + +css-prefers-color-scheme@^6.0.3: + version "6.0.3" + resolved "https://registry.yarnpkg.com/css-prefers-color-scheme/-/css-prefers-color-scheme-6.0.3.tgz#ca8a22e5992c10a5b9d315155e7caee625903349" + integrity sha512-4BqMbZksRkJQx2zAjrokiGMd07RqOa2IxIrrN10lyBe9xhn9DEvjUK79J6jkeiv9D9hQFXKb6g1jwU62jziJZA== + +css-select@^4.1.3: + version "4.3.0" + resolved "https://registry.yarnpkg.com/css-select/-/css-select-4.3.0.tgz#db7129b2846662fd8628cfc496abb2b59e41529b" + integrity sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ== + dependencies: + boolbase "^1.0.0" + css-what "^6.0.1" + domhandler "^4.3.1" + domutils "^2.8.0" + nth-check "^2.0.1" + +css-tree@^1.1.2, css-tree@^1.1.3: + version "1.1.3" + resolved "https://registry.yarnpkg.com/css-tree/-/css-tree-1.1.3.tgz#eb4870fb6fd7707327ec95c2ff2ab09b5e8db91d" + integrity sha512-tRpdppF7TRazZrjJ6v3stzv93qxRcSsFmW6cX0Zm2NVKpxE1WV1HblnghVv9TreireHkqI/VDEsfolRF1p6y7Q== + dependencies: + mdn-data "2.0.14" + source-map "^0.6.1" + +css-what@^6.0.1: + version "6.1.0" + resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4" + integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw== + +cssdb@^7.1.0: + version "7.11.2" + resolved "https://registry.yarnpkg.com/cssdb/-/cssdb-7.11.2.tgz#127a2f5b946ee653361a5af5333ea85a39df5ae5" + integrity sha512-lhQ32TFkc1X4eTefGfYPvgovRSzIMofHkigfH8nWtyRL4XJLsRhJFreRvEgKzept7x1rjBuy3J/MurXLaFxW/A== + +cssesc@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/cssesc/-/cssesc-3.0.0.tgz#37741919903b868565e1c09ea747445cd18983ee" + integrity sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg== + +csso@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/csso/-/csso-4.2.0.tgz#ea3a561346e8dc9f546d6febedd50187cf389529" + integrity sha512-wvlcdIbf6pwKEk7vHj8/Bkc0B4ylXZruLvOgs9doS5eOsOpuodOV2zJChSpkp+pRpYQLQMeF04nr3Z68Sta9jA== + dependencies: + css-tree "^1.1.2" + +csstype@^3.0.2: + version "3.1.3" + resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.1.3.tgz#d80ff294d114fb0e6ac500fbf85b60137d7eff81" + integrity sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw== + +cwebp-bin@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/cwebp-bin/-/cwebp-bin-7.0.1.tgz#cb1303bf43f645ba5b2ece342773c4a93574d4f4" + integrity sha512-Ko5ADY74/dbfd8xG0+f+MUP9UKjCe1TG4ehpW0E5y4YlPdwDJlGrSzSR4/Yonxpm9QmZE1RratkIxFlKeyo3FA== + dependencies: + bin-build "^3.0.0" + bin-wrapper "^4.0.1" + +data-view-buffer@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/data-view-buffer/-/data-view-buffer-1.0.1.tgz#8ea6326efec17a2e42620696e671d7d5a8bc66b2" + integrity sha512-0lht7OugA5x3iJLOWFhWK/5ehONdprk0ISXqVFn/NFrDu+cuc8iADFrGQz5BnRK7LLU3JmkbXSxaqX+/mXYtUA== + dependencies: + call-bind "^1.0.6" + es-errors "^1.3.0" + is-data-view "^1.0.1" + +data-view-byte-length@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/data-view-byte-length/-/data-view-byte-length-1.0.1.tgz#90721ca95ff280677eb793749fce1011347669e2" + integrity sha512-4J7wRJD3ABAzr8wP+OcIcqq2dlUKp4DVflx++hs5h5ZKydWMI6/D/fAot+yh6g2tHh8fLFTvNOaVN357NvSrOQ== + dependencies: + call-bind "^1.0.7" + es-errors "^1.3.0" + is-data-view "^1.0.1" + +data-view-byte-offset@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/data-view-byte-offset/-/data-view-byte-offset-1.0.0.tgz#5e0bbfb4828ed2d1b9b400cd8a7d119bca0ff18a" + integrity sha512-t/Ygsytq+R995EJ5PZlD4Cu56sWa8InXySaViRzw9apusqsOO2bQP+SbYzAhR0pFKoB+43lYy8rWban9JSuXnA== + dependencies: + call-bind "^1.0.6" + es-errors "^1.3.0" + is-data-view "^1.0.1" + +debug@2.6.9: + version "2.6.9" + resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" + integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA== + dependencies: + ms "2.0.0" + +debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4: + version "4.3.7" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.7.tgz#87945b4151a011d76d95a198d7111c865c360a52" + integrity sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ== + dependencies: + ms "^2.1.3" + +decode-uri-component@^0.2.0: + version "0.2.2" + resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.2.tgz#e69dbe25d37941171dd540e024c444cd5188e1e9" + integrity sha512-FqUYQ+8o158GyGTrMFJms9qh3CqTKvAqgqsTnkLI8sKu0028orqBhxNMFkFen0zGyg6epACD32pjVk58ngIErQ== + +decompress-response@^3.2.0, decompress-response@^3.3.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/decompress-response/-/decompress-response-3.3.0.tgz#80a4dd323748384bfa248083622aedec982adff3" + integrity sha512-BzRPQuY1ip+qDonAOz42gRm/pg9F768C+npV/4JOsxRC2sq+Rlk+Q4ZCAsOhnIaMrgarILY+RMUIvMmmX1qAEA== + dependencies: + mimic-response "^1.0.0" + +decompress-tar@^4.0.0, decompress-tar@^4.1.0, decompress-tar@^4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/decompress-tar/-/decompress-tar-4.1.1.tgz#718cbd3fcb16209716e70a26b84e7ba4592e5af1" + integrity sha512-JdJMaCrGpB5fESVyxwpCx4Jdj2AagLmv3y58Qy4GE6HMVjWz1FeVQk1Ct4Kye7PftcdOo/7U7UKzYBJgqnGeUQ== + dependencies: + file-type "^5.2.0" + is-stream "^1.1.0" + tar-stream "^1.5.2" + +decompress-tarbz2@^4.0.0: + version "4.1.1" + resolved "https://registry.yarnpkg.com/decompress-tarbz2/-/decompress-tarbz2-4.1.1.tgz#3082a5b880ea4043816349f378b56c516be1a39b" + integrity sha512-s88xLzf1r81ICXLAVQVzaN6ZmX4A6U4z2nMbOwobxkLoIIfjVMBg7TeguTUXkKeXni795B6y5rnvDw7rxhAq9A== + dependencies: + decompress-tar "^4.1.0" + file-type "^6.1.0" + is-stream "^1.1.0" + seek-bzip "^1.0.5" + unbzip2-stream "^1.0.9" + +decompress-targz@^4.0.0: + version "4.1.1" + resolved "https://registry.yarnpkg.com/decompress-targz/-/decompress-targz-4.1.1.tgz#c09bc35c4d11f3de09f2d2da53e9de23e7ce1eee" + integrity sha512-4z81Znfr6chWnRDNfFNqLwPvm4db3WuZkqV+UgXQzSngG3CEKdBkw5jrv3axjjL96glyiiKjsxJG3X6WBZwX3w== + dependencies: + decompress-tar "^4.1.1" + file-type "^5.2.0" + is-stream "^1.1.0" + +decompress-unzip@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/decompress-unzip/-/decompress-unzip-4.0.1.tgz#deaaccdfd14aeaf85578f733ae8210f9b4848f69" + integrity sha512-1fqeluvxgnn86MOh66u8FjbtJpAFv5wgCT9Iw8rcBqQcCo5tO8eiJw7NNTrvt9n4CRBVq7CstiS922oPgyGLrw== + dependencies: + file-type "^3.8.0" + get-stream "^2.2.0" + pify "^2.3.0" + yauzl "^2.4.2" + +decompress@^4.0.0, decompress@^4.2.0: + version "4.2.1" + resolved "https://registry.yarnpkg.com/decompress/-/decompress-4.2.1.tgz#007f55cc6a62c055afa37c07eb6a4ee1b773f118" + integrity sha512-e48kc2IjU+2Zw8cTb6VZcJQ3lgVbS4uuB1TfCHbiZIP/haNXm+SVyhu+87jts5/3ROpd82GSVCoNs/z8l4ZOaQ== + dependencies: + decompress-tar "^4.0.0" + decompress-tarbz2 "^4.0.0" + decompress-targz "^4.0.0" + decompress-unzip "^4.0.1" + graceful-fs "^4.1.10" + make-dir "^1.0.0" + pify "^2.3.0" + strip-dirs "^2.0.0" + +deep-equal@^2.0.5: + version "2.2.3" + resolved "https://registry.yarnpkg.com/deep-equal/-/deep-equal-2.2.3.tgz#af89dafb23a396c7da3e862abc0be27cf51d56e1" + integrity sha512-ZIwpnevOurS8bpT4192sqAowWM76JDKSHYzMLty3BZGSswgq6pBaH3DhCSW5xVAZICZyKdOBPjwww5wfgT/6PA== + dependencies: + array-buffer-byte-length "^1.0.0" + call-bind "^1.0.5" + es-get-iterator "^1.1.3" + get-intrinsic "^1.2.2" + is-arguments "^1.1.1" + is-array-buffer "^3.0.2" + is-date-object "^1.0.5" + is-regex "^1.1.4" + is-shared-array-buffer "^1.0.2" + isarray "^2.0.5" + object-is "^1.1.5" + object-keys "^1.1.1" + object.assign "^4.1.4" + regexp.prototype.flags "^1.5.1" + side-channel "^1.0.4" + which-boxed-primitive "^1.0.2" + which-collection "^1.0.1" + which-typed-array "^1.1.13" + +deep-is@^0.1.3: + version "0.1.4" + resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.4.tgz#a6f2dce612fadd2ef1f519b73551f17e85199831" + integrity sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ== + +deepmerge@^4.2.2: + version "4.3.1" + resolved "https://registry.yarnpkg.com/deepmerge/-/deepmerge-4.3.1.tgz#44b5f2147cd3b00d4b56137685966f26fd25dd4a" + integrity sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A== + +default-gateway@^6.0.3: + version "6.0.3" + resolved "https://registry.yarnpkg.com/default-gateway/-/default-gateway-6.0.3.tgz#819494c888053bdb743edbf343d6cdf7f2943a71" + integrity sha512-fwSOJsbbNzZ/CUFpqFBqYfYNLj1NbMPm8MMCIzHjC83iSJRBEGmDUxU+WP661BaBQImeC2yHwXtz+P/O9o+XEg== + dependencies: + execa "^5.0.0" + +define-data-property@^1.0.1, define-data-property@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/define-data-property/-/define-data-property-1.1.4.tgz#894dc141bb7d3060ae4366f6a0107e68fbe48c5e" + integrity sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A== + dependencies: + es-define-property "^1.0.0" + es-errors "^1.3.0" + gopd "^1.0.1" + +define-lazy-prop@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz#3f7ae421129bcaaac9bc74905c98a0009ec9ee7f" + integrity sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og== + +define-properties@^1.1.3, define-properties@^1.2.0, define-properties@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.2.1.tgz#10781cc616eb951a80a034bafcaa7377f6af2b6c" + integrity sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg== + dependencies: + define-data-property "^1.0.1" + has-property-descriptors "^1.0.0" + object-keys "^1.1.1" + +delayed-stream@~1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619" + integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ== + +depd@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/depd/-/depd-2.0.0.tgz#b696163cc757560d09cf22cc8fad1571b79e76df" + integrity sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw== + +depd@~1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/depd/-/depd-1.1.2.tgz#9bcd52e14c097763e749b274c4346ed2e560b5a9" + integrity sha512-7emPTl6Dpo6JRXOXjLRxck+FlLRX5847cLKEn00PLAgc3g2hTZZgr+e4c2v6QpSmLeFP3n5yUo7ft6avBK/5jQ== + +destroy@1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/destroy/-/destroy-1.2.0.tgz#4803735509ad8be552934c67df614f94e66fa015" + integrity sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg== + +detect-libc@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/detect-libc/-/detect-libc-1.0.3.tgz#fa137c4bd698edf55cd5cd02ac559f91a4c4ba9b" + integrity sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg== + +detect-node@^2.0.4: + version "2.1.0" + resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.1.0.tgz#c9c70775a49c3d03bc2c06d9a73be550f978f8b1" + integrity sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g== + +didyoumean@^1.2.2: + version "1.2.2" + resolved "https://registry.yarnpkg.com/didyoumean/-/didyoumean-1.2.2.tgz#989346ffe9e839b4555ecf5666edea0d3e8ad037" + integrity sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw== + +dir-glob@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-3.0.1.tgz#56dbf73d992a4a93ba1584f4534063fd2e41717f" + integrity sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA== + dependencies: + path-type "^4.0.0" + +dlv@^1.1.3: + version "1.1.3" + resolved "https://registry.yarnpkg.com/dlv/-/dlv-1.1.3.tgz#5c198a8a11453596e751494d49874bc7732f2e79" + integrity sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA== + +dns-packet@^5.2.2: + version "5.6.1" + resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-5.6.1.tgz#ae888ad425a9d1478a0674256ab866de1012cf2f" + integrity sha512-l4gcSouhcgIKRvyy99RNVOgxXiicE+2jZoNmaNmZ6JXiGajBOJAesk1OBlJuM5k2c+eudGdLxDqXuPCKIj6kpw== + dependencies: + "@leichtgewicht/ip-codec" "^2.0.1" + +doctrine@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-2.1.0.tgz#5cd01fc101621b42c4cd7f5d1a66243716d3f39d" + integrity sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw== + dependencies: + esutils "^2.0.2" + +doctrine@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-3.0.0.tgz#addebead72a6574db783639dc87a121773973961" + integrity sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w== + dependencies: + esutils "^2.0.2" + +dom-accessibility-api@^0.5.9: + version "0.5.16" + resolved "https://registry.yarnpkg.com/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz#5a7429e6066eb3664d911e33fb0e45de8eb08453" + integrity sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg== + +dom-converter@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/dom-converter/-/dom-converter-0.2.0.tgz#6721a9daee2e293682955b6afe416771627bb768" + integrity sha512-gd3ypIPfOMr9h5jIKq8E3sHOTCjeirnl0WK5ZdS1AW0Odt0b1PaWaHdJ4Qk4klv+YB9aJBS7mESXjFoDQPu6DA== + dependencies: + utila "~0.4" + +dom-serializer@^1.0.1: + version "1.4.1" + resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-1.4.1.tgz#de5d41b1aea290215dc45a6dae8adcf1d32e2d30" + integrity sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag== + dependencies: + domelementtype "^2.0.1" + domhandler "^4.2.0" + entities "^2.0.0" + +domelementtype@^2.0.1, domelementtype@^2.2.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d" + integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw== + +domhandler@^4.0.0, domhandler@^4.2.0, domhandler@^4.3.1: + version "4.3.1" + resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.3.1.tgz#8d792033416f59d68bc03a5aa7b018c1ca89279c" + integrity sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ== + dependencies: + domelementtype "^2.2.0" + +domutils@^2.5.2, domutils@^2.8.0: + version "2.8.0" + resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.8.0.tgz#4437def5db6e2d1f5d6ee859bd95ca7d02048135" + integrity sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A== + dependencies: + dom-serializer "^1.0.1" + domelementtype "^2.2.0" + domhandler "^4.2.0" + +dot-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/dot-case/-/dot-case-3.0.4.tgz#9b2b670d00a431667a8a75ba29cd1b98809ce751" + integrity sha512-Kv5nKlh6yRrdrGvxeJ2e5y2eRUpkUosIW4A2AS38zwSz27zu7ufDwQPi5Jhs3XAlGNetl3bmnGhQsMtkKJnj3w== + dependencies: + no-case "^3.0.4" + tslib "^2.0.3" + +dotenv-defaults@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/dotenv-defaults/-/dotenv-defaults-2.0.2.tgz#6b3ec2e4319aafb70940abda72d3856770ee77ac" + integrity sha512-iOIzovWfsUHU91L5i8bJce3NYK5JXeAwH50Jh6+ARUdLiiGlYWfGw6UkzsYqaXZH/hjE/eCd/PlfM/qqyK0AMg== + dependencies: + dotenv "^8.2.0" + +dotenv-safe@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/dotenv-safe/-/dotenv-safe-5.0.1.tgz#8c4a79b8978fd4271b3d8ef17be2b2f04588af71" + integrity sha512-NiS92uLQHKThcQIB4pNEQHLaosz+O1VaJ1yuBaZNj0BASGpQliSzfPKgaoFs/WvE+p8pS9STXOMW30b5uxBZog== + dependencies: + dotenv "^5.0.0" + +dotenv-webpack@^8.0.1: + version "8.1.0" + resolved "https://registry.yarnpkg.com/dotenv-webpack/-/dotenv-webpack-8.1.0.tgz#4d66abc4a30395b46a030ebcd125320232b54873" + integrity sha512-owK1JcsPkIobeqjVrk6h7jPED/W6ZpdFsMPR+5ursB7/SdgDyO+VzAU+szK8C8u3qUhtENyYnj8eyXMR5kkGag== + dependencies: + dotenv-defaults "^2.0.2" + +dotenv@^16.0.2: + version "16.4.5" + resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-16.4.5.tgz#cdd3b3b604cb327e286b4762e13502f717cb099f" + integrity sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg== + +dotenv@^5.0.0: + version "5.0.1" + resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-5.0.1.tgz#a5317459bd3d79ab88cff6e44057a6a3fbb1fcef" + integrity sha512-4As8uPrjfwb7VXC+WnLCbXK7y+Ueb2B3zgNCePYfhxS1PYeaO1YTeplffTEcbfLhvFNGLAz90VvJs9yomG7bow== + +dotenv@^8.2.0: + version "8.6.0" + resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-8.6.0.tgz#061af664d19f7f4d8fc6e4ff9b584ce237adcb8b" + integrity sha512-IrPdXQsk2BbzvCBGBOTmmSH5SodmqZNt4ERAZDmW4CT+tL8VtvinqywuANaFu4bOMWki16nqf0e4oC0QIaDr/g== + +download@^6.2.2: + version "6.2.5" + resolved "https://registry.yarnpkg.com/download/-/download-6.2.5.tgz#acd6a542e4cd0bb42ca70cfc98c9e43b07039714" + integrity sha512-DpO9K1sXAST8Cpzb7kmEhogJxymyVUd5qz/vCOSyvwtp2Klj2XcDt5YUuasgxka44SxF0q5RriKIwJmQHG2AuA== + dependencies: + caw "^2.0.0" + content-disposition "^0.5.2" + decompress "^4.0.0" + ext-name "^5.0.0" + file-type "5.2.0" + filenamify "^2.0.0" + get-stream "^3.0.0" + got "^7.0.0" + make-dir "^1.0.0" + p-event "^1.0.0" + pify "^3.0.0" + +download@^7.1.0: + version "7.1.0" + resolved "https://registry.yarnpkg.com/download/-/download-7.1.0.tgz#9059aa9d70b503ee76a132897be6dec8e5587233" + integrity sha512-xqnBTVd/E+GxJVrX5/eUJiLYjCGPwMpdL+jGhGU57BvtcA7wwhtHVbXBeUk51kOpW3S7Jn3BQbN9Q1R1Km2qDQ== + dependencies: + archive-type "^4.0.0" + caw "^2.0.1" + content-disposition "^0.5.2" + decompress "^4.2.0" + ext-name "^5.0.0" + file-type "^8.1.0" + filenamify "^2.0.0" + get-stream "^3.0.0" + got "^8.3.1" + make-dir "^1.2.0" + p-event "^2.1.0" + pify "^3.0.0" + +duplexer3@^0.1.4: + version "0.1.5" + resolved "https://registry.yarnpkg.com/duplexer3/-/duplexer3-0.1.5.tgz#0b5e4d7bad5de8901ea4440624c8e1d20099217e" + integrity sha512-1A8za6ws41LQgv9HrE/66jyC5yuSjQ3L/KOpFtoBilsAK2iA2wuS5rTt1OCzIvtS2V7nVmedsUU+DGRcjBmOYA== + +eastasianwidth@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/eastasianwidth/-/eastasianwidth-0.2.0.tgz#696ce2ec0aa0e6ea93a397ffcf24aa7840c827cb" + integrity sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA== + +ee-first@1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d" + integrity sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow== + +electron-to-chromium@^1.5.41: + version "1.5.50" + resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.5.50.tgz#d9ba818da7b2b5ef1f3dd32bce7046feb7e93234" + integrity sha512-eMVObiUQ2LdgeO1F/ySTXsvqvxb6ZH2zPGaMYsWzRDdOddUa77tdmI0ltg+L16UpbWdhPmuF3wIQYyQq65WfZw== + +emoji-regex@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" + integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== + +emoji-regex@^9.2.2: + version "9.2.2" + resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72" + integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg== + +emojis-list@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/emojis-list/-/emojis-list-3.0.0.tgz#5570662046ad29e2e916e71aae260abdff4f6a78" + integrity sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q== + +encodeurl@~1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59" + integrity sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w== + +encodeurl@~2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-2.0.0.tgz#7b8ea898077d7e409d3ac45474ea38eaf0857a58" + integrity sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg== + +end-of-stream@^1.0.0, end-of-stream@^1.1.0: + version "1.4.4" + resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" + integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q== + dependencies: + once "^1.4.0" + +enhanced-resolve@^5.0.0, enhanced-resolve@^5.17.1: + version "5.17.1" + resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz#67bfbbcc2f81d511be77d686a90267ef7f898a15" + integrity sha512-LMHl3dXhTcfv8gM4kEzIUeTQ+7fpdA0l2tUf34BddXPkz2A5xJ5L/Pchd5BL6rdccM9QGvu0sWZzK1Z1t4wwyg== + dependencies: + graceful-fs "^4.2.4" + tapable "^2.2.0" + +entities@^2.0.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55" + integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A== + +envinfo@^7.7.3: + version "7.14.0" + resolved "https://registry.yarnpkg.com/envinfo/-/envinfo-7.14.0.tgz#26dac5db54418f2a4c1159153a0b2ae980838aae" + integrity sha512-CO40UI41xDQzhLB1hWyqUKgFhs250pNcGbyGKe1l/e4FSaI/+YE4IMG76GDt0In67WLPACIITC+sOi08x4wIvg== + +error-ex@^1.3.1: + version "1.3.2" + resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf" + integrity sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g== + dependencies: + is-arrayish "^0.2.1" + +error-stack-parser@^2.0.0, error-stack-parser@^2.0.6: + version "2.1.4" + resolved "https://registry.yarnpkg.com/error-stack-parser/-/error-stack-parser-2.1.4.tgz#229cb01cdbfa84440bfa91876285b94680188286" + integrity sha512-Sk5V6wVazPhq5MhpO+AUxJn5x7XSXGl1R93Vn7i+zS15KDVxQijejNCrz8340/2bgLBjR9GtEG8ZVKONDjcqGQ== + dependencies: + stackframe "^1.3.4" + +es-abstract@^1.17.5, es-abstract@^1.22.1, es-abstract@^1.22.3, es-abstract@^1.23.0, es-abstract@^1.23.1, es-abstract@^1.23.2, es-abstract@^1.23.3: + version "1.23.3" + resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.23.3.tgz#8f0c5a35cd215312573c5a27c87dfd6c881a0aa0" + integrity sha512-e+HfNH61Bj1X9/jLc5v1owaLYuHdeHHSQlkhCBiTK8rBvKaULl/beGMxwrMXjpYrv4pz22BlY570vVePA2ho4A== + dependencies: + array-buffer-byte-length "^1.0.1" + arraybuffer.prototype.slice "^1.0.3" + available-typed-arrays "^1.0.7" + call-bind "^1.0.7" + data-view-buffer "^1.0.1" + data-view-byte-length "^1.0.1" + data-view-byte-offset "^1.0.0" + es-define-property "^1.0.0" + es-errors "^1.3.0" + es-object-atoms "^1.0.0" + es-set-tostringtag "^2.0.3" + es-to-primitive "^1.2.1" + function.prototype.name "^1.1.6" + get-intrinsic "^1.2.4" + get-symbol-description "^1.0.2" + globalthis "^1.0.3" + gopd "^1.0.1" + has-property-descriptors "^1.0.2" + has-proto "^1.0.3" + has-symbols "^1.0.3" + hasown "^2.0.2" + internal-slot "^1.0.7" + is-array-buffer "^3.0.4" + is-callable "^1.2.7" + is-data-view "^1.0.1" + is-negative-zero "^2.0.3" + is-regex "^1.1.4" + is-shared-array-buffer "^1.0.3" + is-string "^1.0.7" + is-typed-array "^1.1.13" + is-weakref "^1.0.2" + object-inspect "^1.13.1" + object-keys "^1.1.1" + object.assign "^4.1.5" + regexp.prototype.flags "^1.5.2" + safe-array-concat "^1.1.2" + safe-regex-test "^1.0.3" + string.prototype.trim "^1.2.9" + string.prototype.trimend "^1.0.8" + string.prototype.trimstart "^1.0.8" + typed-array-buffer "^1.0.2" + typed-array-byte-length "^1.0.1" + typed-array-byte-offset "^1.0.2" + typed-array-length "^1.0.6" + unbox-primitive "^1.0.2" + which-typed-array "^1.1.15" + +es-define-property@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/es-define-property/-/es-define-property-1.0.0.tgz#c7faefbdff8b2696cf5f46921edfb77cc4ba3845" + integrity sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ== + dependencies: + get-intrinsic "^1.2.4" + +es-errors@^1.2.1, es-errors@^1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/es-errors/-/es-errors-1.3.0.tgz#05f75a25dab98e4fb1dcd5e1472c0546d5057c8f" + integrity sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw== + +es-get-iterator@^1.1.3: + version "1.1.3" + resolved "https://registry.yarnpkg.com/es-get-iterator/-/es-get-iterator-1.1.3.tgz#3ef87523c5d464d41084b2c3c9c214f1199763d6" + integrity sha512-sPZmqHBe6JIiTfN5q2pEi//TwxmAFHwj/XEuYjTuse78i8KxaqMTTzxPoFKuzRpDpTJ+0NAbpfenkmH2rePtuw== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.1.3" + has-symbols "^1.0.3" + is-arguments "^1.1.1" + is-map "^2.0.2" + is-set "^2.0.2" + is-string "^1.0.7" + isarray "^2.0.5" + stop-iteration-iterator "^1.0.0" + +es-iterator-helpers@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/es-iterator-helpers/-/es-iterator-helpers-1.1.0.tgz#f6d745d342aea214fe09497e7152170dc333a7a6" + integrity sha512-/SurEfycdyssORP/E+bj4sEu1CWw4EmLDsHynHwSXQ7utgbrMRWW195pTrCjFgFCddf/UkYm3oqKPRq5i8bJbw== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-abstract "^1.23.3" + es-errors "^1.3.0" + es-set-tostringtag "^2.0.3" + function-bind "^1.1.2" + get-intrinsic "^1.2.4" + globalthis "^1.0.4" + has-property-descriptors "^1.0.2" + has-proto "^1.0.3" + has-symbols "^1.0.3" + internal-slot "^1.0.7" + iterator.prototype "^1.1.3" + safe-array-concat "^1.1.2" + +es-module-lexer@^1.2.1: + version "1.5.4" + resolved "https://registry.yarnpkg.com/es-module-lexer/-/es-module-lexer-1.5.4.tgz#a8efec3a3da991e60efa6b633a7cad6ab8d26b78" + integrity sha512-MVNK56NiMrOwitFB7cqDwq0CQutbw+0BvLshJSse0MUNU+y1FC3bUS/AQg7oUng+/wKrrki7JfmwtVHkVfPLlw== + +es-object-atoms@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/es-object-atoms/-/es-object-atoms-1.0.0.tgz#ddb55cd47ac2e240701260bc2a8e31ecb643d941" + integrity sha512-MZ4iQ6JwHOBQjahnjwaC1ZtIBH+2ohjamzAO3oaHcXYup7qxjF2fixyH+Q71voWHeOkI2q/TnJao/KfXYIZWbw== + dependencies: + es-errors "^1.3.0" + +es-set-tostringtag@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/es-set-tostringtag/-/es-set-tostringtag-2.0.3.tgz#8bb60f0a440c2e4281962428438d58545af39777" + integrity sha512-3T8uNMC3OQTHkFUsFq8r/BwAXLHvU/9O9mE0fBc/MY5iq/8H7ncvO947LmYA6ldWw9Uh8Yhf25zu6n7nML5QWQ== + dependencies: + get-intrinsic "^1.2.4" + has-tostringtag "^1.0.2" + hasown "^2.0.1" + +es-shim-unscopables@^1.0.0, es-shim-unscopables@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/es-shim-unscopables/-/es-shim-unscopables-1.0.2.tgz#1f6942e71ecc7835ed1c8a83006d8771a63a3763" + integrity sha512-J3yBRXCzDu4ULnQwxyToo/OjdMx6akgVC7K6few0a7F/0wLtmKKN7I73AH5T2836UuXRqN7Qg+IIUw/+YJksRw== + dependencies: + hasown "^2.0.0" + +es-to-primitive@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a" + integrity sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA== + dependencies: + is-callable "^1.1.4" + is-date-object "^1.0.1" + is-symbol "^1.0.2" + +escalade@^3.1.1, escalade@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.2.0.tgz#011a3f69856ba189dffa7dc8fcce99d2a87903e5" + integrity sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA== + +escape-html@~1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/escape-html/-/escape-html-1.0.3.tgz#0258eae4d3d0c0974de1c169188ef0051d1d1988" + integrity sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow== + +escape-string-regexp@^1.0.2: + version "1.0.5" + resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" + integrity sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg== + +escape-string-regexp@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" + integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA== + +eslint-plugin-react@^7.31.0: + version "7.37.2" + resolved "https://registry.yarnpkg.com/eslint-plugin-react/-/eslint-plugin-react-7.37.2.tgz#cd0935987876ba2900df2f58339f6d92305acc7a" + integrity sha512-EsTAnj9fLVr/GZleBLFbj/sSuXeWmp1eXIN60ceYnZveqEaUCyW4X+Vh4WTdUhCkW4xutXYqTXCUSyqD4rB75w== + dependencies: + array-includes "^3.1.8" + array.prototype.findlast "^1.2.5" + array.prototype.flatmap "^1.3.2" + array.prototype.tosorted "^1.1.4" + doctrine "^2.1.0" + es-iterator-helpers "^1.1.0" + estraverse "^5.3.0" + hasown "^2.0.2" + jsx-ast-utils "^2.4.1 || ^3.0.0" + minimatch "^3.1.2" + object.entries "^1.1.8" + object.fromentries "^2.0.8" + object.values "^1.2.0" + prop-types "^15.8.1" + resolve "^2.0.0-next.5" + semver "^6.3.1" + string.prototype.matchall "^4.0.11" + string.prototype.repeat "^1.0.0" + +eslint-scope@5.1.1, eslint-scope@^5.1.1: + version "5.1.1" + resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c" + integrity sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw== + dependencies: + esrecurse "^4.3.0" + estraverse "^4.1.1" + +eslint-scope@^7.2.2: + version "7.2.2" + resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-7.2.2.tgz#deb4f92563390f32006894af62a22dba1c46423f" + integrity sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg== + dependencies: + esrecurse "^4.3.0" + estraverse "^5.2.0" + +eslint-visitor-keys@^3.3.0, eslint-visitor-keys@^3.4.1, eslint-visitor-keys@^3.4.3: + version "3.4.3" + resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz#0cd72fe8550e3c2eae156a96a4dddcd1c8ac5800" + integrity sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag== + +eslint@^8.22.0: + version "8.57.1" + resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.57.1.tgz#7df109654aba7e3bbe5c8eae533c5e461d3c6ca9" + integrity sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA== + dependencies: + "@eslint-community/eslint-utils" "^4.2.0" + "@eslint-community/regexpp" "^4.6.1" + "@eslint/eslintrc" "^2.1.4" + "@eslint/js" "8.57.1" + "@humanwhocodes/config-array" "^0.13.0" + "@humanwhocodes/module-importer" "^1.0.1" + "@nodelib/fs.walk" "^1.2.8" + "@ungap/structured-clone" "^1.2.0" + ajv "^6.12.4" + chalk "^4.0.0" + cross-spawn "^7.0.2" + debug "^4.3.2" + doctrine "^3.0.0" + escape-string-regexp "^4.0.0" + eslint-scope "^7.2.2" + eslint-visitor-keys "^3.4.3" + espree "^9.6.1" + esquery "^1.4.2" + esutils "^2.0.2" + fast-deep-equal "^3.1.3" + file-entry-cache "^6.0.1" + find-up "^5.0.0" + glob-parent "^6.0.2" + globals "^13.19.0" + graphemer "^1.4.0" + ignore "^5.2.0" + imurmurhash "^0.1.4" + is-glob "^4.0.0" + is-path-inside "^3.0.3" + js-yaml "^4.1.0" + json-stable-stringify-without-jsonify "^1.0.1" + levn "^0.4.1" + lodash.merge "^4.6.2" + minimatch "^3.1.2" + natural-compare "^1.4.0" + optionator "^0.9.3" + strip-ansi "^6.0.1" + text-table "^0.2.0" + +espree@^9.6.0, espree@^9.6.1: + version "9.6.1" + resolved "https://registry.yarnpkg.com/espree/-/espree-9.6.1.tgz#a2a17b8e434690a5432f2f8018ce71d331a48c6f" + integrity sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ== + dependencies: + acorn "^8.9.0" + acorn-jsx "^5.3.2" + eslint-visitor-keys "^3.4.1" + +esquery@^1.4.2: + version "1.6.0" + resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.6.0.tgz#91419234f804d852a82dceec3e16cdc22cf9dae7" + integrity sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg== + dependencies: + estraverse "^5.1.0" + +esrecurse@^4.3.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/esrecurse/-/esrecurse-4.3.0.tgz#7ad7964d679abb28bee72cec63758b1c5d2c9921" + integrity sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag== + dependencies: + estraverse "^5.2.0" + +estraverse@^4.1.1: + version "4.3.0" + resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-4.3.0.tgz#398ad3f3c5a24948be7725e83d11a7de28cdbd1d" + integrity sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw== + +estraverse@^5.1.0, estraverse@^5.2.0, estraverse@^5.3.0: + version "5.3.0" + resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-5.3.0.tgz#2eea5290702f26ab8fe5370370ff86c965d21123" + integrity sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA== + +esutils@^2.0.2: + version "2.0.3" + resolved "https://registry.yarnpkg.com/esutils/-/esutils-2.0.3.tgz#74d2eb4de0b8da1293711910d50775b9b710ef64" + integrity sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g== + +etag@~1.8.1: + version "1.8.1" + resolved "https://registry.yarnpkg.com/etag/-/etag-1.8.1.tgz#41ae2eeb65efa62268aebfea83ac7d79299b0887" + integrity sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg== + +eventemitter3@^4.0.0: + version "4.0.7" + resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-4.0.7.tgz#2de9b68f6528d5644ef5c59526a1b4a07306169f" + integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw== + +events@^3.2.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/events/-/events-3.3.0.tgz#31a95ad0a924e2d2c419a813aeb2c4e878ea7400" + integrity sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q== + +eventsource@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/eventsource/-/eventsource-2.0.2.tgz#76dfcc02930fb2ff339520b6d290da573a9e8508" + integrity sha512-IzUmBGPR3+oUG9dUeXynyNmf91/3zUSJg1lCktzKw47OXuhco54U3r9B7O4XX+Rb1Itm9OZ2b0RkTs10bICOxA== + +exec-buffer@^3.0.0, exec-buffer@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/exec-buffer/-/exec-buffer-3.2.0.tgz#b1686dbd904c7cf982e652c1f5a79b1e5573082b" + integrity sha512-wsiD+2Tp6BWHoVv3B+5Dcx6E7u5zky+hUwOHjuH2hKSLR3dvRmX8fk8UD8uqQixHs4Wk6eDmiegVrMPjKj7wpA== + dependencies: + execa "^0.7.0" + p-finally "^1.0.0" + pify "^3.0.0" + rimraf "^2.5.4" + tempfile "^2.0.0" + +execa@^0.7.0: + version "0.7.0" + resolved "https://registry.yarnpkg.com/execa/-/execa-0.7.0.tgz#944becd34cc41ee32a63a9faf27ad5a65fc59777" + integrity sha512-RztN09XglpYI7aBBrJCPW95jEH7YF1UEPOoX9yDhUTPdp7mK+CQvnLTuD10BNXZ3byLTu2uehZ8EcKT/4CGiFw== + dependencies: + cross-spawn "^5.0.1" + get-stream "^3.0.0" + is-stream "^1.1.0" + npm-run-path "^2.0.0" + p-finally "^1.0.0" + signal-exit "^3.0.0" + strip-eof "^1.0.0" + +execa@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/execa/-/execa-1.0.0.tgz#c6236a5bb4df6d6f15e88e7f017798216749ddd8" + integrity sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA== + dependencies: + cross-spawn "^6.0.0" + get-stream "^4.0.0" + is-stream "^1.1.0" + npm-run-path "^2.0.0" + p-finally "^1.0.0" + signal-exit "^3.0.0" + strip-eof "^1.0.0" + +execa@^4.0.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a" + integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA== + dependencies: + cross-spawn "^7.0.0" + get-stream "^5.0.0" + human-signals "^1.1.1" + is-stream "^2.0.0" + merge-stream "^2.0.0" + npm-run-path "^4.0.0" + onetime "^5.1.0" + signal-exit "^3.0.2" + strip-final-newline "^2.0.0" + +execa@^5.0.0: + version "5.1.1" + resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.1.tgz#f80ad9cbf4298f7bd1d4c9555c21e93741c411dd" + integrity sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg== + dependencies: + cross-spawn "^7.0.3" + get-stream "^6.0.0" + human-signals "^2.1.0" + is-stream "^2.0.0" + merge-stream "^2.0.0" + npm-run-path "^4.0.1" + onetime "^5.1.2" + signal-exit "^3.0.3" + strip-final-newline "^2.0.0" + +executable@^4.1.0: + version "4.1.1" + resolved "https://registry.yarnpkg.com/executable/-/executable-4.1.1.tgz#41532bff361d3e57af4d763b70582db18f5d133c" + integrity sha512-8iA79xD3uAch729dUG8xaaBBFGaEa0wdD2VkYLFHwlqosEj/jT66AzcreRDSgV7ehnNLBW2WR5jIXwGKjVdTLg== + dependencies: + pify "^2.2.0" + +express@^4.17.3: + version "4.21.1" + resolved "https://registry.yarnpkg.com/express/-/express-4.21.1.tgz#9dae5dda832f16b4eec941a4e44aa89ec481b281" + integrity sha512-YSFlK1Ee0/GC8QaO91tHcDxJiE/X4FbpAyQWkxAvG6AXCuR65YzK8ua6D9hvi/TzUfZMpc+BwuM1IPw8fmQBiQ== + dependencies: + accepts "~1.3.8" + array-flatten "1.1.1" + body-parser "1.20.3" + content-disposition "0.5.4" + content-type "~1.0.4" + cookie "0.7.1" + cookie-signature "1.0.6" + debug "2.6.9" + depd "2.0.0" + encodeurl "~2.0.0" + escape-html "~1.0.3" + etag "~1.8.1" + finalhandler "1.3.1" + fresh "0.5.2" + http-errors "2.0.0" + merge-descriptors "1.0.3" + methods "~1.1.2" + on-finished "2.4.1" + parseurl "~1.3.3" + path-to-regexp "0.1.10" + proxy-addr "~2.0.7" + qs "6.13.0" + range-parser "~1.2.1" + safe-buffer "5.2.1" + send "0.19.0" + serve-static "1.16.2" + setprototypeof "1.2.0" + statuses "2.0.1" + type-is "~1.6.18" + utils-merge "1.0.1" + vary "~1.1.2" + +ext-list@^2.0.0: + version "2.2.2" + resolved "https://registry.yarnpkg.com/ext-list/-/ext-list-2.2.2.tgz#0b98e64ed82f5acf0f2931babf69212ef52ddd37" + integrity sha512-u+SQgsubraE6zItfVA0tBuCBhfU9ogSRnsvygI7wht9TS510oLkBRXBsqopeUG/GBOIQyKZO9wjTqIu/sf5zFA== + dependencies: + mime-db "^1.28.0" + +ext-name@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/ext-name/-/ext-name-5.0.0.tgz#70781981d183ee15d13993c8822045c506c8f0a6" + integrity sha512-yblEwXAbGv1VQDmow7s38W77hzAgJAO50ztBLMcUyUBfxv1HC+LGwtiEN+Co6LtlqT/5uwVOxsD4TNIilWhwdQ== + dependencies: + ext-list "^2.0.0" + sort-keys-length "^1.0.0" + +fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3: + version "3.1.3" + resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" + integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q== + +fast-glob@^3.0.3, fast-glob@^3.2.11, fast-glob@^3.2.9, fast-glob@^3.3.0: + version "3.3.2" + resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.3.2.tgz#a904501e57cfdd2ffcded45e99a54fef55e46129" + integrity sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow== + dependencies: + "@nodelib/fs.stat" "^2.0.2" + "@nodelib/fs.walk" "^1.2.3" + glob-parent "^5.1.2" + merge2 "^1.3.0" + micromatch "^4.0.4" + +fast-json-stable-stringify@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz#874bf69c6f404c2b5d99c481341399fd55892633" + integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw== + +fast-levenshtein@^2.0.6: + version "2.0.6" + resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917" + integrity sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw== + +fast-uri@^3.0.1: + version "3.0.3" + resolved "https://registry.yarnpkg.com/fast-uri/-/fast-uri-3.0.3.tgz#892a1c91802d5d7860de728f18608a0573142241" + integrity sha512-aLrHthzCjH5He4Z2H9YZ+v6Ujb9ocRuW6ZzkJQOrTxleEijANq4v1TsaPaVG1PZcuurEzrLcWRyYBYXD5cEiaw== + +fast-xml-parser@^4.1.3: + version "4.5.0" + resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.5.0.tgz#2882b7d01a6825dfdf909638f2de0256351def37" + integrity sha512-/PlTQCI96+fZMAOLMZK4CWG1ItCbfZ/0jx7UIJFChPNrx7tcEgerUgWbeieCM9MfHInUDyK8DWYZ+YrywDJuTg== + dependencies: + strnum "^1.0.5" + +fastest-levenshtein@^1.0.12: + version "1.0.16" + resolved "https://registry.yarnpkg.com/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz#210e61b6ff181de91ea9b3d1b84fdedd47e034e5" + integrity sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg== + +fastq@^1.6.0: + version "1.17.1" + resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.17.1.tgz#2a523f07a4e7b1e81a42b91b8bf2254107753b47" + integrity sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w== + dependencies: + reusify "^1.0.4" + +faye-websocket@^0.11.3: + version "0.11.4" + resolved "https://registry.yarnpkg.com/faye-websocket/-/faye-websocket-0.11.4.tgz#7f0d9275cfdd86a1c963dc8b65fcc451edcbb1da" + integrity sha512-CzbClwlXAuiRQAlUyfqPgvPoNKTckTPGfwZV4ZdAhVcP2lh9KUxJg2b5GkE7XbjKQ3YJnQ9z6D9ntLAlB+tP8g== + dependencies: + websocket-driver ">=0.5.1" + +fd-slicer@~1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e" + integrity sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g== + dependencies: + pend "~1.2.0" + +fetch-event-stream@^0.1.5: + version "0.1.5" + resolved "https://registry.yarnpkg.com/fetch-event-stream/-/fetch-event-stream-0.1.5.tgz#ffc5a8f57a040e3eb78d9f990632c2a3fd253c02" + integrity sha512-V1PWovkspxQfssq/NnxoEyQo1DV+MRK/laPuPblIZmSjMN8P5u46OhlFQznSr9p/t0Sp8Uc6SbM3yCMfr0KU8g== + +file-entry-cache@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027" + integrity sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg== + dependencies: + flat-cache "^3.0.4" + +file-loader@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/file-loader/-/file-loader-6.2.0.tgz#baef7cf8e1840df325e4390b4484879480eebe4d" + integrity sha512-qo3glqyTa61Ytg4u73GultjHGjdRyig3tG6lPtyX/jOEJvHif9uB0/OCI2Kif6ctF3caQTW2G5gym21oAsI4pw== + dependencies: + loader-utils "^2.0.0" + schema-utils "^3.0.0" + +file-type@5.2.0, file-type@^5.2.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/file-type/-/file-type-5.2.0.tgz#2ddbea7c73ffe36368dfae49dc338c058c2b8ad6" + integrity sha512-Iq1nJ6D2+yIO4c8HHg4fyVb8mAJieo1Oloy1mLLaB2PvezNedhBVm+QU7g0qM42aiMbRXTxKKwGD17rjKNJYVQ== + +file-type@^10.4.0, file-type@^10.5.0: + version "10.11.0" + resolved "https://registry.yarnpkg.com/file-type/-/file-type-10.11.0.tgz#2961d09e4675b9fb9a3ee6b69e9cd23f43fd1890" + integrity sha512-uzk64HRpUZyTGZtVuvrjP0FYxzQrBf4rojot6J65YMEbwBLB0CWm0CLojVpwpmFmxcE/lkvYICgfcGozbBq6rw== + +file-type@^12.0.0: + version "12.4.2" + resolved "https://registry.yarnpkg.com/file-type/-/file-type-12.4.2.tgz#a344ea5664a1d01447ee7fb1b635f72feb6169d9" + integrity sha512-UssQP5ZgIOKelfsaB5CuGAL+Y+q7EmONuiwF3N5HAH0t27rvrttgi6Ra9k/+DVaY9UF6+ybxu5pOXLUdA8N7Vg== + +file-type@^3.8.0: + version "3.9.0" + resolved "https://registry.yarnpkg.com/file-type/-/file-type-3.9.0.tgz#257a078384d1db8087bc449d107d52a52672b9e9" + integrity sha512-RLoqTXE8/vPmMuTI88DAzhMYC99I8BWv7zYP4A1puo5HIjEJ5EX48ighy4ZyKMG9EDXxBgW6e++cn7d1xuFghA== + +file-type@^4.2.0: + version "4.4.0" + resolved "https://registry.yarnpkg.com/file-type/-/file-type-4.4.0.tgz#1b600e5fca1fbdc6e80c0a70c71c8dba5f7906c5" + integrity sha512-f2UbFQEk7LXgWpi5ntcO86OeA/cC80fuDDDaX/fZ2ZGel+AF7leRQqBBW1eJNiiQkrZlAoM6P+VYP5P6bOlDEQ== + +file-type@^6.1.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/file-type/-/file-type-6.2.0.tgz#e50cd75d356ffed4e306dc4f5bcf52a79903a919" + integrity sha512-YPcTBDV+2Tm0VqjybVd32MHdlEGAtuxS3VAYsumFokDSMG+ROT5wawGlnHDoz7bfMcMDt9hxuXvXwoKUx2fkOg== + +file-type@^8.1.0: + version "8.1.0" + resolved "https://registry.yarnpkg.com/file-type/-/file-type-8.1.0.tgz#244f3b7ef641bbe0cca196c7276e4b332399f68c" + integrity sha512-qyQ0pzAy78gVoJsmYeNgl8uH8yKhr1lVhW7JbzJmnlRi0I4R2eEDEJZVKG8agpDnLpacwNbDhLNG/LMdxHD2YQ== + +filename-reserved-regex@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/filename-reserved-regex/-/filename-reserved-regex-2.0.0.tgz#abf73dfab735d045440abfea2d91f389ebbfa229" + integrity sha512-lc1bnsSr4L4Bdif8Xb/qrtokGbq5zlsms/CYH8PP+WtCkGNF65DPiQY8vG3SakEdRn8Dlnm+gW/qWKKjS5sZzQ== + +filenamify@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/filenamify/-/filenamify-2.1.0.tgz#88faf495fb1b47abfd612300002a16228c677ee9" + integrity sha512-ICw7NTT6RsDp2rnYKVd8Fu4cr6ITzGy3+u4vUujPkabyaz+03F24NWEX7fs5fp+kBonlaqPH8fAO2NM+SXt/JA== + dependencies: + filename-reserved-regex "^2.0.0" + strip-outer "^1.0.0" + trim-repeated "^1.0.0" + +fill-range@^7.1.1: + version "7.1.1" + resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.1.1.tgz#44265d3cac07e3ea7dc247516380643754a05292" + integrity sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg== + dependencies: + to-regex-range "^5.0.1" + +finalhandler@1.3.1: + version "1.3.1" + resolved "https://registry.yarnpkg.com/finalhandler/-/finalhandler-1.3.1.tgz#0c575f1d1d324ddd1da35ad7ece3df7d19088019" + integrity sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ== + dependencies: + debug "2.6.9" + encodeurl "~2.0.0" + escape-html "~1.0.3" + on-finished "2.4.1" + parseurl "~1.3.3" + statuses "2.0.1" + unpipe "~1.0.0" + +find-cache-dir@^3.3.1: + version "3.3.2" + resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-3.3.2.tgz#b30c5b6eff0730731aea9bbd9dbecbd80256d64b" + integrity sha512-wXZV5emFEjrridIgED11OoUKLxiYjAcqot/NJdAkOhlJ+vGzwhOAfcG5OX1jP+S0PcjEn8bdMJv+g2jwQ3Onig== + dependencies: + commondir "^1.0.1" + make-dir "^3.0.2" + pkg-dir "^4.1.0" + +find-up@^4.0.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19" + integrity sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw== + dependencies: + locate-path "^5.0.0" + path-exists "^4.0.0" + +find-up@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc" + integrity sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng== + dependencies: + locate-path "^6.0.0" + path-exists "^4.0.0" + +find-versions@^3.0.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/find-versions/-/find-versions-3.2.0.tgz#10297f98030a786829681690545ef659ed1d254e" + integrity sha512-P8WRou2S+oe222TOCHitLy8zj+SIsVJh52VP4lvXkaFVnOFFdoWv1H1Jjvel1aI6NCFOAaeAVm8qrI0odiLcww== + dependencies: + semver-regex "^2.0.0" + +flat-cache@^3.0.4: + version "3.2.0" + resolved "https://registry.yarnpkg.com/flat-cache/-/flat-cache-3.2.0.tgz#2c0c2d5040c99b1632771a9d105725c0115363ee" + integrity sha512-CYcENa+FtcUKLmhhqyctpclsq7QF38pKjZHsGNiSQF5r4FtoKDWabFDl3hzaEQMvT1LHEysw5twgLvpYYb4vbw== + dependencies: + flatted "^3.2.9" + keyv "^4.5.3" + rimraf "^3.0.2" + +flat@^5.0.2: + version "5.0.2" + resolved "https://registry.yarnpkg.com/flat/-/flat-5.0.2.tgz#8ca6fe332069ffa9d324c327198c598259ceb241" + integrity sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ== + +flatbuffers@^1.12.0: + version "1.12.0" + resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-1.12.0.tgz#72e87d1726cb1b216e839ef02658aa87dcef68aa" + integrity sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ== + +flatted@^3.2.9: + version "3.3.1" + resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.3.1.tgz#21db470729a6734d4997002f439cb308987f567a" + integrity sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw== + +follow-redirects@^1.0.0, follow-redirects@^1.15.6: + version "1.15.9" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.9.tgz#a604fa10e443bf98ca94228d9eebcc2e8a2c8ee1" + integrity sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ== + +for-each@^0.3.3: + version "0.3.3" + resolved "https://registry.yarnpkg.com/for-each/-/for-each-0.3.3.tgz#69b447e88a0a5d32c3e7084f3f1710034b21376e" + integrity sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw== + dependencies: + is-callable "^1.1.3" + +foreground-child@^3.1.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/foreground-child/-/foreground-child-3.3.0.tgz#0ac8644c06e431439f8561db8ecf29a7b5519c77" + integrity sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg== + dependencies: + cross-spawn "^7.0.0" + signal-exit "^4.0.1" + +fork-ts-checker-webpack-plugin@^7.2.13: + version "7.3.0" + resolved "https://registry.yarnpkg.com/fork-ts-checker-webpack-plugin/-/fork-ts-checker-webpack-plugin-7.3.0.tgz#a9c984a018493962360d7c7e77a67b44a2d5f3aa" + integrity sha512-IN+XTzusCjR5VgntYFgxbxVx3WraPRnKehBFrf00cMSrtUuW9MsG9dhL6MWpY6MkjC3wVwoujfCDgZZCQwbswA== + dependencies: + "@babel/code-frame" "^7.16.7" + chalk "^4.1.2" + chokidar "^3.5.3" + cosmiconfig "^7.0.1" + deepmerge "^4.2.2" + fs-extra "^10.0.0" + memfs "^3.4.1" + minimatch "^3.0.4" + node-abort-controller "^3.0.1" + schema-utils "^3.1.1" + semver "^7.3.5" + tapable "^2.2.1" + +form-data@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.1.tgz#ba1076daaaa5bfd7e99c1a6cb02aa0a5cff90d48" + integrity sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw== + dependencies: + asynckit "^0.4.0" + combined-stream "^1.0.8" + mime-types "^2.1.12" + +forwarded@0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/forwarded/-/forwarded-0.2.0.tgz#2269936428aad4c15c7ebe9779a84bf0b2a81811" + integrity sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow== + +fraction.js@^4.3.7: + version "4.3.7" + resolved "https://registry.yarnpkg.com/fraction.js/-/fraction.js-4.3.7.tgz#06ca0085157e42fda7f9e726e79fefc4068840f7" + integrity sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew== + +fresh@0.5.2: + version "0.5.2" + resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7" + integrity sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q== + +friendly-errors-webpack-plugin@^1.7.0: + version "1.7.0" + resolved "https://registry.yarnpkg.com/friendly-errors-webpack-plugin/-/friendly-errors-webpack-plugin-1.7.0.tgz#efc86cbb816224565861a1be7a9d84d0aafea136" + integrity sha512-K27M3VK30wVoOarP651zDmb93R9zF28usW4ocaK3mfQeIEI5BPht/EzZs5E8QLLwbLRJQMwscAjDxYPb1FuNiw== + dependencies: + chalk "^1.1.3" + error-stack-parser "^2.0.0" + string-width "^2.0.0" + +from2@^2.1.1: + version "2.3.0" + resolved "https://registry.yarnpkg.com/from2/-/from2-2.3.0.tgz#8bfb5502bde4a4d36cfdeea007fcca21d7e382af" + integrity sha512-OMcX/4IC/uqEPVgGeyfN22LJk6AZrMkRZHxcHBMBvHScDGgwTm2GT2Wkgtocyd3JfZffjj2kYUDXXII0Fk9W0g== + dependencies: + inherits "^2.0.1" + readable-stream "^2.0.0" + +fs-constants@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/fs-constants/-/fs-constants-1.0.0.tgz#6be0de9be998ce16af8afc24497b9ee9b7ccd9ad" + integrity sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow== + +fs-extra@^10.0.0: + version "10.1.0" + resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-10.1.0.tgz#02873cfbc4084dde127eaa5f9905eef2325d1abf" + integrity sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ== + dependencies: + graceful-fs "^4.2.0" + jsonfile "^6.0.1" + universalify "^2.0.0" + +fs-monkey@^1.0.4: + version "1.0.6" + resolved "https://registry.yarnpkg.com/fs-monkey/-/fs-monkey-1.0.6.tgz#8ead082953e88d992cf3ff844faa907b26756da2" + integrity sha512-b1FMfwetIKymC0eioW7mTywihSQE4oLzQn1dB6rZB5fx/3NpNEdAWeCSMB+60/AeT0TCXsxzAlcYVEFCTAksWg== + +fs.realpath@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" + integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw== + +fsevents@~2.3.2: + version "2.3.3" + resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" + integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw== + +function-bind@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.2.tgz#2c02d864d97f3ea6c8830c464cbd11ab6eab7a1c" + integrity sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA== + +function.prototype.name@^1.1.6: + version "1.1.6" + resolved "https://registry.yarnpkg.com/function.prototype.name/-/function.prototype.name-1.1.6.tgz#cdf315b7d90ee77a4c6ee216c3c3362da07533fd" + integrity sha512-Z5kx79swU5P27WEayXM1tBi5Ze/lbIyiNgU3qyXUOf9b2rgXYyF9Dy9Cx+IQv/Lc8WCG6L82zwUPpSS9hGehIg== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + functions-have-names "^1.2.3" + +functions-have-names@^1.2.3: + version "1.2.3" + resolved "https://registry.yarnpkg.com/functions-have-names/-/functions-have-names-1.2.3.tgz#0404fe4ee2ba2f607f0e0ec3c80bae994133b834" + integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== + +gensync@^1.0.0-beta.2: + version "1.0.0-beta.2" + resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0" + integrity sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg== + +get-caller-file@^2.0.5: + version "2.0.5" + resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e" + integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg== + +get-intrinsic@^1.1.3, get-intrinsic@^1.2.1, get-intrinsic@^1.2.2, get-intrinsic@^1.2.3, get-intrinsic@^1.2.4: + version "1.2.4" + resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.2.4.tgz#e385f5a4b5227d449c3eabbad05494ef0abbeadd" + integrity sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ== + dependencies: + es-errors "^1.3.0" + function-bind "^1.1.2" + has-proto "^1.0.1" + has-symbols "^1.0.3" + hasown "^2.0.0" + +get-proxy@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/get-proxy/-/get-proxy-2.1.0.tgz#349f2b4d91d44c4d4d4e9cba2ad90143fac5ef93" + integrity sha512-zmZIaQTWnNQb4R4fJUEp/FC51eZsc6EkErspy3xtIYStaq8EB/hDIWipxsal+E8rz0qD7f2sL/NA9Xee4RInJw== + dependencies: + npm-conf "^1.1.0" + +get-stream@3.0.0, get-stream@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-3.0.0.tgz#8e943d1358dc37555054ecbe2edb05aa174ede14" + integrity sha512-GlhdIUuVakc8SJ6kK0zAFbiGzRFzNnY4jUuEbV9UROo4Y+0Ny4fjvcZFVTeDA4odpFyOQzaw6hXukJSq/f28sQ== + +get-stream@^2.2.0: + version "2.3.1" + resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-2.3.1.tgz#5f38f93f346009666ee0150a054167f91bdd95de" + integrity sha512-AUGhbbemXxrZJRD5cDvKtQxLuYaIbNtDTK8YqupCI393Q2KSTreEsLUN3ZxAWFGiKTzL6nKuzfcIvieflUX9qA== + dependencies: + object-assign "^4.0.1" + pinkie-promise "^2.0.0" + +get-stream@^4.0.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-4.1.0.tgz#c1b255575f3dc21d59bfc79cd3d2b46b1c3a54b5" + integrity sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w== + dependencies: + pump "^3.0.0" + +get-stream@^5.0.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3" + integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA== + dependencies: + pump "^3.0.0" + +get-stream@^6.0.0: + version "6.0.1" + resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7" + integrity sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg== + +get-symbol-description@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/get-symbol-description/-/get-symbol-description-1.0.2.tgz#533744d5aa20aca4e079c8e5daf7fd44202821f5" + integrity sha512-g0QYk1dZBxGwk+Ngc+ltRH2IBp2f7zBkBMBJZCDerh6EhlhSR6+9irMCuT/09zD6qkarHUSn529sK/yL4S27mg== + dependencies: + call-bind "^1.0.5" + es-errors "^1.3.0" + get-intrinsic "^1.2.4" + +gifsicle@^5.0.0: + version "5.3.0" + resolved "https://registry.yarnpkg.com/gifsicle/-/gifsicle-5.3.0.tgz#499713c6f1e89ebbc3630da3a74fdb4697913b4e" + integrity sha512-FJTpgdj1Ow/FITB7SVza5HlzXa+/lqEY0tHQazAJbuAdvyJtkH4wIdsR2K414oaTwRXHFLLF+tYbipj+OpYg+Q== + dependencies: + bin-build "^3.0.0" + bin-wrapper "^4.0.0" + execa "^5.0.0" + +glob-parent@^5.1.2, glob-parent@~5.1.2: + version "5.1.2" + resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4" + integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow== + dependencies: + is-glob "^4.0.1" + +glob-parent@^6.0.1, glob-parent@^6.0.2: + version "6.0.2" + resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-6.0.2.tgz#6d237d99083950c79290f24c7642a3de9a28f9e3" + integrity sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A== + dependencies: + is-glob "^4.0.3" + +glob-to-regexp@^0.4.1: + version "0.4.1" + resolved "https://registry.yarnpkg.com/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz#c75297087c851b9a578bd217dd59a92f59fe546e" + integrity sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw== + +glob@^10.3.10: + version "10.4.5" + resolved "https://registry.yarnpkg.com/glob/-/glob-10.4.5.tgz#f4d9f0b90ffdbab09c9d77f5f29b4262517b0956" + integrity sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg== + dependencies: + foreground-child "^3.1.0" + jackspeak "^3.1.2" + minimatch "^9.0.4" + minipass "^7.1.2" + package-json-from-dist "^1.0.0" + path-scurry "^1.11.1" + +glob@^7.1.3: + version "7.2.3" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b" + integrity sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q== + dependencies: + fs.realpath "^1.0.0" + inflight "^1.0.4" + inherits "2" + minimatch "^3.1.1" + once "^1.3.0" + path-is-absolute "^1.0.0" + +globals@^11.1.0: + version "11.12.0" + resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e" + integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA== + +globals@^13.19.0: + version "13.24.0" + resolved "https://registry.yarnpkg.com/globals/-/globals-13.24.0.tgz#8432a19d78ce0c1e833949c36adb345400bb1171" + integrity sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ== + dependencies: + type-fest "^0.20.2" + +globalthis@^1.0.3, globalthis@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/globalthis/-/globalthis-1.0.4.tgz#7430ed3a975d97bfb59bcce41f5cabbafa651236" + integrity sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ== + dependencies: + define-properties "^1.2.1" + gopd "^1.0.1" + +globby@^10.0.0: + version "10.0.2" + resolved "https://registry.yarnpkg.com/globby/-/globby-10.0.2.tgz#277593e745acaa4646c3ab411289ec47a0392543" + integrity sha512-7dUi7RvCoT/xast/o/dLN53oqND4yk0nsHkhRgn9w65C4PofCLOoJ39iSOg+qVDdWQPIEj+eszMHQ+aLVwwQSg== + dependencies: + "@types/glob" "^7.1.1" + array-union "^2.1.0" + dir-glob "^3.0.1" + fast-glob "^3.0.3" + glob "^7.1.3" + ignore "^5.1.1" + merge2 "^1.2.3" + slash "^3.0.0" + +globby@^11.1.0: + version "11.1.0" + resolved "https://registry.yarnpkg.com/globby/-/globby-11.1.0.tgz#bd4be98bb042f83d796f7e3811991fbe82a0d34b" + integrity sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g== + dependencies: + array-union "^2.1.0" + dir-glob "^3.0.1" + fast-glob "^3.2.9" + ignore "^5.2.0" + merge2 "^1.4.1" + slash "^3.0.0" + +globby@^13.1.1: + version "13.2.2" + resolved "https://registry.yarnpkg.com/globby/-/globby-13.2.2.tgz#63b90b1bf68619c2135475cbd4e71e66aa090592" + integrity sha512-Y1zNGV+pzQdh7H39l9zgB4PJqjRNqydvdYCDG4HFXM4XuvSaQQlEc91IU1yALL8gUTDomgBAfz3XJdmUS+oo0w== + dependencies: + dir-glob "^3.0.1" + fast-glob "^3.3.0" + ignore "^5.2.4" + merge2 "^1.4.1" + slash "^4.0.0" + +gopd@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/gopd/-/gopd-1.0.1.tgz#29ff76de69dac7489b7c0918a5788e56477c332c" + integrity sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA== + dependencies: + get-intrinsic "^1.1.3" + +got@^7.0.0: + version "7.1.0" + resolved "https://registry.yarnpkg.com/got/-/got-7.1.0.tgz#05450fd84094e6bbea56f451a43a9c289166385a" + integrity sha512-Y5WMo7xKKq1muPsxD+KmrR8DH5auG7fBdDVueZwETwV6VytKyU9OX/ddpq2/1hp1vIPvVb4T81dKQz3BivkNLw== + dependencies: + decompress-response "^3.2.0" + duplexer3 "^0.1.4" + get-stream "^3.0.0" + is-plain-obj "^1.1.0" + is-retry-allowed "^1.0.0" + is-stream "^1.0.0" + isurl "^1.0.0-alpha5" + lowercase-keys "^1.0.0" + p-cancelable "^0.3.0" + p-timeout "^1.1.1" + safe-buffer "^5.0.1" + timed-out "^4.0.0" + url-parse-lax "^1.0.0" + url-to-options "^1.0.1" + +got@^8.3.1: + version "8.3.2" + resolved "https://registry.yarnpkg.com/got/-/got-8.3.2.tgz#1d23f64390e97f776cac52e5b936e5f514d2e937" + integrity sha512-qjUJ5U/hawxosMryILofZCkm3C84PLJS/0grRIpjAwu+Lkxxj5cxeCU25BG0/3mDSpXKTyZr8oh8wIgLaH0QCw== + dependencies: + "@sindresorhus/is" "^0.7.0" + cacheable-request "^2.1.1" + decompress-response "^3.3.0" + duplexer3 "^0.1.4" + get-stream "^3.0.0" + into-stream "^3.1.0" + is-retry-allowed "^1.1.0" + isurl "^1.0.0-alpha5" + lowercase-keys "^1.0.0" + mimic-response "^1.0.0" + p-cancelable "^0.4.0" + p-timeout "^2.0.1" + pify "^3.0.0" + safe-buffer "^5.1.1" + timed-out "^4.0.1" + url-parse-lax "^3.0.0" + url-to-options "^1.0.1" + +graceful-fs@^4.1.10, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.11, graceful-fs@^4.2.2, graceful-fs@^4.2.4, graceful-fs@^4.2.6: + version "4.2.11" + resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3" + integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ== + +graphemer@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/graphemer/-/graphemer-1.4.0.tgz#fb2f1d55e0e3a1849aeffc90c4fa0dd53a0e66c6" + integrity sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag== + +graphql@^16.8.1: + version "16.9.0" + resolved "https://registry.yarnpkg.com/graphql/-/graphql-16.9.0.tgz#1c310e63f16a49ce1fbb230bd0a000e99f6f115f" + integrity sha512-GGTKBX4SD7Wdb8mqeDLni2oaRGYQWjWHGKPQ24ZMnUtKfcsVoiv4uX8+LJr1K6U5VW2Lu1BwJnj7uiori0YtRw== + +guid-typescript@^1.0.9: + version "1.0.9" + resolved "https://registry.yarnpkg.com/guid-typescript/-/guid-typescript-1.0.9.tgz#e35f77003535b0297ea08548f5ace6adb1480ddc" + integrity sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ== + +handle-thing@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/handle-thing/-/handle-thing-2.0.1.tgz#857f79ce359580c340d43081cc648970d0bb234e" + integrity sha512-9Qn4yBxelxoh2Ow62nP+Ka/kMnOXRi8BXnRaUwezLNhqelnN49xKz4F/dPP8OYLxLxq6JDtZb2i9XznUQbNPTg== + +has-ansi@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/has-ansi/-/has-ansi-2.0.0.tgz#34f5049ce1ecdf2b0649af3ef24e45ed35416d91" + integrity sha512-C8vBJ8DwUCx19vhm7urhTuUsr4/IyP6l4VzNQDv+ryHQObW3TTTp9yB68WpYgRe2bbaGuZ/se74IqFeVnMnLZg== + dependencies: + ansi-regex "^2.0.0" + +has-bigints@^1.0.1, has-bigints@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/has-bigints/-/has-bigints-1.0.2.tgz#0871bd3e3d51626f6ca0966668ba35d5602d6eaa" + integrity sha512-tSvCKtBr9lkF0Ex0aQiP9N+OpV4zi2r/Nee5VkRDbaqv35RLYMzbwQfFSZZH0kR+Rd6302UJZ2p/bJCEoR3VoQ== + +has-flag@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" + integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== + +has-property-descriptors@^1.0.0, has-property-descriptors@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz#963ed7d071dc7bf5f084c5bfbe0d1b6222586854" + integrity sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg== + dependencies: + es-define-property "^1.0.0" + +has-proto@^1.0.1, has-proto@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has-proto/-/has-proto-1.0.3.tgz#b31ddfe9b0e6e9914536a6ab286426d0214f77fd" + integrity sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q== + +has-symbol-support-x@^1.4.1: + version "1.4.2" + resolved "https://registry.yarnpkg.com/has-symbol-support-x/-/has-symbol-support-x-1.4.2.tgz#1409f98bc00247da45da67cee0a36f282ff26455" + integrity sha512-3ToOva++HaW+eCpgqZrCfN51IPB+7bJNVT6CUATzueB5Heb8o6Nam0V3HG5dlDvZU1Gn5QLcbahiKw/XVk5JJw== + +has-symbols@^1.0.2, has-symbols@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" + integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== + +has-to-string-tag-x@^1.2.0: + version "1.4.1" + resolved "https://registry.yarnpkg.com/has-to-string-tag-x/-/has-to-string-tag-x-1.4.1.tgz#a045ab383d7b4b2012a00148ab0aa5f290044d4d" + integrity sha512-vdbKfmw+3LoOYVr+mtxHaX5a96+0f3DljYd8JOqvOLsf5mw2Otda2qCDT9qRqLAhrjyQ0h7ual5nOiASpsGNFw== + dependencies: + has-symbol-support-x "^1.4.1" + +has-tostringtag@^1.0.0, has-tostringtag@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/has-tostringtag/-/has-tostringtag-1.0.2.tgz#2cdc42d40bef2e5b4eeab7c01a73c54ce7ab5abc" + integrity sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw== + dependencies: + has-symbols "^1.0.3" + +hasown@^2.0.0, hasown@^2.0.1, hasown@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/hasown/-/hasown-2.0.2.tgz#003eaf91be7adc372e84ec59dc37252cedb80003" + integrity sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ== + dependencies: + function-bind "^1.1.2" + +he@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f" + integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw== + +headers-polyfill@^4.0.2: + version "4.0.3" + resolved "https://registry.yarnpkg.com/headers-polyfill/-/headers-polyfill-4.0.3.tgz#922a0155de30ecc1f785bcf04be77844ca95ad07" + integrity sha512-IScLbePpkvO846sIwOtOTDjutRMWdXdJmXdMvk6gCBHxFO8d+QKOQedyZSxFTTFYRSmlgSTDtXqqq4pcenBXLQ== + +hpack.js@^2.1.6: + version "2.1.6" + resolved "https://registry.yarnpkg.com/hpack.js/-/hpack.js-2.1.6.tgz#87774c0949e513f42e84575b3c45681fade2a0b2" + integrity sha512-zJxVehUdMGIKsRaNt7apO2Gqp0BdqW5yaiGHXXmbpvxgBYVZnAql+BJb4RO5ad2MgpbZKn5G6nMnegrH1FcNYQ== + dependencies: + inherits "^2.0.1" + obuf "^1.0.0" + readable-stream "^2.0.1" + wbuf "^1.1.0" + +html-entities@^2.1.0, html-entities@^2.3.2: + version "2.5.2" + resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.5.2.tgz#201a3cf95d3a15be7099521620d19dfb4f65359f" + integrity sha512-K//PSRMQk4FZ78Kyau+mZurHn3FH0Vwr+H36eE0rPbeYkRRi9YxceYPhuN60UwWorxyKHhqoAJl2OFKa4BVtaA== + +html-minifier-terser@^6.0.2: + version "6.1.0" + resolved "https://registry.yarnpkg.com/html-minifier-terser/-/html-minifier-terser-6.1.0.tgz#bfc818934cc07918f6b3669f5774ecdfd48f32ab" + integrity sha512-YXxSlJBZTP7RS3tWnQw74ooKa6L9b9i9QYXY21eUEvhZ3u9XLfv6OnFsQq6RxkhHygsaUMvYsZRV5rU/OVNZxw== + dependencies: + camel-case "^4.1.2" + clean-css "^5.2.2" + commander "^8.3.0" + he "^1.2.0" + param-case "^3.0.4" + relateurl "^0.2.7" + terser "^5.10.0" + +html-webpack-plugin@^5.5.0: + version "5.6.3" + resolved "https://registry.yarnpkg.com/html-webpack-plugin/-/html-webpack-plugin-5.6.3.tgz#a31145f0fee4184d53a794f9513147df1e653685" + integrity sha512-QSf1yjtSAsmf7rYBV7XX86uua4W/vkhIt0xNXKbsi2foEeW7vjJQz4bhnpL3xH+l1ryl1680uNv968Z+X6jSYg== + dependencies: + "@types/html-minifier-terser" "^6.0.0" + html-minifier-terser "^6.0.2" + lodash "^4.17.21" + pretty-error "^4.0.0" + tapable "^2.0.0" + +htmlparser2@^6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-6.1.0.tgz#c4d762b6c3371a05dbe65e94ae43a9f845fb8fb7" + integrity sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A== + dependencies: + domelementtype "^2.0.1" + domhandler "^4.0.0" + domutils "^2.5.2" + entities "^2.0.0" + +http-cache-semantics@3.8.1: + version "3.8.1" + resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-3.8.1.tgz#39b0e16add9b605bf0a9ef3d9daaf4843b4cacd2" + integrity sha512-5ai2iksyV8ZXmnZhHH4rWPoxxistEexSi5936zIQ1bnNTW5VnA85B6P/VpXiRM017IgRvb2kKo1a//y+0wSp3w== + +http-deceiver@^1.2.7: + version "1.2.7" + resolved "https://registry.yarnpkg.com/http-deceiver/-/http-deceiver-1.2.7.tgz#fa7168944ab9a519d337cb0bec7284dc3e723d87" + integrity sha512-LmpOGxTfbpgtGVxJrj5k7asXHCgNZp5nLfp+hWc8QQRqtb7fUy6kRY3BO1h9ddF6yIPYUARgxGOwB42DnxIaNw== + +http-errors@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-2.0.0.tgz#b7774a1486ef73cf7667ac9ae0858c012c57b9d3" + integrity sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ== + dependencies: + depd "2.0.0" + inherits "2.0.4" + setprototypeof "1.2.0" + statuses "2.0.1" + toidentifier "1.0.1" + +http-errors@~1.6.2: + version "1.6.3" + resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.6.3.tgz#8b55680bb4be283a0b5bf4ea2e38580be1d9320d" + integrity sha512-lks+lVC8dgGyh97jxvxeYTWQFvh4uw4yC12gVl63Cg30sjPX4wuGcdkICVXDAESr6OJGjqGA8Iz5mkeN6zlD7A== + dependencies: + depd "~1.1.2" + inherits "2.0.3" + setprototypeof "1.1.0" + statuses ">= 1.4.0 < 2" + +http-parser-js@>=0.5.1: + version "0.5.8" + resolved "https://registry.yarnpkg.com/http-parser-js/-/http-parser-js-0.5.8.tgz#af23090d9ac4e24573de6f6aecc9d84a48bf20e3" + integrity sha512-SGeBX54F94Wgu5RH3X5jsDtf4eHyRogWX1XGT3b4HuW3tQPM4AaBzoUji/4AAJNXCEOWZ5O0DgZmJw1947gD5Q== + +http-proxy-middleware@^2.0.3: + version "2.0.7" + resolved "https://registry.yarnpkg.com/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz#915f236d92ae98ef48278a95dedf17e991936ec6" + integrity sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA== + dependencies: + "@types/http-proxy" "^1.17.8" + http-proxy "^1.18.1" + is-glob "^4.0.1" + is-plain-obj "^3.0.0" + micromatch "^4.0.2" + +http-proxy@^1.18.1: + version "1.18.1" + resolved "https://registry.yarnpkg.com/http-proxy/-/http-proxy-1.18.1.tgz#401541f0534884bbf95260334e72f88ee3976549" + integrity sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ== + dependencies: + eventemitter3 "^4.0.0" + follow-redirects "^1.0.0" + requires-port "^1.0.0" + +human-signals@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3" + integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw== + +human-signals@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" + integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw== + +iconv-lite@0.4.24: + version "0.4.24" + resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b" + integrity sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA== + dependencies: + safer-buffer ">= 2.1.2 < 3" + +icss-utils@^5.0.0, icss-utils@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/icss-utils/-/icss-utils-5.1.0.tgz#c6be6858abd013d768e98366ae47e25d5887b1ae" + integrity sha512-soFhflCVWLfRNOPU3iv5Z9VUdT44xFRbzjLsEzSr5AQmgqPMTHdU3PMT1Cf1ssx8fLNJDA1juftYl+PUcv3MqA== + +ieee754@^1.1.13: + version "1.2.1" + resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352" + integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA== + +ignore@^5.1.1, ignore@^5.2.0, ignore@^5.2.4: + version "5.3.2" + resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.2.tgz#3cd40e729f3643fd87cb04e50bf0eb722bc596f5" + integrity sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g== + +image-webpack-loader@^8.1.0: + version "8.1.0" + resolved "https://registry.yarnpkg.com/image-webpack-loader/-/image-webpack-loader-8.1.0.tgz#cd97172e1e7304ef5eb898344fc25bbb650fc7d7" + integrity sha512-bxzMIBNu42KGo6Bc9YMB0QEUt+XuVTl2ZSX3oGAlbsqYOkxkT4TEWvVsnwUkCRCYISJrMCEc/s0y8OYrmEfUOg== + dependencies: + imagemin "^7.0.1" + loader-utils "^2.0.0" + object-assign "^4.1.1" + schema-utils "^2.7.1" + optionalDependencies: + imagemin-gifsicle "^7.0.0" + imagemin-mozjpeg "^9.0.0" + imagemin-optipng "^8.0.0" + imagemin-pngquant "^9.0.2" + imagemin-svgo "^9.0.0" + imagemin-webp "^7.0.0" + +imagemin-gifsicle@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/imagemin-gifsicle/-/imagemin-gifsicle-7.0.0.tgz#1a7ab136a144c4678657ba3b6c412f80805d26b0" + integrity sha512-LaP38xhxAwS3W8PFh4y5iQ6feoTSF+dTAXFRUEYQWYst6Xd+9L/iPk34QGgK/VO/objmIlmq9TStGfVY2IcHIA== + dependencies: + execa "^1.0.0" + gifsicle "^5.0.0" + is-gif "^3.0.0" + +imagemin-mozjpeg@^9.0.0: + version "9.0.0" + resolved "https://registry.yarnpkg.com/imagemin-mozjpeg/-/imagemin-mozjpeg-9.0.0.tgz#d1af26d0b43d75a41c211051c1910da59d9d2324" + integrity sha512-TwOjTzYqCFRgROTWpVSt5UTT0JeCuzF1jswPLKALDd89+PmrJ2PdMMYeDLYZ1fs9cTovI9GJd68mRSnuVt691w== + dependencies: + execa "^4.0.0" + is-jpg "^2.0.0" + mozjpeg "^7.0.0" + +imagemin-optipng@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/imagemin-optipng/-/imagemin-optipng-8.0.0.tgz#b88e5cf6da25cc8479e07cdf38c3ae0479df7ef2" + integrity sha512-CUGfhfwqlPjAC0rm8Fy+R2DJDBGjzy2SkfyT09L8rasnF9jSoHFqJ1xxSZWK6HVPZBMhGPMxCTL70OgTHlLF5A== + dependencies: + exec-buffer "^3.0.0" + is-png "^2.0.0" + optipng-bin "^7.0.0" + +imagemin-pngquant@^9.0.2: + version "9.0.2" + resolved "https://registry.yarnpkg.com/imagemin-pngquant/-/imagemin-pngquant-9.0.2.tgz#38155702b0cc4f60f671ba7c2b086ea3805d9567" + integrity sha512-cj//bKo8+Frd/DM8l6Pg9pws1pnDUjgb7ae++sUX1kUVdv2nrngPykhiUOgFeE0LGY/LmUbCf4egCHC4YUcZSg== + dependencies: + execa "^4.0.0" + is-png "^2.0.0" + is-stream "^2.0.0" + ow "^0.17.0" + pngquant-bin "^6.0.0" + +imagemin-svgo@^9.0.0: + version "9.0.0" + resolved "https://registry.yarnpkg.com/imagemin-svgo/-/imagemin-svgo-9.0.0.tgz#749370804608917a67d4ff590f07a87756aec006" + integrity sha512-uNgXpKHd99C0WODkrJ8OO/3zW3qjgS4pW7hcuII0RcHN3tnKxDjJWcitdVC/TZyfIqSricU8WfrHn26bdSW62g== + dependencies: + is-svg "^4.2.1" + svgo "^2.1.0" + +imagemin-webp@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/imagemin-webp/-/imagemin-webp-7.0.0.tgz#df000ec927855d74d4cfafec8558ac418c88d2a9" + integrity sha512-JoYjvHNgBLgrQAkeCO7T5iNc8XVpiBmMPZmiXMhalC7K6gwY/3DCEUfNxVPOmNJ+NIJlJFvzcMR9RBxIE74Xxw== + dependencies: + cwebp-bin "^7.0.1" + exec-buffer "^3.2.0" + is-cwebp-readable "^3.0.0" + +imagemin@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/imagemin/-/imagemin-7.0.1.tgz#f6441ca647197632e23db7d971fffbd530c87dbf" + integrity sha512-33AmZ+xjZhg2JMCe+vDf6a9mzWukE7l+wAtesjE7KyteqqKjzxv7aVQeWnul1Ve26mWvEQqyPwl0OctNBfSR9w== + dependencies: + file-type "^12.0.0" + globby "^10.0.0" + graceful-fs "^4.2.2" + junk "^3.1.0" + make-dir "^3.0.0" + p-pipe "^3.0.0" + replace-ext "^1.0.0" + +immutable@^4.0.0: + version "4.3.7" + resolved "https://registry.yarnpkg.com/immutable/-/immutable-4.3.7.tgz#c70145fc90d89fb02021e65c84eb0226e4e5a381" + integrity sha512-1hqclzwYwjRDFLjcFxOM5AYkkG0rpFPpr1RLPMEuGczoS7YA8gLhy8SWXYRAA/XwfEHpfo3cw5JGioS32fnMRw== + +import-fresh@^3.2.1, import-fresh@^3.3.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.0.tgz#37162c25fcb9ebaa2e6e53d5b4d88ce17d9e0c2b" + integrity sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw== + dependencies: + parent-module "^1.0.0" + resolve-from "^4.0.0" + +import-lazy@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/import-lazy/-/import-lazy-3.1.0.tgz#891279202c8a2280fdbd6674dbd8da1a1dfc67cc" + integrity sha512-8/gvXvX2JMn0F+CDlSC4l6kOmVaLOO3XLkksI7CI3Ud95KDYJuYur2b9P/PUt/i/pDAMd/DulQsNbbbmRRsDIQ== + +import-local@^3.0.2: + version "3.2.0" + resolved "https://registry.yarnpkg.com/import-local/-/import-local-3.2.0.tgz#c3d5c745798c02a6f8b897726aba5100186ee260" + integrity sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA== + dependencies: + pkg-dir "^4.2.0" + resolve-cwd "^3.0.0" + +imurmurhash@^0.1.4: + version "0.1.4" + resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea" + integrity sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA== + +inflight@^1.0.4: + version "1.0.6" + resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9" + integrity sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA== + dependencies: + once "^1.3.0" + wrappy "1" + +inherits@2, inherits@2.0.4, inherits@^2.0.1, inherits@^2.0.3, inherits@~2.0.3: + version "2.0.4" + resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" + integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== + +inherits@2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de" + integrity sha512-x00IRNXNy63jwGkJmzPigoySHbaqpNuzKbBOmzK+g2OdZpQ9w+sxCN+VSB3ja7IAge2OP2qpfxTjeNcyjmW1uw== + +ini@^1.3.4: + version "1.3.8" + resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c" + integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew== + +internal-slot@^1.0.4, internal-slot@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.7.tgz#c06dcca3ed874249881007b0a5523b172a190802" + integrity sha512-NGnrKwXzSms2qUUih/ILZ5JBqNTSa1+ZmP6flaIp6KmSElgE9qdndzS3cqjrDovwFdmwsGsLdeFgB6suw+1e9g== + dependencies: + es-errors "^1.3.0" + hasown "^2.0.0" + side-channel "^1.0.4" + +interpret@^2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/interpret/-/interpret-2.2.0.tgz#1a78a0b5965c40a5416d007ad6f50ad27c417df9" + integrity sha512-Ju0Bz/cEia55xDwUWEa8+olFpCiQoypjnQySseKtmjNrnps3P+xfpUmGr90T7yjlVJmOtybRvPXhKMbHr+fWnw== + +into-stream@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/into-stream/-/into-stream-3.1.0.tgz#96fb0a936c12babd6ff1752a17d05616abd094c6" + integrity sha512-TcdjPibTksa1NQximqep2r17ISRiNE9fwlfbg3F8ANdvP5/yrFTew86VcO//jk4QTaMlbjypPBq76HN2zaKfZQ== + dependencies: + from2 "^2.1.1" + p-is-promise "^1.1.0" + +ipaddr.js@1.9.1: + version "1.9.1" + resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz#bff38543eeb8984825079ff3a2a8e6cbd46781b3" + integrity sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g== + +ipaddr.js@^2.0.1: + version "2.2.0" + resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-2.2.0.tgz#d33fa7bac284f4de7af949638c9d68157c6b92e8" + integrity sha512-Ag3wB2o37wslZS19hZqorUnrnzSkpOVy+IiiDEiTqNubEYpYuHWIf6K4psgN2ZWKExS4xhVCrRVfb/wfW8fWJA== + +is-arguments@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/is-arguments/-/is-arguments-1.1.1.tgz#15b3f88fda01f2a97fec84ca761a560f123efa9b" + integrity sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-array-buffer@^3.0.2, is-array-buffer@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/is-array-buffer/-/is-array-buffer-3.0.4.tgz#7a1f92b3d61edd2bc65d24f130530ea93d7fae98" + integrity sha512-wcjaerHw0ydZwfhiKbXJWLDY8A7yV7KhjQOpb83hGgGfId/aQa4TOvwyzn2PuswW2gPCYEL/nEAiSVpdOj1lXw== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.2.1" + +is-arrayish@^0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d" + integrity sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg== + +is-async-function@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/is-async-function/-/is-async-function-2.0.0.tgz#8e4418efd3e5d3a6ebb0164c05ef5afb69aa9646" + integrity sha512-Y1JXKrfykRJGdlDwdKlLpLyMIiWqWvuSd17TvZk68PLAOGOoF4Xyav1z0Xhoi+gCYjZVeC5SI+hYFOfvXmGRCA== + dependencies: + has-tostringtag "^1.0.0" + +is-bigint@^1.0.1: + version "1.0.4" + resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.4.tgz#08147a1875bc2b32005d41ccd8291dffc6691df3" + integrity sha512-zB9CruMamjym81i2JZ3UMn54PKGsQzsJeo6xvN3HJJ4CAsQNB6iRutp2To77OfCNuoxspsIhzaPoO1zyCEhFOg== + dependencies: + has-bigints "^1.0.1" + +is-binary-path@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-2.1.0.tgz#ea1f7f3b80f064236e83470f86c09c254fb45b09" + integrity sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw== + dependencies: + binary-extensions "^2.0.0" + +is-boolean-object@^1.1.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.2.tgz#5c6dc200246dd9321ae4b885a114bb1f75f63719" + integrity sha512-gDYaKHJmnj4aWxyj6YHyXVpdQawtVLHU5cb+eztPGczf6cjuTdwve5ZIEfgXqH4e57An1D1AKf8CZ3kYrQRqYA== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-callable@^1.1.3, is-callable@^1.1.4, is-callable@^1.2.7: + version "1.2.7" + resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.2.7.tgz#3bc2a85ea742d9e36205dcacdd72ca1fdc51b055" + integrity sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA== + +is-core-module@^2.13.0: + version "2.15.1" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.15.1.tgz#a7363a25bee942fefab0de13bf6aa372c82dcc37" + integrity sha512-z0vtXSwucUJtANQWldhbtbt7BnL0vxiFjIdDLAatwhDYty2bad6s+rijD6Ri4YuYJubLzIJLUidCh09e1djEVQ== + dependencies: + hasown "^2.0.2" + +is-cwebp-readable@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-cwebp-readable/-/is-cwebp-readable-3.0.0.tgz#0554aaa400977a2fc4de366d8c0244f13cde58cb" + integrity sha512-bpELc7/Q1/U5MWHn4NdHI44R3jxk0h9ew9ljzabiRl70/UIjL/ZAqRMb52F5+eke/VC8yTiv4Ewryo1fPWidvA== + dependencies: + file-type "^10.5.0" + +is-data-view@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/is-data-view/-/is-data-view-1.0.1.tgz#4b4d3a511b70f3dc26d42c03ca9ca515d847759f" + integrity sha512-AHkaJrsUVW6wq6JS8y3JnM/GJF/9cf+k20+iDzlSaJrinEo5+7vRiteOSwBhHRiAyQATN1AmY4hwzxJKPmYf+w== + dependencies: + is-typed-array "^1.1.13" + +is-date-object@^1.0.1, is-date-object@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.5.tgz#0841d5536e724c25597bf6ea62e1bd38298df31f" + integrity sha512-9YQaSxsAiSwcvS33MBk3wTCVnWK+HhF8VZR2jRxehM16QcVOdHqPn4VPHmRK4lSr38n9JriurInLcP90xsYNfQ== + dependencies: + has-tostringtag "^1.0.0" + +is-docker@^2.0.0, is-docker@^2.1.1: + version "2.2.1" + resolved "https://registry.yarnpkg.com/is-docker/-/is-docker-2.2.1.tgz#33eeabe23cfe86f14bde4408a02c0cfb853acdaa" + integrity sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ== + +is-extglob@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" + integrity sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ== + +is-finalizationregistry@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-finalizationregistry/-/is-finalizationregistry-1.0.2.tgz#c8749b65f17c133313e661b1289b95ad3dbd62e6" + integrity sha512-0by5vtUJs8iFQb5TYUHHPudOR+qXYIMKtiUzvLIZITZUjknFmziyBJuLhVRc+Ds0dREFlskDNJKYIdIzu/9pfw== + dependencies: + call-bind "^1.0.2" + +is-fullwidth-code-point@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz#a3b30a5c4f199183167aaab93beefae3ddfb654f" + integrity sha512-VHskAKYM8RfSFXwee5t5cbN5PZeq1Wrh6qd5bkyiXIf6UQcN6w/A0eXM9r6t8d+GYOh+o6ZhiEnb88LN/Y8m2w== + +is-fullwidth-code-point@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d" + integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg== + +is-generator-function@^1.0.10: + version "1.0.10" + resolved "https://registry.yarnpkg.com/is-generator-function/-/is-generator-function-1.0.10.tgz#f1558baf1ac17e0deea7c0415c438351ff2b3c72" + integrity sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A== + dependencies: + has-tostringtag "^1.0.0" + +is-gif@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-gif/-/is-gif-3.0.0.tgz#c4be60b26a301d695bb833b20d9b5d66c6cf83b1" + integrity sha512-IqJ/jlbw5WJSNfwQ/lHEDXF8rxhRgF6ythk2oiEvhpG29F704eX9NO6TvPfMiq9DrbwgcEDnETYNcZDPewQoVw== + dependencies: + file-type "^10.4.0" + +is-glob@^4.0.0, is-glob@^4.0.1, is-glob@^4.0.3, is-glob@~4.0.1: + version "4.0.3" + resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" + integrity sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg== + dependencies: + is-extglob "^2.1.1" + +is-jpg@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/is-jpg/-/is-jpg-2.0.0.tgz#2e1997fa6e9166eaac0242daae443403e4ef1d97" + integrity sha512-ODlO0ruzhkzD3sdynIainVP5eoOFNN85rxA1+cwwnPe4dKyX0r5+hxNO5XpCrxlHcmb9vkOit9mhRD2JVuimHg== + +is-map@^2.0.2, is-map@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/is-map/-/is-map-2.0.3.tgz#ede96b7fe1e270b3c4465e3a465658764926d62e" + integrity sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw== + +is-natural-number@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/is-natural-number/-/is-natural-number-4.0.1.tgz#ab9d76e1db4ced51e35de0c72ebecf09f734cde8" + integrity sha512-Y4LTamMe0DDQIIAlaer9eKebAlDSV6huy+TWhJVPlzZh2o4tRP5SQWFlLn5N0To4mDD22/qdOq+veo1cSISLgQ== + +is-negative-zero@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/is-negative-zero/-/is-negative-zero-2.0.3.tgz#ced903a027aca6381b777a5743069d7376a49747" + integrity sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw== + +is-node-process@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/is-node-process/-/is-node-process-1.2.0.tgz#ea02a1b90ddb3934a19aea414e88edef7e11d134" + integrity sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw== + +is-number-object@^1.0.4: + version "1.0.7" + resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.7.tgz#59d50ada4c45251784e9904f5246c742f07a42fc" + integrity sha512-k1U0IRzLMo7ZlYIfzRu23Oh6MiIFasgpb9X76eqfFZAqwH44UI4KTBvBYIZ1dSL9ZzChTB9ShHfLkR4pdW5krQ== + dependencies: + has-tostringtag "^1.0.0" + +is-number@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b" + integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng== + +is-object@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-object/-/is-object-1.0.2.tgz#a56552e1c665c9e950b4a025461da87e72f86fcf" + integrity sha512-2rRIahhZr2UWb45fIOuvZGpFtz0TyOZLf32KxBbSoUCeZR495zCKlWUKKUByk3geS2eAs7ZAABt0Y/Rx0GiQGA== + +is-path-inside@^3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-3.0.3.tgz#d231362e53a07ff2b0e0ea7fed049161ffd16283" + integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ== + +is-plain-obj@^1.0.0, is-plain-obj@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e" + integrity sha512-yvkRyxmFKEOQ4pNXCmJG5AEQNlXJS5LaONXo5/cLdTZdWvsZ1ioJEonLGAosKlMWE8lwUy/bJzMjcw8az73+Fg== + +is-plain-obj@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-3.0.0.tgz#af6f2ea14ac5a646183a5bbdb5baabbc156ad9d7" + integrity sha512-gwsOE28k+23GP1B6vFl1oVh/WOzmawBrKwo5Ev6wMKzPkaXaCDIQKzLnvsA42DRlbVTWorkgTKIviAKCWkfUwA== + +is-plain-object@^2.0.4: + version "2.0.4" + resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677" + integrity sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og== + dependencies: + isobject "^3.0.1" + +is-png@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/is-png/-/is-png-2.0.0.tgz#ee8cbc9e9b050425cedeeb4a6fb74a649b0a4a8d" + integrity sha512-4KPGizaVGj2LK7xwJIz8o5B2ubu1D/vcQsgOGFEDlpcvgZHto4gBnyd0ig7Ws+67ixmwKoNmu0hYnpo6AaKb5g== + +is-regex@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.4.tgz#eef5663cd59fa4c0ae339505323df6854bb15958" + integrity sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-retry-allowed@^1.0.0, is-retry-allowed@^1.1.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/is-retry-allowed/-/is-retry-allowed-1.2.0.tgz#d778488bd0a4666a3be8a1482b9f2baafedea8b4" + integrity sha512-RUbUeKwvm3XG2VYamhJL1xFktgjvPzL0Hq8C+6yrWIswDy3BIXGqCxhxkc30N9jqK311gVU137K8Ei55/zVJRg== + +is-set@^2.0.2, is-set@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/is-set/-/is-set-2.0.3.tgz#8ab209ea424608141372ded6e0cb200ef1d9d01d" + integrity sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg== + +is-shared-array-buffer@^1.0.2, is-shared-array-buffer@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/is-shared-array-buffer/-/is-shared-array-buffer-1.0.3.tgz#1237f1cba059cdb62431d378dcc37d9680181688" + integrity sha512-nA2hv5XIhLR3uVzDDfCIknerhx8XUKnstuOERPNNIinXG7v9u+ohXF67vxm4TPTEPU6lm61ZkwP3c9PCB97rhg== + dependencies: + call-bind "^1.0.7" + +is-stream@^1.0.0, is-stream@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-1.1.0.tgz#12d4a3dd4e68e0b79ceb8dbc84173ae80d91ca44" + integrity sha512-uQPm8kcs47jx38atAcWTVxyltQYoPT68y9aWYdV6yWXSyW8mzSat0TL6CiWdZeCdF3KrAvpVtnHbTv4RN+rqdQ== + +is-stream@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.1.tgz#fac1e3d53b97ad5a9d0ae9cef2389f5810a5c077" + integrity sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg== + +is-string@^1.0.5, is-string@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.7.tgz#0dd12bf2006f255bb58f695110eff7491eebc0fd" + integrity sha512-tE2UXzivje6ofPW7l23cjDOMa09gb7xlAqG6jG5ej6uPV32TlWP3NKPigtaGeHNu9fohccRYvIiZMfOOnOYUtg== + dependencies: + has-tostringtag "^1.0.0" + +is-svg@^4.2.1: + version "4.4.0" + resolved "https://registry.yarnpkg.com/is-svg/-/is-svg-4.4.0.tgz#34db20a38146be5f2b3060154da33d11e6f74b7c" + integrity sha512-v+AgVwiK5DsGtT9ng+m4mClp6zDAmwrW8nZi6Gg15qzvBnRWWdfWA1TGaXyCDnWq5g5asofIgMVl3PjKxvk1ug== + dependencies: + fast-xml-parser "^4.1.3" + +is-symbol@^1.0.2, is-symbol@^1.0.3: + version "1.0.4" + resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.4.tgz#a6dac93b635b063ca6872236de88910a57af139c" + integrity sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg== + dependencies: + has-symbols "^1.0.2" + +is-typed-array@^1.1.13: + version "1.1.13" + resolved "https://registry.yarnpkg.com/is-typed-array/-/is-typed-array-1.1.13.tgz#d6c5ca56df62334959322d7d7dd1cca50debe229" + integrity sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw== + dependencies: + which-typed-array "^1.1.14" + +is-weakmap@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/is-weakmap/-/is-weakmap-2.0.2.tgz#bf72615d649dfe5f699079c54b83e47d1ae19cfd" + integrity sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w== + +is-weakref@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-weakref/-/is-weakref-1.0.2.tgz#9529f383a9338205e89765e0392efc2f100f06f2" + integrity sha512-qctsuLZmIQ0+vSSMfoVvyFe2+GSEvnmZ2ezTup1SBse9+twCCeial6EEi3Nc2KFcf6+qz2FBPnjXsk8xhKSaPQ== + dependencies: + call-bind "^1.0.2" + +is-weakset@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/is-weakset/-/is-weakset-2.0.3.tgz#e801519df8c0c43e12ff2834eead84ec9e624007" + integrity sha512-LvIm3/KWzS9oRFHugab7d+M/GcBXuXX5xZkzPmN+NxihdQlZUQ4dWuSV1xR/sq6upL1TJEDrfBgRepHFdBtSNQ== + dependencies: + call-bind "^1.0.7" + get-intrinsic "^1.2.4" + +is-wsl@^2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271" + integrity sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww== + dependencies: + is-docker "^2.0.0" + +isarray@^2.0.5: + version "2.0.5" + resolved "https://registry.yarnpkg.com/isarray/-/isarray-2.0.5.tgz#8af1e4c1221244cc62459faf38940d4e644a5723" + integrity sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw== + +isarray@~1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11" + integrity sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ== + +isexe@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" + integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw== + +isobject@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df" + integrity sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg== + +isurl@^1.0.0-alpha5: + version "1.0.0" + resolved "https://registry.yarnpkg.com/isurl/-/isurl-1.0.0.tgz#b27f4f49f3cdaa3ea44a0a5b7f3462e6edc39d67" + integrity sha512-1P/yWsxPlDtn7QeRD+ULKQPaIaN6yF368GZ2vDfv0AL0NwpStafjWCDDdn0k8wgFMWpVAqG7oJhxHnlud42i9w== + dependencies: + has-to-string-tag-x "^1.2.0" + is-object "^1.0.1" + +iterator.prototype@^1.1.3: + version "1.1.3" + resolved "https://registry.yarnpkg.com/iterator.prototype/-/iterator.prototype-1.1.3.tgz#016c2abe0be3bbdb8319852884f60908ac62bf9c" + integrity sha512-FW5iMbeQ6rBGm/oKgzq2aW4KvAGpxPzYES8N4g4xNXUKpL1mclMvOe+76AcLDTvD+Ze+sOpVhgdAQEKF4L9iGQ== + dependencies: + define-properties "^1.2.1" + get-intrinsic "^1.2.1" + has-symbols "^1.0.3" + reflect.getprototypeof "^1.0.4" + set-function-name "^2.0.1" + +jackspeak@^3.1.2: + version "3.4.3" + resolved "https://registry.yarnpkg.com/jackspeak/-/jackspeak-3.4.3.tgz#8833a9d89ab4acde6188942bd1c53b6390ed5a8a" + integrity sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw== + dependencies: + "@isaacs/cliui" "^8.0.2" + optionalDependencies: + "@pkgjs/parseargs" "^0.11.0" + +jest-worker@^27.4.5: + version "27.5.1" + resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-27.5.1.tgz#8d146f0900e8973b106b6f73cc1e9a8cb86f8db0" + integrity sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg== + dependencies: + "@types/node" "*" + merge-stream "^2.0.0" + supports-color "^8.0.0" + +jiti@^1.20.0, jiti@^1.21.0: + version "1.21.6" + resolved "https://registry.yarnpkg.com/jiti/-/jiti-1.21.6.tgz#6c7f7398dd4b3142767f9a168af2f317a428d268" + integrity sha512-2yTgeWTWzMWkHu6Jp9NKgePDaYHbntiwvYuuJLbbN9vl7DC9DvXKOB2BC3ZZ92D3cvV/aflH0osDfwpHepQ53w== + +"js-tokens@^3.0.0 || ^4.0.0", js-tokens@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499" + integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ== + +js-yaml@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-4.1.0.tgz#c1fb65f8f5017901cdd2c951864ba18458a10602" + integrity sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA== + dependencies: + argparse "^2.0.1" + +jsesc@^3.0.2, jsesc@~3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-3.0.2.tgz#bb8b09a6597ba426425f2e4a07245c3d00b9343e" + integrity sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g== + +json-buffer@3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/json-buffer/-/json-buffer-3.0.0.tgz#5b1f397afc75d677bde8bcfc0e47e1f9a3d9a898" + integrity sha512-CuUqjv0FUZIdXkHPI8MezCnFCdaTAacej1TZYulLoAg1h/PhwkdXFN4V/gzY4g+fMBCOV2xF+rp7t2XD2ns/NQ== + +json-buffer@3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/json-buffer/-/json-buffer-3.0.1.tgz#9338802a30d3b6605fbe0613e094008ca8c05a13" + integrity sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ== + +json-parse-even-better-errors@^2.3.0, json-parse-even-better-errors@^2.3.1: + version "2.3.1" + resolved "https://registry.yarnpkg.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz#7c47805a94319928e05777405dc12e1f7a4ee02d" + integrity sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w== + +json-schema-traverse@^0.4.1: + version "0.4.1" + resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660" + integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg== + +json-schema-traverse@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2" + integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug== + +json-stable-stringify-without-jsonify@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651" + integrity sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw== + +json5@^2.1.2, json5@^2.2.3: + version "2.2.3" + resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.3.tgz#78cd6f1a19bdc12b73db5ad0c61efd66c1e29283" + integrity sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg== + +jsonfile@^6.0.1: + version "6.1.0" + resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae" + integrity sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ== + dependencies: + universalify "^2.0.0" + optionalDependencies: + graceful-fs "^4.1.6" + +"jsx-ast-utils@^2.4.1 || ^3.0.0": + version "3.3.5" + resolved "https://registry.yarnpkg.com/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz#4766bd05a8e2a11af222becd19e15575e52a853a" + integrity sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ== + dependencies: + array-includes "^3.1.6" + array.prototype.flat "^1.3.1" + object.assign "^4.1.4" + object.values "^1.1.6" + +junk@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/junk/-/junk-3.1.0.tgz#31499098d902b7e98c5d9b9c80f43457a88abfa1" + integrity sha512-pBxcB3LFc8QVgdggvZWyeys+hnrNWg4OcZIU/1X59k5jQdLBlCsYGRQaz234SqoRLTCgMH00fY0xRJH+F9METQ== + +keyv@3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/keyv/-/keyv-3.0.0.tgz#44923ba39e68b12a7cec7df6c3268c031f2ef373" + integrity sha512-eguHnq22OE3uVoSYG0LVWNP+4ppamWr9+zWBe1bsNcovIMy6huUJFPgy4mGwCd/rnl3vOLGW1MTlu4c57CT1xA== + dependencies: + json-buffer "3.0.0" + +keyv@^4.5.3: + version "4.5.4" + resolved "https://registry.yarnpkg.com/keyv/-/keyv-4.5.4.tgz#a879a99e29452f942439f2a405e3af8b31d4de93" + integrity sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw== + dependencies: + json-buffer "3.0.1" + +kind-of@^6.0.2: + version "6.0.3" + resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd" + integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw== + +launch-editor@^2.6.0: + version "2.9.1" + resolved "https://registry.yarnpkg.com/launch-editor/-/launch-editor-2.9.1.tgz#253f173bd441e342d4344b4dae58291abb425047" + integrity sha512-Gcnl4Bd+hRO9P9icCP/RVVT2o8SFlPXofuCxvA2SaZuH45whSvf5p8x5oih5ftLiVhEI4sp5xDY+R+b3zJBh5w== + dependencies: + picocolors "^1.0.0" + shell-quote "^1.8.1" + +levn@^0.4.1: + version "0.4.1" + resolved "https://registry.yarnpkg.com/levn/-/levn-0.4.1.tgz#ae4562c007473b932a6200d403268dd2fffc6ade" + integrity sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ== + dependencies: + prelude-ls "^1.2.1" + type-check "~0.4.0" + +lilconfig@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/lilconfig/-/lilconfig-2.1.0.tgz#78e23ac89ebb7e1bfbf25b18043de756548e7f52" + integrity sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ== + +lilconfig@^3.0.0: + version "3.1.2" + resolved "https://registry.yarnpkg.com/lilconfig/-/lilconfig-3.1.2.tgz#e4a7c3cb549e3a606c8dcc32e5ae1005e62c05cb" + integrity sha512-eop+wDAvpItUys0FWkHIKeC9ybYrTGbU41U5K7+bttZZeohvnY7M9dZ5kB21GNWiFT2q1OoPTvncPCgSOVO5ow== + +lines-and-columns@^1.1.6: + version "1.2.4" + resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" + integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg== + +loader-runner@^4.2.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-4.3.0.tgz#c1b4a163b99f614830353b16755e7149ac2314e1" + integrity sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg== + +loader-utils@^2.0.0, loader-utils@^2.0.4: + version "2.0.4" + resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-2.0.4.tgz#8b5cb38b5c34a9a018ee1fc0e6a066d1dfcc528c" + integrity sha512-xXqpXoINfFhgua9xiqD8fPFHgkoq1mmmpE92WlDbm9rNRd/EbRb+Gqf908T2DMfuHjjJlksiK2RbHVOdD/MqSw== + dependencies: + big.js "^5.2.2" + emojis-list "^3.0.0" + json5 "^2.1.2" + +locate-path@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0" + integrity sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g== + dependencies: + p-locate "^4.1.0" + +locate-path@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-6.0.0.tgz#55321eb309febbc59c4801d931a72452a681d286" + integrity sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw== + dependencies: + p-locate "^5.0.0" + +lodash.debounce@^4.0.8: + version "4.0.8" + resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af" + integrity sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow== + +lodash.merge@^4.6.2: + version "4.6.2" + resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a" + integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ== + +lodash@^4.17.20, lodash@^4.17.21: + version "4.17.21" + resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" + integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== + +long@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/long/-/long-4.0.0.tgz#9a7b71cfb7d361a194ea555241c92f7468d5bf28" + integrity sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA== + +loose-envify@^1.1.0, loose-envify@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/loose-envify/-/loose-envify-1.4.0.tgz#71ee51fa7be4caec1a63839f7e682d8132d30caf" + integrity sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q== + dependencies: + js-tokens "^3.0.0 || ^4.0.0" + +lower-case@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/lower-case/-/lower-case-2.0.2.tgz#6fa237c63dbdc4a82ca0fd882e4722dc5e634e28" + integrity sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg== + dependencies: + tslib "^2.0.3" + +lowercase-keys@1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/lowercase-keys/-/lowercase-keys-1.0.0.tgz#4e3366b39e7f5457e35f1324bdf6f88d0bfc7306" + integrity sha512-RPlX0+PHuvxVDZ7xX+EBVAp4RsVxP/TdDSN2mJYdiq1Lc4Hz7EUSjUI7RZrKKlmrIzVhf6Jo2stj7++gVarS0A== + +lowercase-keys@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/lowercase-keys/-/lowercase-keys-1.0.1.tgz#6f9e30b47084d971a7c820ff15a6c5167b74c26f" + integrity sha512-G2Lj61tXDnVFFOi8VZds+SoQjtQC3dgokKdDG2mTm1tx4m50NUHBOZSBwQQHyy0V12A0JTG4icfZQH+xPyh8VA== + +lru-cache@^10.2.0: + version "10.4.3" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.4.3.tgz#410fc8a17b70e598013df257c2446b7f3383f119" + integrity sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ== + +lru-cache@^4.0.1: + version "4.1.5" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-4.1.5.tgz#8bbe50ea85bed59bc9e33dcab8235ee9bcf443cd" + integrity sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g== + dependencies: + pseudomap "^1.0.2" + yallist "^2.1.2" + +lru-cache@^5.1.1: + version "5.1.1" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-5.1.1.tgz#1da27e6710271947695daf6848e847f01d84b920" + integrity sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w== + dependencies: + yallist "^3.0.2" + +lz-string@^1.5.0: + version "1.5.0" + resolved "https://registry.yarnpkg.com/lz-string/-/lz-string-1.5.0.tgz#c1ab50f77887b712621201ba9fd4e3a6ed099941" + integrity sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ== + +make-dir@^1.0.0, make-dir@^1.2.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-1.3.0.tgz#79c1033b80515bd6d24ec9933e860ca75ee27f0c" + integrity sha512-2w31R7SJtieJJnQtGc7RVL2StM2vGYVfqUOvUDxH6bC6aJTxPxTF0GnIgCyu7tjockiUWAYQRbxa7vKn34s5sQ== + dependencies: + pify "^3.0.0" + +make-dir@^3.0.0, make-dir@^3.0.2, make-dir@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f" + integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw== + dependencies: + semver "^6.0.0" + +mdn-data@2.0.14: + version "2.0.14" + resolved "https://registry.yarnpkg.com/mdn-data/-/mdn-data-2.0.14.tgz#7113fc4281917d63ce29b43446f701e68c25ba50" + integrity sha512-dn6wd0uw5GsdswPFfsgMp5NSB0/aDe6fK94YJV/AJDYXL6HVLWBsxeq7js7Ad+mU2K9LAlwpk6kN2D5mwCPVow== + +media-typer@0.3.0: + version "0.3.0" + resolved "https://registry.yarnpkg.com/media-typer/-/media-typer-0.3.0.tgz#8710d7af0aa626f8fffa1ce00168545263255748" + integrity sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ== + +memfs@^3.4.1, memfs@^3.4.3: + version "3.6.0" + resolved "https://registry.yarnpkg.com/memfs/-/memfs-3.6.0.tgz#d7a2110f86f79dd950a8b6df6d57bc984aa185f6" + integrity sha512-EGowvkkgbMcIChjMTMkESFDbZeSh8xZ7kNSF0hAiAN4Jh6jgHCRS0Ga/+C8y6Au+oqpezRHCfPsmJ2+DwAgiwQ== + dependencies: + fs-monkey "^1.0.4" + +merge-descriptors@1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.3.tgz#d80319a65f3c7935351e5cfdac8f9318504dbed5" + integrity sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ== + +merge-stream@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60" + integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w== + +merge2@^1.2.3, merge2@^1.3.0, merge2@^1.4.1: + version "1.4.1" + resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae" + integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg== + +methods@~1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/methods/-/methods-1.1.2.tgz#5529a4d67654134edcc5266656835b0f851afcee" + integrity sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w== + +micromatch@^4.0.0, micromatch@^4.0.2, micromatch@^4.0.4, micromatch@^4.0.5: + version "4.0.8" + resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.8.tgz#d66fa18f3a47076789320b9b1af32bd86d9fa202" + integrity sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA== + dependencies: + braces "^3.0.3" + picomatch "^2.3.1" + +mime-db@1.52.0: + version "1.52.0" + resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70" + integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== + +"mime-db@>= 1.43.0 < 2", mime-db@^1.28.0: + version "1.53.0" + resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.53.0.tgz#3cb63cd820fc29896d9d4e8c32ab4fcd74ccb447" + integrity sha512-oHlN/w+3MQ3rba9rqFr6V/ypF10LSkdwUysQL7GkXoTgIWeV+tcXGA852TBxH+gsh8UWoyhR1hKcoMJTuWflpg== + +mime-types@^2.1.12, mime-types@^2.1.27, mime-types@^2.1.31, mime-types@~2.1.17, mime-types@~2.1.24, mime-types@~2.1.34: + version "2.1.35" + resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a" + integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw== + dependencies: + mime-db "1.52.0" + +mime@1.6.0: + version "1.6.0" + resolved "https://registry.yarnpkg.com/mime/-/mime-1.6.0.tgz#32cd9e5c64553bd58d19a568af452acff04981b1" + integrity sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg== + +mimic-fn@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b" + integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg== + +mimic-response@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/mimic-response/-/mimic-response-1.0.1.tgz#4923538878eef42063cb8a3e3b0798781487ab1b" + integrity sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ== + +minimalistic-assert@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz#2e194de044626d4a10e7f7fbc00ce73e83e4d5c7" + integrity sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A== + +minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" + integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw== + dependencies: + brace-expansion "^1.1.7" + +minimatch@^9.0.4: + version "9.0.5" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.5.tgz#d74f9dd6b57d83d8e98cfb82133b03978bc929e5" + integrity sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow== + dependencies: + brace-expansion "^2.0.1" + +"minipass@^5.0.0 || ^6.0.2 || ^7.0.0", minipass@^7.1.2: + version "7.1.2" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-7.1.2.tgz#93a9626ce5e5e66bd4db86849e7515e92340a707" + integrity sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw== + +mozjpeg@^7.0.0: + version "7.1.1" + resolved "https://registry.yarnpkg.com/mozjpeg/-/mozjpeg-7.1.1.tgz#dfb61953536e66fcabd4ae795e7a312d42a51f18" + integrity sha512-iIDxWvzhWvLC9mcRJ1uSkiKaj4drF58oCqK2bITm5c2Jt6cJ8qQjSSru2PCaysG+hLIinryj8mgz5ZJzOYTv1A== + dependencies: + bin-build "^3.0.0" + bin-wrapper "^4.0.0" + +ms@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8" + integrity sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A== + +ms@2.1.3, ms@^2.1.3: + version "2.1.3" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" + integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== + +msw@^2.2.1: + version "2.6.4" + resolved "https://registry.yarnpkg.com/msw/-/msw-2.6.4.tgz#954906f10f23d9364529eebe9b4e244796241a8a" + integrity sha512-Pm4LmWQeytDsNCR+A7gt39XAdtH6zQb6jnIKRig0FlvYOn8eksn3s1nXxUfz5KYUjbckof7Z4p2ewzgffPoCbg== + dependencies: + "@bundled-es-modules/cookie" "^2.0.1" + "@bundled-es-modules/statuses" "^1.0.1" + "@bundled-es-modules/tough-cookie" "^0.1.6" + "@inquirer/confirm" "^5.0.0" + "@mswjs/interceptors" "^0.36.5" + "@open-draft/deferred-promise" "^2.2.0" + "@open-draft/until" "^2.1.0" + "@types/cookie" "^0.6.0" + "@types/statuses" "^2.0.4" + chalk "^4.1.2" + graphql "^16.8.1" + headers-polyfill "^4.0.2" + is-node-process "^1.2.0" + outvariant "^1.4.3" + path-to-regexp "^6.3.0" + strict-event-emitter "^0.5.1" + type-fest "^4.26.1" + yargs "^17.7.2" + +multicast-dns@^7.2.5: + version "7.2.5" + resolved "https://registry.yarnpkg.com/multicast-dns/-/multicast-dns-7.2.5.tgz#77eb46057f4d7adbd16d9290fa7299f6fa64cced" + integrity sha512-2eznPJP8z2BFLX50tf0LuODrpINqP1RVIm/CObbTcBRITQgmC/TjcREF1NeTBzIcR5XO/ukWo+YHOjBbFwIupg== + dependencies: + dns-packet "^5.2.2" + thunky "^1.0.2" + +mute-stream@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/mute-stream/-/mute-stream-2.0.0.tgz#a5446fc0c512b71c83c44d908d5c7b7b4c493b2b" + integrity sha512-WWdIxpyjEn+FhQJQQv9aQAYlHoNVdzIzUySNV1gHUPDSdZJ3yZn7pAAbQcV7B56Mvu881q9FZV+0Vx2xC44VWA== + +mz@^2.7.0: + version "2.7.0" + resolved "https://registry.yarnpkg.com/mz/-/mz-2.7.0.tgz#95008057a56cafadc2bc63dde7f9ff6955948e32" + integrity sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q== + dependencies: + any-promise "^1.0.0" + object-assign "^4.0.1" + thenify-all "^1.0.0" + +nanoid@^3.3.7: + version "3.3.7" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.7.tgz#d0c301a691bc8d54efa0a2226ccf3fe2fd656bd8" + integrity sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g== + +natural-compare-lite@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/natural-compare-lite/-/natural-compare-lite-1.4.0.tgz#17b09581988979fddafe0201e931ba933c96cbb4" + integrity sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g== + +natural-compare@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7" + integrity sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw== + +negotiator@0.6.3: + version "0.6.3" + resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd" + integrity sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg== + +negotiator@~0.6.4: + version "0.6.4" + resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.4.tgz#777948e2452651c570b712dd01c23e262713fff7" + integrity sha512-myRT3DiWPHqho5PrJaIRyaMv2kgYf0mUVgBNOYMuCH5Ki1yEiQaf/ZJuQ62nvpc44wL5WDbTX7yGJi1Neevw8w== + +neo-async@^2.6.2: + version "2.6.2" + resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f" + integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw== + +nice-try@^1.0.4: + version "1.0.5" + resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366" + integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ== + +no-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/no-case/-/no-case-3.0.4.tgz#d361fd5c9800f558551a8369fc0dcd4662b6124d" + integrity sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg== + dependencies: + lower-case "^2.0.2" + tslib "^2.0.3" + +node-abort-controller@^3.0.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/node-abort-controller/-/node-abort-controller-3.1.1.tgz#a94377e964a9a37ac3976d848cb5c765833b8548" + integrity sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ== + +node-addon-api@^7.0.0: + version "7.1.1" + resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-7.1.1.tgz#1aba6693b0f255258a049d621329329322aad558" + integrity sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ== + +node-fetch@^2.6.12: + version "2.7.0" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d" + integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A== + dependencies: + whatwg-url "^5.0.0" + +node-forge@^1: + version "1.3.1" + resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-1.3.1.tgz#be8da2af243b2417d5f646a770663a92b7e9ded3" + integrity sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA== + +node-gyp-build@^4.3.0: + version "4.8.3" + resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.8.3.tgz#9187216d24dbee29e44eb20d2ebf62a296bbea1a" + integrity sha512-EMS95CMJzdoSKoIiXo8pxKoL8DYxwIZXYlLmgPb8KUv794abpnLK6ynsCAWNliOjREKruYKdzbh76HHYUHX7nw== + +node-releases@^2.0.18: + version "2.0.18" + resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.18.tgz#f010e8d35e2fe8d6b2944f03f70213ecedc4ca3f" + integrity sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g== + +normalize-path@^3.0.0, normalize-path@~3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65" + integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA== + +normalize-range@^0.1.2: + version "0.1.2" + resolved "https://registry.yarnpkg.com/normalize-range/-/normalize-range-0.1.2.tgz#2d10c06bdfd312ea9777695a4d28439456b75942" + integrity sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA== + +normalize-url@2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-2.0.1.tgz#835a9da1551fa26f70e92329069a23aa6574d7e6" + integrity sha512-D6MUW4K/VzoJ4rJ01JFKxDrtY1v9wrgzCX5f2qj/lzH1m/lW6MhUZFKerVsnyjOhOsYzI9Kqqak+10l4LvLpMw== + dependencies: + prepend-http "^2.0.0" + query-string "^5.0.1" + sort-keys "^2.0.0" + +npm-conf@^1.1.0: + version "1.1.3" + resolved "https://registry.yarnpkg.com/npm-conf/-/npm-conf-1.1.3.tgz#256cc47bd0e218c259c4e9550bf413bc2192aff9" + integrity sha512-Yic4bZHJOt9RCFbRP3GgpqhScOY4HH3V2P8yBj6CeYq118Qr+BLXqT2JvpJ00mryLESpgOxf5XlFv4ZjXxLScw== + dependencies: + config-chain "^1.1.11" + pify "^3.0.0" + +npm-run-path@^2.0.0: + version "2.0.2" + resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-2.0.2.tgz#35a9232dfa35d7067b4cb2ddf2357b1871536c5f" + integrity sha512-lJxZYlT4DW/bRUtFh1MQIWqmLwQfAxnqWG4HhEdjMlkrJYnJn0Jrr2u3mgxqaWsdiBc76TYkTG/mhrnYTuzfHw== + dependencies: + path-key "^2.0.0" + +npm-run-path@^4.0.0, npm-run-path@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea" + integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw== + dependencies: + path-key "^3.0.0" + +npyjs@^0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/npyjs/-/npyjs-0.4.0.tgz#d81ffd758d3764ebf148b4c9affacfb158b183c4" + integrity sha512-IONdueT/xI3tqKZ2wlNKANjye4vAzGbkJ5+o4HiQmZ7+8V9KgqJfGqLZY+4BWeWoa9+138+1PsymCQzHn3nwZw== + dependencies: + cross-fetch "^3.1.5" + +nth-check@^2.0.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d" + integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w== + dependencies: + boolbase "^1.0.0" + +object-assign@^4.0.1, object-assign@^4.1.0, object-assign@^4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" + integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg== + +object-hash@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/object-hash/-/object-hash-3.0.0.tgz#73f97f753e7baffc0e2cc9d6e079079744ac82e9" + integrity sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw== + +object-inspect@^1.13.1: + version "1.13.2" + resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.13.2.tgz#dea0088467fb991e67af4058147a24824a3043ff" + integrity sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g== + +object-is@^1.1.5: + version "1.1.6" + resolved "https://registry.yarnpkg.com/object-is/-/object-is-1.1.6.tgz#1a6a53aed2dd8f7e6775ff870bea58545956ab07" + integrity sha512-F8cZ+KfGlSGi09lJT7/Nd6KJZ9ygtvYC0/UYYLI9nmQKLMnydpB9yvbv9K1uSkEu7FU9vYPmVwLg328tX+ot3Q== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + +object-keys@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" + integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA== + +object.assign@^4.1.4, object.assign@^4.1.5: + version "4.1.5" + resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.5.tgz#3a833f9ab7fdb80fc9e8d2300c803d216d8fdbb0" + integrity sha512-byy+U7gp+FVwmyzKPYhW2h5l3crpmGsxl7X2s8y43IgxvG4g3QZ6CffDtsNQy1WsmZpQbO+ybo0AlW7TY6DcBQ== + dependencies: + call-bind "^1.0.5" + define-properties "^1.2.1" + has-symbols "^1.0.3" + object-keys "^1.1.1" + +object.entries@^1.1.8: + version "1.1.8" + resolved "https://registry.yarnpkg.com/object.entries/-/object.entries-1.1.8.tgz#bffe6f282e01f4d17807204a24f8edd823599c41" + integrity sha512-cmopxi8VwRIAw/fkijJohSfpef5PdN0pMQJN6VC/ZKvn0LIknWD8KtgY6KlQdEc4tIjcQ3HxSMmnvtzIscdaYQ== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-object-atoms "^1.0.0" + +object.fromentries@^2.0.8: + version "2.0.8" + resolved "https://registry.yarnpkg.com/object.fromentries/-/object.fromentries-2.0.8.tgz#f7195d8a9b97bd95cbc1999ea939ecd1a2b00c65" + integrity sha512-k6E21FzySsSK5a21KRADBd/NGneRegFO5pLHfdQLpRDETUNJueLXs3WCzyQ3tFRDYgbq3KHGXfTbi2bs8WQ6rQ== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-abstract "^1.23.2" + es-object-atoms "^1.0.0" + +object.values@^1.1.6, object.values@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/object.values/-/object.values-1.2.0.tgz#65405a9d92cee68ac2d303002e0b8470a4d9ab1b" + integrity sha512-yBYjY9QX2hnRmZHAjG/f13MzmBzxzYgQhFrke06TTyKY5zSTEqkOeukBzIdVA3j3ulu8Qa3MbVFShV7T2RmGtQ== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-object-atoms "^1.0.0" + +obuf@^1.0.0, obuf@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/obuf/-/obuf-1.1.2.tgz#09bea3343d41859ebd446292d11c9d4db619084e" + integrity sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg== + +on-finished@2.4.1: + version "2.4.1" + resolved "https://registry.yarnpkg.com/on-finished/-/on-finished-2.4.1.tgz#58c8c44116e54845ad57f14ab10b03533184ac3f" + integrity sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg== + dependencies: + ee-first "1.1.1" + +on-headers@~1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/on-headers/-/on-headers-1.0.2.tgz#772b0ae6aaa525c399e489adfad90c403eb3c28f" + integrity sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA== + +once@^1.3.0, once@^1.3.1, once@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" + integrity sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w== + dependencies: + wrappy "1" + +onetime@^5.1.0, onetime@^5.1.2: + version "5.1.2" + resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e" + integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg== + dependencies: + mimic-fn "^2.1.0" + +onnx-proto@^4.0.4: + version "4.0.4" + resolved "https://registry.yarnpkg.com/onnx-proto/-/onnx-proto-4.0.4.tgz#2431a25bee25148e915906dda0687aafe3b9e044" + integrity sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA== + dependencies: + protobufjs "^6.8.8" + +onnxruntime-common@~1.14.0: + version "1.14.0" + resolved "https://registry.yarnpkg.com/onnxruntime-common/-/onnxruntime-common-1.14.0.tgz#2bb5dac5261269779aa5fb6536ca379657de8bf6" + integrity sha512-3LJpegM2iMNRX2wUmtYfeX/ytfOzNwAWKSq1HbRrKc9+uqG/FsEA0bbKZl1btQeZaXhC26l44NWpNUeXPII7Ew== + +onnxruntime-web@1.14.0: + version "1.14.0" + resolved "https://registry.yarnpkg.com/onnxruntime-web/-/onnxruntime-web-1.14.0.tgz#c8cee538781b1d4c1c6b043934f4a3e6ddf1466e" + integrity sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw== + dependencies: + flatbuffers "^1.12.0" + guid-typescript "^1.0.9" + long "^4.0.0" + onnx-proto "^4.0.4" + onnxruntime-common "~1.14.0" + platform "^1.3.6" + +open@^8.0.9: + version "8.4.2" + resolved "https://registry.yarnpkg.com/open/-/open-8.4.2.tgz#5b5ffe2a8f793dcd2aad73e550cb87b59cb084f9" + integrity sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ== + dependencies: + define-lazy-prop "^2.0.0" + is-docker "^2.1.1" + is-wsl "^2.2.0" + +optionator@^0.9.3: + version "0.9.4" + resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.4.tgz#7ea1c1a5d91d764fb282139c88fe11e182a3a734" + integrity sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g== + dependencies: + deep-is "^0.1.3" + fast-levenshtein "^2.0.6" + levn "^0.4.1" + prelude-ls "^1.2.1" + type-check "^0.4.0" + word-wrap "^1.2.5" + +optipng-bin@^7.0.0: + version "7.0.1" + resolved "https://registry.yarnpkg.com/optipng-bin/-/optipng-bin-7.0.1.tgz#beb8e55a52f8a26f885ee57ab44fcf62397d6972" + integrity sha512-W99mpdW7Nt2PpFiaO+74pkht7KEqkXkeRomdWXfEz3SALZ6hns81y/pm1dsGZ6ItUIfchiNIP6ORDr1zETU1jA== + dependencies: + bin-build "^3.0.0" + bin-wrapper "^4.0.0" + +os-filter-obj@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/os-filter-obj/-/os-filter-obj-2.0.0.tgz#1c0b62d5f3a2442749a2d139e6dddee6e81d8d16" + integrity sha512-uksVLsqG3pVdzzPvmAHpBK0wKxYItuzZr7SziusRPoz67tGV8rL1szZ6IdeUrbqLjGDwApBtN29eEE3IqGHOjg== + dependencies: + arch "^2.1.0" + +outvariant@^1.4.0, outvariant@^1.4.3: + version "1.4.3" + resolved "https://registry.yarnpkg.com/outvariant/-/outvariant-1.4.3.tgz#221c1bfc093e8fec7075497e7799fdbf43d14873" + integrity sha512-+Sl2UErvtsoajRDKCE5/dBz4DIvHXQQnAxtQTF04OJxY0+DyZXSo5P5Bb7XYWOh81syohlYL24hbDwxedPUJCA== + +ow@^0.17.0: + version "0.17.0" + resolved "https://registry.yarnpkg.com/ow/-/ow-0.17.0.tgz#4f938999fed6264c9048cd6254356e0f1e7f688c" + integrity sha512-i3keDzDQP5lWIe4oODyDFey1qVrq2hXKTuTH2VpqwpYtzPiKZt2ziRI4NBQmgW40AnV5Euz17OyWweCb+bNEQA== + dependencies: + type-fest "^0.11.0" + +p-cancelable@^0.3.0: + version "0.3.0" + resolved "https://registry.yarnpkg.com/p-cancelable/-/p-cancelable-0.3.0.tgz#b9e123800bcebb7ac13a479be195b507b98d30fa" + integrity sha512-RVbZPLso8+jFeq1MfNvgXtCRED2raz/dKpacfTNxsx6pLEpEomM7gah6VeHSYV3+vo0OAi4MkArtQcWWXuQoyw== + +p-cancelable@^0.4.0: + version "0.4.1" + resolved "https://registry.yarnpkg.com/p-cancelable/-/p-cancelable-0.4.1.tgz#35f363d67d52081c8d9585e37bcceb7e0bbcb2a0" + integrity sha512-HNa1A8LvB1kie7cERyy21VNeHb2CWJJYqyyC2o3klWFfMGlFmWv2Z7sFgZH8ZiaYL95ydToKTFVXgMV/Os0bBQ== + +p-event@^1.0.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/p-event/-/p-event-1.3.0.tgz#8e6b4f4f65c72bc5b6fe28b75eda874f96a4a085" + integrity sha512-hV1zbA7gwqPVFcapfeATaNjQ3J0NuzorHPyG8GPL9g/Y/TplWVBVoCKCXL6Ej2zscrCEv195QNWJXuBH6XZuzA== + dependencies: + p-timeout "^1.1.1" + +p-event@^2.1.0: + version "2.3.1" + resolved "https://registry.yarnpkg.com/p-event/-/p-event-2.3.1.tgz#596279ef169ab2c3e0cae88c1cfbb08079993ef6" + integrity sha512-NQCqOFhbpVTMX4qMe8PF8lbGtzZ+LCiN7pcNrb/413Na7+TRoe1xkKUzuWa/YEJdGQ0FvKtj35EEbDoVPO2kbA== + dependencies: + p-timeout "^2.0.1" + +p-finally@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/p-finally/-/p-finally-1.0.0.tgz#3fbcfb15b899a44123b34b6dcc18b724336a2cae" + integrity sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow== + +p-is-promise@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/p-is-promise/-/p-is-promise-1.1.0.tgz#9c9456989e9f6588017b0434d56097675c3da05e" + integrity sha512-zL7VE4JVS2IFSkR2GQKDSPEVxkoH43/p7oEnwpdCndKYJO0HVeRB7fA8TJwuLOTBREtK0ea8eHaxdwcpob5dmg== + +p-limit@^2.2.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1" + integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w== + dependencies: + p-try "^2.0.0" + +p-limit@^3.0.2: + version "3.1.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b" + integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ== + dependencies: + yocto-queue "^0.1.0" + +p-locate@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07" + integrity sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A== + dependencies: + p-limit "^2.2.0" + +p-locate@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-5.0.0.tgz#83c8315c6785005e3bd021839411c9e110e6d834" + integrity sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw== + dependencies: + p-limit "^3.0.2" + +p-map-series@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/p-map-series/-/p-map-series-1.0.0.tgz#bf98fe575705658a9e1351befb85ae4c1f07bdca" + integrity sha512-4k9LlvY6Bo/1FcIdV33wqZQES0Py+iKISU9Uc8p8AjWoZPnFKMpVIVD3s0EYn4jzLh1I+WeUZkJ0Yoa4Qfw3Kg== + dependencies: + p-reduce "^1.0.0" + +p-pipe@^3.0.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/p-pipe/-/p-pipe-3.1.0.tgz#48b57c922aa2e1af6a6404cb7c6bf0eb9cc8e60e" + integrity sha512-08pj8ATpzMR0Y80x50yJHn37NF6vjrqHutASaX5LiH5npS9XPvrUmscd9MF5R4fuYRHOxQR1FfMIlF7AzwoPqw== + +p-reduce@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/p-reduce/-/p-reduce-1.0.0.tgz#18c2b0dd936a4690a529f8231f58a0fdb6a47dfa" + integrity sha512-3Tx1T3oM1xO/Y8Gj0sWyE78EIJZ+t+aEmXUdvQgvGmSMri7aPTHoovbXEreWKkL5j21Er60XAWLTzKbAKYOujQ== + +p-retry@^4.5.0: + version "4.6.2" + resolved "https://registry.yarnpkg.com/p-retry/-/p-retry-4.6.2.tgz#9baae7184057edd4e17231cee04264106e092a16" + integrity sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ== + dependencies: + "@types/retry" "0.12.0" + retry "^0.13.1" + +p-timeout@^1.1.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/p-timeout/-/p-timeout-1.2.1.tgz#5eb3b353b7fce99f101a1038880bb054ebbea386" + integrity sha512-gb0ryzr+K2qFqFv6qi3khoeqMZF/+ajxQipEF6NteZVnvz9tzdsfAVj3lYtn1gAXvH5lfLwfxEII799gt/mRIA== + dependencies: + p-finally "^1.0.0" + +p-timeout@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/p-timeout/-/p-timeout-2.0.1.tgz#d8dd1979595d2dc0139e1fe46b8b646cb3cdf038" + integrity sha512-88em58dDVB/KzPEx1X0N3LwFfYZPyDc4B6eF38M1rk9VTZMbxXXgjugz8mmwpS9Ox4BDZ+t6t3QP5+/gazweIA== + dependencies: + p-finally "^1.0.0" + +p-try@^2.0.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6" + integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ== + +package-json-from-dist@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz#4f1471a010827a86f94cfd9b0727e36d267de505" + integrity sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw== + +param-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/param-case/-/param-case-3.0.4.tgz#7d17fe4aa12bde34d4a77d91acfb6219caad01c5" + integrity sha512-RXlj7zCYokReqWpOPH9oYivUzLYZ5vAPIfEmCTNViosC78F8F0H9y7T7gG2M39ymgutxF5gcFEsyZQSph9Bp3A== + dependencies: + dot-case "^3.0.4" + tslib "^2.0.3" + +parent-module@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2" + integrity sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g== + dependencies: + callsites "^3.0.0" + +parse-json@^5.0.0, parse-json@^5.2.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-5.2.0.tgz#c76fc66dee54231c962b22bcc8a72cf2f99753cd" + integrity sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg== + dependencies: + "@babel/code-frame" "^7.0.0" + error-ex "^1.3.1" + json-parse-even-better-errors "^2.3.0" + lines-and-columns "^1.1.6" + +parseurl@~1.3.2, parseurl@~1.3.3: + version "1.3.3" + resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4" + integrity sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ== + +pascal-case@^3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/pascal-case/-/pascal-case-3.1.2.tgz#b48e0ef2b98e205e7c1dae747d0b1508237660eb" + integrity sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g== + dependencies: + no-case "^3.0.4" + tslib "^2.0.3" + +path-exists@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3" + integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w== + +path-is-absolute@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f" + integrity sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg== + +path-key@^2.0.0, path-key@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40" + integrity sha512-fEHGKCSmUSDPv4uoj8AlD+joPlq3peND+HRYyxFz4KPw4z926S/b8rIuFs2FYJg3BwsxJf6A9/3eIdLaYC+9Dw== + +path-key@^3.0.0, path-key@^3.1.0: + version "3.1.1" + resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375" + integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== + +path-parse@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== + +path-scurry@^1.11.1: + version "1.11.1" + resolved "https://registry.yarnpkg.com/path-scurry/-/path-scurry-1.11.1.tgz#7960a668888594a0720b12a911d1a742ab9f11d2" + integrity sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA== + dependencies: + lru-cache "^10.2.0" + minipass "^5.0.0 || ^6.0.2 || ^7.0.0" + +path-to-regexp@0.1.10: + version "0.1.10" + resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-0.1.10.tgz#67e9108c5c0551b9e5326064387de4763c4d5f8b" + integrity sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w== + +path-to-regexp@^6.3.0: + version "6.3.0" + resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-6.3.0.tgz#2b6a26a337737a8e1416f9272ed0766b1c0389f4" + integrity sha512-Yhpw4T9C6hPpgPeA28us07OJeqZ5EzQTkbfwuhsUg0c237RomFoETJgmp2sa3F/41gfLE6G5cqcYwznmeEeOlQ== + +path-type@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b" + integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw== + +pend@~1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50" + integrity sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg== + +picocolors@^1.0.0, picocolors@^1.0.1, picocolors@^1.1.0: + version "1.1.1" + resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.1.1.tgz#3d321af3eab939b083c8f929a1d12cda81c26b6b" + integrity sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA== + +picomatch@^2.0.4, picomatch@^2.2.1, picomatch@^2.3.1: + version "2.3.1" + resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" + integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA== + +pify@^2.2.0, pify@^2.3.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/pify/-/pify-2.3.0.tgz#ed141a6ac043a849ea588498e7dca8b15330e90c" + integrity sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog== + +pify@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/pify/-/pify-3.0.0.tgz#e5a4acd2c101fdf3d9a4d07f0dbc4db49dd28176" + integrity sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg== + +pify@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231" + integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g== + +pinkie-promise@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/pinkie-promise/-/pinkie-promise-2.0.1.tgz#2135d6dfa7a358c069ac9b178776288228450ffa" + integrity sha512-0Gni6D4UcLTbv9c57DfxDGdr41XfgUjqWZu492f0cIGr16zDU06BWP/RAEvOuo7CQ0CNjHaLlM59YJJFm3NWlw== + dependencies: + pinkie "^2.0.0" + +pinkie@^2.0.0: + version "2.0.4" + resolved "https://registry.yarnpkg.com/pinkie/-/pinkie-2.0.4.tgz#72556b80cfa0d48a974e80e77248e80ed4f7f870" + integrity sha512-MnUuEycAemtSaeFSjXKW/aroV7akBbY+Sv+RkyqFjgAe73F+MR0TBWKBRDkmfWq/HiFmdavfZ1G7h4SPZXaCSg== + +pirates@^4.0.1: + version "4.0.6" + resolved "https://registry.yarnpkg.com/pirates/-/pirates-4.0.6.tgz#3018ae32ecfcff6c29ba2267cbf21166ac1f36b9" + integrity sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg== + +pkg-dir@^4.1.0, pkg-dir@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3" + integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ== + dependencies: + find-up "^4.0.0" + +platform@^1.3.6: + version "1.3.6" + resolved "https://registry.yarnpkg.com/platform/-/platform-1.3.6.tgz#48b4ce983164b209c2d45a107adb31f473a6e7a7" + integrity sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg== + +pngquant-bin@^6.0.0: + version "6.0.1" + resolved "https://registry.yarnpkg.com/pngquant-bin/-/pngquant-bin-6.0.1.tgz#2b5789ca219eeb4d8509ab1ae082092801b7f07e" + integrity sha512-Q3PUyolfktf+hYio6wsg3SanQzEU/v8aICg/WpzxXcuCMRb7H2Q81okfpcEztbMvw25ILjd3a87doj2N9kvbpQ== + dependencies: + bin-build "^3.0.0" + bin-wrapper "^4.0.1" + execa "^4.0.0" + +possible-typed-array-names@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz#89bb63c6fada2c3e90adc4a647beeeb39cc7bf8f" + integrity sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q== + +postcss-attribute-case-insensitive@^5.0.2: + version "5.0.2" + resolved "https://registry.yarnpkg.com/postcss-attribute-case-insensitive/-/postcss-attribute-case-insensitive-5.0.2.tgz#03d761b24afc04c09e757e92ff53716ae8ea2741" + integrity sha512-XIidXV8fDr0kKt28vqki84fRK8VW8eTuIa4PChv2MqKuT6C9UjmSKzen6KaWhWEoYvwxFCa7n/tC1SZ3tyq4SQ== + dependencies: + postcss-selector-parser "^6.0.10" + +postcss-clamp@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/postcss-clamp/-/postcss-clamp-4.1.0.tgz#7263e95abadd8c2ba1bd911b0b5a5c9c93e02363" + integrity sha512-ry4b1Llo/9zz+PKC+030KUnPITTJAHeOwjfAyyB60eT0AorGLdzp52s31OsPRHRf8NchkgFoG2y6fCfn1IV1Ow== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-color-functional-notation@^4.2.4: + version "4.2.4" + resolved "https://registry.yarnpkg.com/postcss-color-functional-notation/-/postcss-color-functional-notation-4.2.4.tgz#21a909e8d7454d3612d1659e471ce4696f28caec" + integrity sha512-2yrTAUZUab9s6CpxkxC4rVgFEVaR6/2Pipvi6qcgvnYiVqZcbDHEoBDhrXzyb7Efh2CCfHQNtcqWcIruDTIUeg== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-color-hex-alpha@^8.0.4: + version "8.0.4" + resolved "https://registry.yarnpkg.com/postcss-color-hex-alpha/-/postcss-color-hex-alpha-8.0.4.tgz#c66e2980f2fbc1a63f5b079663340ce8b55f25a5" + integrity sha512-nLo2DCRC9eE4w2JmuKgVA3fGL3d01kGq752pVALF68qpGLmx2Qrk91QTKkdUqqp45T1K1XV8IhQpcu1hoAQflQ== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-color-rebeccapurple@^7.1.1: + version "7.1.1" + resolved "https://registry.yarnpkg.com/postcss-color-rebeccapurple/-/postcss-color-rebeccapurple-7.1.1.tgz#63fdab91d878ebc4dd4b7c02619a0c3d6a56ced0" + integrity sha512-pGxkuVEInwLHgkNxUc4sdg4g3py7zUeCQ9sMfwyHAT+Ezk8a4OaaVZ8lIY5+oNqA/BXXgLyXv0+5wHP68R79hg== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-custom-media@^8.0.2: + version "8.0.2" + resolved "https://registry.yarnpkg.com/postcss-custom-media/-/postcss-custom-media-8.0.2.tgz#c8f9637edf45fef761b014c024cee013f80529ea" + integrity sha512-7yi25vDAoHAkbhAzX9dHx2yc6ntS4jQvejrNcC+csQJAXjj15e7VcWfMgLqBNAbOvqi5uIa9huOVwdHbf+sKqg== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-custom-properties@^12.1.10: + version "12.1.11" + resolved "https://registry.yarnpkg.com/postcss-custom-properties/-/postcss-custom-properties-12.1.11.tgz#d14bb9b3989ac4d40aaa0e110b43be67ac7845cf" + integrity sha512-0IDJYhgU8xDv1KY6+VgUwuQkVtmYzRwu+dMjnmdMafXYv86SWqfxkc7qdDvWS38vsjaEtv8e0vGOUQrAiMBLpQ== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-custom-selectors@^6.0.3: + version "6.0.3" + resolved "https://registry.yarnpkg.com/postcss-custom-selectors/-/postcss-custom-selectors-6.0.3.tgz#1ab4684d65f30fed175520f82d223db0337239d9" + integrity sha512-fgVkmyiWDwmD3JbpCmB45SvvlCD6z9CG6Ie6Iere22W5aHea6oWa7EM2bpnv2Fj3I94L3VbtvX9KqwSi5aFzSg== + dependencies: + postcss-selector-parser "^6.0.4" + +postcss-dir-pseudo-class@^6.0.5: + version "6.0.5" + resolved "https://registry.yarnpkg.com/postcss-dir-pseudo-class/-/postcss-dir-pseudo-class-6.0.5.tgz#2bf31de5de76added44e0a25ecf60ae9f7c7c26c" + integrity sha512-eqn4m70P031PF7ZQIvSgy9RSJ5uI2171O/OO/zcRNYpJbvaeKFUlar1aJ7rmgiQtbm0FSPsRewjpdS0Oew7MPA== + dependencies: + postcss-selector-parser "^6.0.10" + +postcss-double-position-gradients@^3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/postcss-double-position-gradients/-/postcss-double-position-gradients-3.1.2.tgz#b96318fdb477be95997e86edd29c6e3557a49b91" + integrity sha512-GX+FuE/uBR6eskOK+4vkXgT6pDkexLokPaz/AbJna9s5Kzp/yl488pKPjhy0obB475ovfT1Wv8ho7U/cHNaRgQ== + dependencies: + "@csstools/postcss-progressive-custom-properties" "^1.1.0" + postcss-value-parser "^4.2.0" + +postcss-env-function@^4.0.6: + version "4.0.6" + resolved "https://registry.yarnpkg.com/postcss-env-function/-/postcss-env-function-4.0.6.tgz#7b2d24c812f540ed6eda4c81f6090416722a8e7a" + integrity sha512-kpA6FsLra+NqcFnL81TnsU+Z7orGtDTxcOhl6pwXeEq1yFPpRMkCDpHhrz8CFQDr/Wfm0jLiNQ1OsGGPjlqPwA== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-focus-visible@^6.0.4: + version "6.0.4" + resolved "https://registry.yarnpkg.com/postcss-focus-visible/-/postcss-focus-visible-6.0.4.tgz#50c9ea9afa0ee657fb75635fabad25e18d76bf9e" + integrity sha512-QcKuUU/dgNsstIK6HELFRT5Y3lbrMLEOwG+A4s5cA+fx3A3y/JTq3X9LaOj3OC3ALH0XqyrgQIgey/MIZ8Wczw== + dependencies: + postcss-selector-parser "^6.0.9" + +postcss-focus-within@^5.0.4: + version "5.0.4" + resolved "https://registry.yarnpkg.com/postcss-focus-within/-/postcss-focus-within-5.0.4.tgz#5b1d2ec603195f3344b716c0b75f61e44e8d2e20" + integrity sha512-vvjDN++C0mu8jz4af5d52CB184ogg/sSxAFS+oUJQq2SuCe7T5U2iIsVJtsCp2d6R4j0jr5+q3rPkBVZkXD9fQ== + dependencies: + postcss-selector-parser "^6.0.9" + +postcss-font-variant@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/postcss-font-variant/-/postcss-font-variant-5.0.0.tgz#efd59b4b7ea8bb06127f2d031bfbb7f24d32fa66" + integrity sha512-1fmkBaCALD72CK2a9i468mA/+tr9/1cBxRRMXOUaZqO43oWPR5imcyPjXwuv7PXbCid4ndlP5zWhidQVVa3hmA== + +postcss-gap-properties@^3.0.5: + version "3.0.5" + resolved "https://registry.yarnpkg.com/postcss-gap-properties/-/postcss-gap-properties-3.0.5.tgz#f7e3cddcf73ee19e94ccf7cb77773f9560aa2fff" + integrity sha512-IuE6gKSdoUNcvkGIqdtjtcMtZIFyXZhmFd5RUlg97iVEvp1BZKV5ngsAjCjrVy+14uhGBQl9tzmi1Qwq4kqVOg== + +postcss-image-set-function@^4.0.7: + version "4.0.7" + resolved "https://registry.yarnpkg.com/postcss-image-set-function/-/postcss-image-set-function-4.0.7.tgz#08353bd756f1cbfb3b6e93182c7829879114481f" + integrity sha512-9T2r9rsvYzm5ndsBE8WgtrMlIT7VbtTfE7b3BQnudUqnBcBo7L758oc+o+pdj/dUV0l5wjwSdjeOH2DZtfv8qw== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-import@^15.1.0: + version "15.1.0" + resolved "https://registry.yarnpkg.com/postcss-import/-/postcss-import-15.1.0.tgz#41c64ed8cc0e23735a9698b3249ffdbf704adc70" + integrity sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew== + dependencies: + postcss-value-parser "^4.0.0" + read-cache "^1.0.0" + resolve "^1.1.7" + +postcss-initial@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/postcss-initial/-/postcss-initial-4.0.1.tgz#529f735f72c5724a0fb30527df6fb7ac54d7de42" + integrity sha512-0ueD7rPqX8Pn1xJIjay0AZeIuDoF+V+VvMt/uOnn+4ezUKhZM/NokDeP6DwMNyIoYByuN/94IQnt5FEkaN59xQ== + +postcss-js@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/postcss-js/-/postcss-js-4.0.1.tgz#61598186f3703bab052f1c4f7d805f3991bee9d2" + integrity sha512-dDLF8pEO191hJMtlHFPRa8xsizHaM82MLfNkUHdUtVEV3tgTp5oj+8qbEqYM57SLfc74KSbw//4SeJma2LRVIw== + dependencies: + camelcase-css "^2.0.1" + +postcss-lab-function@^4.2.1: + version "4.2.1" + resolved "https://registry.yarnpkg.com/postcss-lab-function/-/postcss-lab-function-4.2.1.tgz#6fe4c015102ff7cd27d1bd5385582f67ebdbdc98" + integrity sha512-xuXll4isR03CrQsmxyz92LJB2xX9n+pZJ5jE9JgcnmsCammLyKdlzrBin+25dy6wIjfhJpKBAN80gsTlCgRk2w== + dependencies: + "@csstools/postcss-progressive-custom-properties" "^1.1.0" + postcss-value-parser "^4.2.0" + +postcss-load-config@^4.0.1: + version "4.0.2" + resolved "https://registry.yarnpkg.com/postcss-load-config/-/postcss-load-config-4.0.2.tgz#7159dcf626118d33e299f485d6afe4aff7c4a3e3" + integrity sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ== + dependencies: + lilconfig "^3.0.0" + yaml "^2.3.4" + +postcss-loader@^7.0.1: + version "7.3.4" + resolved "https://registry.yarnpkg.com/postcss-loader/-/postcss-loader-7.3.4.tgz#aed9b79ce4ed7e9e89e56199d25ad1ec8f606209" + integrity sha512-iW5WTTBSC5BfsBJ9daFMPVrLT36MrNiC6fqOZTTaHjBNX6Pfd5p+hSBqe/fEeNd7pc13QiAyGt7VdGMw4eRC4A== + dependencies: + cosmiconfig "^8.3.5" + jiti "^1.20.0" + semver "^7.5.4" + +postcss-logical@^5.0.4: + version "5.0.4" + resolved "https://registry.yarnpkg.com/postcss-logical/-/postcss-logical-5.0.4.tgz#ec75b1ee54421acc04d5921576b7d8db6b0e6f73" + integrity sha512-RHXxplCeLh9VjinvMrZONq7im4wjWGlRJAqmAVLXyZaXwfDWP73/oq4NdIp+OZwhQUMj0zjqDfM5Fj7qby+B4g== + +postcss-media-minmax@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/postcss-media-minmax/-/postcss-media-minmax-5.0.0.tgz#7140bddec173e2d6d657edbd8554a55794e2a5b5" + integrity sha512-yDUvFf9QdFZTuCUg0g0uNSHVlJ5X1lSzDZjPSFaiCWvjgsvu8vEVxtahPrLMinIDEEGnx6cBe6iqdx5YWz08wQ== + +postcss-modules-extract-imports@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/postcss-modules-extract-imports/-/postcss-modules-extract-imports-3.1.0.tgz#b4497cb85a9c0c4b5aabeb759bb25e8d89f15002" + integrity sha512-k3kNe0aNFQDAZGbin48pL2VNidTF0w4/eASDsxlyspobzU3wZQLOGj7L9gfRe0Jo9/4uud09DsjFNH7winGv8Q== + +postcss-modules-local-by-default@^4.0.5: + version "4.0.5" + resolved "https://registry.yarnpkg.com/postcss-modules-local-by-default/-/postcss-modules-local-by-default-4.0.5.tgz#f1b9bd757a8edf4d8556e8d0f4f894260e3df78f" + integrity sha512-6MieY7sIfTK0hYfafw1OMEG+2bg8Q1ocHCpoWLqOKj3JXlKu4G7btkmM/B7lFubYkYWmRSPLZi5chid63ZaZYw== + dependencies: + icss-utils "^5.0.0" + postcss-selector-parser "^6.0.2" + postcss-value-parser "^4.1.0" + +postcss-modules-scope@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/postcss-modules-scope/-/postcss-modules-scope-3.2.0.tgz#a43d28289a169ce2c15c00c4e64c0858e43457d5" + integrity sha512-oq+g1ssrsZOsx9M96c5w8laRmvEu9C3adDSjI8oTcbfkrTE8hx/zfyobUoWIxaKPO8bt6S62kxpw5GqypEw1QQ== + dependencies: + postcss-selector-parser "^6.0.4" + +postcss-modules-values@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/postcss-modules-values/-/postcss-modules-values-4.0.0.tgz#d7c5e7e68c3bb3c9b27cbf48ca0bb3ffb4602c9c" + integrity sha512-RDxHkAiEGI78gS2ofyvCsu7iycRv7oqw5xMWn9iMoR0N/7mf9D50ecQqUo5BZ9Zh2vH4bCUR/ktCqbB9m8vJjQ== + dependencies: + icss-utils "^5.0.0" + +postcss-nested@^6.0.1: + version "6.2.0" + resolved "https://registry.yarnpkg.com/postcss-nested/-/postcss-nested-6.2.0.tgz#4c2d22ab5f20b9cb61e2c5c5915950784d068131" + integrity sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ== + dependencies: + postcss-selector-parser "^6.1.1" + +postcss-nesting@^10.2.0: + version "10.2.0" + resolved "https://registry.yarnpkg.com/postcss-nesting/-/postcss-nesting-10.2.0.tgz#0b12ce0db8edfd2d8ae0aaf86427370b898890be" + integrity sha512-EwMkYchxiDiKUhlJGzWsD9b2zvq/r2SSubcRrgP+jujMXFzqvANLt16lJANC+5uZ6hjI7lpRmI6O8JIl+8l1KA== + dependencies: + "@csstools/selector-specificity" "^2.0.0" + postcss-selector-parser "^6.0.10" + +postcss-opacity-percentage@^1.1.2: + version "1.1.3" + resolved "https://registry.yarnpkg.com/postcss-opacity-percentage/-/postcss-opacity-percentage-1.1.3.tgz#5b89b35551a556e20c5d23eb5260fbfcf5245da6" + integrity sha512-An6Ba4pHBiDtyVpSLymUUERMo2cU7s+Obz6BTrS+gxkbnSBNKSuD0AVUc+CpBMrpVPKKfoVz0WQCX+Tnst0i4A== + +postcss-overflow-shorthand@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/postcss-overflow-shorthand/-/postcss-overflow-shorthand-3.0.4.tgz#7ed6486fec44b76f0eab15aa4866cda5d55d893e" + integrity sha512-otYl/ylHK8Y9bcBnPLo3foYFLL6a6Ak+3EQBPOTR7luMYCOsiVTUk1iLvNf6tVPNGXcoL9Hoz37kpfriRIFb4A== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-page-break@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/postcss-page-break/-/postcss-page-break-3.0.4.tgz#7fbf741c233621622b68d435babfb70dd8c1ee5f" + integrity sha512-1JGu8oCjVXLa9q9rFTo4MbeeA5FMe00/9C7lN4va606Rdb+HkxXtXsmEDrIraQ11fGz/WvKWa8gMuCKkrXpTsQ== + +postcss-place@^7.0.5: + version "7.0.5" + resolved "https://registry.yarnpkg.com/postcss-place/-/postcss-place-7.0.5.tgz#95dbf85fd9656a3a6e60e832b5809914236986c4" + integrity sha512-wR8igaZROA6Z4pv0d+bvVrvGY4GVHihBCBQieXFY3kuSuMyOmEnnfFzHl/tQuqHZkfkIVBEbDvYcFfHmpSet9g== + dependencies: + postcss-value-parser "^4.2.0" + +postcss-preset-env@^7.8.0: + version "7.8.3" + resolved "https://registry.yarnpkg.com/postcss-preset-env/-/postcss-preset-env-7.8.3.tgz#2a50f5e612c3149cc7af75634e202a5b2ad4f1e2" + integrity sha512-T1LgRm5uEVFSEF83vHZJV2z19lHg4yJuZ6gXZZkqVsqv63nlr6zabMH3l4Pc01FQCyfWVrh2GaUeCVy9Po+Aag== + dependencies: + "@csstools/postcss-cascade-layers" "^1.1.1" + "@csstools/postcss-color-function" "^1.1.1" + "@csstools/postcss-font-format-keywords" "^1.0.1" + "@csstools/postcss-hwb-function" "^1.0.2" + "@csstools/postcss-ic-unit" "^1.0.1" + "@csstools/postcss-is-pseudo-class" "^2.0.7" + "@csstools/postcss-nested-calc" "^1.0.0" + "@csstools/postcss-normalize-display-values" "^1.0.1" + "@csstools/postcss-oklab-function" "^1.1.1" + "@csstools/postcss-progressive-custom-properties" "^1.3.0" + "@csstools/postcss-stepped-value-functions" "^1.0.1" + "@csstools/postcss-text-decoration-shorthand" "^1.0.0" + "@csstools/postcss-trigonometric-functions" "^1.0.2" + "@csstools/postcss-unset-value" "^1.0.2" + autoprefixer "^10.4.13" + browserslist "^4.21.4" + css-blank-pseudo "^3.0.3" + css-has-pseudo "^3.0.4" + css-prefers-color-scheme "^6.0.3" + cssdb "^7.1.0" + postcss-attribute-case-insensitive "^5.0.2" + postcss-clamp "^4.1.0" + postcss-color-functional-notation "^4.2.4" + postcss-color-hex-alpha "^8.0.4" + postcss-color-rebeccapurple "^7.1.1" + postcss-custom-media "^8.0.2" + postcss-custom-properties "^12.1.10" + postcss-custom-selectors "^6.0.3" + postcss-dir-pseudo-class "^6.0.5" + postcss-double-position-gradients "^3.1.2" + postcss-env-function "^4.0.6" + postcss-focus-visible "^6.0.4" + postcss-focus-within "^5.0.4" + postcss-font-variant "^5.0.0" + postcss-gap-properties "^3.0.5" + postcss-image-set-function "^4.0.7" + postcss-initial "^4.0.1" + postcss-lab-function "^4.2.1" + postcss-logical "^5.0.4" + postcss-media-minmax "^5.0.0" + postcss-nesting "^10.2.0" + postcss-opacity-percentage "^1.1.2" + postcss-overflow-shorthand "^3.0.4" + postcss-page-break "^3.0.4" + postcss-place "^7.0.5" + postcss-pseudo-class-any-link "^7.1.6" + postcss-replace-overflow-wrap "^4.0.0" + postcss-selector-not "^6.0.1" + postcss-value-parser "^4.2.0" + +postcss-pseudo-class-any-link@^7.1.6: + version "7.1.6" + resolved "https://registry.yarnpkg.com/postcss-pseudo-class-any-link/-/postcss-pseudo-class-any-link-7.1.6.tgz#2693b221902da772c278def85a4d9a64b6e617ab" + integrity sha512-9sCtZkO6f/5ML9WcTLcIyV1yz9D1rf0tWc+ulKcvV30s0iZKS/ONyETvoWsr6vnrmW+X+KmuK3gV/w5EWnT37w== + dependencies: + postcss-selector-parser "^6.0.10" + +postcss-replace-overflow-wrap@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/postcss-replace-overflow-wrap/-/postcss-replace-overflow-wrap-4.0.0.tgz#d2df6bed10b477bf9c52fab28c568b4b29ca4319" + integrity sha512-KmF7SBPphT4gPPcKZc7aDkweHiKEEO8cla/GjcBK+ckKxiZslIu3C4GCRW3DNfL0o7yW7kMQu9xlZ1kXRXLXtw== + +postcss-selector-not@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/postcss-selector-not/-/postcss-selector-not-6.0.1.tgz#8f0a709bf7d4b45222793fc34409be407537556d" + integrity sha512-1i9affjAe9xu/y9uqWH+tD4r6/hDaXJruk8xn2x1vzxC2U3J3LKO3zJW4CyxlNhA56pADJ/djpEwpH1RClI2rQ== + dependencies: + postcss-selector-parser "^6.0.10" + +postcss-selector-parser@^6.0.10, postcss-selector-parser@^6.0.11, postcss-selector-parser@^6.0.2, postcss-selector-parser@^6.0.4, postcss-selector-parser@^6.0.9, postcss-selector-parser@^6.1.1: + version "6.1.2" + resolved "https://registry.yarnpkg.com/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz#27ecb41fb0e3b6ba7a1ec84fff347f734c7929de" + integrity sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg== + dependencies: + cssesc "^3.0.0" + util-deprecate "^1.0.2" + +postcss-value-parser@^4.0.0, postcss-value-parser@^4.1.0, postcss-value-parser@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz#723c09920836ba6d3e5af019f92bc0971c02e514" + integrity sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ== + +postcss@^8.4.23, postcss@^8.4.33: + version "8.4.47" + resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.47.tgz#5bf6c9a010f3e724c503bf03ef7947dcb0fea365" + integrity sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ== + dependencies: + nanoid "^3.3.7" + picocolors "^1.1.0" + source-map-js "^1.2.1" + +prelude-ls@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396" + integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g== + +prepend-http@^1.0.1: + version "1.0.4" + resolved "https://registry.yarnpkg.com/prepend-http/-/prepend-http-1.0.4.tgz#d4f4562b0ce3696e41ac52d0e002e57a635dc6dc" + integrity sha512-PhmXi5XmoyKw1Un4E+opM2KcsJInDvKyuOumcjjw3waw86ZNjHwVUOOWLc4bCzLdcKNaWBH9e99sbWzDQsVaYg== + +prepend-http@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/prepend-http/-/prepend-http-2.0.0.tgz#e92434bfa5ea8c19f41cdfd401d741a3c819d897" + integrity sha512-ravE6m9Atw9Z/jjttRUZ+clIXogdghyZAuWJ3qEzjT+jI/dL1ifAqhZeC5VHzQp1MSt1+jxKkFNemj/iO7tVUA== + +pretty-error@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/pretty-error/-/pretty-error-4.0.0.tgz#90a703f46dd7234adb46d0f84823e9d1cb8f10d6" + integrity sha512-AoJ5YMAcXKYxKhuJGdcvse+Voc6v1RgnsR3nWcYU7q4t6z0Q6T86sv5Zq8VIRbOWWFpvdGE83LtdSMNd+6Y0xw== + dependencies: + lodash "^4.17.20" + renderkid "^3.0.0" + +pretty-format@^27.0.2: + version "27.5.1" + resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.5.1.tgz#2181879fdea51a7a5851fb39d920faa63f01d88e" + integrity sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ== + dependencies: + ansi-regex "^5.0.1" + ansi-styles "^5.0.0" + react-is "^17.0.1" + +process-nextick-args@~2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2" + integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag== + +process@^0.11.10: + version "0.11.10" + resolved "https://registry.yarnpkg.com/process/-/process-0.11.10.tgz#7332300e840161bda3e69a1d1d91a7d4bc16f182" + integrity sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A== + +prop-types@^15.8.1: + version "15.8.1" + resolved "https://registry.yarnpkg.com/prop-types/-/prop-types-15.8.1.tgz#67d87bf1a694f48435cf332c24af10214a3140b5" + integrity sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg== + dependencies: + loose-envify "^1.4.0" + object-assign "^4.1.1" + react-is "^16.13.1" + +proto-list@~1.2.1: + version "1.2.4" + resolved "https://registry.yarnpkg.com/proto-list/-/proto-list-1.2.4.tgz#212d5bfe1318306a420f6402b8e26ff39647a849" + integrity sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA== + +protobufjs@^6.8.8: + version "6.11.4" + resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-6.11.4.tgz#29a412c38bf70d89e537b6d02d904a6f448173aa" + integrity sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw== + dependencies: + "@protobufjs/aspromise" "^1.1.2" + "@protobufjs/base64" "^1.1.2" + "@protobufjs/codegen" "^2.0.4" + "@protobufjs/eventemitter" "^1.1.0" + "@protobufjs/fetch" "^1.1.0" + "@protobufjs/float" "^1.0.2" + "@protobufjs/inquire" "^1.1.0" + "@protobufjs/path" "^1.1.2" + "@protobufjs/pool" "^1.1.0" + "@protobufjs/utf8" "^1.1.0" + "@types/long" "^4.0.1" + "@types/node" ">=13.7.0" + long "^4.0.0" + +proxy-addr@~2.0.7: + version "2.0.7" + resolved "https://registry.yarnpkg.com/proxy-addr/-/proxy-addr-2.0.7.tgz#f19fe69ceab311eeb94b42e70e8c2070f9ba1025" + integrity sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg== + dependencies: + forwarded "0.2.0" + ipaddr.js "1.9.1" + +proxy-from-env@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2" + integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg== + +pseudomap@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/pseudomap/-/pseudomap-1.0.2.tgz#f052a28da70e618917ef0a8ac34c1ae5a68286b3" + integrity sha512-b/YwNhb8lk1Zz2+bXXpS/LK9OisiZZ1SNsSLxN1x2OXVEhW2Ckr/7mWE5vrC1ZTiJlD9g19jWszTmJsB+oEpFQ== + +psl@^1.1.33: + version "1.10.0" + resolved "https://registry.yarnpkg.com/psl/-/psl-1.10.0.tgz#1450f7e16f922c3beeb7bd9db3f312635018fa15" + integrity sha512-KSKHEbjAnpUuAUserOq0FxGXCUrzC3WniuSJhvdbs102rL55266ZcHBqLWOsG30spQMlPdpy7icATiAQehg/iA== + dependencies: + punycode "^2.3.1" + +pump@^3.0.0: + version "3.0.2" + resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.2.tgz#836f3edd6bc2ee599256c924ffe0d88573ddcbf8" + integrity sha512-tUPXtzlGM8FE3P0ZL6DVs/3P58k9nk8/jZeQCurTJylQA8qFYzHFfhBJkuqyE0FifOsQ0uKWekiZ5g8wtr28cw== + dependencies: + end-of-stream "^1.1.0" + once "^1.3.1" + +punycode@^2.1.0, punycode@^2.1.1, punycode@^2.3.1: + version "2.3.1" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.1.tgz#027422e2faec0b25e1549c3e1bd8309b9133b6e5" + integrity sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg== + +qs@6.13.0: + version "6.13.0" + resolved "https://registry.yarnpkg.com/qs/-/qs-6.13.0.tgz#6ca3bd58439f7e245655798997787b0d88a51906" + integrity sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg== + dependencies: + side-channel "^1.0.6" + +query-string@^5.0.1: + version "5.1.1" + resolved "https://registry.yarnpkg.com/query-string/-/query-string-5.1.1.tgz#a78c012b71c17e05f2e3fa2319dd330682efb3cb" + integrity sha512-gjWOsm2SoGlgLEdAGt7a6slVOk9mGiXmPFMqrEhLQ68rhQuBnpfs3+EmlvqKyxnCo9/PPlF+9MtY02S1aFg+Jw== + dependencies: + decode-uri-component "^0.2.0" + object-assign "^4.1.0" + strict-uri-encode "^1.0.0" + +querystringify@^2.1.1: + version "2.2.0" + resolved "https://registry.yarnpkg.com/querystringify/-/querystringify-2.2.0.tgz#3345941b4153cb9d082d8eee4cda2016a9aef7f6" + integrity sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ== + +queue-microtask@^1.2.2: + version "1.2.3" + resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243" + integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A== + +randombytes@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a" + integrity sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ== + dependencies: + safe-buffer "^5.1.0" + +range-parser@^1.2.1, range-parser@~1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.1.tgz#3cf37023d199e1c24d1a55b84800c2f3e6468031" + integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg== + +raw-body@2.5.2: + version "2.5.2" + resolved "https://registry.yarnpkg.com/raw-body/-/raw-body-2.5.2.tgz#99febd83b90e08975087e8f1f9419a149366b68a" + integrity sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA== + dependencies: + bytes "3.1.2" + http-errors "2.0.0" + iconv-lite "0.4.24" + unpipe "1.0.0" + +react-dom@^18.2.0: + version "18.3.1" + resolved "https://registry.yarnpkg.com/react-dom/-/react-dom-18.3.1.tgz#c2265d79511b57d479b3dd3fdfa51536494c5cb4" + integrity sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw== + dependencies: + loose-envify "^1.1.0" + scheduler "^0.23.2" + +react-is@^16.13.1: + version "16.13.1" + resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4" + integrity sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ== + +react-is@^17.0.1: + version "17.0.2" + resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0" + integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w== + +react-refresh@^0.14.0: + version "0.14.2" + resolved "https://registry.yarnpkg.com/react-refresh/-/react-refresh-0.14.2.tgz#3833da01ce32da470f1f936b9d477da5c7028bf9" + integrity sha512-jCvmsr+1IUSMUyzOkRcvnVbX3ZYC6g9TDrDbFuFmRDq7PD4yaGbLKNQL6k2jnArV8hjYxh7hVhAZB6s9HDGpZA== + +react@^18.2.0: + version "18.3.1" + resolved "https://registry.yarnpkg.com/react/-/react-18.3.1.tgz#49ab892009c53933625bd16b2533fc754cab2891" + integrity sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ== + dependencies: + loose-envify "^1.1.0" + +read-cache@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/read-cache/-/read-cache-1.0.0.tgz#e664ef31161166c9751cdbe8dbcf86b5fb58f774" + integrity sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA== + dependencies: + pify "^2.3.0" + +readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.3.0, readable-stream@^2.3.5: + version "2.3.8" + resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.8.tgz#91125e8042bba1b9887f49345f6277027ce8be9b" + integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA== + dependencies: + core-util-is "~1.0.0" + inherits "~2.0.3" + isarray "~1.0.0" + process-nextick-args "~2.0.0" + safe-buffer "~5.1.1" + string_decoder "~1.1.1" + util-deprecate "~1.0.1" + +readable-stream@^3.0.6: + version "3.6.2" + resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.2.tgz#56a9b36ea965c00c5a93ef31eb111a0f11056967" + integrity sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA== + dependencies: + inherits "^2.0.3" + string_decoder "^1.1.1" + util-deprecate "^1.0.1" + +readdirp@^4.0.1: + version "4.0.2" + resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-4.0.2.tgz#388fccb8b75665da3abffe2d8f8ed59fe74c230a" + integrity sha512-yDMz9g+VaZkqBYS/ozoBJwaBhTbZo3UNYQHNRw1D3UFQB8oHB4uS/tAODO+ZLjGWmUbKnIlOWO+aaIiAxrUWHA== + +readdirp@~3.6.0: + version "3.6.0" + resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.6.0.tgz#74a370bd857116e245b29cc97340cd431a02a6c7" + integrity sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA== + dependencies: + picomatch "^2.2.1" + +rechoir@^0.7.0: + version "0.7.1" + resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.7.1.tgz#9478a96a1ca135b5e88fc027f03ee92d6c645686" + integrity sha512-/njmZ8s1wVeR6pjTZ+0nCnv8SpZNRMT2D1RLOJQESlYFDBvwpTA4KWJpZ+sBJ4+vhjILRcK7JIFdGCdxEAAitg== + dependencies: + resolve "^1.9.0" + +reflect.getprototypeof@^1.0.4: + version "1.0.6" + resolved "https://registry.yarnpkg.com/reflect.getprototypeof/-/reflect.getprototypeof-1.0.6.tgz#3ab04c32a8390b770712b7a8633972702d278859" + integrity sha512-fmfw4XgoDke3kdI6h4xcUz1dG8uaiv5q9gcEwLS4Pnth2kxT+GZ7YehS1JTMGBQmtV7Y4GFGbs2re2NqhdozUg== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-abstract "^1.23.1" + es-errors "^1.3.0" + get-intrinsic "^1.2.4" + globalthis "^1.0.3" + which-builtin-type "^1.1.3" + +regenerate-unicode-properties@^10.2.0: + version "10.2.0" + resolved "https://registry.yarnpkg.com/regenerate-unicode-properties/-/regenerate-unicode-properties-10.2.0.tgz#626e39df8c372338ea9b8028d1f99dc3fd9c3db0" + integrity sha512-DqHn3DwbmmPVzeKj9woBadqmXxLvQoQIwu7nopMc72ztvxVmVk2SBhSnx67zuye5TP+lJsb/TBQsjLKhnDf3MA== + dependencies: + regenerate "^1.4.2" + +regenerate@^1.4.2: + version "1.4.2" + resolved "https://registry.yarnpkg.com/regenerate/-/regenerate-1.4.2.tgz#b9346d8827e8f5a32f7ba29637d398b69014848a" + integrity sha512-zrceR/XhGYU/d/opr2EKO7aRHUeiBI8qjtfHqADTwZd6Szfy16la6kqD0MIUs5z5hx6AaKa+PixpPrR289+I0A== + +regenerator-runtime@^0.14.0: + version "0.14.1" + resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f" + integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw== + +regenerator-transform@^0.15.2: + version "0.15.2" + resolved "https://registry.yarnpkg.com/regenerator-transform/-/regenerator-transform-0.15.2.tgz#5bbae58b522098ebdf09bca2f83838929001c7a4" + integrity sha512-hfMp2BoF0qOk3uc5V20ALGDS2ddjQaLrdl7xrGXvAIow7qeWRM2VA2HuCHkUKk9slq3VwEwLNK3DFBqDfPGYtg== + dependencies: + "@babel/runtime" "^7.8.4" + +regexp.prototype.flags@^1.5.1, regexp.prototype.flags@^1.5.2: + version "1.5.3" + resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.5.3.tgz#b3ae40b1d2499b8350ab2c3fe6ef3845d3a96f42" + integrity sha512-vqlC04+RQoFalODCbCumG2xIOvapzVMHwsyIGM/SIE8fRhFFsXeH8/QQ+s0T0kDAhKc4k30s73/0ydkHQz6HlQ== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-errors "^1.3.0" + set-function-name "^2.0.2" + +regexpu-core@^6.1.1: + version "6.1.1" + resolved "https://registry.yarnpkg.com/regexpu-core/-/regexpu-core-6.1.1.tgz#b469b245594cb2d088ceebc6369dceb8c00becac" + integrity sha512-k67Nb9jvwJcJmVpw0jPttR1/zVfnKf8Km0IPatrU/zJ5XeG3+Slx0xLXs9HByJSzXzrlz5EDvN6yLNMDc2qdnw== + dependencies: + regenerate "^1.4.2" + regenerate-unicode-properties "^10.2.0" + regjsgen "^0.8.0" + regjsparser "^0.11.0" + unicode-match-property-ecmascript "^2.0.0" + unicode-match-property-value-ecmascript "^2.1.0" + +regjsgen@^0.8.0: + version "0.8.0" + resolved "https://registry.yarnpkg.com/regjsgen/-/regjsgen-0.8.0.tgz#df23ff26e0c5b300a6470cad160a9d090c3a37ab" + integrity sha512-RvwtGe3d7LvWiDQXeQw8p5asZUmfU1G/l6WbUXeHta7Y2PEIvBTwH6E2EfmYUK8pxcxEdEmaomqyp0vZZ7C+3Q== + +regjsparser@^0.11.0: + version "0.11.2" + resolved "https://registry.yarnpkg.com/regjsparser/-/regjsparser-0.11.2.tgz#7404ad42be00226d72bcf1f003f1f441861913d8" + integrity sha512-3OGZZ4HoLJkkAZx/48mTXJNlmqTGOzc0o9OWQPuWpkOlXXPbyN6OafCcoXUnBqE2D3f/T5L+pWc1kdEmnfnRsA== + dependencies: + jsesc "~3.0.2" + +relateurl@^0.2.7: + version "0.2.7" + resolved "https://registry.yarnpkg.com/relateurl/-/relateurl-0.2.7.tgz#54dbf377e51440aca90a4cd274600d3ff2d888a9" + integrity sha512-G08Dxvm4iDN3MLM0EsP62EDV9IuhXPR6blNz6Utcp7zyV3tr4HVNINt6MpaRWbxoOHT3Q7YN2P+jaHX8vUbgog== + +renderkid@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/renderkid/-/renderkid-3.0.0.tgz#5fd823e4d6951d37358ecc9a58b1f06836b6268a" + integrity sha512-q/7VIQA8lmM1hF+jn+sFSPWGlMkSAeNYcPLmDQx2zzuiDfaLrOmumR8iaUKlenFgh0XRPIUeSPlH3A+AW3Z5pg== + dependencies: + css-select "^4.1.3" + dom-converter "^0.2.0" + htmlparser2 "^6.1.0" + lodash "^4.17.21" + strip-ansi "^6.0.1" + +replace-ext@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/replace-ext/-/replace-ext-1.0.1.tgz#2d6d996d04a15855d967443631dd5f77825b016a" + integrity sha512-yD5BHCe7quCgBph4rMQ+0KkIRKwWCrHDOX1p1Gp6HwjPM5kVoCdKGNhN7ydqqsX6lJEnQDKZ/tFMiEdQ1dvPEw== + +require-directory@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" + integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q== + +require-from-string@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909" + integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw== + +requires-port@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/requires-port/-/requires-port-1.0.0.tgz#925d2601d39ac485e091cf0da5c6e694dc3dcaff" + integrity sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ== + +resolve-cwd@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-3.0.0.tgz#0f0075f1bb2544766cf73ba6a6e2adfebcb13f2d" + integrity sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg== + dependencies: + resolve-from "^5.0.0" + +resolve-from@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6" + integrity sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g== + +resolve-from@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69" + integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw== + +resolve@^1.1.7, resolve@^1.14.2, resolve@^1.22.2, resolve@^1.9.0: + version "1.22.8" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.8.tgz#b6c87a9f2aa06dfab52e3d70ac8cde321fa5a48d" + integrity sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw== + dependencies: + is-core-module "^2.13.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + +resolve@^2.0.0-next.5: + version "2.0.0-next.5" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-2.0.0-next.5.tgz#6b0ec3107e671e52b68cd068ef327173b90dc03c" + integrity sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA== + dependencies: + is-core-module "^2.13.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + +responselike@1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/responselike/-/responselike-1.0.2.tgz#918720ef3b631c5642be068f15ade5a46f4ba1e7" + integrity sha512-/Fpe5guzJk1gPqdJLJR5u7eG/gNY4nImjbRDaVWVMRhne55TCmj2i9Q+54PBRfatRC8v/rIiv9BN0pMd9OV5EQ== + dependencies: + lowercase-keys "^1.0.0" + +retry@^0.13.1: + version "0.13.1" + resolved "https://registry.yarnpkg.com/retry/-/retry-0.13.1.tgz#185b1587acf67919d63b357349e03537b2484658" + integrity sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg== + +reusify@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76" + integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw== + +rimraf@^2.5.4: + version "2.7.1" + resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec" + integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w== + dependencies: + glob "^7.1.3" + +rimraf@^3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a" + integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA== + dependencies: + glob "^7.1.3" + +run-parallel@^1.1.9: + version "1.2.0" + resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee" + integrity sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA== + dependencies: + queue-microtask "^1.2.2" + +safe-array-concat@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/safe-array-concat/-/safe-array-concat-1.1.2.tgz#81d77ee0c4e8b863635227c721278dd524c20edb" + integrity sha512-vj6RsCsWBCf19jIeHEfkRMw8DPiBb+DMXklQ/1SGDHOMlHdPUkZXFQ2YdplS23zESTijAcurb1aSgJA3AgMu1Q== + dependencies: + call-bind "^1.0.7" + get-intrinsic "^1.2.4" + has-symbols "^1.0.3" + isarray "^2.0.5" + +safe-buffer@5.2.1, safe-buffer@>=5.1.0, safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@~5.2.0: + version "5.2.1" + resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" + integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== + +safe-buffer@~5.1.0, safe-buffer@~5.1.1: + version "5.1.2" + resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d" + integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g== + +safe-regex-test@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/safe-regex-test/-/safe-regex-test-1.0.3.tgz#a5b4c0f06e0ab50ea2c395c14d8371232924c377" + integrity sha512-CdASjNJPvRa7roO6Ra/gLYBTzYzzPyyBXxIMdGW3USQLyjWEls2RgW5UBTXaQVp+OrpeCK3bLem8smtmheoRuw== + dependencies: + call-bind "^1.0.6" + es-errors "^1.3.0" + is-regex "^1.1.4" + +"safer-buffer@>= 2.1.2 < 3": + version "2.1.2" + resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" + integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== + +sass-loader@^13.0.2: + version "13.3.3" + resolved "https://registry.yarnpkg.com/sass-loader/-/sass-loader-13.3.3.tgz#60df5e858788cffb1a3215e5b92e9cba61e7e133" + integrity sha512-mt5YN2F1MOZr3d/wBRcZxeFgwgkH44wVc2zohO2YF6JiOMkiXe4BYRZpSu2sO1g71mo/j16txzUhsKZlqjVGzA== + dependencies: + neo-async "^2.6.2" + +sass@^1.54.5: + version "1.80.5" + resolved "https://registry.yarnpkg.com/sass/-/sass-1.80.5.tgz#0ba965223d44df22497f2966b498cf5c453fae8f" + integrity sha512-TQd2aoQl/+zsxRMEDSxVdpPIqeq9UFc6pr7PzkugiTx3VYCFPUaa3P4RrBQsqok4PO200Vkz0vXQBNlg7W907g== + dependencies: + "@parcel/watcher" "^2.4.1" + chokidar "^4.0.0" + immutable "^4.0.0" + source-map-js ">=0.6.2 <2.0.0" + +scheduler@^0.23.2: + version "0.23.2" + resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.23.2.tgz#414ba64a3b282892e944cf2108ecc078d115cdc3" + integrity sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ== + dependencies: + loose-envify "^1.1.0" + +schema-utils@^2.6.5, schema-utils@^2.7.1: + version "2.7.1" + resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-2.7.1.tgz#1ca4f32d1b24c590c203b8e7a50bf0ea4cd394d7" + integrity sha512-SHiNtMOUGWBQJwzISiVYKu82GiV4QYGePp3odlY1tuKO7gPtphAT5R/py0fA6xtbgLL/RvtJZnU9b8s0F1q0Xg== + dependencies: + "@types/json-schema" "^7.0.5" + ajv "^6.12.4" + ajv-keywords "^3.5.2" + +schema-utils@^3.0.0, schema-utils@^3.1.1, schema-utils@^3.2.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-3.3.0.tgz#f50a88877c3c01652a15b622ae9e9795df7a60fe" + integrity sha512-pN/yOAvcC+5rQ5nERGuwrjLlYvLTbCibnZ1I7B1LaiAz9BRBlE9GMgE/eqV30P7aJQUf7Ddimy/RsbYO/GrVGg== + dependencies: + "@types/json-schema" "^7.0.8" + ajv "^6.12.5" + ajv-keywords "^3.5.2" + +schema-utils@^4.0.0, schema-utils@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-4.2.0.tgz#70d7c93e153a273a805801882ebd3bff20d89c8b" + integrity sha512-L0jRsrPpjdckP3oPug3/VxNKt2trR8TcabrM6FOAAlvC/9Phcmm+cuAgTlxBqdBR1WJx7Naj9WHw+aOmheSVbw== + dependencies: + "@types/json-schema" "^7.0.9" + ajv "^8.9.0" + ajv-formats "^2.1.1" + ajv-keywords "^5.1.0" + +seek-bzip@^1.0.5: + version "1.0.6" + resolved "https://registry.yarnpkg.com/seek-bzip/-/seek-bzip-1.0.6.tgz#35c4171f55a680916b52a07859ecf3b5857f21c4" + integrity sha512-e1QtP3YL5tWww8uKaOCQ18UxIT2laNBXHjV/S2WYCiK4udiv8lkG89KRIoCjUagnAmCBurjF4zEVX2ByBbnCjQ== + dependencies: + commander "^2.8.1" + +select-hose@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/select-hose/-/select-hose-2.0.0.tgz#625d8658f865af43ec962bfc376a37359a4994ca" + integrity sha512-mEugaLK+YfkijB4fx0e6kImuJdCIt2LxCRcbEYPqRGCs4F2ogyfZU5IAZRdjCP8JPq2AtdNoC/Dux63d9Kiryg== + +selfsigned@^2.1.1: + version "2.4.1" + resolved "https://registry.yarnpkg.com/selfsigned/-/selfsigned-2.4.1.tgz#560d90565442a3ed35b674034cec4e95dceb4ae0" + integrity sha512-th5B4L2U+eGLq1TVh7zNRGBapioSORUeymIydxgFpwww9d2qyKvtuPU2jJuHvYAwwqi2Y596QBL3eEqcPEYL8Q== + dependencies: + "@types/node-forge" "^1.3.0" + node-forge "^1" + +semiver@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/semiver/-/semiver-1.1.0.tgz#9c97fb02c21c7ce4fcf1b73e2c7a24324bdddd5f" + integrity sha512-QNI2ChmuioGC1/xjyYwyZYADILWyW6AmS1UH6gDj/SFUUUS4MBAWs/7mxnkRPc/F4iHezDP+O8t0dO8WHiEOdg== + +semver-regex@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/semver-regex/-/semver-regex-2.0.0.tgz#a93c2c5844539a770233379107b38c7b4ac9d338" + integrity sha512-mUdIBBvdn0PLOeP3TEkMH7HHeUP3GjsXCwKarjv/kGmUFOYg1VqEemKhoQpWMu6X2I8kHeuVdGibLGkVK+/5Qw== + +semver-truncate@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/semver-truncate/-/semver-truncate-1.1.2.tgz#57f41de69707a62709a7e0104ba2117109ea47e8" + integrity sha512-V1fGg9i4CL3qesB6U0L6XAm4xOJiHmt4QAacazumuasc03BvtFGIMCduv01JWQ69Nv+JST9TqhSCiJoxoY031w== + dependencies: + semver "^5.3.0" + +semver@^5.3.0, semver@^5.5.0, semver@^5.6.0: + version "5.7.2" + resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.2.tgz#48d55db737c3287cd4835e17fa13feace1c41ef8" + integrity sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g== + +semver@^6.0.0, semver@^6.3.1: + version "6.3.1" + resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4" + integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== + +semver@^7.3.4, semver@^7.3.5, semver@^7.3.7, semver@^7.5.4: + version "7.6.3" + resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.3.tgz#980f7b5550bc175fb4dc09403085627f9eb33143" + integrity sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A== + +send@0.19.0: + version "0.19.0" + resolved "https://registry.yarnpkg.com/send/-/send-0.19.0.tgz#bbc5a388c8ea6c048967049dbeac0e4a3f09d7f8" + integrity sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw== + dependencies: + debug "2.6.9" + depd "2.0.0" + destroy "1.2.0" + encodeurl "~1.0.2" + escape-html "~1.0.3" + etag "~1.8.1" + fresh "0.5.2" + http-errors "2.0.0" + mime "1.6.0" + ms "2.1.3" + on-finished "2.4.1" + range-parser "~1.2.1" + statuses "2.0.1" + +serialize-javascript@^6.0.0, serialize-javascript@^6.0.1: + version "6.0.2" + resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-6.0.2.tgz#defa1e055c83bf6d59ea805d8da862254eb6a6c2" + integrity sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g== + dependencies: + randombytes "^2.1.0" + +serve-index@^1.9.1: + version "1.9.1" + resolved "https://registry.yarnpkg.com/serve-index/-/serve-index-1.9.1.tgz#d3768d69b1e7d82e5ce050fff5b453bea12a9239" + integrity sha512-pXHfKNP4qujrtteMrSBb0rc8HJ9Ms/GrXwcUtUtD5s4ewDJI8bT3Cz2zTVRMKtri49pLx2e0Ya8ziP5Ya2pZZw== + dependencies: + accepts "~1.3.4" + batch "0.6.1" + debug "2.6.9" + escape-html "~1.0.3" + http-errors "~1.6.2" + mime-types "~2.1.17" + parseurl "~1.3.2" + +serve-static@1.16.2: + version "1.16.2" + resolved "https://registry.yarnpkg.com/serve-static/-/serve-static-1.16.2.tgz#b6a5343da47f6bdd2673848bf45754941e803296" + integrity sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw== + dependencies: + encodeurl "~2.0.0" + escape-html "~1.0.3" + parseurl "~1.3.3" + send "0.19.0" + +set-function-length@^1.2.1: + version "1.2.2" + resolved "https://registry.yarnpkg.com/set-function-length/-/set-function-length-1.2.2.tgz#aac72314198eaed975cf77b2c3b6b880695e5449" + integrity sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg== + dependencies: + define-data-property "^1.1.4" + es-errors "^1.3.0" + function-bind "^1.1.2" + get-intrinsic "^1.2.4" + gopd "^1.0.1" + has-property-descriptors "^1.0.2" + +set-function-name@^2.0.1, set-function-name@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/set-function-name/-/set-function-name-2.0.2.tgz#16a705c5a0dc2f5e638ca96d8a8cd4e1c2b90985" + integrity sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ== + dependencies: + define-data-property "^1.1.4" + es-errors "^1.3.0" + functions-have-names "^1.2.3" + has-property-descriptors "^1.0.2" + +setprototypeof@1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.1.0.tgz#d0bd85536887b6fe7c0d818cb962d9d91c54e656" + integrity sha512-BvE/TwpZX4FXExxOxZyRGQQv651MSwmWKZGqvmPcRIjDqWub67kTKuIMx43cZZrS/cBBzwBcNDWoFxt2XEFIpQ== + +setprototypeof@1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.2.0.tgz#66c9a24a73f9fc28cbe66b09fed3d33dcaf1b424" + integrity sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw== + +shallow-clone@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/shallow-clone/-/shallow-clone-3.0.1.tgz#8f2981ad92531f55035b01fb230769a40e02efa3" + integrity sha512-/6KqX+GVUdqPuPPd2LxDDxzX6CAbjJehAAOKlNpqqUpAqPM6HeL8f+o3a+JsyGjn2lv0WY8UsTgUJjU9Ok55NA== + dependencies: + kind-of "^6.0.2" + +shebang-command@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-1.2.0.tgz#44aac65b695b03398968c39f363fee5deafdf1ea" + integrity sha512-EV3L1+UQWGor21OmnvojK36mhg+TyIKDh3iFBKBohr5xeXIhNBcx8oWdgkTEEQ+BEFFYdLRuqMfd5L84N1V5Vg== + dependencies: + shebang-regex "^1.0.0" + +shebang-command@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea" + integrity sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA== + dependencies: + shebang-regex "^3.0.0" + +shebang-regex@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-1.0.0.tgz#da42f49740c0b42db2ca9728571cb190c98efea3" + integrity sha512-wpoSFAxys6b2a2wHZ1XpDSgD7N9iVjg29Ph9uV/uaP9Ex/KXlkTZTeddxDPSYQpgvzKLGJke2UU0AzoGCjNIvQ== + +shebang-regex@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172" + integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A== + +shell-quote@^1.8.1: + version "1.8.1" + resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.8.1.tgz#6dbf4db75515ad5bac63b4f1894c3a154c766680" + integrity sha512-6j1W9l1iAs/4xYBI1SYOVZyFcCis9b4KCLQ8fgAGG07QvzaRLVVRQvAy85yNmmZSjYjg4MWh4gNvlPujU/5LpA== + +side-channel@^1.0.4, side-channel@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.6.tgz#abd25fb7cd24baf45466406b1096b7831c9215f2" + integrity sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA== + dependencies: + call-bind "^1.0.7" + es-errors "^1.3.0" + get-intrinsic "^1.2.4" + object-inspect "^1.13.1" + +signal-exit@^3.0.0, signal-exit@^3.0.2, signal-exit@^3.0.3: + version "3.0.7" + resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" + integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== + +signal-exit@^4.0.1, signal-exit@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-4.1.0.tgz#952188c1cbd546070e2dd20d0f41c0ae0530cb04" + integrity sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw== + +slash@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634" + integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q== + +slash@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/slash/-/slash-4.0.0.tgz#2422372176c4c6c5addb5e2ada885af984b396a7" + integrity sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew== + +sockjs@^0.3.24: + version "0.3.24" + resolved "https://registry.yarnpkg.com/sockjs/-/sockjs-0.3.24.tgz#c9bc8995f33a111bea0395ec30aa3206bdb5ccce" + integrity sha512-GJgLTZ7vYb/JtPSSZ10hsOYIvEYsjbNU+zPdIHcUaWVNUEPivzxku31865sSSud0Da0W4lEeOPlmw93zLQchuQ== + dependencies: + faye-websocket "^0.11.3" + uuid "^8.3.2" + websocket-driver "^0.7.4" + +sort-keys-length@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/sort-keys-length/-/sort-keys-length-1.0.1.tgz#9cb6f4f4e9e48155a6aa0671edd336ff1479a188" + integrity sha512-GRbEOUqCxemTAk/b32F2xa8wDTs+Z1QHOkbhJDQTvv/6G3ZkbJ+frYWsTcc7cBB3Fu4wy4XlLCuNtJuMn7Gsvw== + dependencies: + sort-keys "^1.0.0" + +sort-keys@^1.0.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/sort-keys/-/sort-keys-1.1.2.tgz#441b6d4d346798f1b4e49e8920adfba0e543f9ad" + integrity sha512-vzn8aSqKgytVik0iwdBEi+zevbTYZogewTUM6dtpmGwEcdzbub/TX4bCzRhebDCRC3QzXgJsLRKB2V/Oof7HXg== + dependencies: + is-plain-obj "^1.0.0" + +sort-keys@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/sort-keys/-/sort-keys-2.0.0.tgz#658535584861ec97d730d6cf41822e1f56684128" + integrity sha512-/dPCrG1s3ePpWm6yBbxZq5Be1dXGLyLn9Z791chDC3NFrpkVbWGzkBwPN1knaciexFXgRJ7hzdnwZ4stHSDmjg== + dependencies: + is-plain-obj "^1.0.0" + +"source-map-js@>=0.6.2 <2.0.0", source-map-js@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.2.1.tgz#1ce5650fddd87abc099eda37dcff024c2667ae46" + integrity sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA== + +source-map-support@~0.5.20: + version "0.5.21" + resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.21.tgz#04fe7c7f9e1ed2d662233c28cb2b35b9f63f6e4f" + integrity sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w== + dependencies: + buffer-from "^1.0.0" + source-map "^0.6.0" + +source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.0: + version "0.6.1" + resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" + integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== + +source-map@^0.7.3, source-map@^0.7.4: + version "0.7.4" + resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.4.tgz#a9bbe705c9d8846f4e08ff6765acf0f1b0898656" + integrity sha512-l3BikUxvPOcn5E74dZiq5BGsTb5yEwhaTSzccU6t4sDOH8NWJCstKO5QT2CvtFoK6F0saL7p9xHAqHOlCPJygA== + +spdy-transport@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/spdy-transport/-/spdy-transport-3.0.0.tgz#00d4863a6400ad75df93361a1608605e5dcdcf31" + integrity sha512-hsLVFE5SjA6TCisWeJXFKniGGOpBgMLmerfO2aCyCU5s7nJ/rpAepqmFifv/GCbSbueEeAJJnmSQ2rKC/g8Fcw== + dependencies: + debug "^4.1.0" + detect-node "^2.0.4" + hpack.js "^2.1.6" + obuf "^1.1.2" + readable-stream "^3.0.6" + wbuf "^1.7.3" + +spdy@^4.0.2: + version "4.0.2" + resolved "https://registry.yarnpkg.com/spdy/-/spdy-4.0.2.tgz#b74f466203a3eda452c02492b91fb9e84a27677b" + integrity sha512-r46gZQZQV+Kl9oItvl1JZZqJKGr+oEkB08A6BzkiR7593/7IbtuncXHd2YoYeTsG4157ZssMu9KYvUHLcjcDoA== + dependencies: + debug "^4.1.0" + handle-thing "^2.0.0" + http-deceiver "^1.2.7" + select-hose "^2.0.0" + spdy-transport "^3.0.0" + +stable@^0.1.8: + version "0.1.8" + resolved "https://registry.yarnpkg.com/stable/-/stable-0.1.8.tgz#836eb3c8382fe2936feaf544631017ce7d47a3cf" + integrity sha512-ji9qxRnOVfcuLDySj9qzhGSEFVobyt1kIOSkj1qZzYLzq7Tos/oUUWvotUPQLlrsidqsK6tBH89Bc9kL5zHA6w== + +stackframe@^1.3.4: + version "1.3.4" + resolved "https://registry.yarnpkg.com/stackframe/-/stackframe-1.3.4.tgz#b881a004c8c149a5e8efef37d51b16e412943310" + integrity sha512-oeVtt7eWQS+Na6F//S4kJ2K2VbRlS9D43mAlMyVpVWovy9o+jfgH8O9agzANzaiLjclA0oYzUXEM4PurhSUChw== + +statuses@2.0.1, statuses@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/statuses/-/statuses-2.0.1.tgz#55cb000ccf1d48728bd23c685a063998cf1a1b63" + integrity sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ== + +"statuses@>= 1.4.0 < 2": + version "1.5.0" + resolved "https://registry.yarnpkg.com/statuses/-/statuses-1.5.0.tgz#161c7dac177659fd9811f43771fa99381478628c" + integrity sha512-OpZ3zP+jT1PI7I8nemJX4AKmAX070ZkYPVWV/AaKTJl+tXCTGyVdC1a4SL8RUQYEwk/f34ZX8UTykN68FwrqAA== + +stop-iteration-iterator@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/stop-iteration-iterator/-/stop-iteration-iterator-1.0.0.tgz#6a60be0b4ee757d1ed5254858ec66b10c49285e4" + integrity sha512-iCGQj+0l0HOdZ2AEeBADlsRC+vsnDsZsbdSiH1yNSjcfKM7fdpCMfqAL/dwF5BLiw/XhRft/Wax6zQbhq2BcjQ== + dependencies: + internal-slot "^1.0.4" + +strict-event-emitter@^0.5.1: + version "0.5.1" + resolved "https://registry.yarnpkg.com/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz#1602ece81c51574ca39c6815e09f1a3e8550bd93" + integrity sha512-vMgjE/GGEPEFnhFub6pa4FmJBRBVOLpIII2hvCZ8Kzb7K0hlHo7mQv6xYrBvCL2LtAIBwFUK8wvuJgTVSQ5MFQ== + +strict-uri-encode@^1.0.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz#279b225df1d582b1f54e65addd4352e18faa0713" + integrity sha512-R3f198pcvnB+5IpnBlRkphuE9n46WyVl8I39W/ZUTZLz4nqSP/oLYUrcnJrw462Ds8he4YKMov2efsTIw1BDGQ== + +"string-width-cjs@npm:string-width@^4.2.0": + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + +string-width@^2.0.0: + version "2.1.1" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-2.1.1.tgz#ab93f27a8dc13d28cac815c462143a6d9012ae9e" + integrity sha512-nOqH59deCq9SRHlxq1Aw85Jnt4w6KvLKqWVik6oA9ZklXLNIOlqg4F2yrT1MVaTjAqvVwdfeZ7w7aCvJD7ugkw== + dependencies: + is-fullwidth-code-point "^2.0.0" + strip-ansi "^4.0.0" + +string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + +string-width@^5.0.1, string-width@^5.1.2: + version "5.1.2" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794" + integrity sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA== + dependencies: + eastasianwidth "^0.2.0" + emoji-regex "^9.2.2" + strip-ansi "^7.0.1" + +string.prototype.matchall@^4.0.11: + version "4.0.11" + resolved "https://registry.yarnpkg.com/string.prototype.matchall/-/string.prototype.matchall-4.0.11.tgz#1092a72c59268d2abaad76582dccc687c0297e0a" + integrity sha512-NUdh0aDavY2og7IbBPenWqR9exH+E26Sv8e0/eTe1tltDGZL+GtBkDAnnyBtmekfK6/Dq3MkcGtzXFEd1LQrtg== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-abstract "^1.23.2" + es-errors "^1.3.0" + es-object-atoms "^1.0.0" + get-intrinsic "^1.2.4" + gopd "^1.0.1" + has-symbols "^1.0.3" + internal-slot "^1.0.7" + regexp.prototype.flags "^1.5.2" + set-function-name "^2.0.2" + side-channel "^1.0.6" + +string.prototype.repeat@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/string.prototype.repeat/-/string.prototype.repeat-1.0.0.tgz#e90872ee0308b29435aa26275f6e1b762daee01a" + integrity sha512-0u/TldDbKD8bFCQ/4f5+mNRrXwZ8hg2w7ZR8wa16e8z9XpePWl3eGEcUD0OXpEH/VJH/2G3gjUtR3ZOiBe2S/w== + dependencies: + define-properties "^1.1.3" + es-abstract "^1.17.5" + +string.prototype.trim@^1.2.9: + version "1.2.9" + resolved "https://registry.yarnpkg.com/string.prototype.trim/-/string.prototype.trim-1.2.9.tgz#b6fa326d72d2c78b6df02f7759c73f8f6274faa4" + integrity sha512-klHuCNxiMZ8MlsOihJhJEBJAiMVqU3Z2nEXWfWnIqjN0gEFS9J9+IxKozWWtQGcgoa1WUZzLjKPTr4ZHNFTFxw== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-abstract "^1.23.0" + es-object-atoms "^1.0.0" + +string.prototype.trimend@^1.0.8: + version "1.0.8" + resolved "https://registry.yarnpkg.com/string.prototype.trimend/-/string.prototype.trimend-1.0.8.tgz#3651b8513719e8a9f48de7f2f77640b26652b229" + integrity sha512-p73uL5VCHCO2BZZ6krwwQE3kCzM7NKmis8S//xEC6fQonchbum4eP6kR4DLEjQFO3Wnj3Fuo8NM0kOSjVdHjZQ== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-object-atoms "^1.0.0" + +string.prototype.trimstart@^1.0.8: + version "1.0.8" + resolved "https://registry.yarnpkg.com/string.prototype.trimstart/-/string.prototype.trimstart-1.0.8.tgz#7ee834dda8c7c17eff3118472bb35bfedaa34dde" + integrity sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg== + dependencies: + call-bind "^1.0.7" + define-properties "^1.2.1" + es-object-atoms "^1.0.0" + +string_decoder@^1.1.1: + version "1.3.0" + resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e" + integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== + dependencies: + safe-buffer "~5.2.0" + +string_decoder@~1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8" + integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg== + dependencies: + safe-buffer "~5.1.0" + +"strip-ansi-cjs@npm:strip-ansi@^6.0.1": + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + +strip-ansi@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-3.0.1.tgz#6a385fb8853d952d5ff05d0e8aaf94278dc63dcf" + integrity sha512-VhumSSbBqDTP8p2ZLKj40UjBCV4+v8bUSEpUb4KjRgWk9pbqGF4REFj6KEagidb2f/M6AzC0EmFyDNGaw9OCzg== + dependencies: + ansi-regex "^2.0.0" + +strip-ansi@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-4.0.0.tgz#a8479022eb1ac368a871389b635262c505ee368f" + integrity sha512-4XaJ2zQdCzROZDivEVIDPkcQn8LMFSa8kj8Gxb/Lnwzv9A8VctNZ+lfivC/sV3ivW8ElJTERXZoPBRrZKkNKow== + dependencies: + ansi-regex "^3.0.0" + +strip-ansi@^6.0.0, strip-ansi@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + +strip-ansi@^7.0.1: + version "7.1.0" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.1.0.tgz#d5b6568ca689d8561370b0707685d22434faff45" + integrity sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ== + dependencies: + ansi-regex "^6.0.1" + +strip-dirs@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/strip-dirs/-/strip-dirs-2.1.0.tgz#4987736264fc344cf20f6c34aca9d13d1d4ed6c5" + integrity sha512-JOCxOeKLm2CAS73y/U4ZeZPTkE+gNVCzKt7Eox84Iej1LT/2pTWYpZKJuxwQpvX1LiZb1xokNR7RLfuBAa7T3g== + dependencies: + is-natural-number "^4.0.1" + +strip-eof@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/strip-eof/-/strip-eof-1.0.0.tgz#bb43ff5598a6eb05d89b59fcd129c983313606bf" + integrity sha512-7FCwGGmx8mD5xQd3RPUvnSpUXHM3BWuzjtpD4TXsfcZ9EL4azvVVUscFYwD9nx8Kh+uCBC00XBtAykoMHwTh8Q== + +strip-final-newline@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad" + integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA== + +strip-json-comments@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006" + integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig== + +strip-outer@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/strip-outer/-/strip-outer-1.0.1.tgz#b2fd2abf6604b9d1e6013057195df836b8a9d631" + integrity sha512-k55yxKHwaXnpYGsOzg4Vl8+tDrWylxDEpknGjhTiZB8dFRU5rTo9CAzeycivxV3s+zlTKwrs6WxMxR95n26kwg== + dependencies: + escape-string-regexp "^1.0.2" + +strnum@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/strnum/-/strnum-1.0.5.tgz#5c4e829fe15ad4ff0d20c3db5ac97b73c9b072db" + integrity sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA== + +style-loader@^3.3.1: + version "3.3.4" + resolved "https://registry.yarnpkg.com/style-loader/-/style-loader-3.3.4.tgz#f30f786c36db03a45cbd55b6a70d930c479090e7" + integrity sha512-0WqXzrsMTyb8yjZJHDqwmnwRJvhALK9LfRtRc6B4UTWe8AijYLZYZ9thuJTZc2VfQWINADW/j+LiJnfy2RoC1w== + +sucrase@^3.32.0: + version "3.35.0" + resolved "https://registry.yarnpkg.com/sucrase/-/sucrase-3.35.0.tgz#57f17a3d7e19b36d8995f06679d121be914ae263" + integrity sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA== + dependencies: + "@jridgewell/gen-mapping" "^0.3.2" + commander "^4.0.0" + glob "^10.3.10" + lines-and-columns "^1.1.6" + mz "^2.7.0" + pirates "^4.0.1" + ts-interface-checker "^0.1.9" + +supports-color@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-2.0.0.tgz#535d045ce6b6363fa40117084629995e9df324c7" + integrity sha512-KKNVtd6pCYgPIKU4cp2733HWYCpplQhddZLBUryaAHou723x+FRzQ5Df824Fj+IyyuiQTRoub4SnIFfIcrp70g== + +supports-color@^7.1.0: + version "7.2.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da" + integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw== + dependencies: + has-flag "^4.0.0" + +supports-color@^8.0.0: + version "8.1.1" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-8.1.1.tgz#cd6fc17e28500cff56c1b86c0a7fd4a54a73005c" + integrity sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q== + dependencies: + has-flag "^4.0.0" + +supports-preserve-symlinks-flag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" + integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== + +svgo@^2.1.0: + version "2.8.0" + resolved "https://registry.yarnpkg.com/svgo/-/svgo-2.8.0.tgz#4ff80cce6710dc2795f0c7c74101e6764cfccd24" + integrity sha512-+N/Q9kV1+F+UeWYoSiULYo4xYSDQlTgb+ayMobAXPwMnLvop7oxKMo9OzIrX5x3eS4L4f2UHhc9axXwY8DpChg== + dependencies: + "@trysound/sax" "0.2.0" + commander "^7.2.0" + css-select "^4.1.3" + css-tree "^1.1.3" + csso "^4.2.0" + picocolors "^1.0.0" + stable "^0.1.8" + +tailwindcss@^3.1.8: + version "3.4.14" + resolved "https://registry.yarnpkg.com/tailwindcss/-/tailwindcss-3.4.14.tgz#6dd23a7f54ec197b19159e91e3bb1e55e7aa73ac" + integrity sha512-IcSvOcTRcUtQQ7ILQL5quRDg7Xs93PdJEk1ZLbhhvJc7uj/OAhYOnruEiwnGgBvUtaUAJ8/mhSw1o8L2jCiENA== + dependencies: + "@alloc/quick-lru" "^5.2.0" + arg "^5.0.2" + chokidar "^3.5.3" + didyoumean "^1.2.2" + dlv "^1.1.3" + fast-glob "^3.3.0" + glob-parent "^6.0.2" + is-glob "^4.0.3" + jiti "^1.21.0" + lilconfig "^2.1.0" + micromatch "^4.0.5" + normalize-path "^3.0.0" + object-hash "^3.0.0" + picocolors "^1.0.0" + postcss "^8.4.23" + postcss-import "^15.1.0" + postcss-js "^4.0.1" + postcss-load-config "^4.0.1" + postcss-nested "^6.0.1" + postcss-selector-parser "^6.0.11" + resolve "^1.22.2" + sucrase "^3.32.0" + +tapable@^2.0.0, tapable@^2.1.1, tapable@^2.2.0, tapable@^2.2.1: + version "2.2.1" + resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0" + integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ== + +tar-stream@^1.5.2: + version "1.6.2" + resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-1.6.2.tgz#8ea55dab37972253d9a9af90fdcd559ae435c555" + integrity sha512-rzS0heiNf8Xn7/mpdSVVSMAWAoy9bfb1WOTYC78Z0UQKeKa/CWS8FOq0lKGNa8DWKAn9gxjCvMLYc5PGXYlK2A== + dependencies: + bl "^1.0.0" + buffer-alloc "^1.2.0" + end-of-stream "^1.0.0" + fs-constants "^1.0.0" + readable-stream "^2.3.0" + to-buffer "^1.1.1" + xtend "^4.0.0" + +temp-dir@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/temp-dir/-/temp-dir-1.0.0.tgz#0a7c0ea26d3a39afa7e0ebea9c1fc0bc4daa011d" + integrity sha512-xZFXEGbG7SNC3itwBzI3RYjq/cEhBkx2hJuKGIUOcEULmkQExXiHat2z/qkISYsuR+IKumhEfKKbV5qXmhICFQ== + +tempfile@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/tempfile/-/tempfile-2.0.0.tgz#6b0446856a9b1114d1856ffcbe509cccb0977265" + integrity sha512-ZOn6nJUgvgC09+doCEF3oB+r3ag7kUvlsXEGX069QRD60p+P3uP7XG9N2/at+EyIRGSN//ZY3LyEotA1YpmjuA== + dependencies: + temp-dir "^1.0.0" + uuid "^3.0.1" + +terser-webpack-plugin@^5.3.10: + version "5.3.10" + resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.3.10.tgz#904f4c9193c6fd2a03f693a2150c62a92f40d199" + integrity sha512-BKFPWlPDndPs+NGGCr1U59t0XScL5317Y0UReNrHaw9/FwhPENlq6bfgs+4yPfyP51vqC1bQ4rp1EfXW5ZSH9w== + dependencies: + "@jridgewell/trace-mapping" "^0.3.20" + jest-worker "^27.4.5" + schema-utils "^3.1.1" + serialize-javascript "^6.0.1" + terser "^5.26.0" + +terser@^5.10.0, terser@^5.26.0: + version "5.36.0" + resolved "https://registry.yarnpkg.com/terser/-/terser-5.36.0.tgz#8b0dbed459ac40ff7b4c9fd5a3a2029de105180e" + integrity sha512-IYV9eNMuFAV4THUspIRXkLakHnV6XO7FEdtKjf/mDyrnqUg9LnlOn6/RwRvM9SZjR4GUq8Nk8zj67FzVARr74w== + dependencies: + "@jridgewell/source-map" "^0.3.3" + acorn "^8.8.2" + commander "^2.20.0" + source-map-support "~0.5.20" + +text-table@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4" + integrity sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw== + +textlinestream@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/textlinestream/-/textlinestream-1.1.1.tgz#00677266d1dc70b28544adca8806134a3457f0bd" + integrity sha512-iBHbi7BQxrFmwZUQJsT0SjNzlLLsXhvW/kg7EyOMVMBIrlnj/qYofwo1LVLZi+3GbUEo96Iu2eqToI2+lZoAEQ== + +thenify-all@^1.0.0: + version "1.6.0" + resolved "https://registry.yarnpkg.com/thenify-all/-/thenify-all-1.6.0.tgz#1a1918d402d8fc3f98fbf234db0bcc8cc10e9726" + integrity sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA== + dependencies: + thenify ">= 3.1.0 < 4" + +"thenify@>= 3.1.0 < 4": + version "3.3.1" + resolved "https://registry.yarnpkg.com/thenify/-/thenify-3.3.1.tgz#8932e686a4066038a016dd9e2ca46add9838a95f" + integrity sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw== + dependencies: + any-promise "^1.0.0" + +through@^2.3.8: + version "2.3.8" + resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5" + integrity sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg== + +thunky@^1.0.2: + version "1.1.0" + resolved "https://registry.yarnpkg.com/thunky/-/thunky-1.1.0.tgz#5abaf714a9405db0504732bbccd2cedd9ef9537d" + integrity sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA== + +timed-out@^4.0.0, timed-out@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/timed-out/-/timed-out-4.0.1.tgz#f32eacac5a175bea25d7fab565ab3ed8741ef56f" + integrity sha512-G7r3AhovYtr5YKOWQkta8RKAPb+J9IsO4uVmzjl8AZwfhs8UcUwTiD6gcJYSgOtzyjvQKrKYn41syHbUWMkafA== + +to-buffer@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/to-buffer/-/to-buffer-1.1.1.tgz#493bd48f62d7c43fcded313a03dcadb2e1213a80" + integrity sha512-lx9B5iv7msuFYE3dytT+KE5tap+rNYw+K4jVkb9R/asAb+pbBSM17jtunHplhBe6RRJdZx3Pn2Jph24O32mOVg== + +to-regex-range@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4" + integrity sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ== + dependencies: + is-number "^7.0.0" + +toidentifier@1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/toidentifier/-/toidentifier-1.0.1.tgz#3be34321a88a820ed1bd80dfaa33e479fbb8dd35" + integrity sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA== + +tough-cookie@^4.1.4: + version "4.1.4" + resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-4.1.4.tgz#945f1461b45b5a8c76821c33ea49c3ac192c1b36" + integrity sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag== + dependencies: + psl "^1.1.33" + punycode "^2.1.1" + universalify "^0.2.0" + url-parse "^1.5.3" + +tr46@~0.0.3: + version "0.0.3" + resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" + integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== + +trim-repeated@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/trim-repeated/-/trim-repeated-1.0.0.tgz#e3646a2ea4e891312bf7eace6cfb05380bc01c21" + integrity sha512-pkonvlKk8/ZuR0D5tLW8ljt5I8kmxp2XKymhepUeOdCEfKpZaktSArkLHZt76OB1ZvO9bssUsDty4SWhLvZpLg== + dependencies: + escape-string-regexp "^1.0.2" + +ts-interface-checker@^0.1.9: + version "0.1.13" + resolved "https://registry.yarnpkg.com/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz#784fd3d679722bc103b1b4b8030bcddb5db2a699" + integrity sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA== + +ts-loader@^9.3.1: + version "9.5.1" + resolved "https://registry.yarnpkg.com/ts-loader/-/ts-loader-9.5.1.tgz#63d5912a86312f1fbe32cef0859fb8b2193d9b89" + integrity sha512-rNH3sK9kGZcH9dYzC7CewQm4NtxJTjSEVRJ2DyBZR7f8/wcta+iV44UPCXc5+nzDzivKtlzV6c9P4e+oFhDLYg== + dependencies: + chalk "^4.1.0" + enhanced-resolve "^5.0.0" + micromatch "^4.0.0" + semver "^7.3.4" + source-map "^0.7.4" + +tslib@^1.8.1: + version "1.14.1" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" + integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== + +tslib@^2.0.3: + version "2.8.1" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.8.1.tgz#612efe4ed235d567e8aba5f2a5fab70280ade83f" + integrity sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w== + +tsutils@^3.21.0: + version "3.21.0" + resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623" + integrity sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA== + dependencies: + tslib "^1.8.1" + +tunnel-agent@^0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd" + integrity sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w== + dependencies: + safe-buffer "^5.0.1" + +type-check@^0.4.0, type-check@~0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1" + integrity sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew== + dependencies: + prelude-ls "^1.2.1" + +type-fest@^0.11.0: + version "0.11.0" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.11.0.tgz#97abf0872310fed88a5c466b25681576145e33f1" + integrity sha512-OdjXJxnCN1AvyLSzeKIgXTXxV+99ZuXl3Hpo9XpJAv9MBcHrrJOQ5kV7ypXOuQie+AmWG25hLbiKdwYTifzcfQ== + +type-fest@^0.20.2: + version "0.20.2" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.20.2.tgz#1bf207f4b28f91583666cb5fbd327887301cd5f4" + integrity sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ== + +type-fest@^0.21.3: + version "0.21.3" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.21.3.tgz#d260a24b0198436e133fa26a524a6d65fa3b2e37" + integrity sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w== + +type-fest@^4.26.1: + version "4.26.1" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-4.26.1.tgz#a4a17fa314f976dd3e6d6675ef6c775c16d7955e" + integrity sha512-yOGpmOAL7CkKe/91I5O3gPICmJNLJ1G4zFYVAsRHg7M64biSnPtRj0WNQt++bRkjYOqjWXrhnUw1utzmVErAdg== + +type-is@~1.6.18: + version "1.6.18" + resolved "https://registry.yarnpkg.com/type-is/-/type-is-1.6.18.tgz#4e552cd05df09467dcbc4ef739de89f2cf37c131" + integrity sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g== + dependencies: + media-typer "0.3.0" + mime-types "~2.1.24" + +typed-array-buffer@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/typed-array-buffer/-/typed-array-buffer-1.0.2.tgz#1867c5d83b20fcb5ccf32649e5e2fc7424474ff3" + integrity sha512-gEymJYKZtKXzzBzM4jqa9w6Q1Jjm7x2d+sh19AdsD4wqnMPDYyvwpsIc2Q/835kHuo3BEQ7CjelGhfTsoBb2MQ== + dependencies: + call-bind "^1.0.7" + es-errors "^1.3.0" + is-typed-array "^1.1.13" + +typed-array-byte-length@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/typed-array-byte-length/-/typed-array-byte-length-1.0.1.tgz#d92972d3cff99a3fa2e765a28fcdc0f1d89dec67" + integrity sha512-3iMJ9q0ao7WE9tWcaYKIptkNBuOIcZCCT0d4MRvuuH88fEoEH62IuQe0OtraD3ebQEoTRk8XCBoknUNc1Y67pw== + dependencies: + call-bind "^1.0.7" + for-each "^0.3.3" + gopd "^1.0.1" + has-proto "^1.0.3" + is-typed-array "^1.1.13" + +typed-array-byte-offset@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/typed-array-byte-offset/-/typed-array-byte-offset-1.0.2.tgz#f9ec1acb9259f395093e4567eb3c28a580d02063" + integrity sha512-Ous0vodHa56FviZucS2E63zkgtgrACj7omjwd/8lTEMEPFFyjfixMZ1ZXenpgCFBBt4EC1J2XsyVS2gkG0eTFA== + dependencies: + available-typed-arrays "^1.0.7" + call-bind "^1.0.7" + for-each "^0.3.3" + gopd "^1.0.1" + has-proto "^1.0.3" + is-typed-array "^1.1.13" + +typed-array-length@^1.0.6: + version "1.0.6" + resolved "https://registry.yarnpkg.com/typed-array-length/-/typed-array-length-1.0.6.tgz#57155207c76e64a3457482dfdc1c9d1d3c4c73a3" + integrity sha512-/OxDN6OtAk5KBpGb28T+HZc2M+ADtvRxXrKKbUwtsLgdoxgX13hyy7ek6bFRl5+aBs2yZzB0c4CnQfAtVypW/g== + dependencies: + call-bind "^1.0.7" + for-each "^0.3.3" + gopd "^1.0.1" + has-proto "^1.0.3" + is-typed-array "^1.1.13" + possible-typed-array-names "^1.0.0" + +typescript@^4.8.2: + version "4.9.5" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.9.5.tgz#095979f9bcc0d09da324d58d03ce8f8374cbe65a" + integrity sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g== + +typescript@^5.0.0: + version "5.6.3" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.6.3.tgz#5f3449e31c9d94febb17de03cc081dd56d81db5b" + integrity sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw== + +unbox-primitive@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/unbox-primitive/-/unbox-primitive-1.0.2.tgz#29032021057d5e6cdbd08c5129c226dff8ed6f9e" + integrity sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw== + dependencies: + call-bind "^1.0.2" + has-bigints "^1.0.2" + has-symbols "^1.0.3" + which-boxed-primitive "^1.0.2" + +unbzip2-stream@^1.0.9: + version "1.4.3" + resolved "https://registry.yarnpkg.com/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz#b0da04c4371311df771cdc215e87f2130991ace7" + integrity sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg== + dependencies: + buffer "^5.2.1" + through "^2.3.8" + +underscore@^1.13.6: + version "1.13.7" + resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.13.7.tgz#970e33963af9a7dda228f17ebe8399e5fbe63a10" + integrity sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g== + +undici-types@~5.26.4: + version "5.26.5" + resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617" + integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA== + +undici-types@~6.19.8: + version "6.19.8" + resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.19.8.tgz#35111c9d1437ab83a7cdc0abae2f26d88eda0a02" + integrity sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw== + +unicode-canonical-property-names-ecmascript@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-2.0.1.tgz#cb3173fe47ca743e228216e4a3ddc4c84d628cc2" + integrity sha512-dA8WbNeb2a6oQzAQ55YlT5vQAWGV9WXOsi3SskE3bcCdM0P4SDd+24zS/OCacdRq5BkdsRj9q3Pg6YyQoxIGqg== + +unicode-match-property-ecmascript@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/unicode-match-property-ecmascript/-/unicode-match-property-ecmascript-2.0.0.tgz#54fd16e0ecb167cf04cf1f756bdcc92eba7976c3" + integrity sha512-5kaZCrbp5mmbz5ulBkDkbY0SsPOjKqVS35VpL9ulMPfSl0J0Xsm+9Evphv9CoIZFwre7aJoa94AY6seMKGVN5Q== + dependencies: + unicode-canonical-property-names-ecmascript "^2.0.0" + unicode-property-aliases-ecmascript "^2.0.0" + +unicode-match-property-value-ecmascript@^2.1.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/unicode-match-property-value-ecmascript/-/unicode-match-property-value-ecmascript-2.2.0.tgz#a0401aee72714598f739b68b104e4fe3a0cb3c71" + integrity sha512-4IehN3V/+kkr5YeSSDDQG8QLqO26XpL2XP3GQtqwlT/QYSECAwFztxVHjlbh0+gjJ3XmNLS0zDsbgs9jWKExLg== + +unicode-property-aliases-ecmascript@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/unicode-property-aliases-ecmascript/-/unicode-property-aliases-ecmascript-2.1.0.tgz#43d41e3be698bd493ef911077c9b131f827e8ccd" + integrity sha512-6t3foTQI9qne+OZoVQB/8x8rk2k1eVy1gRXhV3oFQ5T6R1dqQ1xtin3XqSlx3+ATBkliTaR/hHyJBm+LVPNM8w== + +universalify@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.2.0.tgz#6451760566fa857534745ab1dde952d1b1761be0" + integrity sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg== + +universalify@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/universalify/-/universalify-2.0.1.tgz#168efc2180964e6386d061e094df61afe239b18d" + integrity sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw== + +unpipe@1.0.0, unpipe@~1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec" + integrity sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ== + +update-browserslist-db@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/update-browserslist-db/-/update-browserslist-db-1.1.1.tgz#80846fba1d79e82547fb661f8d141e0945755fe5" + integrity sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A== + dependencies: + escalade "^3.2.0" + picocolors "^1.1.0" + +uri-js@^4.2.2: + version "4.4.1" + resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e" + integrity sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg== + dependencies: + punycode "^2.1.0" + +url-parse-lax@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/url-parse-lax/-/url-parse-lax-1.0.0.tgz#7af8f303645e9bd79a272e7a14ac68bc0609da73" + integrity sha512-BVA4lR5PIviy2PMseNd2jbFQ+jwSwQGdJejf5ctd1rEXt0Ypd7yanUK9+lYechVlN5VaTJGsu2U/3MDDu6KgBA== + dependencies: + prepend-http "^1.0.1" + +url-parse-lax@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/url-parse-lax/-/url-parse-lax-3.0.0.tgz#16b5cafc07dbe3676c1b1999177823d6503acb0c" + integrity sha512-NjFKA0DidqPa5ciFcSrXnAltTtzz84ogy+NebPvfEgAck0+TNg4UJ4IN+fB7zRZfbgUf0syOo9MDxFkDSMuFaQ== + dependencies: + prepend-http "^2.0.0" + +url-parse@^1.5.3: + version "1.5.10" + resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.10.tgz#9d3c2f736c1d75dd3bd2be507dcc111f1e2ea9c1" + integrity sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ== + dependencies: + querystringify "^2.1.1" + requires-port "^1.0.0" + +url-to-options@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/url-to-options/-/url-to-options-1.0.1.tgz#1505a03a289a48cbd7a434efbaeec5055f5633a9" + integrity sha512-0kQLIzG4fdk/G5NONku64rSH/x32NOA39LVQqlK8Le6lvTF6GGRJpqaQFGgU+CLwySIqBSMdwYM0sYcW9f6P4A== + +util-deprecate@^1.0.1, util-deprecate@^1.0.2, util-deprecate@~1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" + integrity sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw== + +utila@~0.4: + version "0.4.0" + resolved "https://registry.yarnpkg.com/utila/-/utila-0.4.0.tgz#8a16a05d445657a3aea5eecc5b12a4fa5379772c" + integrity sha512-Z0DbgELS9/L/75wZbro8xAnT50pBVFQZ+hUEueGDU5FN51YSCYM+jdxsfCiHjwNP/4LCDD0i/graKpeBnOXKRA== + +utils-merge@1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/utils-merge/-/utils-merge-1.0.1.tgz#9f95710f50a267947b2ccc124741c1028427e713" + integrity sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA== + +uuid@^3.0.1: + version "3.4.0" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee" + integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A== + +uuid@^8.3.2: + version "8.3.2" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2" + integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== + +vary@~1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/vary/-/vary-1.1.2.tgz#2299f02c6ded30d4a5961b0b9f74524a18f634fc" + integrity sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg== + +watchpack@^2.4.1: + version "2.4.2" + resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.4.2.tgz#2feeaed67412e7c33184e5a79ca738fbd38564da" + integrity sha512-TnbFSbcOCcDgjZ4piURLCbJ3nJhznVh9kw6F6iokjiFPl8ONxe9A6nMDVXDiNbrSfLILs6vB07F7wLBrwPYzJw== + dependencies: + glob-to-regexp "^0.4.1" + graceful-fs "^4.1.2" + +wbuf@^1.1.0, wbuf@^1.7.3: + version "1.7.3" + resolved "https://registry.yarnpkg.com/wbuf/-/wbuf-1.7.3.tgz#c1d8d149316d3ea852848895cb6a0bfe887b87df" + integrity sha512-O84QOnr0icsbFGLS0O3bI5FswxzRr8/gHwWkDlQFskhSPryQXvrTMxjxGP4+iWYoauLoBvfDpkrOauZ+0iZpDA== + dependencies: + minimalistic-assert "^1.0.0" + +webidl-conversions@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" + integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ== + +webpack-cli@^4.10.0: + version "4.10.0" + resolved "https://registry.yarnpkg.com/webpack-cli/-/webpack-cli-4.10.0.tgz#37c1d69c8d85214c5a65e589378f53aec64dab31" + integrity sha512-NLhDfH/h4O6UOy+0LSso42xvYypClINuMNBVVzX4vX98TmTaTUxwRbXdhucbFMd2qLaCTcLq/PdYrvi8onw90w== + dependencies: + "@discoveryjs/json-ext" "^0.5.0" + "@webpack-cli/configtest" "^1.2.0" + "@webpack-cli/info" "^1.5.0" + "@webpack-cli/serve" "^1.7.0" + colorette "^2.0.14" + commander "^7.0.0" + cross-spawn "^7.0.3" + fastest-levenshtein "^1.0.12" + import-local "^3.0.2" + interpret "^2.2.0" + rechoir "^0.7.0" + webpack-merge "^5.7.3" + +webpack-dev-middleware@^5.3.4: + version "5.3.4" + resolved "https://registry.yarnpkg.com/webpack-dev-middleware/-/webpack-dev-middleware-5.3.4.tgz#eb7b39281cbce10e104eb2b8bf2b63fce49a3517" + integrity sha512-BVdTqhhs+0IfoeAf7EoH5WE+exCmqGerHfDM0IL096Px60Tq2Mn9MAbnaGUe6HiMa41KMCYF19gyzZmBcq/o4Q== + dependencies: + colorette "^2.0.10" + memfs "^3.4.3" + mime-types "^2.1.31" + range-parser "^1.2.1" + schema-utils "^4.0.0" + +webpack-dev-server@^4.10.0: + version "4.15.2" + resolved "https://registry.yarnpkg.com/webpack-dev-server/-/webpack-dev-server-4.15.2.tgz#9e0c70a42a012560860adb186986da1248333173" + integrity sha512-0XavAZbNJ5sDrCbkpWL8mia0o5WPOd2YGtxrEiZkBK9FjLppIUK2TgxK6qGD2P3hUXTJNNPVibrerKcx5WkR1g== + dependencies: + "@types/bonjour" "^3.5.9" + "@types/connect-history-api-fallback" "^1.3.5" + "@types/express" "^4.17.13" + "@types/serve-index" "^1.9.1" + "@types/serve-static" "^1.13.10" + "@types/sockjs" "^0.3.33" + "@types/ws" "^8.5.5" + ansi-html-community "^0.0.8" + bonjour-service "^1.0.11" + chokidar "^3.5.3" + colorette "^2.0.10" + compression "^1.7.4" + connect-history-api-fallback "^2.0.0" + default-gateway "^6.0.3" + express "^4.17.3" + graceful-fs "^4.2.6" + html-entities "^2.3.2" + http-proxy-middleware "^2.0.3" + ipaddr.js "^2.0.1" + launch-editor "^2.6.0" + open "^8.0.9" + p-retry "^4.5.0" + rimraf "^3.0.2" + schema-utils "^4.0.0" + selfsigned "^2.1.1" + serve-index "^1.9.1" + sockjs "^0.3.24" + spdy "^4.0.2" + webpack-dev-middleware "^5.3.4" + ws "^8.13.0" + +webpack-dotenv-plugin@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/webpack-dotenv-plugin/-/webpack-dotenv-plugin-2.1.0.tgz#366bb18712f414e8b86aa66408a9039d03dd7165" + integrity sha512-UpVAdojRNLZWx/VqgehelPLWCXe1eu0PVvtBTz4paQmLRUbgPjw9wz6ElOYYlcEK6pcrdNk+G7v9kgVJ3zq+Yg== + dependencies: + dotenv-safe "^5.0.1" + +webpack-merge@^5.7.3, webpack-merge@^5.8.0: + version "5.10.0" + resolved "https://registry.yarnpkg.com/webpack-merge/-/webpack-merge-5.10.0.tgz#a3ad5d773241e9c682803abf628d4cd62b8a4177" + integrity sha512-+4zXKdx7UnO+1jaN4l2lHVD+mFvnlZQP/6ljaJVb4SZiwIKeUnrT5l0gkT8z+n4hKpC+jpOv6O9R+gLtag7pSA== + dependencies: + clone-deep "^4.0.1" + flat "^5.0.2" + wildcard "^2.0.0" + +webpack-sources@^3.2.3: + version "3.2.3" + resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-3.2.3.tgz#2d4daab8451fd4b240cc27055ff6a0c2ccea0cde" + integrity sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w== + +webpack@^5.74.0: + version "5.96.0" + resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.96.0.tgz#1e4dc9d1d819ff1b1f89d53e45a299ffe9231a8a" + integrity sha512-gvn84AfQ4f6vUeNWmFuRp3vGERyxK4epADKTaAo60K0EQbY/YBNQbXH3Ji/ZRK5M25O/XneAOuChF4xQZjQ4xA== + dependencies: + "@types/estree" "^1.0.6" + "@webassemblyjs/ast" "^1.12.1" + "@webassemblyjs/wasm-edit" "^1.12.1" + "@webassemblyjs/wasm-parser" "^1.12.1" + acorn "^8.14.0" + browserslist "^4.24.0" + chrome-trace-event "^1.0.2" + enhanced-resolve "^5.17.1" + es-module-lexer "^1.2.1" + eslint-scope "5.1.1" + events "^3.2.0" + glob-to-regexp "^0.4.1" + graceful-fs "^4.2.11" + json-parse-even-better-errors "^2.3.1" + loader-runner "^4.2.0" + mime-types "^2.1.27" + neo-async "^2.6.2" + schema-utils "^3.2.0" + tapable "^2.1.1" + terser-webpack-plugin "^5.3.10" + watchpack "^2.4.1" + webpack-sources "^3.2.3" + +websocket-driver@>=0.5.1, websocket-driver@^0.7.4: + version "0.7.4" + resolved "https://registry.yarnpkg.com/websocket-driver/-/websocket-driver-0.7.4.tgz#89ad5295bbf64b480abcba31e4953aca706f5760" + integrity sha512-b17KeDIQVjvb0ssuSDF2cYXSg2iztliJ4B9WdsuB6J952qCPKmnVq4DyW5motImXHDC1cBT/1UezrJVsKw5zjg== + dependencies: + http-parser-js ">=0.5.1" + safe-buffer ">=5.1.0" + websocket-extensions ">=0.1.1" + +websocket-extensions@>=0.1.1: + version "0.1.4" + resolved "https://registry.yarnpkg.com/websocket-extensions/-/websocket-extensions-0.1.4.tgz#7f8473bc839dfd87608adb95d7eb075211578a42" + integrity sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg== + +whatwg-url@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d" + integrity sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw== + dependencies: + tr46 "~0.0.3" + webidl-conversions "^3.0.0" + +which-boxed-primitive@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" + integrity sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg== + dependencies: + is-bigint "^1.0.1" + is-boolean-object "^1.1.0" + is-number-object "^1.0.4" + is-string "^1.0.5" + is-symbol "^1.0.3" + +which-builtin-type@^1.1.3: + version "1.1.4" + resolved "https://registry.yarnpkg.com/which-builtin-type/-/which-builtin-type-1.1.4.tgz#592796260602fc3514a1b5ee7fa29319b72380c3" + integrity sha512-bppkmBSsHFmIMSl8BO9TbsyzsvGjVoppt8xUiGzwiu/bhDCGxnpOKCxgqj6GuyHE0mINMDecBFPlOm2hzY084w== + dependencies: + function.prototype.name "^1.1.6" + has-tostringtag "^1.0.2" + is-async-function "^2.0.0" + is-date-object "^1.0.5" + is-finalizationregistry "^1.0.2" + is-generator-function "^1.0.10" + is-regex "^1.1.4" + is-weakref "^1.0.2" + isarray "^2.0.5" + which-boxed-primitive "^1.0.2" + which-collection "^1.0.2" + which-typed-array "^1.1.15" + +which-collection@^1.0.1, which-collection@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/which-collection/-/which-collection-1.0.2.tgz#627ef76243920a107e7ce8e96191debe4b16c2a0" + integrity sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw== + dependencies: + is-map "^2.0.3" + is-set "^2.0.3" + is-weakmap "^2.0.2" + is-weakset "^2.0.3" + +which-typed-array@^1.1.13, which-typed-array@^1.1.14, which-typed-array@^1.1.15: + version "1.1.15" + resolved "https://registry.yarnpkg.com/which-typed-array/-/which-typed-array-1.1.15.tgz#264859e9b11a649b388bfaaf4f767df1f779b38d" + integrity sha512-oV0jmFtUky6CXfkqehVvBP/LSWJ2sy4vWMioiENyJLePrBO/yKyV9OyJySfAKosh+RYkIl5zJCNZ8/4JncrpdA== + dependencies: + available-typed-arrays "^1.0.7" + call-bind "^1.0.7" + for-each "^0.3.3" + gopd "^1.0.1" + has-tostringtag "^1.0.2" + +which@^1.2.9: + version "1.3.1" + resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a" + integrity sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ== + dependencies: + isexe "^2.0.0" + +which@^2.0.1: + version "2.0.2" + resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1" + integrity sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA== + dependencies: + isexe "^2.0.0" + +wildcard@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/wildcard/-/wildcard-2.0.1.tgz#5ab10d02487198954836b6349f74fff961e10f67" + integrity sha512-CC1bOL87PIWSBhDcTrdeLo6eGT7mCFtrg0uIJtqJUFyK+eJnzl8A1niH56uu7KMa5XFrtiV+AQuHO3n7DsHnLQ== + +word-wrap@^1.2.5: + version "1.2.5" + resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.5.tgz#d2c45c6dd4fbce621a66f136cbe328afd0410b34" + integrity sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA== + +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53" + integrity sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^8.1.0: + version "8.1.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214" + integrity sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ== + dependencies: + ansi-styles "^6.1.0" + string-width "^5.0.1" + strip-ansi "^7.0.1" + +wrappy@1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" + integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ== + +ws@^8.13.0: + version "8.18.0" + resolved "https://registry.yarnpkg.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc" + integrity sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw== + +xtend@^4.0.0: + version "4.0.2" + resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54" + integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ== + +y18n@^5.0.5: + version "5.0.8" + resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55" + integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA== + +yallist@^2.1.2: + version "2.1.2" + resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52" + integrity sha512-ncTzHV7NvsQZkYe1DW7cbDLm0YpzHmZF5r/iyP3ZnQtMiJ+pjzisCiMNI+Sj+xQF5pXhSHxSB3uDbsBTzY/c2A== + +yallist@^3.0.2: + version "3.1.1" + resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd" + integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g== + +yaml@^1.10.0: + version "1.10.2" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-1.10.2.tgz#2301c5ffbf12b467de8da2333a459e29e7920e4b" + integrity sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg== + +yaml@^2.3.4: + version "2.6.0" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.6.0.tgz#14059ad9d0b1680d0f04d3a60fe00f3a857303c3" + integrity sha512-a6ae//JvKDEra2kdi1qzCyrJW/WZCgFi8ydDV+eXExl95t+5R+ijnqHJbz9tmMh8FUjx3iv2fCQ4dclAQlO2UQ== + +yargs-parser@^21.1.1: + version "21.1.1" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35" + integrity sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw== + +yargs@^17.7.2: + version "17.7.2" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269" + integrity sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w== + dependencies: + cliui "^8.0.1" + escalade "^3.1.1" + get-caller-file "^2.0.5" + require-directory "^2.1.1" + string-width "^4.2.3" + y18n "^5.0.5" + yargs-parser "^21.1.1" + +yauzl@^2.4.2: + version "2.10.0" + resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" + integrity sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g== + dependencies: + buffer-crc32 "~0.2.3" + fd-slicer "~1.1.0" + +yocto-queue@^0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b" + integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q== + +yoctocolors-cjs@^2.1.2: + version "2.1.2" + resolved "https://registry.yarnpkg.com/yoctocolors-cjs/-/yoctocolors-cjs-2.1.2.tgz#f4b905a840a37506813a7acaa28febe97767a242" + integrity sha512-cYVsTjKl8b+FrnidjibDWskAv7UKOfcwaVZdp/it9n1s9fU3IkgDbhdIRKCW4JDsAlECJY0ytoVPT3sK6kideA== diff --git a/demo/gradio/requirements.txt b/demo/gradio/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c28c1ace7e07c161f656c8f2bb49e9d8ea43f86f --- /dev/null +++ b/demo/gradio/requirements.txt @@ -0,0 +1,15 @@ +sentencepiece +accelerate>=0.28.0 +pydantic>=2.10.1 +numpy>=1.23.5,<2.0.0 +pillow>=9.4.0 +gradio>=5.5.0 +requests +httpx +uvicorn +fastapi +protobuf +opencv-python +openai>=1.55.0 +spaces==0.30.4 +git+https://github.com/facebookresearch/segment-anything.git diff --git a/evaluation/DLC-Bench/annotations/annotations.json b/evaluation/DLC-Bench/annotations/annotations.json new file mode 100644 index 0000000000000000000000000000000000000000..f355a9422ce489642d6c7d75a06890503ac6b550 --- /dev/null +++ b/evaluation/DLC-Bench/annotations/annotations.json @@ -0,0 +1 @@ +{"images": [{"height": 768, "id": 912042, "license": 5, "width": 1024, "file_name": "objects365_v2_00912042.jpg", "url": ""}, {"height": 768, "id": 912850, "license": 5, "width": 1024, "file_name": "objects365_v2_00912850.jpg", "url": ""}, {"height": 1024, "id": 915597, "license": 5, "width": 768, "file_name": "objects365_v2_00915597.jpg", "url": ""}, {"height": 768, "id": 926777, "license": 5, "width": 1024, "file_name": "objects365_v2_00926777.jpg", "url": ""}, {"height": 768, "id": 946186, "license": 5, "width": 1024, "file_name": "objects365_v2_00946186.jpg", "url": ""}, {"height": 768, "id": 951281, "license": 5, "width": 1024, "file_name": "objects365_v2_00951281.jpg", "url": ""}, {"height": 768, "id": 975971, "license": 5, "width": 1024, "file_name": "objects365_v2_00975971.jpg", "url": ""}, {"height": 768, "id": 981094, "license": 5, "width": 1024, "file_name": "objects365_v2_00981094.jpg", "url": ""}, {"height": 1024, "id": 1002306, "license": 5, "width": 902, "file_name": "objects365_v2_01002306.jpg", "url": ""}, {"height": 768, "id": 1010195, "license": 5, "width": 1024, "file_name": "objects365_v2_01010195.jpg", "url": ""}, {"height": 1080, "id": 1010539, "license": 5, "width": 1080, "file_name": "objects365_v2_01010539.jpg", "url": ""}, {"height": 1024, "id": 1070155, "license": 5, "width": 768, "file_name": "objects365_v2_01070155.jpg", "url": ""}, {"height": 821, "id": 1071650, "license": 5, "width": 1024, "file_name": "objects365_v2_01071650.jpg", "url": ""}, {"height": 1024, "id": 1080826, "license": 5, "width": 768, "file_name": "objects365_v2_01080826.jpg", "url": ""}, {"height": 1024, "id": 1091580, "license": 5, "width": 768, "file_name": "objects365_v2_01091580.jpg", "url": ""}, {"height": 1024, "id": 1095871, "license": 5, "width": 768, "file_name": "objects365_v2_01095871.jpg", "url": ""}, {"height": 768, "id": 1103219, "license": 5, "width": 1024, "file_name": "objects365_v2_01103219.jpg", "url": ""}, {"height": 768, "id": 1103275, "license": 5, "width": 1024, "file_name": "objects365_v2_01103275.jpg", "url": ""}, {"height": 768, "id": 1108895, "license": 5, "width": 1024, "file_name": "objects365_v2_01108895.jpg", "url": ""}, {"height": 768, "id": 1121205, "license": 5, "width": 1024, "file_name": "objects365_v2_01121205.jpg", "url": ""}, {"height": 768, "id": 1142493, "license": 5, "width": 1024, "file_name": "objects365_v2_01142493.jpg", "url": ""}, {"height": 768, "id": 1155009, "license": 5, "width": 1024, "file_name": "objects365_v2_01155009.jpg", "url": ""}, {"height": 1024, "id": 1156032, "license": 5, "width": 980, "file_name": "objects365_v2_01156032.jpg", "url": ""}, {"height": 1024, "id": 1156833, "license": 5, "width": 768, "file_name": "objects365_v2_01156833.jpg", "url": ""}, {"height": 1024, "id": 1189415, "license": 5, "width": 768, "file_name": "objects365_v2_01189415.jpg", "url": ""}, {"height": 1024, "id": 1198997, "license": 5, "width": 795, "file_name": "objects365_v2_01198997.jpg", "url": ""}, {"height": 768, "id": 1246937, "license": 5, "width": 1024, "file_name": "objects365_v2_01246937.jpg", "url": ""}, {"height": 768, "id": 1252367, "license": 5, "width": 1024, "file_name": "objects365_v2_01252367.jpg", "url": ""}, {"height": 1536, "id": 1261883, "license": 5, "width": 2304, "file_name": "objects365_v2_01261883.jpg", "url": ""}, {"height": 1024, "id": 1261906, "license": 5, "width": 768, "file_name": "objects365_v2_01261906.jpg", "url": ""}, {"height": 1024, "id": 1276645, "license": 5, "width": 768, "file_name": "objects365_v2_01276645.jpg", "url": ""}, {"height": 768, "id": 1312527, "license": 5, "width": 1024, "file_name": "objects365_v2_01312527.jpg", "url": ""}, {"height": 768, "id": 1350089, "license": 5, "width": 1024, "file_name": "objects365_v2_01350089.jpg", "url": ""}, {"height": 768, "id": 1356234, "license": 5, "width": 1024, "file_name": "objects365_v2_01356234.jpg", "url": ""}, {"height": 1024, "id": 1364554, "license": 5, "width": 820, "file_name": "objects365_v2_01364554.jpg", "url": ""}, {"height": 768, "id": 1364931, "license": 5, "width": 1024, "file_name": "objects365_v2_01364931.jpg", "url": ""}, {"height": 1024, "id": 1396529, "license": 5, "width": 768, "file_name": "objects365_v2_01396529.jpg", "url": ""}, {"height": 1152, "id": 1403825, "license": 5, "width": 2048, "file_name": "objects365_v2_01403825.jpg", "url": ""}, {"height": 768, "id": 1413369, "license": 5, "width": 1024, "file_name": "objects365_v2_01413369.jpg", "url": ""}, {"height": 803, "id": 1420513, "license": 5, "width": 1024, "file_name": "objects365_v2_01420513.jpg", "url": ""}, {"height": 768, "id": 1453850, "license": 5, "width": 1024, "file_name": "objects365_v2_01453850.jpg", "url": ""}, {"height": 768, "id": 1455911, "license": 5, "width": 1024, "file_name": "objects365_v2_01455911.jpg", "url": ""}, {"height": 768, "id": 1511060, "license": 5, "width": 1024, "file_name": "objects365_v2_01511060.jpg", "url": ""}, {"height": 768, "id": 1517456, "license": 5, "width": 1024, "file_name": "objects365_v2_01517456.jpg", "url": ""}, {"height": 1024, "id": 1525619, "license": 5, "width": 1024, "file_name": "objects365_v2_01525619.jpg", "url": ""}, {"height": 786, "id": 1534987, "license": 5, "width": 1178, "file_name": "objects365_v2_01534987.jpg", "url": ""}, {"height": 1024, "id": 1575962, "license": 5, "width": 768, "file_name": "objects365_v2_01575962.jpg", "url": ""}, {"height": 768, "id": 1616122, "license": 5, "width": 1024, "file_name": "objects365_v2_01616122.jpg", "url": ""}, {"height": 1024, "id": 1616394, "license": 5, "width": 1024, "file_name": "objects365_v2_01616394.jpg", "url": ""}, {"height": 768, "id": 1621320, "license": 5, "width": 1024, "file_name": "objects365_v2_01621320.jpg", "url": ""}, {"height": 1600, "id": 1629547, "license": 5, "width": 1200, "file_name": "objects365_v2_01629547.jpg", "url": ""}, {"height": 1024, "id": 1634579, "license": 5, "width": 853, "file_name": "objects365_v2_01634579.jpg", "url": ""}, {"height": 1024, "id": 1635395, "license": 5, "width": 1024, "file_name": "objects365_v2_01635395.jpg", "url": ""}, {"height": 1024, "id": 1670098, "license": 5, "width": 768, "file_name": "objects365_v2_01670098.jpg", "url": ""}, {"height": 1024, "id": 1673739, "license": 5, "width": 1024, "file_name": "objects365_v2_01673739.jpg", "url": ""}, {"height": 855, "id": 1689730, "license": 5, "width": 1024, "file_name": "objects365_v2_01689730.jpg", "url": ""}, {"height": 1024, "id": 1696718, "license": 5, "width": 768, "file_name": "objects365_v2_01696718.jpg", "url": ""}, {"height": 768, "id": 1722811, "license": 5, "width": 1024, "file_name": "objects365_v2_01722811.jpg", "url": ""}, {"height": 768, "id": 1729425, "license": 5, "width": 1024, "file_name": "objects365_v2_01729425.jpg", "url": ""}, {"height": 768, "id": 1770249, "license": 5, "width": 1024, "file_name": "objects365_v2_01770249.jpg", "url": ""}, {"height": 1009, "id": 1788343, "license": 5, "width": 1024, "file_name": "objects365_v2_01788343.jpg", "url": ""}, {"height": 2953, "id": 1793137, "license": 5, "width": 3925, "file_name": "objects365_v2_01793137.jpg", "url": ""}, {"height": 768, "id": 1811034, "license": 5, "width": 1024, "file_name": "objects365_v2_01811034.jpg", "url": ""}, {"height": 768, "id": 1833546, "license": 5, "width": 1024, "file_name": "objects365_v2_01833546.jpg", "url": ""}, {"height": 2472, "id": 1835389, "license": 5, "width": 2832, "file_name": "objects365_v2_01835389.jpg", "url": ""}, {"height": 1023, "id": 1867731, "license": 5, "width": 806, "file_name": "objects365_v2_01867731.jpg", "url": ""}, {"height": 768, "id": 1915694, "license": 5, "width": 1024, "file_name": "objects365_v2_01915694.jpg", "url": ""}, {"height": 768, "id": 1916008, "license": 5, "width": 1024, "file_name": "objects365_v2_01916008.jpg", "url": ""}, {"height": 768, "id": 1936287, "license": 5, "width": 1024, "file_name": "objects365_v2_01936287.jpg", "url": ""}, {"height": 1024, "id": 1939853, "license": 5, "width": 768, "file_name": "objects365_v2_01939853.jpg", "url": ""}, {"height": 768, "id": 1944558, "license": 5, "width": 1024, "file_name": "objects365_v2_01944558.jpg", "url": ""}, {"height": 768, "id": 1948375, "license": 5, "width": 1024, "file_name": "objects365_v2_01948375.jpg", "url": ""}, {"height": 1200, "id": 1959650, "license": 5, "width": 1600, "file_name": "objects365_v2_01959650.jpg", "url": ""}, {"height": 768, "id": 1968981, "license": 5, "width": 1024, "file_name": "objects365_v2_01968981.jpg", "url": ""}, {"height": 1024, "id": 1975150, "license": 5, "width": 768, "file_name": "objects365_v2_01975150.jpg", "url": ""}, {"height": 768, "id": 1981955, "license": 5, "width": 1024, "file_name": "objects365_v2_01981955.jpg", "url": ""}, {"height": 1024, "id": 1983311, "license": 5, "width": 768, "file_name": "objects365_v2_01983311.jpg", "url": ""}], "annotations": [{"id": 2391761, "iscrowd": 0, "isfake": 0, "area": 60783.843489567524, "isreflected": 0, "bbox": [374.7691650384, 756.3739014144, 483.12524414, 125.81384273919991], "image_id": 1002306, "category_id": 22, "segmentation": {"size": [1024, 902], "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1"}, "frequency": "F", "size": "L"}, {"id": 2391780, "iscrowd": 0, "isfake": 0, "area": 2934.557551818257, "isreflected": 0, "bbox": [406.7530517974, 709.0274658304, 59.46862791539996, 49.3463134207999], "image_id": 1002306, "category_id": 57, "segmentation": {"size": [1024, 902], "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd="}, "frequency": "F", "size": "S"}, {"id": 2391781, "iscrowd": 0, "isfake": 0, "area": 2663.9835348902247, "isreflected": 0, "bbox": [463.2692871378, 679.92590336, 49.34619133839999, 53.98559569919996], "image_id": 1002306, "category_id": 57, "segmentation": {"size": [1024, 902], "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<"}, "frequency": "F", "size": "S"}, {"id": 2580318, "iscrowd": 0, "isfake": 0, "area": 2121.218075824239, "isreflected": 0, "bbox": [793.5177001984, 510.1010742528, 58.981323264000025, 35.96423339520004], "image_id": 1010195, "category_id": 116, "segmentation": {"size": [768, 1024], "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3"}, "frequency": "F", "size": "S"}, {"id": 2580323, "iscrowd": 0, "isfake": 0, "area": 32970.833150758786, "isreflected": 0, "bbox": [574.3869628416, 45.932251008, 198.62597662719998, 165.99456783359997], "image_id": 1010195, "category_id": 17, "segmentation": {"size": [768, 1024], "counts": "URc=?`g0d0]Od0\\Oc0]Od0\\Od0[Od0]O:F1O00000O1000000000O100000O100000000000000O01000000000000000O100000O1000000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O1000000000000000O010000000000000000O1000O100000000000O1000000000O100000O1000000000000000O010000000000000000O1000O100000000000O100000000000O2O:Fe0[Od0\\Od0\\Oe0[Od0\\Oe0ZO\\Ri5"}, "frequency": "F", "size": "M"}, {"id": 2588513, "iscrowd": 0, "isfake": 0, "area": 41775.97498396474, "isreflected": 0, "bbox": [587.0013427439999, 721.615356468, 156.62451178800018, 266.726928672], "image_id": 1010539, "category_id": 298, "segmentation": {"size": [1080, 1080], "counts": "RnYc06]Q15eNKnPO;ln0JoPO;ln0KmPO;nn0JmPO;mn0KnPO:mn0LmPO9nn0[1K5K5K5K5K6J5J6K5K5K5K5L400aNcKbUO^4Xj0hKhUOX4Sj0mKmUOS4kh0_KPVOe01MV1P4gh0oL]WOQ3bh0PM^WOP3_h0SMaWOm2Zh0YMeWOg2Zh0ZMfWOf2Zh0ZMfWOf2Zh0ZMfWOf2[h0YMeWOg2[h0ZMdWOf2\\h0ZMdWOf2\\h0ZMdWOf2\\h0ZMdWOg2[h0YMeWOg2[h0YMeWOg2[h0ZMdWOf2\\h0ZMdWOf2\\h0ZMdWOf2\\h0ZMdWOf2\\h0ZMdWOf2]h0YMcWOg2]h0ZMbWOf2^h0ZMbWOf2^h0ZMbWOf2^h0ZMbWOg2]h0YMcWOg2]h0ZMbWOf2^h0ZMbWOf2^h0ZMbWOf2^h0ZMbWOf2^h0ZMbWOf2^h0ZMbWOf2^h0[MaWOe2_h0[MaWOe2`h0ZM`WOf2`h0ZM`WOf2`h0ZM`WOg2_h0YMaWOg2_h0ZM`WOf2`h0ZM`WOf2`h0ZM`WOf2`h0ZM`WOf2`h0ZM`WOf2`h0[M_WOe2ah0[M_WOe2ah0[M_WOe2ah0[M_WOe2ah0[M_WOe2bh0ZM^WOg2ah0ZM^WOf2bh0ZM^WOf2bh0ZM^WOf2bh0ZM^WOf2bh0ZM^WOf2bh0ZM^WOf2bh0[M]WOe2ch0[M]WOe2ch0[M]WOe2dh0ZM\\WOf2dh0ZM[WOg2eh0ZMZWOf2fh0ZMZWOg2eh0YM[WOg2eh0YM[WOg2eh0YM[WOg2eh0YM[WOg2fh0YMYWOh2fh0XMZWOh2fh0XMZWOh2fh0XMZWOh2fh0XMZWOh2fh0XMZWOi2eh0XMZWOh2fh0XMZWOh2fh0XMZWOh2gh0WMYWOj2fh0VMZWOj2fh0WMYWOi2gh0WMYWOi2gh0WMYWOi2gh0WMYWOj2fh0VMZWOj2fh0VMZWOj2fh0WMYWOi2hh0VMXWOj2hh0VMXWOk2gh0UMYWOk2gh0UMYWOk2gh0UMYWOk2gh0VMXWOk2gh0UMYWOk2gh0UMYWOk2gh0UMYWOk2hh0TMXWOl2hh0UMWWOl2hh0TMXWOl2hh0TMXWOl2hh0TMXWOl2hh0TMXWOl2hh0TMXWOm2gh0TMXWOl2hh0TMXWOl2ih0SMWWOm2ih0SMWWOm2ih0SMWWOn2hh0RMXWOn2hh0SMWWOm2ih0SMWWOm2ih0SMWWOm2ih0SMWWOk2kh0UMUWOg2Pi0YMoVOb2Vi0^MjVO]2[i0cMeVOY2_i0gMaVOT2di0lM\\VOo1ii0QNWVOj1ni0WNQVOe1Sj0[NmUO`1Xj0`NhUO[1^j0dNbUOX1bj0hN^UOS1gj0mNYUOn0lj0POVUOl0nj0jNgUOf0^j0QOY4F:F:G:E[lU;"}, "frequency": "R", "size": "L"}, {"id": 3993075, "iscrowd": 0, "isfake": 0, "area": 1029.8814360074698, "isreflected": 0, "bbox": [271.3186035456, 662.6578369536, 82.9853515008, 12.410400358399897], "image_id": 1070155, "category_id": 171, "segmentation": {"size": [1024, 768], "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<"}, "frequency": "C", "size": "S"}, {"id": 4027486, "iscrowd": 0, "isfake": 0, "area": 72109.81715515377, "isreflected": 0, "bbox": [7.9053955072, 154.84509275829998, 158.6149902336, 454.6217040959], "image_id": 1071650, "category_id": 200, "segmentation": {"size": [821, 1024], "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0"}, "frequency": "R", "size": "M"}, {"id": 4604873, "iscrowd": 0, "isfake": 0, "area": 63140.32710627251, "isreflected": 0, "bbox": [12.8526611712, 601.6166991872, 218.9517821952, 288.37548830720004], "image_id": 1095871, "category_id": 132, "segmentation": {"size": [1024, 768], "counts": "ob<`0_o02N2O000000O1000001O0001O000001O000001O000001O0001O000001O000001O000001O000001O0001O00000001O0001O000001O0001O00000001O0001O00000001O01O00000001O00012M2\\XO[Oi?f0d_ONZ`02S_Oa0m`0@_^OS1aa0kNn]Oh1Sb0VN\\]O\\2db0cMj\\Oo2Wc0QM\\\\OBWLa3]g0lLQ\\Oi3oc0WLe[OU4[d0kKY[Oa4gd0_KmZOm4Te0RKcXOCd1g5ie0fJbXOENN`0N0U6Pg0ZJbXOL;HF^6]g0nIbXO07\\6Wg0dIbXO14]6Zg0XJdXOj5]g0XJ_XOi5ag0[1000000000_OjXOWHXg0o6aXORI^h0e6c0C=lN_VOPKZj0e4f0YOXUORLmj0`3P1D2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0Rg05I^OnXOe0Qg06O1O1O0001O1O001N2N2L4O001N003M2O000O2N1000000000001N100O1O1O110O001N\\]b0"}, "frequency": "R", "size": "S"}, {"id": 4782949, "iscrowd": 0, "isfake": 0, "area": 1398.9630307475893, "isreflected": 0, "bbox": [173.8924560384, 392.2485351936, 33.04052736, 42.34082027520003], "image_id": 1103275, "category_id": 54, "segmentation": {"size": [768, 1024], "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0"}, "frequency": "F", "size": "S"}, {"id": 4916799, "iscrowd": 0, "isfake": 1, "area": 54225.00993098004, "isreflected": 0, "bbox": [112.2895432704, 345.10542650879995, 218.02861107200002, 248.70593664], "image_id": 1108895, "category_id": 303, "segmentation": {"size": [768, 1024], "counts": "WUk23lg04K5K6K4gXO\\Oif0U1K4L5U]OnN^>X1PAVOm>Q1k@SOS?S1i@mNU?Y1f@hNW?_1U^OcMZ2P1^?e1a@[N\\?i1b@XN]?j1b@VN]?l1a@UN^?m1a@SN^?o1`@RN_?P2`@PN_?Q2a@oM^?S2`@nM_?T2`@lM_?V2_@kM`?W2_@iM`?Y2_@gM`?[2^@gM`?[2_@eM`?]2_@cM`?_2_@aM`?a2^@`Ma?b2]@_Mb?c2]@]Mb?d2]@]Mb?e2\\@\\Mc?f2\\@ZMc?h2\\@XMc?j2[@WMd?k2Z@VMe?l2Y@UMf?m2Y@SMg?m2X@UMf?m2Y@SMg?n2X@RMg?o2Y@QMf?Q3X@PMh?P3X@PMg?R3X@nLh?S3W@mLh?T3W@mLi?T3V@lLi?U3W@kLh?W3V@jLj?W3U@gLl?Z3T@fLl?[3S@eLl?]3S@cLm?]3R@dLm?^3R@bLm?_3R@bLn?_3Q@aLn?a3Q@_Lo?a3Q@_Ln?c3R@\\Ln?e3R@ZLm?g3T@XLk?j3U@ULj?l3W@SLi?n3V@RLi?o3X@PLi?P4V@PLj?P4V@PLk?o3T@RLl?n3S@SLm?m3S@SLm?m3R@TLn?l3R@TLn?l3R@TLn?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?n3P@RLP`0n3P@RLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3R@PLn?Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?Q4e_O^J9a1R`0Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?^4d_ObK[`0R60000000O01O1O1ON3M4UNo_OfKP`0Z4o_OhKP`0X4P@jKn?V4R@kKm?U4S@lKl?T4U@kKl?T4T@lKl?T4T@mKk?S4U@mKk?S4U@mKk?S4U@mKl?R4T@nKl?R4U@mKk?S4U@mKk?R4V@nKj?R4V@nKk?P4V@PLj?P4V@hKR`0X4n_OiKQ`0V4P@kKo?U4Q@kKP`0S4Q@nKn?R4R@oKm?Q4S@PLl?o3U@RLj?n3V@SLi?m3W@SLj?k3W@ULi?k3W@SLk?l3W@QLk?o3U@nKn?R4R@oKn?o3S@QLm?o3S@RLl?m3U@SLk?m3U@TLj?l3V@ULj?i3W@WLi?i3W@WLj?h3V@XLj?g3W@WLl?h3T@VLo?h3R@YLn?f3R@[Lm?e3S@\\Ll?c3U@^Ll?fNW_Og4n0cLk?fNW_Oe4o0eLj?fNW_Od4P1fLj?fNV_Oc4P1gLk?eNU_Oc4Q1hLk?eNT_Ob4Q1iLk?eNT_Oa4R1jLk?eNS_O_4S1lLl?T3T@lLm?R3T@nLl?R3T@mLn?Q3T@nLl?Q3U@oLl?P3T@mLo?R3R@aL\\`0^3d_OcL\\`0[3e_OeL\\`0Y3e_OhLZ`0X3f_OhL[`0V3f_OkLZ`0S3g_OnLY`0P3h_OPMY`0m2j_OSMW`0i2k_OXMV`0d2l_O\\MU`0a2m_O`MT`0\\2n_OdMT`0X2n_OiMR`0T2P@lMR`0Q2P@nMR`0n1P@RNR`0j1P@UNR`0i1o_OWNS`0f1n_OYNU`0b1n_O^NS`0^1P@aNS`0[1P@dNR`0W1Q@hNR`0S1Q@jNS`0S1`3K5K4M4K5K3N2M4N1O2N2N0M5L3M4MQcV`0"}, "frequency": "R", "size": "L"}, {"id": 5211280, "iscrowd": 0, "isfake": 0, "area": 47015.60015903375, "isreflected": 0, "bbox": [49.491088896, 246.5204467968, 233.50012200959998, 201.3515014656], "image_id": 1121205, "category_id": 315, "segmentation": {"size": [768, 1024], "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0"}, "frequency": "R", "size": "L"}, {"id": 5718392, "iscrowd": 0, "isfake": 0, "area": 2789.0872092945424, "isreflected": 0, "bbox": [230.5845947392, 494.3394775296, 43.73229977599996, 63.776367206400096], "image_id": 1142493, "category_id": 21, "segmentation": {"size": [768, 1024], "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100UNkAm1S>SNmAn1R>SNmAm1fMoMk>4_Cm1dMVOh=mNdDh1iM`0^TNYOR2h0_NaNh0^2ZOZMe0m2[OlLf0Z3[O_Ld0h3[ORLf0T4ZOfKe0a4YO[Kh0j4UOTKk0Q5ROmJm0Y5oNfJQ1_5lN_JT1f5iNXJV1n5fNQJZ1T6cNiI]1]6`N`Ia1e6]NXIb1n6^NlHc1Y7]NbHb1d7^NVHc1o7]NkGc1[8]N^Ge1g8[NSGe1S9[NgFe1_9[N[Ff1j9ZNPFf1V:ZNeEe1a:\\NYEd1l:\\NnDd1X;\\NcDc1c;]NXDc1m;]NmCc1Y<]NbCb1d<^NWCb1n<^NlBc1Y=^NaBb1d=^NWBb1n=^NlAc1Y>]NbAc1c>]NXAc1m>]Nn@c1W?]Nd@b1b?^NY@b1l?_Nk_Od1Z`0\\Nd_Oa1a`0_N__O\\1f`0cNZ_OY1k`0gNV_OR1Pa0nNP_Ol0Va0TOj^Oe0]a0\\Oc^O=ca0C]^O7ia0IW^O1oa00Q^OMQb03o]OKSb05n]OHTb09i200001O00001O00001O001O0000001O00001O00001O00001O0000010N10001O001O00001O00001O00001O001O00001O00001O001O0[YOUOUf0k0eYO]OYf0V1O0000O1000000000000000000O100000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000O100000000O10000000000O2OO100000O10000000000O100000000O10000000000O10000O10000O100O10000O10000O10000O10000000000O10000000000O1N2F:J6JQa70m^H:[OEXYOd0hf0;O00O11O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O00"}, "frequency": "R", "size": "L"}, {"id": 5718424, "iscrowd": 0, "isfake": 0, "area": 1780.4546720137675, "isreflected": 0, "bbox": [918.8903809024, 687.0722656512, 43.188354457599985, 41.22534174719999], "image_id": 1142493, "category_id": 2, "segmentation": {"size": [768, 1024], "counts": "hebe0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0"}, "frequency": "F", "size": "M"}, {"id": 6055310, "iscrowd": 0, "isfake": 0, "area": 3009.7433433694678, "isreflected": 0, "bbox": [557.9277343488, 618.4233398272, 23.663330073600036, 127.19018557439995], "image_id": 1156833, "category_id": 300, "segmentation": {"size": [1024, 768], "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5"}, "frequency": "R", "size": "S"}, {"id": 6820594, "iscrowd": 0, "isfake": 0, "area": 90721.74625583093, "isreflected": 0, "bbox": [422.3154296832, 391.4096679936, 345.7492675584, 262.39172362240004], "image_id": 1189415, "category_id": 140, "segmentation": {"size": [1024, 768], "counts": "j^T=385S10_l0NYRO69HNf0]m0N`ROe0\\m0R1O2M3M2O2M2O2N1O1O101M101N100O2O0O100O2O0O100O2O0O010O010O01O001O00000000O100O101O000000000000O10O10O10000001O1O2N2N1O2N1O001O1O1O1O1O1O1O001O1O1O1O1O1O001O001O0000000O2O0O10000O101N100O10001N100O100O2O000O2O0O2O1N2O1O1N2O1N2O0O2O1O1N2O1N2O1N2N1O2O1N2N2N2N2O1N2N2N2N3N1N2N2N2N2O1N3M2N1O2O0O2N1O10O01O1O00100O001O10O01O1O010O1O001O10O01O1O010O1O001O010O00O1O2F9N200O2O000O1O2M2O2N10001O001O001O000O2O001O00001O001O0N3L300100O2O0O100O1O101N100O1O101N1000000O2O000O101O000O101O000O101O0O10O1000O0100O10O0100O010O100O010O1000O01000O01000000O10000O1000000O10000O1000000O10000O10000O100BWVOeJii0[5WVOdJji0[5XVOcJii0]5ZVO]Jii0b5;O1O100O1O2O0O1O2O0O2N100O2N1O2O0O1O2O0O2N100O2N101N1O101N1O2O0O1O2O0O2N100O2N2O1N1O2O1N2N2O1N1O2O1NVC"}, "frequency": "R", "size": "L"}, {"id": 6820595, "iscrowd": 0, "isfake": 0, "area": 36191.76019389325, "isreflected": 1, "bbox": [338.268066432, 346.3110351872, 333.1145019648, 108.6466064384], "image_id": 1189415, "category_id": 140, "segmentation": {"size": [1024, 768], "counts": "V\\_:2U10`m04\\QONj04fm06VRONhm05SRO0jm03RRO1km01QRO3mm00oQO2Qn00jQO3Vn0l0000O1N101O1N2O0O2O1N2O0O2O1N2TOZQO;gn0E]QO5fn0Ii0d0\\O3M3M2N1O0O2O00001O2N1O1N3M3N1N2N2O0N2O001O1O1O1O1O100O02O001O2N5K3M2ZNiQO^1]n00000000004L1O001N010O0000100O1O1O00001OO1000O2O001O001N1O3D;M4M4KRP55koJ2N2O1HCjPO>Uo0c0B6K2N2N2M2O1O2N3L3N2N3M2M4M2N2N2N2O0O1O1O100O1O100O1O00100O1O100O1O[OlRO`NSm0a1mRO_NSm0a1nRO]NRm0d1oROZNRm0f1RSOUNnl0l1c01N10000_ROQNQm0o1nROSNPm0o1nROSNQm0m1oROTNPm0l1oROUNPm0l1PSOTNPm0^200O10000O10000O10000O1000O010000O100O10000O10000O100O01000O10000O100O1000000001O0O100000001O00000000001N1000001O001O0O2O001O1O001O1O0O2O001O1O001O1N2O1O1O1O1O1O1O1N2O1O1O1O1O1O1N2O1O1O1O2N2N2N2M3N2N1O1O0O2O0O2O0O2O0O2O0O2O0O2O001N100O2O0O2N1O2N1O1N2O1O1O1N200O1O1O1O100O1O1000O100001N2O001O00O1O1O10O01O1O1O00100O001O1O1O0O2O001N101N1O2N1O2O000100O1O010O1O1O1H8K5M3J7L`dm2"}, "frequency": "R", "size": "L"}, {"id": 7050495, "iscrowd": 0, "isfake": 0, "area": 12178.664306584886, "isreflected": 0, "bbox": [614.0329590105, 0.0297851904, 144.31091307449992, 84.39184568319999], "image_id": 1198997, "category_id": 14, "segmentation": {"size": [1024, 795], "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1"}, "frequency": "F", "size": "M"}, {"id": 8201777, "iscrowd": 0, "isfake": 0, "area": 8305.03078992447, "isreflected": 0, "bbox": [95.1661376512, 427.63671874560004, 103.73437501439999, 80.0605468416], "image_id": 1246937, "category_id": 50, "segmentation": {"size": [768, 1024], "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0"}, "frequency": "C", "size": "M"}, {"id": 8331685, "iscrowd": 0, "isfake": 0, "area": 1319.8651688711316, "isreflected": 0, "bbox": [333.0378418176, 276.2176513536, 19.66040033280001, 67.13317870080004], "image_id": 1252367, "category_id": 126, "segmentation": {"size": [768, 1024], "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[OT?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0"}, "frequency": "F", "size": "M"}, {"id": 8331718, "iscrowd": 0, "isfake": 0, "area": 7810.716052963013, "isreflected": 0, "bbox": [874.5920410624, 396.60668943359997, 133.42102046720004, 58.541870131200085], "image_id": 1252367, "category_id": 19, "segmentation": {"size": [768, 1024], "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<"}, "frequency": "F", "size": "M"}, {"id": 8556674, "iscrowd": 0, "isfake": 0, "area": 16102.07363324599, "isreflected": 0, "bbox": [1624.1782226688, 345.7526856192, 122.70483394560029, 131.22607411199994], "image_id": 1261883, "category_id": 105, "segmentation": {"size": [1536, 2304], "counts": "flf[28b_1:G8H9G8K6J4M2M3N3L3N2M4M2M3N3M2N2N3N1N2O2M2O1N3M2O1N3N0O2O0O2N101N101N101N1O2O0O2M2O2N1O2N101N101N101O00001O001N1O100000000000O100000000O1000000O2O0000000O10001O0O1000001O0O2O001O1N2O001O1O0O2O1O1O0O2O1N1O2N2N2N2M4M2M3M3M3M3N2M3N2N2N2N3L3N2M3M3M3M3M3L4K6SOYaN9m^1CXaN8[_1L5K\\SSj0"}, "frequency": "F", "size": "M"}, {"id": 8556676, "iscrowd": 0, "isfake": 0, "area": 8877.468905432304, "isreflected": 0, "bbox": [1786.9125975552001, 0.06994621440000001, 115.1357423615998, 77.1043702272], "image_id": 1261883, "category_id": 83, "segmentation": {"size": [1536, 2304], "counts": "Q`kc27h_13N7I4L2N1O2N3M6J6J7I7I2N2N1O2N2N001O00001O0000001O00001O0000000000000000001O2N2N2N001O1O001O1O00000000O1000000O1000000O1000000O1000000O100O100O100O100O100O100O1O100O100O100O100O100O1O1O1O1O1O1O1O1O100O1N2H8G9K5L4K5K6NP`ob0"}, "frequency": "F", "size": "M"}, {"id": 8557176, "iscrowd": 0, "isfake": 0, "area": 4382.728302921929, "isreflected": 0, "bbox": [479.4177245952, 771.48437504, 50.91101076479998, 86.08605951999994], "image_id": 1261906, "category_id": 43, "segmentation": {"size": [1024, 768], "counts": "^hm>;eo02N2N1N3N2N2M3M2O2M3N2M3N2N1N3N2N1N3N1N3N2N1O2N3L8I2N1N3M3N2N4MQNmROP1Rm0nNXSOk0hl0RO\\SOl0el0hNhSOW1bm0N2N2O1N1O2N2N3M2N2M3N2N2M3N2M3M4L3M]U\\7"}, "frequency": "C", "size": "S"}, {"id": 8557195, "iscrowd": 0, "isfake": 0, "area": 21226.217223277847, "isreflected": 0, "bbox": [118.7488402944, 476.1030273024, 134.81121830400002, 157.45141606400006], "image_id": 1261906, "category_id": 278, "segmentation": {"size": [1024, 768], "counts": "]_^59fo07I7I7H8I7I7H8I7I7I7J6I7J6J6J6J6I7J6J6J5K4L4L3M1N2O1O1O0000000O0100O1O1O11O000O10O100O100O1O00100O1O2O0O1O2N101N2N1O3N2M4L4L4M3L3M4L4M3L4L4L3N3L4L4L4M3lg02O1N1000O010000O1N101N2O1O100O1O1O1O10000O10O100O10O10O14L2M101O000O101N101O001O4L1O100O0001O000010O00O2H7N4M101N10000O1O100011O0O10O000O10003M2N1N2O1N1O2O0NcUb7"}, "frequency": "F", "size": "S"}, {"id": 10666665, "iscrowd": 0, "isfake": 0, "area": 7470.461379172232, "isreflected": 0, "bbox": [943.0775146496, 170.2689819648, 80.96105953280005, 92.27227783680001], "image_id": 1350089, "category_id": 95, "segmentation": {"size": [768, 1024], "counts": "fnQf0?]g06J7I6L5L3LKaYOoN[f0R1gYOnNVf0T1jYOmNSf0U1mYOlNQf0U1oYOmNme0U1QZOnNne0R1QZOPOme0Q1SZOPOke0Q1UZOPOie0Q1WZOPOge0P1ZZOgNN0fe0Z1\\ZOfNO0ce0[1^ZOeNO0be0\\1_ZOdNO1`e0\\1bZOaNO3^e0]1mZOdNQe0]1nZOeNQe0[1nZOfNQe0[1nZOgNQe0Y1nZOhNQe0Y1nZOiNQe0W1nZOjNQe0W1nZOkNQe0T1oZOmNQe0S1nZOnNQe0S1nZOnNRe0R1lZOQORe0P1mZOQOSe0o0mZOQOSe0o0mZOQOSe0o0mZOQORe0P1oZO\\N=Jdd0j1oZOZNfe0f1:00O1L4MlZO^NPd0X1oZOiNd0?]d0g0g[O]OYd0c0d[O@\\d0`0d[O@\\d0`0d[O@\\d0?[[OlNFe0od0?X[OoNIb0od0?T[OSOM>od0?Q[OVO0;od0?Q[OVO0;od0?Q[OVO0;od0?Q[OVO1:nd0`0Q[OVO29md0a0Q[OVO38ld0b0Q[OVO47kd0c0Q[OUO67id0d0Q[OUO85gd0f0Q[OUO94fd0g0Q[OUO:3ed0h0Q[OUO;2dd0i0Q[OUO<1cd0j0Q[OUO0F4:kd0k0Q[OUO0G47ld0m0P[OUO0H45md0m0oZOVO0J41nd0n0nZOWO0K4Ond0o0nZOWO0L4LPe0P1lZOXO0M4JQe0P1kZOYO0N3ISe0o0jZOZO0O2HTe0o0jZOZO001FWe0P1fZO[O200EYe0a1gZOkNMF\\e0_1gZORO[e0m0eZOSO[e0m0eZOSO[e0n0cZORO^e0g110\\J"}, "frequency": "R", "size": "M"}, {"id": 10811497, "iscrowd": 0, "isfake": 0, "area": 7540.034209184809, "isreflected": 0, "bbox": [921.7823485952, 327.33459471360004, 79.00170905599998, 95.44140625919994], "image_id": 1356234, "category_id": 252, "segmentation": {"size": [768, 1024], "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0"}, "frequency": "R", "size": "M"}, {"id": 11012500, "iscrowd": 0, "isfake": 0, "area": 52070.880339493815, "isreflected": 0, "bbox": [240.38415529800002, 0.0423583744, 295.409179622, 176.2669677568], "image_id": 1364554, "category_id": 172, "segmentation": {"size": [1024, 820], "counts": "PPa7;eo0;E2N1O1O1O2N1O1O1O1O1O1O2N1O1`QONPm0k1F7I7I3M3M3M3M3M2N1O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O001O001O00001O000000001O0000001O000000001O0000001O00000oTOUKmj0Q5N2N1OO100O1N2O1N20000000TUO[K]j0T501ON2N2000000O12N2N3M2N2N001OO100N2N2N2N2N2N2N2N2N21O00001O001OO100O100O1O100O1O100O1O100O100O1O100O1O100O1O100O100O10SUO`KZj0`4eUObKZj0]4gUOcKYj0]4gUOcKYj0\\4gUOeKYj0[4gUOeKYj0Z4gUOgKYj0Y4gUOfKZj0Z4eUOgK[j0X4eUOiK[j0W4eUOiK[j0V4eUOjK\\j0V4dUOjK\\j0U4dUOlK\\j0T4dUOlK\\j0T4cUOlK^j0S4bUOnK^j0R4bUOnK^j0Q4bUOPL^j0P4bUOoK_j0P4aUOQL_j0o3aUOQL_j0o3`UORL`j0m3aUORL`j0n3_UOSLaj0l3`UOTL`j0l3_UOULaj0j3`UOULaj0k3_UOULaj0k3^UOVLbj0i3_UOWL5Jbi0o3YVOVLO3gi0f3ZVOXLG:oi0^3ZVOXLEN3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5"}, "frequency": "C", "size": "M"}, {"id": 11021563, "iscrowd": 0, "isfake": 0, "area": 9054.984388280203, "isreflected": 0, "bbox": [628.0666504192, 354.01574707199995, 137.95556638720007, 65.63696286720005], "image_id": 1364931, "category_id": 269, "segmentation": {"size": [768, 1024], "counts": "bdf>5ig03N101N2O1O001O1O001O1O1O001O1O0000O1N2O100O1000000O1000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1nN1SZOOje0;oYOEPf0V1N2N2N200O10000000000000000000001O001O000000000000000001O000000000000000000000000000O1O0F;O101N2No\\P6"}, "frequency": "C", "size": "M"}, {"id": 11775390, "iscrowd": 0, "isfake": 0, "area": 5139.189770647712, "isreflected": 0, "bbox": [530.4217529088, 829.0786132992, 82.75280762879993, 62.102905241599956], "image_id": 1396529, "category_id": 52, "segmentation": {"size": [1024, 768], "counts": "Pkd`09fo02O1M4M2O1O1N2N2N2O1N2N2N2N101N100O2O0O100O2O1N1O010N2N2NObQOcN^n0]13011OO1O2N1O0O2O001L310O1O1N1O2O0O2M20001O0O1O1O1O1O0011O0O100ZO[OPROf0Pn0[OnQOf0Qn0[OmQOf0Tn0[OjQOf0Vn0ZOiQOf0Xn0VO]QO2:h0\\n0XOcQOi0^n0<001N6K1N6J2N2N2O2N1N2O1O5J2MXUg4"}, "frequency": "C", "size": "S"}, {"id": 11950619, "iscrowd": 0, "isfake": 0, "area": 44743.44957824625, "isreflected": 0, "bbox": [1258.818359296, 548.8509521664, 300.30981447680006, 148.99096673279996], "image_id": 1403825, "category_id": 355, "segmentation": {"size": [1152, 2048], "counts": "fX[\\1b0]S12M3L3O2N101N101N101N101N101N101N101N101N101N1O2O0O2O0O2O1N101N100O01O001O001O00001O010O010O010O0010O010O0mN^NjoNb1TP1aNjoN_1UP1dNjoN[1UP1hNioNY1TP1kNjoNU1UP1mNkoNS1SP1POkoNP1SP1TOloNl0SP1VOkoNj0UP1XOjoNh0VP1YOhoNg0XP1[OgoNd0YP1^OeoNc0[P1^O`oNe0`P1\\O_oNe0`P1]O]oNd0dP1]OYoNe0fP1Z10010O001O010O00010O010O0100O1O010O100O10O0100O1O010O100O010O100000O1000000O0100000O10000000O0100000000O100000001O0O2O00001O001N110O00001O001O001O01O01O001O001O001O0000O1O1O1O10O0100O100O10000O010O100O100O10O010O10O10O1000O010000O02O0000000O1000000O101O00000O100000000O10001O0O100000000O1000001O0O1O100O1O100O1O101mNSoNPOnP1o0boNaN_P1]1S1O1N2N2O1N2O2M2O1N200000000O10001O000O101O00001O0O101O00001N10001O000O2O00001O0O10001N10001N10001N10001N10001N10001N10001N10001O0O2NXfVa0"}, "frequency": "R", "size": "L"}, {"id": 12178946, "iscrowd": 0, "isfake": 0, "area": 6946.930740642342, "isreflected": 0, "bbox": [201.6361083904, 3.2943725568, 61.584472678400004, 112.80328366079999], "image_id": 1413369, "category_id": 9, "segmentation": {"size": [768, 1024], "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0"}, "frequency": "F", "size": "M"}, {"id": 12348078, "iscrowd": 0, "isfake": 0, "area": 106188.96409952972, "isreflected": 0, "bbox": [244.7874755584, 148.1703491081, 286.9733886976, 370.0307006913], "image_id": 1420513, "category_id": 1, "segmentation": {"size": [803, 1024], "counts": "kkY63b12ee03UZO6de0LXZOde0YO]ZO:NM4ge0^O\\ZO?M3ge0BWZO=11ge0j0YZOUOhe0k0XZOUOhe0j0YZOUOge0l0YZOTOge0k0ZZOTOge0k0ZZOUO6ROod0i1lZOTO3WOnd0f1oZOSO2ZOmd0b1S[ORO0^Old0`1U[OQONAld0]1W[OROMBkd0[1Y[OSOKDjd0Y1][OQOJHgd0W1_[OQOIJgd0U1a[OPOHLfd0T1c[OnNHOdd0S1e[OnNE2cd0Q1j[OkNC5bd0P1n[OhN@a0e0QObb0e1\\]OfN]Of0c0QOcb0c1_]OdNZOj0b0QOcb0b1c]O`NYOn0?ROdb0`1f]O]NWOR1?ROcb0_1j]OXNVOX1;TO^a0Do^Oh1?@TOVOYa0GR_Oc1b0_OSOXOUa0JT_O`1d0]OUOYOPa0MV_O]1f0\\OTO[Ol`00Y_OY1h0ZOUO\\Oi`03Y_OW1j0XOVO^Oe`05Z_OT1m0WOVO_Oa`09Z_OR1Q1SOVOA^`0nc0CS]OOoN>mc0DU]ONnN[OR2ec0_MR]O2E_2Xc0`Mh]O`2Xb0_Mi]Oa2Vb0`Mk]O_2Ub0`Ml]O`2Sb0aMm]O_2Sb0`Mn]O`2Rb0`Mn]O`2Sb0^Mn]Ob2hc000000O100O100O100O100O1000000O1000000O100000000O1000000O1000000O100001O1O1O1O1O1O1O1O1bMgZOS2Ze0kMgZOU2Ze0gMjZOX2ae0N1O1O2N1O1O2N1O2N1O1O2N1O:F>B3M0000O10O10000O100O00100O1O100O1O1O101N2N2O1N2N2Ml`\\a0"}, "frequency": "F", "size": "M"}, {"id": 14490578, "iscrowd": 0, "isfake": 0, "area": 2250.40902209553, "isreflected": 0, "bbox": [0.0541991936, 575.0759277312, 79.296264704, 28.379760768000097], "image_id": 1511060, "category_id": 321, "segmentation": {"size": [768, 1024], "counts": "Rb0e0Zg01O100000001O00000000000000000000001O00000000000000000000001O000000000000000000001O0001O00001O00001O000010O0001O00001O00000O2O0O101N100O2O0O101N101N101N2O1NcUTf0"}, "frequency": "R", "size": "S"}, {"id": 14640483, "iscrowd": 0, "isfake": 0, "area": 10497.950872153235, "isreflected": 0, "bbox": [23.1105956864, 512.5881347328, 201.2670898176, 52.15930176000006], "image_id": 1517456, "category_id": 167, "segmentation": {"size": [768, 1024], "counts": "e`?3lg06J7J5J5L2N1\\YOVOPf0k0oYOUOPf0l0b01O1O0O2O001O1O0O2XYOjNdf0X11000O100000000O1000000ON]YOhNdf0X12000O01O100O1N1O2O1O1O010000000O0100000O01000000O100000000O1000001N10000000O0L5M3O10O103M00000O10001O0O10O1O10O101O00000O10O1000O1000O10O100001O00000O2O000000001O000O10001N100O1O1O2N1O10000O2O000O1J6N3M200000001O01O001O011N003M1O10O000000001N10000O101O001N3N1MXXOLjg02dWfc0"}, "frequency": "C", "size": "M"}, {"id": 14832137, "iscrowd": 0, "isfake": 0, "area": 33114.913448420724, "isreflected": 0, "bbox": [902.3974608896, 5.4824218624, 121.71508797440003, 272.06909184], "image_id": 1525619, "category_id": 49, "segmentation": {"size": [1024, 1024], "counts": "ZQ\\l0g0P5[OVe0h0_ZOC^e0?YZOJfe07PZO3ne0OiYO:Wf0F_YOe0_f0]OWYOn0gf0ROoXOY1Pg0gNgXOc1dg0RNQXOY2ng0gMiWOc2Xh0[M^WOQ3ah0nLVWO\\3lh0aLjVOi3Ui0XLbVOQ4Wi0WL^VOS4Zi0VL\\VOR4ci0V1O2O1N20O01000gIoVOb5hi001O2M10O1001O000O2O000O1O0010O01O1O10O01O1O00100O001O10O01O1O010O1O1O010O1O00100O001O10O01O1O1N101O001O0O2N101O1O0010O10M2N3M2N3M2M4M2N3M2N3N002K5K5L4K4L5K4L5K3M4M3L4L4L4L4L4L4M3L5KoK"}, "frequency": "F", "size": "M"}, {"id": 15050320, "iscrowd": 0, "isfake": 0, "area": 15047.028624075696, "isreflected": 0, "bbox": [110.09436038, 0.24633790199999997, 125.22216793819997, 120.1626586716], "image_id": 1534987, "category_id": 36, "segmentation": {"size": [786, 1178], "counts": "Z`d27Yh04L4L4L4L4L5K4M2N2N2N2N2N2N1O2N2N2N2N1O2O1N2N101N101N100O2N100O100O2O0fNkMV\\OV2ic0mMT\\OT2lc0oMQ\\OR2nc0QNn[OP2Rd0SNk[Om1Ud0UNh[Ol1Xd0WNd[Oj1\\d0YNa[Oh1^d0ZN_[Og1ad0\\N\\[Od1dd0_NX[Ob1hd0U10O3M3M3M2O2M3M3M2N3L4M2N000000000O10000000000O10000000000O2O0000000O10000O101N100O100O10000O2O0O100O100O100O2O000O2O0O1O2N1O2N2N1O2N1N3L3N4J5J6J7I6J7I7J`adf0"}, "frequency": "C", "size": "M"}, {"id": 16010041, "iscrowd": 0, "isfake": 0, "area": 14919.115067487317, "isreflected": 0, "bbox": [1.0953368831999999, 775.3538818048, 133.8927002112, 111.4259033088], "image_id": 1575962, "category_id": 163, "segmentation": {"size": [1024, 768], "counts": "Xh0>bo010000000O0100O10000O1O101N100O2O0O1010O1O1N2N2LoQP2E^noM1O100N101O1N2O1O1O10O10O100000000O0100000O100O1O001N2O1O0001O1N1L500O1000O010000O10O01O1MdThc0"}, "frequency": "R", "size": "M"}, {"id": 16951734, "iscrowd": 0, "isfake": 0, "area": 4342.796314784524, "isreflected": 0, "bbox": [204.2850341888, 342.4410400512, 41.53820794879999, 104.54943843840005], "image_id": 1616122, "category_id": 182, "segmentation": {"size": [768, 1024], "counts": "hRj46jg04L3M2N3N2M3M5K5K5L5J5K4L5L4K5K3M0001LS[ORNnc0l1T\\OTNlc0k1T\\OVNlc0h1V\\OXNjc0g1V\\O[Njc0b1X\\O^Nhc0a1X\\O`Nhc0^1Z\\ObNfc0]1Z\\OdNgc0Y1[\\OgNec0X1[\\OjNdc0T1^\\OlNbc0S1^\\OnNdc0n0^\\OROcc0k0^\\OVOcc0h0^\\OXOdc0d0]\\O\\Oec0b0\\\\O]Ofc0?\\\\OAgc07_\\OIbc01b\\OO`c0Jf\\O3ee0LbZVb0"}, "frequency": "C", "size": "S"}, {"id": 16957916, "iscrowd": 0, "isfake": 0, "area": 374381.0622064512, "isreflected": 0, "bbox": [114.3419189248, 386.9085692928, 621.3806152704, 602.4987793408], "image_id": 1616394, "category_id": 261, "segmentation": {"size": [1024, 1024], "counts": "^bh31io09G7J7PRO]OZl0i0cSO^OSl0i0jSO]Olk0j0PTO]Ofk0i0XTO\\O_k0k0^TO[OXk0l0eTOZOQk0l0lTO[Oij0m0TUOYObj0n0[UOXO[j0n0cUOXOSj0o0iUOXOmi0o0PVOVOgi0P1WVOTOai0S1\\VOoN^i0m3J6I7J6J6J5N3M3N2O1N2_N[HeZOf7Pe0eHoZO\\7md0hHR[OY7ld0lHP[OU7od0lHoZOV7od0lHP[Om2^OOae0UMP[O^21:nd0YMP[O\\277id0]MP[O[2<5cd0aMP[OZ2a0YNcN4ke0ZOoZOY2i0QNcN:ee0\\OnZOX2o0jMeN`0^e0^OmZOW2U1cMgNf0We0_OmZOW2X1^MkNj0Ze0WObZO`2[1ZMPOm0Re0ZObZO^2^1VMUOP1jd0]ObZO\\2b1QMYOT1bd0_ObZO\\2^3SNoa0BbZOZ2c3RNja0EbZOX2h3QNea0HbZOV2m3oMaa0LaZOT2Q4oM]a0MbZOS2T4oMYa0ObZOQ2W4PNWa0O`ZOQ2\\4oMSa01`ZOo1`4oMo`03`ZOm1d4nMl`05`ZOl1g4mMi`08_ZOj1k4lMf`0;^ZOi1m4kMe`0=\\ZOh1R5iMb`0?\\ZOg1U5hM_`0b0[ZOe1Y5fM]`0f0YZOc1]5dM[`0j0WZOa1`5cMZ`0n0TZOi06ZNa5KV`0U1PZOc0gl0Y12O1O1O1O1O100O1OU]OVMb8i2^GZM`8e2`G\\M`8d2`G]M_8b2aG`M\\8a2eGaMW8`2jGaMS8`2oGaMn7`2RHaMk7`2WH`Mf7a2[HaMa7`2aH`M]7`2eHaMW7e0a^O:Y:ROS7d0k^O5S:WOo6d0T_O0n9^Ok6b0\\_OKk9Cf6b0g_ODe9Ka6`0P@@`91_6=X@]OZ97\\6;`@XOU9>Z69f@SOS9d0V68mKIQ46PLKo35RLLm32TLOl3OVL1k3LWL4i3JXL6j3GXL9h3FXL:j3CXL`0e3^O]Le0^3\\ObLi0X3WOiLm0R3TOnLQ1k2POVMR1g2nNZMS1d2nN\\MS1b2mN_MT1_2lNbMU1\\2lNdMU1Y2lNhMU1V2kNkMV1S2jNnMW1P2jNPNW1n1iNSNX1j1iNVNY1h1hNXNY1f1gN[NZ1c1fN^N[1`1fN`N[1^1eNcN\\1\\1cNeN^1Y1bNiM^OkCQ2[>_NgMGlCk1\\>[NfM1lCf1]>UNeMQNdMe0mC[1_>lMbMP1mCU1b>fM_M\\1mCo0Qk0YOlTOi0nj0^OPUOc0jj0DTUO>hj0GUUO:ij0`2N2O00000001N2N2M3K5K5K5K5dM`TO2ek0EcTO7bk0@gTO;^k0BfTO0hk0M\\TOL2WO\\j0k0gUOGO@Zj0g0kUODMFXj0d0oUOAKLVj0a0TVO]OG4Uj0>WVOYOF9Tj0<[VOTODa0Qj09iWOHWh06jWOJWh04iWONXh0OiWO2Wh0LjWO4Wh0JjWO7Vh0HiWO:Wh0DjWOVOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6"}, "frequency": "R", "size": "M"}, {"id": 17072764, "iscrowd": 0, "isfake": 0, "area": 8204.94289259638, "isreflected": 0, "bbox": [19.6289062912, 403.4589843456, 120.91662592, 67.85620116479993], "image_id": 1621320, "category_id": 199, "segmentation": {"size": [768, 1024], "counts": "nm=5jg02N2O00000001N10000001O00000001O00000000O1L4`XOISg0e0L3O2O0O2N1O1O1O2N1O1O101N1O1O1O1OD`YO[O^f0d0dYO\\O[f0e0fYOZOZf0f0fYOZOYf0h0gYOXOXf0h0hYOXOWf0j0iYOUOVf0l0jYOTOVf0l0kYOSOTf0o0lYOQORf0P1nYOPORf0Q1nYOnNQf0S1oYOmNQf0T1oYOlNoe0U1QZOkNne0V1SZOiNme0X1RZOhNme0Y1TZOfNle0[1SZOeNle0\\1UZObNke0`1:0O2O0O101N101O001N10000O10O1N2O001O1O1O1O1O1O2N1O1O1O102M10001O0O2O001N101O001N101O0O2O001O0O2O1O1N2O001N2O1O2M101O1N4M3M4K`jed0"}, "frequency": "R", "size": "M"}, {"id": 17265253, "iscrowd": 0, "isfake": 0, "area": 74550.77629977709, "isreflected": 0, "bbox": [0.81213384, 1045.1564940800001, 157.63964843999997, 472.9189454399998], "image_id": 1629547, "category_id": 273, "segmentation": {"size": [1600, 1200], "counts": "mP1`8P1jHAUOdU1S8ijNjIUU1X6jjNiITU1X6ljNiISU1X6ljNjIRU1V6njNkIQU1T6PkNmIoT1S6QkNnInT1R6RkNoImT1P6TkNPJmT1o5SkNPJoT1o5QkNQJoT1o5QkNQJoT1o5RkNPJnT1P6SkNoImT1Q6UkNlIlT1T6_kNaIaT1_6^kNjGoNQ1cU1U7^kNjGB8VU1n7XkNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjG[V1U8eiNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8diNlG\\V1T8diNlG\\V1T8diNlG\\V1T8ciNmG]V1S8ciNmG]V1S8ciNmG]V1S8biNnG^V1R8biNnG^V1R8biNoG]V1Q8biNQH]V1o7ciNQH]V1o7ciNRH\\V1n7ciNSH]V1m7ciNTH\\V1l7ahNdGi0b0fV1h7_hNmGg0;jV1h7]hNSHe06mV1]8nhNdGRW1`8jhNaGUW1c8ghN]GYW1g8bhNZG^W1d900000000000a0_O6J6J6J6J6J6J6J6J6J6J6J3M3M4L4L9G5L2M1O1O1O2N2N001O1O001O001O1O001O0000oE^lNd5bS1ZJalNe5_S1ZJdlNd5\\S1ZJglNd5ZS1ZJjlNd5VS1ZJmlNe5SS1YJQmNe5oR1YJTmNe5mR1YJVmNf5jR1XJZmNf5fR1XJ^mNf5bR1XJemNb5\\R1]JjmN^5WR1_JomN]5QR1aJVnNY5kQ1eJ[nNW5fQ1fJanNU5`Q1hJgnNR5[Q1kJknNo4\\Q1jJknNn4YQ1oJmnNj4WQ1VKmnNc4VQ1\\KonN\\4UQ1cKonNV4UQ1jKonNn3UQ1QLonNi3TQ1WLPoNa3TQ1^LQoNZ3SQ1fLQoNR3SQ1mLToNj2nP1WMYoN^2kP1aM\\oNU2hP1\\L]jN7R5R3eP1eL[jN8^5[2bP1[MRjN:^6:_P1ZOUiN;V_1Dk`NS_1@o`N?R_1_OPaN`0R_1]OPaNa0R_1^Oo`N`0T_1]On`Na0T_1]On`Na0U_1\\Oo`N`0S_1^OQaN>R_1_ORaN=Q_1_OTaN=[`1K4M2N3KWblb1"}, "frequency": "R", "size": "L"}, {"id": 17265254, "iscrowd": 0, "isfake": 0, "area": 322245.85716860637, "isreflected": 0, "bbox": [531.53234868, 1063.54760736, 614.7945555599999, 524.15209968], "image_id": 1629547, "category_id": 273, "segmentation": {"size": [1600, 1200], "counts": "_Sji02la14L3N2N2N2N2N2N2N2N2N2N2N2N2N2N2N2DnNg_NT1V`1=N1O1O1O1O1O1O100O1O1O1O1O010O1O1O1O1O100O1O001O3M2O2M2N3M2N3M2WdNZMVX1h2fgN\\MWX1f2ggN[MXX1h2egNZMYX1h2fgNXMEk1eS1P1dlNUMBd2US19XmNRM_OT3oR1L`mNPM\\O\\3PS1GamNmL[Oc3RS1AamNmLXOj3SS1\\OcmNiLVOS4TS1UOdmNhLVOX4TS1ROdmNfLVO]4SS1nNemNeLWOb4QS1kNfmNcLVOg4RS1gNfmNbLWOl4PS1dNfmNaLXOP5oR1`NhmN_LWOW5nR1\\NhmN^LXOZ5nR1YNimN\\LXO`5lR1VNimNZLYOf5kR1QNkmNYLXOj5kR1oMjmNWLZOo5iR1kMkmNWLYOT6iR1fMlmNVLZOX6hR1cMmmNULXO^6hR1]MomNULXOc6fR1YMPnNTLYOg6eR1UMQnNTLYOj6eR1SMQnNSLXOm6fR1PMQnNSLXOQ7eR1mLQnNRLZOS7dR1kLQnNRL[OV7bR1iLRnNQL[OX7bR1gLRnNQL\\OZ7aR1hLomNnK@]7_R1hLmmNkKD_7^R1gLmmNjKDb7]R1eLmmNiKFd7\\R1dLlmNhKHf7[R1bLlmNhKIi7YR1`LmmNgKIk7YR1^LmmNgKIn7YR1\\LlmNfKKQ8WR1YLmmNgKKR8WR1WLnmNfKKV8UR1TLomNfKLX8TR1QLPnNgKMY8RR1PLPnNhKM[8QR1mKRnNgKN]8oQ1lKRnNgKO`8mQ1iKTnNfK0b8kQ1hKTnNgK0c8kQ1fKUnNfK1f8hQ1dKVnNfK2h8gQ1[IbmNId0SN4j8eQ1ZIdmNHc0TN3m8dQ1WIhmNG?VN5m8cQ1VIkmNE=WN5Q9aQ1SIomND9YN7Q9`Q1RIQnNC8ZN6S9`Q1PITnNB5ZN8U9^Q1oHWnNA1\\N9U9_Q1nHXnNAO[N;V9^Q1nHZnN@L\\N;X9^Q1lH]nN_OH^N=W9^Q1lH^nN^OG^N=Y9^Q1kH`nN]OC`N>Y9_Q1jHbnN\\O@`N`0[9]Q1iHdnN\\O^O`N`0\\9^Q1hHfnN[OZObNb0\\9]Q1gHinNZOWObNc0^9]Q1fHknNYOTOcNc0_9^Q1eHlnNDBl7aQ1`HonNA\\OT8eQ1[HQoN_OVOZ8iQ1WHRoN]OnNe8oQ1nGUoNm9kP1SFWoNk9iP1UFXoNk9gP1UF[oNi9eP1WF]oNg9cP1XF_oNh9`P1XFboNf9^P1YFeoNe9\\P1ZFaoNj9^P1YDVnNX1]1`:]P1WD_nNGER1`1P;]P1VDgnNh0l0S;]P1SDinNi0j0T;^P1PDknNk0g0U;bQ1jD^nNW;cQ1gD]nNY;dQ1fD\\nNZ;eQ1eD[nN\\;eQ1cD[nN];gQ1aDYnN`;gQ1_DYnNa;hQ1^DYnNa;hQ1^DXnNc;iQ1[DWnNe;jQ1ZDWnNe;jQ1ZDVnNf;kQ1YDUnNg;mQ1VDTnNj;mQ1UDTnNj;mQ1TDTnNl;mQ1RDTnNn;nQ1oCTnNPZ1L]Nl9fQ1kE]nNb0`1G^Nj9gQ1mEXnNd0d1D]Nk9gQ1oEVnNb0g1C\\Nk9hQ1RFRnN`0k1C\\Ni9hQ1UFomN`0n1@\\Nk9gQ1WFlmN?Q2_O^Nh9fQ1cGmoNdN^Ng9fQ1eGmoNbN_Ng9eQ1gGUQOW8ln0iGTQOU8mn0kGUQOR8ln0nGUQOQ8kn0oGWQOn7jn0RHWQOl7jn0THXQOj7hn0VHYQOh7hn0WH[QOf7fn0YH\\QOe7en0ZH^QOb7dn0\\H_QOb7bn0UHiQOh7Xn0XHiQOf7Xn0YHkQOd7Vn0\\HkQOa7Wn0^HkQO`7Vn0`HlQO]7Un0bHmQO\\7Tn0cHoQOZ7Rn0fHoQOW7Sn0hHPROU7Rn0jHoQOT7Rn0kHPROS7Qn0mHPROQ7Rn0mHQROo6Rn0PIoQOn6Sn0PIoQOm6Un0PImQOm6fn0`H^QO\\7eT1M4L4L4L3M4L4L4L3M4L4L4L4L3M4L4L4@?^Oc0UNRcNQN_]1l1WcN_Mk\\1_2UcNaMm\\1]2ScNcMo\\1[2QcNeMR]1X2nbNhMT]1U2lbNkMW]1S2ibNmMZ]1P2fbNPN\\]1n1dbNRN^]1W33M2O2bL^aNY3d^11O001O000bHdLXPO\\3fo0gLYPOY3eo0jLZPOV3do0lL\\POU3bo0lL^POU3_o0nL`POR3^o0PMbPOQ3[o0QMePOo2Zo0RMfPOn2Xo0UMgPOk2Xo0VMhPOj2Xo0VMhPOY1cIeNgU12ePOY1dIeNgU12ePOZ1cIcNiU13dPOZ1cIaNkU16]PO^1gIXNPV1:XPO_1hIWNQV19WPO`1hIWNQV18XPOb1fIVNRV18XPOb1fIVNRV17YPOc1eIVNRV16ZPOe1cIUNTV14ZPOg1bIUNTV12\\POj1_ITNVV1O]POm1]ITNVV1N_POm1[IUNWV1K`POQ2XISNYV1KaPOQ2VITNQ^1k1naNVNS^1j1laNVNU^1i1kaNWNV^1i1iaNWNW^1l1faNTN[^1m1caNSN]^1P2`aNPN_^1S2_aNmMa^1V2\\aNjMc^1Y2[aNgMe^1[2YaNeMg^1j2000002N4L5K5K3M2N1O2N1O2iJTLVlNm3gS1VLXlNl3fS1ULXlNm3fS1ULYlNm3eS1TLZlNm3eS1TLZlNn3cS1TL\\lNm3cS1TL\\lNn3bS1SL]lNn3bS1SL]lNo3bS1QL]lNP4cS1PL\\lNR4dS1mK[lNT4iS1hKVlNZ4iS1fKVlN[4iS1eKWlN\\4hS1eKWlN\\4hS1dKXlN]4gS1cKYlN^4fS1bKZlN_4dS1bK\\lN_4cS1aK]lN`4bS1`K]lNb4bS1_K]lNb4bS1^K^lNb4bS1UKRhN0\\4l4aS1TKShN0\\4m4aS1RKShN1\\4n4`S1QKThN1\\4o4_S1ZK`lNg4`S1XK`lNi4_S1WKalNj4^S1VKblNj4_S1UKalNl4^S1TKblNm4]S1SKclNn4]S1RKblNo4]S1QKclNP5\\S1PKdlNQ5\\S1nJdlNS5[S1mJelNS5\\S1lJdlNU5[S1kJelNV5[S1jJclNX5\\S1hJdlNY5\\S1fJdlN[5[S1eJelN[5\\S1dJdlN]5[S1cJelN]5\\S1bJdlN_5[S1bJdlN^5]S1aJclN_5]S1aJclN_5^S1`JblNa5]S1_JclNa5^S1^JehNN\\3d5PT1\\JchN3\\3b5QT1YJdhN6Z3a5`T1aJ_kN_5aT1cJ]kN^5bT1cJ]kN]5cT1eJ[kN[5eT1gJYkNY5gT1iJWkNX5hT1iJVkNX5kT1iJSkNW5mT1kJQkNV5nT1lJPkNT5PU1mJojNS5RU1nJljNR5TU1PKjjNQ5UU1QKijNo4XU1RKfjNn4ZU1TKdjNm4[U1UKcjNk4^U1WK_jNi4aU1YK]jNh4bU1ZK\\jNf4eU1[KYjNe4gU1^KVjNb4jU1`KTjNa4lU1`KRjN`4nU1cKoiN]4QV1eKmiN\\4SV1eKkiN[4UV1gKhiNZ4XV1iKeiNW4\\V1jKbiNV4_V1kK_iNU4aV1mK]iNS4dV1oKYiNQ4hV1PLViNP4jV1RLTiNn3mV1VLnhNi3TW1[LghNe3ZW1^LbhNb3^W1_LahNa3`W1_L_hNa3bW1`L\\hN`3dW1aL[hN^3gW1bLXhN^3iW1bLVhN^3kW1bLThN^3RX1]LmgNc3TX1]LkgNc3VX1]LhgNc3YX1^LfgNb3[X1^LdgNb3\\X1_LcgNa3]X1`LbgN`3^X1aLagN_3^X1cLagN\\3`X1eL_gN[3aX1gL]gNY3dX1gL[gNY3fX1gLYgNY3hX1gLWgNY3iX1hLVgNW3lX1iLSgNW3nX1iLQgNW3PY1iLofNV3SY1jLlfNU3UY1k21N2O1N2N101N2N2O1N3N1N3M2O2M2N3M2N2N3M2N3M2N3M2M4M2N3M3TKodNd2T[1R24L3M3iLUeNTOoZ1h0SeNVOP[1h0PeNWOS[1g0mdNWOX[1e0jdNYOY[1e0gdNZO\\[1d0ddN[Ob[1`0_dN^Oh[1;YdNDo[15QdNIX\\10icNL]\\11ccNLc\\10^cNMh\\10YcNKn\\12RcNKT]11mbNKY]13hbNI^]14bbNId]14\\bNHk]14WbNHl]18TbNDP^1;RbNAR^1>oaN]OU^1b0maNZOV^1d0oaNVOT^1:d`NLo`13?000001O00001N10c_NGU_15k`N0S_1Nm`N4^`12N2O00000O20O000000010O000000mNDb`N=Z_1He`N7Z_1Ke`N5Z_1Me`N3[_1Nd`N3Z_1Ng`N1Y_10f`N0Z_10f`N0Z_10f`N0Z_10f`N1Y_10g`NOY_11g`NOZ_10f`N0Z_11e`NO[_11f`NOZ_10f`N0Z_11e`NO[_11e`NO[_11e`N0[_1Of`N0Z_11e`NO[_11e`NO[_11e`NO\\_11c`N0\\_10e`NO[_11e`NO\\_11c`NO]_11c`NO]_11d`NO\\_10d`N0\\_11c`NO]_11c`NO^_10b`N1]_1Od`N0]_1Nd`N2\\_1Nd`N2]_1Ld`N4]_1Jd`N7\\_1Fg`N9``10000010O00000010O000000010O0000010O00000O2O1N2N2NRi\\4"}, "frequency": "R", "size": "L"}, {"id": 17385866, "iscrowd": 0, "isfake": 0, "area": 2984.3045714135615, "isreflected": 1, "bbox": [474.7595214604, 887.4068603904, 56.859497059499915, 52.48559564800007], "image_id": 1634579, "category_id": 228, "segmentation": {"size": [1024, 853], "counts": "Vlg>3io07K3M2O2L5M2N3M2N2N2N2O1N2O000XQOlNdn0[1M1O1O1O001O0000001O00000000000000010O0010O0010O01O1O3M1O3M2N1O001O1O1mNWQOl0Qo0N2M2O0O2N2O0O3N3K7IWSo9"}, "frequency": "C", "size": "S"}, {"id": 17404769, "iscrowd": 0, "isfake": 0, "area": 51412.65195583029, "isreflected": 0, "bbox": [807.1148681216, 437.674804736, 216.19152834559998, 237.81066885119998], "image_id": 1635395, "category_id": 35, "segmentation": {"size": [1024, 1024], "counts": "Rcfi03ho07I7I6J6K4K6J5_Nb1L4M2M4L3M4M3L3M4L3N]OQTOZMkk0`3M2N3N2N1O2O0N2O2N1O001000O100N2fNQUObMPk0Q2nUOZMVj0X2Q2B>A?B>ATSk00knSOEgl0]1hROfNel0d2^Ob0\\ObLSTOP4gk08J6J6J5K5L4K6J4L5K5D;01O1O1O2O000O100001O01O01O1O2N0001O00000000001O0O100000001O00000000000O2O00000000001O00000O10001O00000000001O0O100000001O000000001N2O1O1O001O1O1O1O1O001O1O1O1O1O1O001O001O00001O0000001O00001O0000001O00001N1O1O2N1000001O002N10O010O101NZB"}, "frequency": "C", "size": "L"}, {"id": 18217373, "iscrowd": 0, "isfake": 0, "area": 2467.680306115097, "isreflected": 0, "bbox": [118.46252444160001, 352.5262450688, 95.02990717439997, 25.967407308799977], "image_id": 1670098, "category_id": 8, "segmentation": {"size": [1024, 768], "counts": "W[g33lo05K9G3M01O1O1O1O1O1O2N1O2N1O08I2N1N2O1N2O001O0000O2O000O1O1O1O2N1G9N2O1000000000000000000000000000000000001O000000000001O000000000000001O0001O1O1Ooo12ooM00000001N10001O00O10O1Oid_a0"}, "frequency": "F", "size": "S"}, {"id": 18301585, "iscrowd": 0, "isfake": 0, "area": 45069.57898518644, "isreflected": 0, "bbox": [-0.025390592, 446.821533184, 296.6237792256, 151.94189455359998], "image_id": 1673739, "category_id": 25, "segmentation": {"size": [1024, 1024], "counts": "a>m0Ro02N2N2O1O1N2O1O100O1O105KU1bQO`Me0>Qk0[2iTOkMWk0T2eTOPN^k0m1]TOXNbk0i1YTO\\Ngk0c1VTOaNik0k201000O01O0000000000000000YMYTOU1gk0kN\\TOR1ck0oN_TOo0ak0QOaTOm0_k0ROdTOl0\\k0TOfTOj0Zk0VOhTOh0Xk0XOkTOe0Uk0[OmTOc0Sk0]OnTOb0Rk0^OmTOb0Tk0]OmTOc0Sk0]OlTOc0Uk0]OkTOb0Uk0_OkTO`0Vk0@jTO>Xk0BhTO4bk0L]TOIok07QTO]O[l0b0fSOROfl0n0ZSOROfl0n0ZSOROfl0n0ZSOROfl0n0YSOROhl0n0XSOROhl0n0XSOROgl0n0ZSOROfl0n0ZSOROfl0n0ZSOROfl0n0YSOROhl0n0XSOROhl0n0XSOROhl0n0XSOROhl0m0YSOSOgl0m0YSOSOgl0m0XSOSOil0m0WSOSOhl0n0XSOROhl0n0XSOROhl0n0XSOROhl0m0YSOSOgl0m0XSOSOil0m0WSOSOil0m0WSOSOil0m0WSOSOil0m0WSOSOil0m0WSOSOil0l0WSOTOjl0l0VSOTOil0m0WSOSOil0m0WSOSOil0m0WSOSOil0m0WSOROjl0n0USOSOkl0l0VSOTOjl0l0VSOTOjl0l0VSOTOjl0l0VSOTOjl0l0USOTOll0l0TSOTOll0l0TSOTOkl0m0USOSOkl0l0VSOTOjl0l0VSOTOjl0l0USOTOll0l0TSOTOll0l0TSOTOll0l0TSOTOll0l0TSOTOll0k0USOUOkl0k0TSOUOml0k0SSOUOml0k0SSOUOll0l0TSOTOll0l0TSOTOll0l0TSOTOll0k0TSOUOml0k0SSOUOml0k0SSOUOml0k0SSOUOml0k0SSOUOml0k0SSOUOml0k0RSOUOol0j0RSOVOnl0j0RSOVOml0k0SSOUOml0k0SSOUOml0k0SSOUOml0k0RSOUOol0k0QSOUOol0j0RSOVOnl0j0RSOVOnl0j0RSOVOnl0j0QSOVOPm0j0PSOVOPm0j0PSOVOol0k0QSOUOol0j0RSOVOnl0j0RSOVOnl0j0QSOVOPm0j0PSOVOPm0j0PSOVOPm0j0PSOVOPm0j0PSOVOPm0j0PSOVOPm0i0PSOWOll0n0TSOROjl0P1VSOPOil0Q1WSOoNhl0R1XSOnNgl0S1YSOmNgl0S1XSOmNil0R1XSOnNgl0S1YSOmNgl0V200000O100000VSOoMek0Q2lSO`NRl0h2O1O1O0010O01O01O1O101N3M01O1O1O001N[MQTO[1ok0eNSTOY1mk0fNWTOV1jk0jNYTOS1gk0mN\\TOP1dk0PO_TOm0ak0SO^TOn0bk0RO^TOn0bk0RO^TOm0ck0SO]TOm0bk0TO^TOk0ck0UO]TOj0dk0UO\\TOk0ek0UO[TOi0gk0WOYTOd0lk0\\OTTO?Ql0AnSO:Xl0FgSO6^l0JaSO2dl0N[SONil03WSOKkl04VSOLjl04USOLll04TSOLll04TSOLll04TSOKml05SSOKml05SSOKml05SSOKll06TSOIml06TSOJll06TSOJll06TSOIml07SSOIml07SSOIml07SSOIml07SSOHnl08RSOHnl07RSOInl08RSODRm0;oROCSm01`RO_O>?Sm02aRO[O>b0Rm03WSOLjl04VSOLjl04VSOLjl03WSOLjl04VSOLjl04VSOLil05WSOKil05WSOKil05WSOKil05WSOKil05WSOKil05WSOKil05WSOKil04XSOLhl04XSOLhl04XSOLgl05XSOLhl04XSOLhl04XSOLhl04XSOLhl04XSOLhl04XSOLhl04XSOLhl04XSOLhl03YSOMgl03YSOMfl04ZSOKgl05YSOKgl05YSOKgl05YSOKgl05YSOKgl05YSOKgl05YSOKgl05YSOKgl04ZSOKgl05YSOKfl06YSOKgl05YSOKgl05YSOJ`l0>`SOB^l0`0bSO@]l0a0cSO_O\\l0b0dSO^O[l0c0eSO\\O\\l0c0eSO]O[l0c0eSO]OZl0d0fSO\\OZl0d0fSO[O[l0e0eSO[O[l0e0bSO^O^l0b0aSO_O_l0a0`SO@al0?^SOg0]k0XOdTOi0[k0WOdTOk0\\k0SOeTOn0[k0POfTOP1[k0nNfTOS1Zk0kNgTOU1Zk0jNfTOV1[k0iNeTOV1]k0iNcTOW1^k0hNbTOX1`k0fN`TOY1ak0gN_TOY1ak0fN`TOY1ak0gN_TOY1ak0gN`TOX1`k0hN`TOW1ak0iN`TOV1ak0iN`TOU1ak0kNaTOS1_k0lNfTOP1[k0oNjTOk0Xk0SOmTOh0Uk0VOQUOd0Rk0XOUUOa0Zm0XOn_jf0"}, "frequency": "C", "size": "L"}, {"id": 18680641, "iscrowd": 0, "isfake": 0, "area": 46186.96594370375, "isreflected": 0, "bbox": [563.8796386304, 711.1573486695, 378.39086919680005, 122.06152342350003], "image_id": 1689730, "category_id": 61, "segmentation": {"size": [855, 1024], "counts": "`dg>:\\j0a0@f1ZN?A01O00001O000000000000000000000000000000000000000000000000000000000000000O10000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000001O000O10000000000000000000000000000O100000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000O1O1O1O1O1O1O1O1O100O00100O1O1N2O1O1O1O100O1O1O100O1O1000000000000O1000000000000O1O11O2N1O1O1O1O1O1O2M2O1O2N1O2N1O2N2N1O2N2N2N2N2N2N1O2N2N2N2N2N1O2N2N2N2N2N1O4L4L4L5K4L4L3M1O1O1O4L1O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O1O2N1O1O1O2MReS2"}, "frequency": "F", "size": "L"}, {"id": 18845103, "iscrowd": 0, "isfake": 0, "area": 4333.823457908682, "isreflected": 0, "bbox": [660.964843776, 577.0498046976, 105.76599121920003, 40.975585894399956], "image_id": 1696718, "category_id": 94, "segmentation": {"size": [1024, 768], "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1"}, "frequency": "F", "size": "S"}, {"id": 19455186, "iscrowd": 0, "isfake": 0, "area": 16189.895776263267, "isreflected": 0, "bbox": [498.5224609792, 323.7412109568, 124.7268066304, 129.802856448], "image_id": 1722811, "category_id": 125, "segmentation": {"size": [768, 1024], "counts": "VRk;Z1ef0c1]N>C1O1O000000WN_[O3ad0bNd[Oj0L@0M0Mad0Lo[O2L0BG9:M0\\d0MP\\O2L0OOOMVd02P\\O2L00NOMUd03P\\O2L00M0OSd02Q\\O2L00M0a0ac0@c\\O2L00M0b0`c0_Od\\O2L00M0c0_c0^Oe\\O2L00M0c0_c0^Oe\\O2L00M0c0_c0^Oe\\O2L00M0c0_c0^Oe\\O2L00M0c0_c0^Oe\\O2L00M0b0`c0_Od\\O2L0ON1`0ac0@c\\O2L0ON1>cc0Ba\\O2L0ON1ke0EQZO?me0BoYOc0oe0]OPZOf0ne0ZORZOi0le0VOSZOl0le0TOTZOl0me0SOSZOm0me0SOSZOm0ne0SOQZOm0oe0SORZOl0ne0TORZOl0ne0TORZOl0ne0TOPZOn0oe0SOPZOn0Pf0a0O100O10000001O1O1O1O1UNmYOg1Vf001O00001N10001O001N101ROcYO7^f0GcYO9^f0EdYO9^f0EcYO8af0G_YO6df0I]YO3hf0H\\YO5Yg0O2N3N2MW\\Ra0"}, "frequency": "R", "size": "S"}, {"id": 20568676, "iscrowd": 0, "isfake": 0, "area": 639107.1059300419, "isreflected": 0, "bbox": [0.4588622848, 143.33825687040002, 1023.6732178432001, 624.3272704512001], "image_id": 1770249, "category_id": 96, "segmentation": {"size": [768, 1024], "counts": "h:k6`0]Il1T1X:a5jC\\In1S1V:e5jCXIP2S1U:h5hCUIT2S1S:k5fCSIW2R1R:n5eCoHZ2S1P:P6dCnH\\2R1n9^6RFbIm9^6TFbIk9_6UFaIj9`6VF`Ii9`6XF`If9b6ZF^Ie9b6\\F]Id9e6[F[Id9g6[FYIc9k6[FUId9m6[FSId9P7ZFPIe9R7ZFnHe9T7ZFlHd9W7[FiHd9Y7[FgHd9[7[FeHd9^7ZFbHe9`7ZF`Hd9c7[F]Hd9e7[F[Hd9g7[FYHd9i7[FWHc9m7[FSHd9o7[FQHd9Q8[FoGe9R8ZFnGe9T8ZFlGe9V8YFkGf9W8YFiGf9Z8WFfGi9\\8VFdGi9^8UFcGj9_8UFaGj9a8UF_Gj9c8TF^Gk9\\9\\EdFd:a9VE`Fi:g9QEYFn:];O1O1O1O1O1O1O100O1O1O1O1O1O1O001O1O100O1O1O1O1O1O1O1O1O1O2O0O1O1O1O2N1O1O1O1O2O0O2N3M2N3M2O1N3M2N3N1N3M2O2M2N2O2M2N3N1N3N1N3N1N101N2O1N2N2O1N2O1N2O1N101N2O1N2N2O1N2O1N2O0O2O1N2N2O1N2O1N2O1N100O100O1O100O100O100O100O100O1O100O100O100O100O1O100O10000O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O1000000O100000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O100000000O100000000O100000000O100000000O1000000O100000000O10000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O10000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O100000000000000O100000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000000000000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O0000000000000000001O0000000000001O0000000000001O000000000000001O0000000000001O0000000000001O0000000000001O000000000000001O0000000000001O0000000000001O00000000001O0000001O0000001O0000001O0000001O00001O0000001O0000001O0000001O0000001O00001O00O100O100O100O100O100O100O100O10000O100O100O100O100O100O100O100O10000000\\]O[Kma0e4R^O\\Kna0d4R^O\\Kna0d4R^O\\Kna0d4Q^O^Kna0b4R^O^Kna0b4R^O^Kna0b4Q^O`Kna0`4R^O`Kna0`4R^O`Kna0`4Q^ObKna0^4R^ObKna0^4R^ObKna0^4Q^OdKna0\\4R^OdKna0\\4R^OdKna0\\4Q^OfKna0Z4R^OfKna0Z4R^OfKna0Z4R^OgKma0Y4R^OhKna0X4R^OhKna0X4R^OhKna0X4R^OiKma0W4S^OiKma0W4S^OiKma0W4S^OjKla0V4T^OjKla0V4T^OjKla0V4T^OkKka0U4U^OkKka0U4U^OkKka0U4U^OlKja0T4W^OkKia0U4W^OkKia0U4W^OlKha0T4X^OlKha0T4X^OlKha0T4X^OmKga0S4Y^OmKga0S4Y^OmKga0S4Z^OlKfa0T4Z^OmKea0S4[^OmKea0S4[^OmKea0T4Z^OmKea0S4[^OmKea0S4[^OmKea0S4\\^OmKca0T4\\^OlKda0T4]^OkKca0U4]^OlKba0T4_^OkKaa0U4_^OkKaa0V4_^OjK`a0V4`^OjK`a0V4a^OjK^a0V4b^OjK^a0V4b^OkK]a0V4c^OiK]a0W4c^OiK]a0W4d^OiK[a0W4e^OiK[a0X4e^OhKZa0X4f^OhKZa0X4f^OiKYa0W4h^OhKXa0X4h^OhKXa0Y4h^OgKWa0Y4i^OgKWa0Y4i^OhKVa0X4k^OgKUa0Y4k^OhKTa0Y4l^OfKTa0Z4l^OfKTa0Z4l^OgKSa0Y4n^OfKRa0[4m^OfKRa0Z4n^OfKRa0Z4o^OeKQa0[4o^OfKPa0Z4Q_OeKo`0\\4P_OeKo`0[4Q_OeKo`0[4R_OeKm`0[4S_OeKm`0[4T_OdKl`0]4S_OdKl`0\\4T_OdKl`0\\4U_OdKj`0\\4V_OdKj`0]4U_OdKj`0\\4W_OcKi`0]4W_OcKi`0^4V_OcKi`0]4X_ObKh`0^4X_OcKg`0^4X_ObKh`0^4Y_OaKg`0_4Y_ObKf`0_4Y_OaKg`0_4Z_OaKe`0_4[_OaKe`0`4Z_OaKe`0_4\\_O`Kd`0`4\\_OaKc`0`4\\_O`Kd`0`4]_O`Kb`0`4^_O`Kb`0a4]_O`Kb`0`4^_O`Kb`0`4__O`K``0a4__O_Ka`0a4__O_Ka`0a4`_O_K_`0b4`_O^K``0b4`_O_K_`0a4b_O^K^`0c4a_O^K^`0b4b_O^K^`0b4c_O^K\\`0c4c_O]K]`0c4c_O^K\\`0b4e_O]K[`0d4d_O]K[`0c4e_O]K[`0c4f_O]KY`0d4f_O\\KZ`0d4f_O\\KZ`0d4g_O\\KX`0e4g_O[KY`0e4g_O\\KX`0d4i_O[KW`0f4h_O[KW`0e4j_OZKV`0f4j_O[KU`0f4j_OZKV`0f4k_OZKT`0f4l_OZKT`0g4k_OZKT`0f4m_OYKS`0h4l_OXKT`0h4l_OYKS`0g4n_OXKQ`0j4n_OWKP`0j4P@VKo?k4R@UKl?m4S@SKl?n4T@SKj?n4W@QKh?Q5W@PKg?Q5Y@oJf?R5[@nJc?T5\\@lJb?V5^@kJ`?V5a@iJ^?Y5a@hJ\\?Z5e@eJZ?\\5f@eJW?^5i@aJV?`5j@aJT?`5m@_JQ?d5n@]JP?d5QA\\Jm>e5SA[Jk>h5UAXJi>i5XAVJg>k5ZAUJc>n5^APJa>R6_AkIb>V6_AgIa>\\6_AcI`>_6eAZI[>g6]13N1N3N2N1N3N1O2M3N1O2L3M4L4K4M4L3O2N2O0O2N1O2_BXGh;h8VD[Gh;g8UD[Gj;h8SDZGk;h8RD[Gm;g8PDZGo;h8nC[GPc0ak2:G8I6J6I`N]UMd1Xj2=K5K5K5M2M4L3N2M4M2N2N2N2N2N2N2O1N2N101N2N2O0O2O0O2O1O0O2O001N101O001O0O101O00001O0000001O0001O000001O0001O00001O00001O001O1O000000000O1000000000000000000000000001O01O000000001O000001O000001O00001O1O1O1O01O0000000000000000001O000O1O1000001N11O01O00001O00000010O1O000001O0001O00000000O10000000000000O100000001N1000001N101O0O10000O2O0O101N100O2O0O2N1O2O1N2N1O2N3M2N2N2N1O1O1O2M2O1N3N2M4K9H3L5L4J6L2N2N2M3000O1001O000O100O2O0O2N101N1N3000O1O000000000000000O10000000001O0000000000000O100000000000000000000000000O100000000000000000000000000O100000000000000000001O000O10000000000000000000001O000O100000000000000000001O000O10000000000000000000001O000O100000000000000000001O00000O1000000000000000001O00000O1000000000000000001O0000000000K]UMdNcj2X1:M3N2N2XO_UMEYnQa9"}, "frequency": "R", "size": "L"}, {"id": 21529954, "iscrowd": 0, "isfake": 0, "area": 13390.340823838651, "isreflected": 0, "bbox": [950.7366943744, 370.0421142528, 73.23291013120001, 182.84594726400002], "image_id": 1811034, "category_id": 65, "segmentation": {"size": [768, 1024], "counts": "hPYf06gg04K6J5K5K5K4L5K5K4M4K4L5K4L4L4L4L3M4L4L3M4L3M4K4M4L3M4L3M4K4M4L3M4TOmKg]OV4Yb0mKa]OW4^b0lK\\]OW4db0c000O010O0100O001O1N1N3K5L4L4L4L4L4L4L4L4L4L4L5K4L4L5K4L4L5K4L5K4L4L5K4L5K]D"}, "frequency": "F", "size": "M"}, {"id": 22064315, "iscrowd": 0, "isfake": 0, "area": 6035.722632416379, "isreflected": 0, "bbox": [961.7115478016, 347.2755127296, 61.88989265919997, 97.52355955200005], "image_id": 1833546, "category_id": 319, "segmentation": {"size": [768, 1024], "counts": "RSaf02mg020N1000010O0000010O0000010O000001O010O1O0010O10O010O10O01O3OO01^OIXYO7ff0K[YO4df0M\\YO3df0L^YO4`f0M`YO5^f0JcYO7[f0JeYO8Yf0GhYO:Wf0FkYO=oe0DRZO?je0AXZOa0de0_O]ZOc0`e0]ObZOd0Ze0^OfZOb0Ye0^OiZOb0Ve0^OmZOOoN:Tf0GU[O7jd0JX[Oh0Sd0YOj[Oi0Vd0YOa[Om0ad0UOT[OR1nd0o02N2N2N?nMSZOS1]f00O1O100^C"}, "frequency": "R", "size": "M"}, {"id": 22107522, "iscrowd": 0, "isfake": 0, "area": 37994.41642281265, "isreflected": 0, "bbox": [1055.9257811184, 908.5612793232001, 181.72290062880006, 209.0788573775999], "image_id": 1835389, "category_id": 152, "segmentation": {"size": [2472, 2832], "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3"}, "frequency": "R", "size": "L"}, {"id": 22879790, "iscrowd": 0, "isfake": 0, "area": 15499.5392421355, "isreflected": 0, "bbox": [343.24243161780004, 770.1613769784, 125.9039306642, 123.10607905860002], "image_id": 1867731, "category_id": 262, "segmentation": {"size": [1023, 806], "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9"}, "frequency": "R", "size": "M"}, {"id": 24010373, "iscrowd": 0, "isfake": 0, "area": 35361.008300654656, "isreflected": 0, "bbox": [360.3198241792, 274.1910400512, 115.04895027200001, 307.3562011392], "image_id": 1915694, "category_id": 60, "segmentation": {"size": [768, 1024], "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9DVc0He\\OO22\\g01RYO5hf0MnXO?MM`f0EbYOR1OkNle0P2[ZOWNLMhd0l2O1G711O100OI_[OlL2O^d0W31kL`[OV3`d0jL`[OW3_d050000O2O00O01O001O01O01O1O1000O01O01000O1O1O100O1001O1O0O2O001O001N10000O1000O1000O1L31O01O010001O1O001O0000000L4O001O1O1O1001O0O1O10000O21OO1O000O100000N2O100000000O2O1O00001O00000001O001O100O01OO1001O5L0O00100O01O0O2OO2N101O1O2N1O00012N0000000O02N001O5K1O3M2N1N3M2M5F8N1XMW[OW2ld0gMW[OV2kd0gMW[OV2md0hMV[OS2^e0M7aNPZOa0Tf0]OoYO>Uf0@mYOYf0^OiYO?Xf0BkYO9Wf0GiYO8Xf0GiYO8Xf0GjYOM]O0Qg02[aU5"}, "frequency": "R", "size": "M"}, {"id": 361105, "iscrowd": 0, "isfake": 0, "area": 72589.70901199844, "isreflected": 0, "bbox": [159.12719723520001, 445.7435302912, 243.0524902656, 298.6585693184], "image_id": 915597, "category_id": 112, "segmentation": {"size": [1024, 768], "counts": "[bP55jo02N2O1N1O1O2N101N1O100O1O100O100O1O1YRODjk0Vk0AjTOb0Sk0_OlTOd0Qk0\\OoTOg0oj0YOPUOj0mj0VOSUOn0jj0ROUUOQ1hj0oNXUOU1dj0kN\\UOY1aj0fN_UO]1^j0cNbUOa1Zj0_NfUOd1Xj0[NhUOi1Tj0XNkUOl1Qj0TNoUOo1ni0QNRVOS2ji0mMVVOV2gi0lMWVOX2ei0iMYVO[2di0fMZVO_2bi0bM[VOd2ai0]M]VOg2`i0ZM]VOl2_i0VM^VOP3]i0QMaVOU3Zi0lLcVOX3[i0iLcVOZ3[i0gLcVO\\3[i0eLbVO^3]i0dL`VO^3`i0bL]VOa3bi0`L\\VOb3ci0_L[VOb3ei0\\101O000010ZJ]VOn4ci0RK_VOl4bi0SK`VOl4`i0SKbVOk4_i0TKdVOi4]i0VKeVOh4\\i0WKfVOh4Zi0WKiVOf4Xi0YKhVOg4Yi0XKhVOg4Yi0XKgVOh4[i0VKeVOk4\\i0SKdVOm4^i0QKbVOo4Wj0nK[UOj2ej0WMeUO^2\\j0bMhUOZ2Wj0VMVUONc0j2Wj0XMWUONc0h2Vj0ZMXUONc0f2Uj0\\MYUOOb0c2Uj0^MZUOOc0a2Sj0_M[UO0c0_2Rj0aM\\UO0c0]2Qj0cM]UO1b0Z2Qj0eM^UO1c0W2oi0hM_UO1d0U2li0jMaUO2d0Q2ki0mMcUO1d0o1ji0oMcUO2e0l1hi0RNdUO3e0h1gi0UNeUO3f0e1ei0XNfUOG@3W1i1ei0]NeUOG_O4V1f1gi0_NeUOG^O4T1f1ji0_NeUOG^O3R1f1mi0_NdUOH]O4P1d1Pj0`NdUOH\\O4n0d1Sj0`NdUOH\\O3l0d1Vj0aNbUOH\\O4j0b1Zj0aNaUOI[O4h06XOf0Wk0VO_UOKZO3f06\\Oc0Vk0YO`UOJXO4e06B=Sk0^O_UOKWO5c04K7mj0E_UOKWO4`071Okj0J^UOLWO4=87Hhj01]UOKWO5;9=_Ofj07\\UOLWO48Xl0A_TOMUO51?[l0^O_TOOVO3Oa0[l0^O_TOOWO3Mb0\\l0[OaTO0WO2Je0]l0ZOaTO0YOT1Vl0kNbTO2WOS1Vl0lNcTO1XOR1Ul0lNdTO2WOR1Tl0mNeTO1XOQ1Sl0mNfTO3VOP1Sl0nNfTO3XOm0Sl0oNfTO4WOm0Rl0POgTO4WOj0Sl0QOgTO5VOj0Rl0ROhTO4UOk0Sl0POiTO6SOj0Sl0QOiTO6SOi0Ul0POgTO9SOh0Ul0POfTO>POc0Zl0oNdTOm1\\k0SNcTOo1]k0QNaTOP2_k0PN`TOR2`k0nM^TOT2ak0lM^TOU2ck0kM[TOW2dk0jMZTOW2gk0hMXTOY2hk0hMWTOX2jk0gMUTOo0AOYl0SOTTOh0ASO3P1Yl0TOSTOj0BPO4O0g0Wl0ASTOh0EmNb0d0gk0GQTOi0d0TO`k03nSOh0^m0XObROg0_m0XOcROg0\\m0ZOeROe0[m0ZOgROd0Ym0]OiROa0Wm0_OjRO`0Um0@nRO>Rm0BoRO]ROBdm0a0YRO_Ohm0b0URO^Omm0c0QRO]Oom0e0nQO\\ORn0g0kQOXOVn0j0gQOWOYn0W110RROnNol0V2K3M2N1O1O2N1K6N1N2N2O0O1O100O00iLbSOm2_l0RMbSOn2]l0SMcSOl2]l0TMcSOl2^l0SMcSOl2]l0UMcSOj2]l0VMdSOj2\\l0UMdSOk2\\l0VMdSOi2\\l0WMeSOh2\\l0WMeSOi2[l0WMeSOh2\\l0XMdSOg2]l0ZMcSOd2^l0\\McSOb2\\l0aMdSO^2Zl0dMhSOY2Xl0iMhSOU2Yl0kMiSOR2Xl0nMiSOOF`1bl0aNiSOKIc1]l0cNlSOFIf1\\l0dN[TOZ1fk0fN[TOX1fk0hNZTOW1fk0jN[TO4PO:el0B\\TO3oN:fl0C[TO2PO:fl0D[TO1PO9fl0F[TOOPO:gl0FYTO0PO:gl0FZTOOPO9gl0G[TONoN:gl0G[TOOnN9hl0H[TOMoN:gl0G\\TONmN:hl0G\\TONmN:hl0H\\TOMmN9il0H\\TOMlN:il0H]TOLkN4lg0000000000000O10O1000000000000000000000O10000000O1000000000000O1O01000000000000000000000O10O100000000001O000000000000O10O1000000000000000000O10O1000000000000000O1000000000O10000000O1000000000000000O10O1000000000O2O0000000000O10000000000000O100000000000O10000000000000000L5O22L6JSaY6"}, "frequency": "R", "size": "S"}], "categories": [{"name": "Person", "id": 1}, {"name": "Sneakers", "id": 2}, {"name": "Chair", "id": 3}, {"name": "Other Shoes", "id": 4}, {"name": "Hat", "id": 5}, {"name": "Car", "id": 6}, {"name": "Lamp", "id": 7}, {"name": "Glasses", "id": 8}, {"name": "Bottle", "id": 9}, {"name": "Desk", "id": 10}, {"name": "Cup", "id": 11}, {"name": "Street Lights", "id": 12}, {"name": "Cabinet/shelf", "id": 13}, {"name": "Handbag/Satchel", "id": 14}, {"name": "Bracelet", "id": 15}, {"name": "Plate", "id": 16}, {"name": "Picture/Frame", "id": 17}, {"name": "Helmet", "id": 18}, {"name": "Book", "id": 19}, {"name": "Gloves", "id": 20}, {"name": "Storage box", "id": 21}, {"name": "Boat", "id": 22}, {"name": "Leather Shoes", "id": 23}, {"name": "Flower", "id": 24}, {"name": "Bench", "id": 25}, {"name": "Potted Plant", "id": 26}, {"name": "Bowl/Basin", "id": 27}, {"name": "Flag", "id": 28}, {"name": "Pillow", "id": 29}, {"name": "Boots", "id": 30}, {"name": "Vase", "id": 31}, {"name": "Microphone", "id": 32}, {"name": "Necklace", "id": 33}, {"name": "Ring", "id": 34}, {"name": "SUV", "id": 35}, {"name": "Wine Glass", "id": 36}, {"name": "Belt", "id": 37}, {"name": "Moniter/TV", "id": 38}, {"name": "Backpack", "id": 39}, {"name": "Umbrella", "id": 40}, {"name": "Traffic Light", "id": 41}, {"name": "Speaker", "id": 42}, {"name": "Watch", "id": 43}, {"name": "Tie", "id": 44}, {"name": "Trash bin Can", "id": 45}, {"name": "Slippers", "id": 46}, {"name": "Bicycle", "id": 47}, {"name": "Stool", "id": 48}, {"name": "Barrel/bucket", "id": 49}, {"name": "Van", "id": 50}, {"name": "Couch", "id": 51}, {"name": "Sandals", "id": 52}, {"name": "Bakset", "id": 53}, {"name": "Drum", "id": 54}, {"name": "Pen/Pencil", "id": 55}, {"name": "Bus", "id": 56}, {"name": "Wild Bird", "id": 57}, {"name": "High Heels", "id": 58}, {"name": "Motorcycle", "id": 59}, {"name": "Guitar", "id": 60}, {"name": "Carpet", "id": 61}, {"name": "Cell Phone", "id": 62}, {"name": "Bread", "id": 63}, {"name": "Camera", "id": 64}, {"name": "Canned", "id": 65}, {"name": "Truck", "id": 66}, {"name": "Traffic cone", "id": 67}, {"name": "Cymbal", "id": 68}, {"name": "Lifesaver", "id": 69}, {"name": "Towel", "id": 70}, {"name": "Stuffed Toy", "id": 71}, {"name": "Candle", "id": 72}, {"name": "Sailboat", "id": 73}, {"name": "Laptop", "id": 74}, {"name": "Awning", "id": 75}, {"name": "Bed", "id": 76}, {"name": "Faucet", "id": 77}, {"name": "Tent", "id": 78}, {"name": "Horse", "id": 79}, {"name": "Mirror", "id": 80}, {"name": "Power outlet", "id": 81}, {"name": "Sink", "id": 82}, {"name": "Apple", "id": 83}, {"name": "Air Conditioner", "id": 84}, {"name": "Knife", "id": 85}, {"name": "Hockey Stick", "id": 86}, {"name": "Paddle", "id": 87}, {"name": "Pickup Truck", "id": 88}, {"name": "Fork", "id": 89}, {"name": "Traffic Sign", "id": 90}, {"name": "Ballon", "id": 91}, {"name": "Tripod", "id": 92}, {"name": "Dog", "id": 93}, {"name": "Spoon", "id": 94}, {"name": "Clock", "id": 95}, {"name": "Pot", "id": 96}, {"name": "Cow", "id": 97}, {"name": "Cake", "id": 98}, {"name": "Dinning Table", "id": 99}, {"name": "Sheep", "id": 100}, {"name": "Hanger", "id": 101}, {"name": "Blackboard/Whiteboard", "id": 102}, {"name": "Napkin", "id": 103}, {"name": "Other Fish", "id": 104}, {"name": "Orange/Tangerine", "id": 105}, {"name": "Toiletry", "id": 106}, {"name": "Keyboard", "id": 107}, {"name": "Tomato", "id": 108}, {"name": "Lantern", "id": 109}, {"name": "Machinery Vehicle", "id": 110}, {"name": "Fan", "id": 111}, {"name": "Green Vegetables", "id": 112}, {"name": "Banana", "id": 113}, {"name": "Baseball Glove", "id": 114}, {"name": "Airplane", "id": 115}, {"name": "Mouse", "id": 116}, {"name": "Train", "id": 117}, {"name": "Pumpkin", "id": 118}, {"name": "Soccer", "id": 119}, {"name": "Skiboard", "id": 120}, {"name": "Luggage", "id": 121}, {"name": "Nightstand", "id": 122}, {"name": "Tea pot", "id": 123}, {"name": "Telephone", "id": 124}, {"name": "Trolley", "id": 125}, {"name": "Head Phone", "id": 126}, {"name": "Sports Car", "id": 127}, {"name": "Stop Sign", "id": 128}, {"name": "Dessert", "id": 129}, {"name": "Scooter", "id": 130}, {"name": "Stroller", "id": 131}, {"name": "Crane", "id": 132}, {"name": "Remote", "id": 133}, {"name": "Refrigerator", "id": 134}, {"name": "Oven", "id": 135}, {"name": "Lemon", "id": 136}, {"name": "Duck", "id": 137}, {"name": "Baseball Bat", "id": 138}, {"name": "Surveillance Camera", "id": 139}, {"name": "Cat", "id": 140}, {"name": "Jug", "id": 141}, {"name": "Broccoli", "id": 142}, {"name": "Piano", "id": 143}, {"name": "Pizza", "id": 144}, {"name": "Elephant", "id": 145}, {"name": "Skateboard", "id": 146}, {"name": "Surfboard", "id": 147}, {"name": "Gun", "id": 148}, {"name": "Skating and Skiing shoes", "id": 149}, {"name": "Gas stove", "id": 150}, {"name": "Donut", "id": 151}, {"name": "Bow Tie", "id": 152}, {"name": "Carrot", "id": 153}, {"name": "Toilet", "id": 154}, {"name": "Kite", "id": 155}, {"name": "Strawberry", "id": 156}, {"name": "Other Balls", "id": 157}, {"name": "Shovel", "id": 158}, {"name": "Pepper", "id": 159}, {"name": "Computer Box", "id": 160}, {"name": "Toilet Paper", "id": 161}, {"name": "Cleaning Products", "id": 162}, {"name": "Chopsticks", "id": 163}, {"name": "Microwave", "id": 164}, {"name": "Pigeon", "id": 165}, {"name": "Baseball", "id": 166}, {"name": "Cutting/chopping Board", "id": 167}, {"name": "Coffee Table", "id": 168}, {"name": "Side Table", "id": 169}, {"name": "Scissors", "id": 170}, {"name": "Marker", "id": 171}, {"name": "Pie", "id": 172}, {"name": "Ladder", "id": 173}, {"name": "Snowboard", "id": 174}, {"name": "Cookies", "id": 175}, {"name": "Radiator", "id": 176}, {"name": "Fire Hydrant", "id": 177}, {"name": "Basketball", "id": 178}, {"name": "Zebra", "id": 179}, {"name": "Grape", "id": 180}, {"name": "Giraffe", "id": 181}, {"name": "Potato", "id": 182}, {"name": "Sausage", "id": 183}, {"name": "Tricycle", "id": 184}, {"name": "Violin", "id": 185}, {"name": "Egg", "id": 186}, {"name": "Fire Extinguisher", "id": 187}, {"name": "Candy", "id": 188}, {"name": "Fire Truck", "id": 189}, {"name": "Billards", "id": 190}, {"name": "Converter", "id": 191}, {"name": "Bathtub", "id": 192}, {"name": "Wheelchair", "id": 193}, {"name": "Golf Club", "id": 194}, {"name": "Briefcase", "id": 195}, {"name": "Cucumber", "id": 196}, {"name": "Cigar/Cigarette ", "id": 197}, {"name": "Paint Brush", "id": 198}, {"name": "Pear", "id": 199}, {"name": "Heavy Truck", "id": 200}, {"name": "Hamburger", "id": 201}, {"name": "Extractor", "id": 202}, {"name": "Extention Cord", "id": 203}, {"name": "Tong", "id": 204}, {"name": "Tennis Racket", "id": 205}, {"name": "Folder", "id": 206}, {"name": "American Football", "id": 207}, {"name": "earphone", "id": 208}, {"name": "Mask", "id": 209}, {"name": "Kettle", "id": 210}, {"name": "Tennis", "id": 211}, {"name": "Ship", "id": 212}, {"name": "Swing", "id": 213}, {"name": "Coffee Machine", "id": 214}, {"name": "Slide", "id": 215}, {"name": "Carriage", "id": 216}, {"name": "Onion", "id": 217}, {"name": "Green beans", "id": 218}, {"name": "Projector", "id": 219}, {"name": "Frisbee", "id": 220}, {"name": "Washing Machine/Drying Machine", "id": 221}, {"name": "Chicken", "id": 222}, {"name": "Printer", "id": 223}, {"name": "Watermelon", "id": 224}, {"name": "Saxophone", "id": 225}, {"name": "Tissue", "id": 226}, {"name": "Toothbrush", "id": 227}, {"name": "Ice cream", "id": 228}, {"name": "Hotair ballon", "id": 229}, {"name": "Cello", "id": 230}, {"name": "French Fries", "id": 231}, {"name": "Scale", "id": 232}, {"name": "Trophy", "id": 233}, {"name": "Cabbage", "id": 234}, {"name": "Hot dog", "id": 235}, {"name": "Blender", "id": 236}, {"name": "Peach", "id": 237}, {"name": "Rice", "id": 238}, {"name": "Wallet/Purse", "id": 239}, {"name": "Volleyball", "id": 240}, {"name": "Deer", "id": 241}, {"name": "Goose", "id": 242}, {"name": "Tape", "id": 243}, {"name": "Tablet", "id": 244}, {"name": "Cosmetics", "id": 245}, {"name": "Trumpet", "id": 246}, {"name": "Pineapple", "id": 247}, {"name": "Golf Ball", "id": 248}, {"name": "Ambulance", "id": 249}, {"name": "Parking meter", "id": 250}, {"name": "Mango", "id": 251}, {"name": "Key", "id": 252}, {"name": "Hurdle", "id": 253}, {"name": "Fishing Rod", "id": 254}, {"name": "Medal", "id": 255}, {"name": "Flute", "id": 256}, {"name": "Brush", "id": 257}, {"name": "Penguin", "id": 258}, {"name": "Megaphone", "id": 259}, {"name": "Corn", "id": 260}, {"name": "Lettuce", "id": 261}, {"name": "Garlic", "id": 262}, {"name": "Swan", "id": 263}, {"name": "Helicopter", "id": 264}, {"name": "Green Onion", "id": 265}, {"name": "Sandwich", "id": 266}, {"name": "Nuts", "id": 267}, {"name": "Speed Limit Sign", "id": 268}, {"name": "Induction Cooker", "id": 269}, {"name": "Broom", "id": 270}, {"name": "Trombone", "id": 271}, {"name": "Plum", "id": 272}, {"name": "Rickshaw", "id": 273}, {"name": "Goldfish", "id": 274}, {"name": "Kiwi fruit", "id": 275}, {"name": "Router/modem", "id": 276}, {"name": "Poker Card", "id": 277}, {"name": "Toaster", "id": 278}, {"name": "Shrimp", "id": 279}, {"name": "Sushi", "id": 280}, {"name": "Cheese", "id": 281}, {"name": "Notepaper", "id": 282}, {"name": "Cherry", "id": 283}, {"name": "Pliers", "id": 284}, {"name": "CD", "id": 285}, {"name": "Pasta", "id": 286}, {"name": "Hammer", "id": 287}, {"name": "Cue", "id": 288}, {"name": "Avocado", "id": 289}, {"name": "Hamimelon", "id": 290}, {"name": "Flask", "id": 291}, {"name": "Mushroon", "id": 292}, {"name": "Screwdriver", "id": 293}, {"name": "Soap", "id": 294}, {"name": "Recorder", "id": 295}, {"name": "Bear", "id": 296}, {"name": "Eggplant", "id": 297}, {"name": "Board Eraser", "id": 298}, {"name": "Coconut", "id": 299}, {"name": "Tape Measur/ Ruler", "id": 300}, {"name": "Pig", "id": 301}, {"name": "Showerhead", "id": 302}, {"name": "Globe", "id": 303}, {"name": "Chips", "id": 304}, {"name": "Steak", "id": 305}, {"name": "Crosswalk Sign", "id": 306}, {"name": "Stapler", "id": 307}, {"name": "Campel", "id": 308}, {"name": "Formula 1 ", "id": 309}, {"name": "Pomegranate", "id": 310}, {"name": "Dishwasher", "id": 311}, {"name": "Crab", "id": 312}, {"name": "Hoverboard", "id": 313}, {"name": "Meat ball", "id": 314}, {"name": "Rice Cooker", "id": 315}, {"name": "Tuba", "id": 316}, {"name": "Calculator", "id": 317}, {"name": "Papaya", "id": 318}, {"name": "Antelope", "id": 319}, {"name": "Parrot", "id": 320}, {"name": "Seal", "id": 321}, {"name": "Buttefly", "id": 322}, {"name": "Dumbbell", "id": 323}, {"name": "Donkey", "id": 324}, {"name": "Lion", "id": 325}, {"name": "Urinal", "id": 326}, {"name": "Dolphin", "id": 327}, {"name": "Electric Drill", "id": 328}, {"name": "Hair Dryer", "id": 329}, {"name": "Egg tart", "id": 330}, {"name": "Jellyfish", "id": 331}, {"name": "Treadmill", "id": 332}, {"name": "Lighter", "id": 333}, {"name": "Grapefruit", "id": 334}, {"name": "Game board", "id": 335}, {"name": "Mop", "id": 336}, {"name": "Radish", "id": 337}, {"name": "Baozi", "id": 338}, {"name": "Target", "id": 339}, {"name": "French", "id": 340}, {"name": "Spring Rolls", "id": 341}, {"name": "Monkey", "id": 342}, {"name": "Rabbit", "id": 343}, {"name": "Pencil Case", "id": 344}, {"name": "Yak", "id": 345}, {"name": "Red Cabbage", "id": 346}, {"name": "Binoculars", "id": 347}, {"name": "Asparagus", "id": 348}, {"name": "Barbell", "id": 349}, {"name": "Scallop", "id": 350}, {"name": "Noddles", "id": 351}, {"name": "Comb", "id": 352}, {"name": "Dumpling", "id": 353}, {"name": "Oyster", "id": 354}, {"name": "Table Teniis paddle", "id": 355}, {"name": "Cosmetics Brush/Eyeliner Pencil", "id": 356}, {"name": "Chainsaw", "id": 357}, {"name": "Eraser", "id": 358}, {"name": "Lobster", "id": 359}, {"name": "Durian", "id": 360}, {"name": "Okra", "id": 361}, {"name": "Lipstick", "id": 362}, {"name": "Cosmetics Mirror", "id": 363}, {"name": "Curling", "id": 364}, {"name": "Table Tennis ", "id": 365}], "licenses": [{"name": "Attribution-NonCommercial-ShareAlike License", "id": 1, "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"}, {"name": "Attribution-NonCommercial License", "id": 2, "url": "http://creativecommons.org/licenses/by-nc/2.0/"}, {"name": "Attribution-NonCommercial-NoDerivs License", "id": 3, "url": "http://creativecommons.org/licenses/by-nc-nd/2.0/"}, {"name": "Attribution License", "id": 4, "url": "http://creativecommons.org/licenses/by/2.0/"}, {"name": "Attribution-ShareAlike License", "id": 5, "url": "http://creativecommons.org/licenses/by-sa/2.0/"}, {"name": "Attribution-NoDerivs License", "id": 6, "url": "http://creativecommons.org/licenses/by-nd/2.0/"}, {"name": "No known copyright restrictions", "id": 7, "url": "http://flickr.com/commons/usage/"}, {"name": "United States Government Work", "id": 8, "url": "http://www.usa.gov/copyright.shtml"}]} \ No newline at end of file diff --git a/evaluation/DLC-Bench/annotations/class_names.json b/evaluation/DLC-Bench/annotations/class_names.json new file mode 100644 index 0000000000000000000000000000000000000000..7d3be2399bccb376d9ed02f4df00a1bff822b42f --- /dev/null +++ b/evaluation/DLC-Bench/annotations/class_names.json @@ -0,0 +1,102 @@ +{ + "2391781": "wild bird", + "2580323": "picture/frame", + "4782942": "megaphone/speaker", + "6037269": "showerhead", + "7050495": "handbag", + "8331699": "computer box", + "8556676": "apple", + "11012500": "taco", + "12348080": "scissors", + "16951734": "potato", + "17265254": "rickshaw", + "18845103": "spoon", + "20993402": "tape", + "21529954": "can/container", + "22879790": "garlic", + "24010373": "guitar", + "24694197": "avocado", + "279135": "ski", + "622329": "eraser", + "622332": "stapler", + "1075308": "monitor/tv", + "1770866": "sign/banner", + "2391761": "boat", + "2580318": "mouse", + "2588513": "wood block", + "3993075": "marker", + "4027486": "truck", + "4243725": "soap", + "4781902": "stool", + "4782949": "drum", + "5211280": "rice cooker", + "5718392": "storage box", + "6037272": "bottle", + "6820594": "cat", + "5718424": "sneakers", + "6055310": "tape measure/ruler", + "8201777": "van", + "8331685": "headphone", + "8331718": "notebook", + "8557176": "watch", + "8557195": "toaster", + "9766617": "duck/goose", + "11021544": "faucet", + "11775390": "sandals", + "11950619": "table tennis paddle", + "12178946": "bottle", + "12348079": "scale", + "14832137": "barrel/bucket", + "15050320": "wine glass", + "16957916": "lettuce", + "17385866": "ice cream", + "17404769": "suv", + "18217373": "glasses", + "19455186": "cart/trolley", + "19610023": "slippers", + "19610025": "rabbit", + "20568676": "pot", + "21107974": "gavel/mallet", + "22064315": "antelope", + "22107522": "bow tie", + "24017816": "car", + "24498027": "street lights", + "24581953": "dog", + "24786060": "towel", + "25054869": "toilet", + "25273553": "tripod", + "25419495": "tong", + "25419516": "stuffed toy", + "25579493": "bowl", + "297718": "sushi", + "361105": "herb", + "1196168": "air conditioner", + "1894089": "screwdriver", + "2391780": "wild bird", + "4502267": "green bean", + "4604873": "crane", + "4916799": "globe", + "5718415": "tent", + "6012878": "traffic light", + "6820595": "cat", + "8556674": "orange/tangerine", + "8906172": "earphone", + "10666665": "clock", + "10811497": "key", + "11021562": "microwave", + "11021563": "stove", + "12348078": "person", + "13138178": "stool", + "13187927": "motorcycle", + "14490578": "seal", + "14640483": "cutting/chopping board", + "16010041": "chopsticks", + "17072759": "belt", + "17072764": "pear", + "18301585": "bench", + "18680641": "carpet", + "25273528": "hot air balloon", + "25419509": "fork", + "25612310": "basket", + "17265253": "rickshaw" +} diff --git a/evaluation/DLC-Bench/annotations/qa.json b/evaluation/DLC-Bench/annotations/qa.json new file mode 100644 index 0000000000000000000000000000000000000000..a4db5e7f24d88dbb59975d65cc509eceb284caf0 --- /dev/null +++ b/evaluation/DLC-Bench/annotations/qa.json @@ -0,0 +1,17174 @@ +{ + "2391781": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The color of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the wing is mentioned in the description but is not grey or brown.", + -1 + ], + [ + "The color of the wing is mentioned in the description and is grey or brown.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feathers or the wild bird are not mentioned.", + 0 + ], + [ + "The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.", + 0.5 + ], + [ + "The color of the feathers is mentioned in the description but is not white, grey, or brown.", + -1 + ], + [ + "The color of the feathers is mentioned in the description and is white, grey, or brown.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the wild bird is not mentioned.", + 0 + ], + [ + "The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + 0.5 + ], + [ + "The shape of the tail is mentioned in the description but is not fan-like.", + -1 + ], + [ + "The shape of the tail is mentioned in the description and is fan-like.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The null or the wild bird is not mentioned.", + 0 + ], + [ + "The action of the null is not mentioned, but the null of the wild bird is mentioned.", + 0.5 + ], + [ + "The action of the null is mentioned in the description but is not flying.", + -1 + ], + [ + "The action of the null is mentioned in the description and is flying.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The position of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The position of the wing is mentioned in the description but is not extended or outstretched.", + -1 + ], + [ + "The position of the wing is mentioned in the description and is extended or outstretched.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the wild bird are not mentioned in the description.", + 1 + ], + [ + "The claws of the wild bird are mentioned in the description.", + -1 + ], + [ + "The wild bird is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are not mentioned in the description.", + 1 + ], + [ + "The boats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are not mentioned in the description.", + 1 + ], + [ + "The chimneys are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bridge is not mentioned in the description.", + 1 + ], + [ + "The bridge is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "2580323": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the frame is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The shape of the frame is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the picture is mentioned in the description and is white.", + 1 + ], + [ + "The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The background color of the picture is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + 1 + ], + [ + "The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The material of the frame is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The radio is mentioned in the description.", + -1 + ], + [ + "The radio is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The picture/frame is not mentioned in the description.", + 0 + ], + [ + "The glass of the picture/frame are mentioned in the description.", + -1 + ], + [ + "The glass of the picture/frame are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The light switch is mentioned in the description.", + -1 + ], + [ + "The light switch is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The curtain is mentioned in the description.", + -1 + ], + [ + "The curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "4782942": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description but is not conical.", + -1 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description and is conical.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the megaphone/speaker is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front/opening or the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.", + 0.5 + ], + [ + "The shape of the front/opening is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the front/opening is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The siren button of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The siren button of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The strap of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The strap of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The battery compartment of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The battery compartment of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The houses are not mentioned in the description.", + 1 + ], + [ + "The houses are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "6037269": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the showerhead is mentioned in the description but is not silver and/or metallic.", + -1 + ], + [ + "The color of the showerhead is not mentioned.", + 0 + ], + [ + "The color of the showerhead is mentioned in the description and is silver and/or metallic.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the showerhead is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the showerhead is not mentioned.", + 0 + ], + [ + "The texture of the showerhead is mentioned in the description and is smooth.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the showerhead is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the showerhead is not mentioned.", + 0 + ], + [ + "The shape of the showerhead is mentioned in the description and is circular.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the showerhead is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the showerhead is not mentioned.", + 0 + ], + [ + "The material of the showerhead is mentioned in the description and is metal.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the handle is not mentioned, but the handle of the showerhead is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is white.", + 1 + ], + [ + "The handle or the showerhead is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower hose of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The shower hose of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bath caddy is mentioned in the description.", + -1 + ], + [ + "The bath caddy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The showerhead filter of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The showerhead filter of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet paper holder is mentioned in the description.", + -1 + ], + [ + "The toilet paper holder is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "7050495": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the handbag is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handbag is not mentioned.", + 0 + ], + [ + "The texture of the handbag is mentioned in the description but is not smooth.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handbag is mentioned in the description and is black or glossy.", + 1 + ], + [ + "The color of the handbag is not mentioned.", + 0 + ], + [ + "The color of the handbag is mentioned in the description but is not black or glossy.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handbag is mentioned in the description and is leather.", + 1 + ], + [ + "The material of the handbag is not mentioned.", + 0 + ], + [ + "The material of the handbag is mentioned in the description but is not leather.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handbag is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the handbag is not mentioned.", + 0 + ], + [ + "The shape of the handbag is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seam of the handbag is mentioned in the description and is visible.", + 1 + ], + [ + "The seam of the handbag is not mentioned.", + 0 + ], + [ + "The seam of the handbag is mentioned in the description but is not visible.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The logo of the handbag is not mentioned in the description.", + 1 + ], + [ + "The logo of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The handle of the handbag is not mentioned in the description.", + 1 + ], + [ + "The handle of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The pocket of the handbag is not mentioned in the description.", + 1 + ], + [ + "The pocket of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The zipper of the handbag is not mentioned in the description.", + 1 + ], + [ + "The zipper of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "8331699": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the computer box is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is mentioned in the description and is black or gray.", + 1 + ], + [ + "The color of the computer box is mentioned in the description but is not black or gray.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side panel or the computer box is not mentioned.", + 0 + ], + [ + "The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.", + 0.5 + ], + [ + "The color of the side panel is mentioned in the description and is gray.", + 1 + ], + [ + "The color of the side panel is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rubber duck is not mentioned in the description.", + 1 + ], + [ + "The rubber duck is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power button of the computer box is not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The power button of the computer box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The USB ports of the computer box are not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The USB ports of the computer box are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "8556676": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the apple is mentioned in the description and is small.", + 1 + ], + [ + "The size of the apple is not mentioned.", + 0 + ], + [ + "The size of the apple is mentioned in the description but is not small.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the apple is mentioned in the description and is smooth or glossy.", + 1 + ], + [ + "The texture of the apple is not mentioned.", + 0 + ], + [ + "The texture of the apple is mentioned in the description but is not smooth or glossy.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the apple is mentioned in the description and is red.", + 1 + ], + [ + "The color of the apple is not mentioned.", + 0 + ], + [ + "The color of the apple is mentioned in the description but is not red.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon is mentioned in the description.", + -1 + ], + [ + "The lemon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange is mentioned in the description.", + -1 + ], + [ + "The orange is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple is mentioned in the description.", + -1 + ], + [ + "The pineapple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the apple are mentioned in the description.", + -1 + ], + [ + "The seeds of the apple are not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the apple is mentioned in the description.", + -1 + ], + [ + "The stem of the apple is not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "11012500": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the lettuce is mentioned in the description but is not shredded.", + -1 + ], + [ + "The lettuce or the taco is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is shredded.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the tomato is mentioned in the description but is not sliced.", + -1 + ], + [ + "The tomato or the taco is not mentioned.", + 0 + ], + [ + "The shape of the tomato is mentioned in the description and is sliced.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the arugula is not mentioned, but the arugula of the taco is mentioned.", + 0.5 + ], + [ + "The color of the arugula is mentioned in the description but is not green.", + -1 + ], + [ + "The arugula or the taco is not mentioned.", + 0 + ], + [ + "The color of the arugula is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.", + 0.5 + ], + [ + "The color of the tortilla is mentioned in the description but is not white.", + -1 + ], + [ + "The tortilla or the taco is not mentioned.", + 0 + ], + [ + "The color of the tortilla is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The whipped cream of the taco is not mentioned in the description.", + 1 + ], + [ + "The whipped cream of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + 1 + ], + [ + "The two glasses of lemonade with lemon slices and straws are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The nuts of the taco are not mentioned in the description.", + 1 + ], + [ + "The nuts of the taco are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandwich with vegetables are not mentioned in the description.", + 1 + ], + [ + "The sandwich with vegetables are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The chocolate of the taco is not mentioned in the description.", + 1 + ], + [ + "The chocolate of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "12348080": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handles is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the handles is mentioned in the description and is plastic.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blades is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The color of the blades is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The color of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blades is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blades is mentioned in the description and is metal.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The material of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handles is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the handles is mentioned in the description and is red.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The color of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The adjustment screw of the scissors is not mentioned in the description.", + 1 + ], + [ + "The adjustment screw of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blade guard of the scissors is not mentioned in the description.", + 1 + ], + [ + "The blade guard of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tricycle cart is not mentioned in the description.", + 1 + ], + [ + "The tricycle cart is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The baskets of fruit are not mentioned in the description.", + 1 + ], + [ + "The baskets of fruit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scale is not mentioned in the description.", + 1 + ], + [ + "The scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "16951734": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description and is yellow, golden, or brown.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description but is not yellow, golden, or brown.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description and is smooth.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description and is irregular.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description but is not irregular.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprouts of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The sprouts of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned in the description.", + 1 + ], + [ + "The bowl is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The roots of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The liquid is not mentioned in the description.", + 1 + ], + [ + "The liquid is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "17265254": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The shape of the wheels is mentioned in the description and is circular or spoked.", + 1 + ], + [ + "The shape of the wheels is mentioned in the description but is not circular or spoked.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + 0.5 + ], + [ + "The seat or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The number of parts of the wheels is mentioned in the description and is 2.", + 1 + ], + [ + "The number of parts of the wheels is mentioned in the description but is not 2.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The lights of the rickshaw are mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The storage compartment of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo forest is not mentioned in the description.", + 1 + ], + [ + "The bamboo forest is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The horn of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The horn of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "18845103": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is metallic.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The material of the bowl is mentioned in the description and is metal.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The material of the bowl is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the bowl is mentioned in the description and is metallic.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the bowl is mentioned in the description and is round or oval.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description but is not round or oval.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The engraved handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The engraved handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The twisted handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The twisted handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "20993402": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the tape is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the tape is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tape roll is mentioned in the description but is not beige, white, or transparent.", + -1 + ], + [ + "The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + 1 + ], + [ + "The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the tape is mentioned in the description but is not adhesive tape.", + -1 + ], + [ + "The type of the tape is mentioned in the description and is adhesive tape.", + 1 + ], + [ + "The type of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the core is mentioned in the description but is not cardboard.", + -1 + ], + [ + "The material of the core is mentioned in the description and is cardboard.", + 1 + ], + [ + "The material of the core is not mentioned, but the core of the tape is mentioned.", + 0.5 + ], + [ + "The core or the tape is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tape roll is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the tape roll is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The dispenser of the tape is not mentioned in the description.", + 1 + ], + [ + "The dispenser of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The window is not mentioned in the description.", + 1 + ], + [ + "The window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stack of plates are not mentioned in the description.", + 1 + ], + [ + "The stack of plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The cutting edge of the tape is not mentioned in the description.", + 1 + ], + [ + "The cutting edge of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "21529954": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the cap/lid is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the cap/lid is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The color of the cap/lid is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the cap/lid is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the body is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the body is not mentioned, but the body of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is cylindrical.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the can/container is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description but is not green, white, yellow.", + -1 + ], + [ + "The color of the label is not mentioned, but the label of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description and is green, white, yellow.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The raspberries are mentioned in the description.", + -1 + ], + [ + "The raspberries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red bell peppers are mentioned in the description.", + -1 + ], + [ + "The red bell peppers are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ginger is mentioned in the description.", + -1 + ], + [ + "The ginger is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sweet potato is mentioned in the description.", + -1 + ], + [ + "The sweet potato is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the can/container is mentioned in the description.", + -1 + ], + [ + "The base of the can/container is not mentioned in the description.", + 1 + ], + [ + "The can/container is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "22879790": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the garlic is mentioned in the description and is white.", + 1 + ], + [ + "The color of the garlic is not mentioned.", + 0 + ], + [ + "The garlic is not mentioned.", + 0 + ], + [ + "The color of the garlic is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the skin is mentioned in the description and is papery.", + 1 + ], + [ + "The texture of the skin is not mentioned, but the skin of the garlic is mentioned.", + 0.5 + ], + [ + "The skin or the garlic is not mentioned.", + 0 + ], + [ + "The texture of the skin is mentioned in the description but is not papery.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the root is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the root is not mentioned, but the root of the garlic is mentioned.", + 0.5 + ], + [ + "The root or the garlic is not mentioned.", + 0 + ], + [ + "The color of the root is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaf of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The leaf of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + 1 + ], + [ + "The ceramic rooster and hen salt and pepper shakers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The stem of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic hen salt shaker is not mentioned in the description.", + 1 + ], + [ + "The ceramic hen salt shaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green glass rooster is not mentioned in the description.", + 1 + ], + [ + "The green glass rooster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "24010373": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The neck or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the neck is mentioned in the description and is straight or slightly curved.", + 1 + ], + [ + "The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the neck is mentioned in the description but is not straight or slightly curved.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the guitar is not mentioned.", + 0 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is not mentioned, but the body of the guitar is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strings or the guitar are not mentioned.", + 0 + ], + [ + "The number of parts of the strings is mentioned in the description and is 6.", + 1 + ], + [ + "The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + 0.5 + ], + [ + "The number of parts of the strings is mentioned in the description but is not 6.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sound hole or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the sound hole is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the sound hole is mentioned in the description but is not round.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description and is black.", + 1 + ], + [ + "The color of the guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The violin is not mentioned in the description.", + 1 + ], + [ + "The violin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned in the description.", + 0 + ], + [ + "The pickguard of the guitar is not mentioned in the description.", + 1 + ], + [ + "The pickguard of the guitar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is not mentioned in the description.", + 1 + ], + [ + "The sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The music stands are not mentioned in the description.", + 1 + ], + [ + "The music stands are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The books are not mentioned in the description.", + 1 + ], + [ + "The books are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "24694197": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The skin or the avocado is not mentioned.", + 0 + ], + [ + "The color of the skin is mentioned in the description but is not dark green.", + -1 + ], + [ + "The color of the skin is mentioned in the description and is dark green.", + 1 + ], + [ + "The color of the skin is not mentioned, but the skin of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The texture of the flesh is mentioned in the description but is not creamy.", + -1 + ], + [ + "The texture of the flesh is mentioned in the description and is creamy.", + 1 + ], + [ + "The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pit or the avocado is not mentioned.", + 0 + ], + [ + "The shape of the pit is mentioned in the description but is not oval.", + -1 + ], + [ + "The shape of the pit is mentioned in the description and is oval.", + 1 + ], + [ + "The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The color of the flesh is mentioned in the description but is not light green.", + -1 + ], + [ + "The color of the flesh is mentioned in the description and is light green.", + 1 + ], + [ + "The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The leaves of the avocado are mentioned in the description.", + -1 + ], + [ + "The leaves of the avocado are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utility pole is mentioned in the description.", + -1 + ], + [ + "The utility pole is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The stem of the avocado is mentioned in the description.", + -1 + ], + [ + "The stem of the avocado is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is mentioned in the description.", + -1 + ], + [ + "The building is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are mentioned in the description.", + -1 + ], + [ + "The people are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "279135": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The color of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The color of the binding is mentioned in the description and is black or orange.", + 1 + ], + [ + "The color of the binding is mentioned in the description but is not black or orange.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The color of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The color of the deck is mentioned in the description and is black, white, or orange.", + 1 + ], + [ + "The color of the deck is mentioned in the description but is not black, white, or orange.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The material of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The material of the binding is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The material of the binding is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The shape of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The shape of the deck is mentioned in the description and is slightly curved.", + 1 + ], + [ + "The shape of the deck is mentioned in the description but is not slightly curved.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the ski is not mentioned.", + 0 + ], + [ + "The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.", + 0.5 + ], + [ + "The texture/pattern of the tail is mentioned in the description and is geometric shapes.", + 1 + ], + [ + "The texture/pattern of the tail is mentioned in the description but is not geometric shapes.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the ski are not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The wheels of the ski are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wooden post is not mentioned in the description.", + 1 + ], + [ + "The wooden post is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass window is not mentioned in the description.", + 1 + ], + [ + "The glass window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski base of the ski is not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The ski base of the ski is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski poles are not mentioned in the description.", + 1 + ], + [ + "The ski poles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "622329": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The color of the eraser is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the eraser is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The material of the eraser is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the eraser is mentioned in the description and is rubber.", + 1 + ], + [ + "The material of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The corner or the eraser is not mentioned.", + 0 + ], + [ + "The shape of the corner is mentioned in the description but is not rounded.", + -1 + ], + [ + "The shape of the corner is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the corner is not mentioned, but the corner of the eraser is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The desk organizer is not mentioned in the description.", + 1 + ], + [ + "The desk organizer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper sleeve of the eraser is not mentioned in the description.", + 1 + ], + [ + "The eraser is not mentioned in the description.", + 0 + ], + [ + "The paper sleeve of the eraser is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The phone is not mentioned in the description.", + 1 + ], + [ + "The phone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 1 + ], + [ + "The tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "622332": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base plate is mentioned in the description and is metallic.", + 1 + ], + [ + "The base plate or the stapler is not mentioned.", + 0 + ], + [ + "The material of the base plate is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stapler is mentioned in the description and is rectangular.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The shape of the stapler is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo is mentioned in the description and is white.", + 1 + ], + [ + "The logo or the stapler is not mentioned.", + 0 + ], + [ + "The color of the logo is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo is not mentioned, but the logo of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the top cover is mentioned in the description and is black.", + 1 + ], + [ + "The top cover or the stapler is not mentioned.", + 0 + ], + [ + "The color of the top cover is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stapler is mentioned in the description and is black.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The color of the stapler is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is mentioned in the description.", + -1 + ], + [ + "The tape is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple remover of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple remover of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper clips are mentioned in the description.", + -1 + ], + [ + "The paper clips are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paintbrushes are mentioned in the description.", + -1 + ], + [ + "The paintbrushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple storage compartment of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple storage compartment of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "1075308": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + 1 + ], + [ + "The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the frame is mentioned in the description but is not plastic.", + -1 + ], + [ + "The frame or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the monitor/tv is mentioned in the description and is black.", + 1 + ], + [ + "The color of the monitor/tv is mentioned in the description but is not black.", + -1 + ], + [ + "The monitor/tv is not mentioned.", + 0 + ], + [ + "The color of the monitor/tv is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screen is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the screen is mentioned in the description but is not glass.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The remote control of the monitor/tv is mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The remote control of the monitor/tv is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ports of the monitor/tv are mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The ports of the monitor/tv are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass are mentioned in the description.", + -1 + ], + [ + "The glass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The range hood is mentioned in the description.", + -1 + ], + [ + "The range hood is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "1770866": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the text is mentioned in the description but is not handwritten.", + -1 + ], + [ + "The type of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The type of the text is mentioned in the description and is handwritten.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + 0.5 + ], + [ + "The board or the sign/banner is not mentioned.", + 0 + ], + [ + "The shape of the board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the sign/banner is mentioned in the description but is not white.", + -1 + ], + [ + "The background color of the sign/banner is not mentioned.", + 0 + ], + [ + "The sign/banner is not mentioned.", + 0 + ], + [ + "The background color of the sign/banner is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the text is mentioned in the description but is not black or blue and red.", + -1 + ], + [ + "The color of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is black or blue and red.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The salami is not mentioned in the description.", + 1 + ], + [ + "The salami is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meats are not mentioned in the description.", + 1 + ], + [ + "The sliced meats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The duster of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The duster of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The marker of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The marker of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "2391761": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hull or the boat is not mentioned.", + 0 + ], + [ + "The color of the hull is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the hull is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the hull is not mentioned, but the hull of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cover or the boat is not mentioned.", + 0 + ], + [ + "The color of the cover is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cover is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the cover is not mentioned, but the cover of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The life preserver/life buoy or the boat is not mentioned.", + 0 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description and is red or white.", + 1 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description but is not red or white.", + -1 + ], + [ + "The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motor or the boat is not mentioned.", + 0 + ], + [ + "The color of the motor is mentioned in the description and is black.", + 1 + ], + [ + "The color of the motor is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the motor is not mentioned, but the motor of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rudder of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The rudder of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sail of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The sail of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabin of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The cabin of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The anchor of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The anchor of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ducks are mentioned in the description.", + -1 + ], + [ + "The ducks are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "2580318": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the mouse is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the mouse is mentioned in the description and is matte.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is mentioned in the description but is not ergonomic.", + -1 + ], + [ + "The shape of the mouse is mentioned in the description and is ergonomic.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Paper is mentioned in the description.", + -1 + ], + [ + "The Paper is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The cable of the mouse is mentioned in the description.", + -1 + ], + [ + "The cable of the mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Framed diagram is mentioned in the description.", + -1 + ], + [ + "The Framed diagram is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pen is mentioned in the description.", + -1 + ], + [ + "The Pen is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The side buttons of the mouse are mentioned in the description.", + -1 + ], + [ + "The side buttons of the mouse are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "2588513": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wood block is not mentioned.", + 0 + ], + [ + "The color of the wood block is mentioned in the description but is not wooden or brown.", + -1 + ], + [ + "The color of the wood block is mentioned in the description and is wooden or brown.", + 1 + ], + [ + "The wood block is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the top is not mentioned, but the top of the wood block is mentioned.", + 0.5 + ], + [ + "The texture of the top is mentioned in the description but is not grain.", + -1 + ], + [ + "The texture of the top is mentioned in the description and is grain.", + 1 + ], + [ + "The top or the wood block is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The metal clip of the wood block is not mentioned in the description.", + 1 + ], + [ + "The metal clip of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The machine with a motor are not mentioned in the description.", + 1 + ], + [ + "The machine with a motor are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickup is not mentioned in the description.", + 1 + ], + [ + "The pickup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The plastic handle of the wood block is not mentioned in the description.", + 1 + ], + [ + "The plastic handle of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The copper wire spool is not mentioned in the description.", + 1 + ], + [ + "The copper wire spool is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "3993075": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cap is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the cap is mentioned in the description and is orange.", + 1 + ], + [ + "The cap or the marker is not mentioned.", + 0 + ], + [ + "The color of the cap is not mentioned, but the cap of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the marker is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the marker is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The marker is not mentioned.", + 0 + ], + [ + "The shape of the marker is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The material of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The children are mentioned in the description.", + -1 + ], + [ + "The children are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is mentioned in the description.", + -1 + ], + [ + "The table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clip of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The clip of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The label of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "4027486": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.", + 0.5 + ], + [ + "The license plate or the truck is not mentioned.", + 0 + ], + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the front is not mentioned, but the front of the truck is mentioned.", + 0.5 + ], + [ + "The front or the truck is not mentioned.", + 0 + ], + [ + "The color of the front is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the front is mentioned in the description and is blue.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the grille is not mentioned, but the grille of the truck is mentioned.", + 0.5 + ], + [ + "The grille or the truck is not mentioned.", + 0 + ], + [ + "The color of the grille is mentioned in the description but is not black, green, or blue.", + -1 + ], + [ + "The color of the grille is mentioned in the description and is black, green, or blue.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The door handle of the truck is mentioned in the description.", + -1 + ], + [ + "The door handle of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The rear part of the truck is mentioned in the description.", + -1 + ], + [ + "The rear part of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The exhaust pipe of the truck is mentioned in the description.", + -1 + ], + [ + "The exhaust pipe of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The tail light of the truck is mentioned in the description.", + -1 + ], + [ + "The tail light of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The cargo area of the truck is mentioned in the description.", + -1 + ], + [ + "The cargo area of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "4243725": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the soap is mentioned in the description and is bar soap.", + 1 + ], + [ + "The type of the soap is mentioned in the description but is not bar soap.", + -1 + ], + [ + "The type of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the soap is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The color of the soap is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the soap are not mentioned in the description.", + 1 + ], + [ + "The decorative elements of the soap are mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is not mentioned in the description.", + 1 + ], + [ + "The cutting board is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dishwasher is not mentioned in the description.", + 1 + ], + [ + "The dishwasher is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative peppers are not mentioned in the description.", + 1 + ], + [ + "The decorative peppers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The logo of the soap is not mentioned in the description.", + 1 + ], + [ + "The logo of the soap is mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "4781902": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is not mentioned.", + 0 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the stool is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the leg is not mentioned, but the leg of the stool is mentioned.", + 0.5 + ], + [ + "The leg or the stool is not mentioned.", + 0 + ], + [ + "The material of the leg is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the leg is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The shape of the crossbar is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the crossbar is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The material of the crossbar is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the crossbar is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building in the background are mentioned in the description.", + -1 + ], + [ + "The building in the background are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swing set is mentioned in the description.", + -1 + ], + [ + "The swing set is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The slide is mentioned in the description.", + -1 + ], + [ + "The slide is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The climbing wall is mentioned in the description.", + -1 + ], + [ + "The climbing wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "4782949": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the drum is mentioned in the description but is not round or circular.", + -1 + ], + [ + "The shape of the drum is mentioned in the description and is round or circular.", + 1 + ], + [ + "The shape of the drum is not mentioned.", + 0 + ], + [ + "The drum is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the body is mentioned in the description and is red.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the drum is mentioned.", + 0.5 + ], + [ + "The body or the drum is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the rim is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the rim is mentioned in the description and is metallic.", + 1 + ], + [ + "The material of the rim is not mentioned, but the rim of the drum is mentioned.", + 0.5 + ], + [ + "The rim or the drum is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum stand of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum stand of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Trees are mentioned in the description.", + -1 + ], + [ + "The Trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum pedal of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum pedal of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Superior Foods sign is mentioned in the description.", + -1 + ], + [ + "The Superior Foods sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The People are mentioned in the description.", + -1 + ], + [ + "The People are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "5211280": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the rice cooker is mentioned in the description and is silver and black.", + 1 + ], + [ + "The color of the rice cooker is mentioned in the description but is not silver and black.", + -1 + ], + [ + "The color of the rice cooker is not mentioned.", + 0 + ], + [ + "The rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + 0.5 + ], + [ + "The handle or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the display is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the display is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + 0.5 + ], + [ + "The display or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description and is silver.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the rice cooker is mentioned.", + 0.5 + ], + [ + "The body or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the base is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + 0.5 + ], + [ + "The base or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red crates are mentioned in the description.", + -1 + ], + [ + "The red crates are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The generator is mentioned in the description.", + -1 + ], + [ + "The generator is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The steam vent of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The steam vent of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The inner pot of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The inner pot of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The measuring cup of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "5718392": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the storage box is mentioned in the description but is not wicker/woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is mentioned in the description and is wicker/woven.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the storage box is mentioned in the description but is not woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is mentioned in the description and is woven.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the storage box is mentioned in the description but is not brown.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the storage box are mentioned in the description.", + -1 + ], + [ + "The wheels of the storage box are not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the storage box is mentioned in the description.", + -1 + ], + [ + "The handle of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the storage box is mentioned in the description.", + -1 + ], + [ + "The lock of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blue canopy is mentioned in the description.", + -1 + ], + [ + "The blue canopy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "6037272": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle or the bottle is not mentioned.", + 0 + ], + [ + "The material of the bottle is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + 0.5 + ], + [ + "The material of the bottle is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is green.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text on the label or the bottle are not mentioned.", + 0 + ], + [ + "The color of the text on the label is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.", + 0.5 + ], + [ + "The color of the text on the label is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description and is green.", + 1 + ], + [ + "The color of the bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/top or the bottle is not mentioned.", + 0 + ], + [ + "The shape of the cap/top is mentioned in the description and is flat or tapered.", + 1 + ], + [ + "The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.", + 0.5 + ], + [ + "The shape of the cap/top is mentioned in the description but is not flat or tapered.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nozzle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The nozzle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower curtain is mentioned in the description.", + -1 + ], + [ + "The shower curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pump of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The pump of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "6820594": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ears is mentioned in the description but is not triangular or pointed.", + -1 + ], + [ + "The shape of the ears is not mentioned, but the ears of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the ears is mentioned in the description and is triangular or pointed.", + 1 + ], + [ + "The ears or the cat are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is yellow or green.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the cat are mentioned.", + 0.5 + ], + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the cat are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the eyes is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the eyes is mentioned in the description and is round.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the back is mentioned in the description but is not brown or black.", + -1 + ], + [ + "The color of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The color of the back is mentioned in the description and is brown or black.", + 1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothpaste is mentioned in the description.", + -1 + ], + [ + "The toothpaste is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the cat is mentioned in the description.", + -1 + ], + [ + "The tail of the cat is not mentioned in the description.", + 1 + ], + [ + "The cat is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "5718424": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + 0.5 + ], + [ + "The color of the laces is mentioned in the description but is not white or green.", + -1 + ], + [ + "The color of the laces is mentioned in the description and is white or green.", + 1 + ], + [ + "The laces or the sneakers are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sneakers is not mentioned.", + 0 + ], + [ + "The color of the sneakers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the sneakers is mentioned in the description and is black.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sneakers is not mentioned.", + 0 + ], + [ + "The material of the sneakers is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sneakers is mentioned in the description and is rubber.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is not mentioned in the description.", + 1 + ], + [ + "The table is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The base of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heel of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The heel of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "6055310": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the blade is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the blade is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blade is mentioned in the description and is yellow or golden.", + 1 + ], + [ + "The color of the blade is mentioned in the description but is not yellow or golden.", + -1 + ], + [ + "The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the blade is mentioned in the description and is long.", + 1 + ], + [ + "The size of the blade is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blade is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the blade is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The case of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is not mentioned in the description.", + 1 + ], + [ + "The bathtub is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The lock of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "8201777": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the van is mentioned in the description but is not black.", + -1 + ], + [ + "The van is not mentioned.", + 0 + ], + [ + "The color of the van is not mentioned.", + 0 + ], + [ + "The color of the van is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sticker is mentioned in the description but is not white.", + -1 + ], + [ + "The sticker or the van is not mentioned.", + 0 + ], + [ + "The color of the sticker is not mentioned, but the sticker of the van is mentioned.", + 0.5 + ], + [ + "The color of the sticker is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The taillight or the van is not mentioned.", + 0 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + 0.5 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the license plate is mentioned in the description but is not yellow.", + -1 + ], + [ + "The license plate or the van is not mentioned.", + 0 + ], + [ + "The color of the license plate is not mentioned, but the license plate of the van is mentioned.", + 0.5 + ], + [ + "The color of the license plate is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the sign is mentioned in the description but is not taxi.", + -1 + ], + [ + "The sign or the van is not mentioned.", + 0 + ], + [ + "The text of the sign is not mentioned, but the sign of the van is mentioned.", + 0.5 + ], + [ + "The text of the sign is mentioned in the description and is taxi.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The grill of the van is not mentioned in the description.", + 1 + ], + [ + "The grill of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the van is not mentioned in the description.", + 1 + ], + [ + "The front bumper of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The antenna of the van is not mentioned in the description.", + 1 + ], + [ + "The antenna of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "8331685": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is mentioned in the description and is black.", + 1 + ], + [ + "The color of the headphone is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ear cup or the headphone is not mentioned.", + 0 + ], + [ + "The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.", + 0.5 + ], + [ + "The type of the ear cup is mentioned in the description and is cushioned.", + 1 + ], + [ + "The type of the ear cup is mentioned in the description but is not cushioned.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headband or the headphone is not mentioned.", + 0 + ], + [ + "The shape of the headband is not mentioned, but the headband of the headphone is mentioned.", + 0.5 + ], + [ + "The shape of the headband is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the headband is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone of the headphone is not mentioned in the description.", + 1 + ], + [ + "The microphone of the headphone is mentioned in the description.", + -1 + ], + [ + "The headphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is not mentioned in the description.", + 1 + ], + [ + "The keyboard is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned in the description.", + 1 + ], + [ + "The clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 1 + ], + [ + "The bottle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "8331718": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the notebook is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the notebook is mentioned in the description and is black.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The color of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the notebook is mentioned in the description but is not spiral-bound.", + -1 + ], + [ + "The type of the notebook is mentioned in the description and is spiral-bound.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The type of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the cover is mentioned in the description but is not YAHOO.", + -1 + ], + [ + "The text of the cover is mentioned in the description and is YAHOO.", + 1 + ], + [ + "The cover or the notebook is not mentioned.", + 0 + ], + [ + "The text of the cover is not mentioned, but the cover of the notebook is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo/text on the cover is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo/text on the cover is mentioned in the description and is white.", + 1 + ], + [ + "The logo/text on the cover or the notebook are not mentioned.", + 0 + ], + [ + "The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is mentioned in the description.", + -1 + ], + [ + "The chair is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bookmark of the notebook is mentioned in the description.", + -1 + ], + [ + "The notebook is not mentioned in the description.", + 0 + ], + [ + "The bookmark of the notebook is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is mentioned in the description.", + -1 + ], + [ + "The mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "8557176": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The color of the case is mentioned in the description but is not golden.", + -1 + ], + [ + "The color of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The color of the case is mentioned in the description and is golden.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The material of the case is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The material of the case is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The color of the strap is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The color of the strap is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The oven is not mentioned in the description.", + 1 + ], + [ + "The oven is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The crab cracker is not mentioned in the description.", + 1 + ], + [ + "The crab cracker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The jar is not mentioned in the description.", + 1 + ], + [ + "The jar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "8557195": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the toaster is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the toaster is mentioned in the description but is not white or beige.", + -1 + ], + [ + "The color of the toaster is not mentioned.", + 0 + ], + [ + "The toaster is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lever of the toaster is not mentioned in the description.", + 1 + ], + [ + "The lever of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cord of the toaster is not mentioned in the description.", + 1 + ], + [ + "The cord of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knob of the toaster is not mentioned in the description.", + 1 + ], + [ + "The knob of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is not mentioned in the description.", + 1 + ], + [ + "The utensil is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "9766617": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The head or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the head is mentioned in the description and is black.", + 1 + ], + [ + "The color of the head is not mentioned, but the head of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The beak or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the beak is mentioned in the description and is black.", + 1 + ], + [ + "The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the body is mentioned in the description and is grey or brown.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description but is not grey or brown.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the tail is mentioned in the description and is black and white.", + 1 + ], + [ + "The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the tail is mentioned in the description but is not black and white.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wings or the duck/goose are not mentioned.", + 0 + ], + [ + "The shape of the wings is mentioned in the description and is folded.", + 1 + ], + [ + "The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + 0.5 + ], + [ + "The shape of the wings is mentioned in the description but is not folded.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the duck/goose are mentioned in the description.", + -1 + ], + [ + "The feet of the duck/goose are not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mud of the duck/goose is mentioned in the description.", + -1 + ], + [ + "The mud of the duck/goose is not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pigeon is mentioned in the description.", + -1 + ], + [ + "The pigeon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "11021544": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the faucet is mentioned in the description but is not metallic.", + -1 + ], + [ + "The texture of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The texture of the faucet is mentioned in the description and is metallic.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the spout is mentioned in the description but is not curved or arc.", + -1 + ], + [ + "The shape of the spout is not mentioned, but the spout of the faucet is mentioned.", + 0.5 + ], + [ + "The spout or the faucet is not mentioned.", + 0 + ], + [ + "The shape of the spout is mentioned in the description and is curved or arc.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the faucet is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The material of the faucet is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soap dispenser of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The soap dispenser of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side spray of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The side spray of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprayer of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The sprayer of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet door is not mentioned in the description.", + 1 + ], + [ + "The cabinet door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "11775390": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the sandals is not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description but is not perforated.", + -1 + ], + [ + "The style of the sandals is not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description and is perforated.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sandals is not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the sandals is not mentioned.", + 0 + ], + [ + "The number of parts of the strap is mentioned in the description but is not one.", + -1 + ], + [ + "The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + 0.5 + ], + [ + "The number of parts of the strap is mentioned in the description and is one.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trailer is not mentioned in the description.", + 1 + ], + [ + "The trailer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire is not mentioned in the description.", + 1 + ], + [ + "The tire is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pumpkins are not mentioned in the description.", + 1 + ], + [ + "The pumpkins are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boy is not mentioned in the description.", + 1 + ], + [ + "The boy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "11950619": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description and is white, beige, or wooden color.", + 1 + ], + [ + "The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not white, beige, or wooden color.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the handle is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cover is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The cover or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the cover is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The head or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not rounded.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dumbbell is not mentioned in the description.", + 1 + ], + [ + "The dumbbell is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The edge tape of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The edge tape of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The logo of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The logo of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mat is not mentioned in the description.", + 1 + ], + [ + "The mat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "12178946": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The shape of the bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the bottle is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the label is not mentioned, but the label of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text or the bottle is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text is not mentioned, but the text of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the text is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is mentioned in the description.", + -1 + ], + [ + "The apple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The trigger of the bottle is mentioned in the description.", + -1 + ], + [ + "The trigger of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup is mentioned in the description.", + -1 + ], + [ + "The measuring cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "12348079": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the weighing platform is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the weighing platform is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The color of the weighing platform is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the weighing platform is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keypad or the scale is not mentioned.", + 0 + ], + [ + "The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the keypad is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the keypad is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The display screen or the scale is not mentioned.", + 0 + ], + [ + "The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + 0.5 + ], + [ + "The color of the display screen is mentioned in the description and is black.", + 1 + ], + [ + "The color of the display screen is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the scale is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the scale is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power cord of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The power cord of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The usb cable of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The usb cable of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle is not mentioned in the description.", + 1 + ], + [ + "The bicycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The child is not mentioned in the description.", + 1 + ], + [ + "The child is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is not mentioned in the description.", + 1 + ], + [ + "The woman is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "14832137": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.", + -1 + ], + [ + "The shape of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The color of the barrel/bucket is mentioned in the description and is purple.", + 1 + ], + [ + "The color of the barrel/bucket is mentioned in the description but is not purple.", + -1 + ], + [ + "The color of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The bottom of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The bottom of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heart is mentioned in the description.", + -1 + ], + [ + "The heart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The lid of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The lid of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cake is mentioned in the description.", + -1 + ], + [ + "The cake is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "15050320": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The material of the wine glass is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the wine glass is mentioned in the description but is not glass.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The color of the wine glass is mentioned in the description and is transparent.", + 1 + ], + [ + "The color of the wine glass is mentioned in the description but is not transparent.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is not mentioned, but the base of the wine glass is mentioned.", + 0.5 + ], + [ + "The base or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the base is mentioned in the description and is round or flat.", + 1 + ], + [ + "The shape of the base is mentioned in the description but is not round or flat.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + 0.5 + ], + [ + "The stem or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the stem is mentioned in the description and is slender.", + 1 + ], + [ + "The shape of the stem is mentioned in the description but is not slender.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The bowl of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is not mentioned in the description.", + 1 + ], + [ + "The plate is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rim of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The rim of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The napkin is not mentioned in the description.", + 1 + ], + [ + "The napkin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "16957916": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is mentioned in the description and is white/green.", + 1 + ], + [ + "The color of the lettuce is mentioned in the description but is not white/green.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is curved or irregular.", + 1 + ], + [ + "The shape of the lettuce is mentioned in the description but is not curved or irregular.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The edge or the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.", + 1 + ], + [ + "The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tartar sauce is mentioned in the description.", + -1 + ], + [ + "The tartar sauce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fried fish are mentioned in the description.", + -1 + ], + [ + "The fried fish are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The french fries are mentioned in the description.", + -1 + ], + [ + "The french fries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is mentioned in the description.", + -1 + ], + [ + "The utensil is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "17385866": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + 0.5 + ], + [ + "The scoop or the ice cream is not mentioned.", + 0 + ], + [ + "The shape of the scoop is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the scoop is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The material of the cone is mentioned in the description but is not waffle.", + -1 + ], + [ + "The material of the cone is mentioned in the description and is waffle.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The color of the cone is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the cone is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The cherry of the ice cream is mentioned in the description.", + -1 + ], + [ + "The cherry of the ice cream is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is mentioned in the description.", + -1 + ], + [ + "The woman is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The sprinkles of the ice cream are mentioned in the description.", + -1 + ], + [ + "The sprinkles of the ice cream are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Turkish flag is mentioned in the description.", + -1 + ], + [ + "The Turkish flag is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "17404769": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gas cap is mentioned in the description but is not square.", + -1 + ], + [ + "The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.", + 0.5 + ], + [ + "The gas cap or the suv is not mentioned.", + 0 + ], + [ + "The shape of the gas cap is mentioned in the description and is square.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + 0.5 + ], + [ + "The taillight or the suv is not mentioned.", + 0 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the suv is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the suv is not mentioned.", + 0 + ], + [ + "The suv is not mentioned.", + 0 + ], + [ + "The color of the suv is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front bumper of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front wheel of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front wheel of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grille of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The grille of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "18217373": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the frame is mentioned in the description but is not black or metallic.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the frame is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the frame is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lens or the glasses are not mentioned.", + 0 + ], + [ + "The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + 0.5 + ], + [ + "The color of the lens is mentioned in the description and is clear.", + 1 + ], + [ + "The color of the lens is mentioned in the description but is not clear.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple arm or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the temple arm is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the temple arm is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fruit is mentioned in the description.", + -1 + ], + [ + "The fruit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The women are mentioned in the description.", + -1 + ], + [ + "The women are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vegetables are mentioned in the description.", + -1 + ], + [ + "The vegetables are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The produce is mentioned in the description.", + -1 + ], + [ + "The produce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is mentioned in the description.", + -1 + ], + [ + "The food is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "19455186": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cart/trolley is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cart/trolley is not mentioned.", + 0 + ], + [ + "The color of the cart/trolley is mentioned in the description but is not blue.", + -1 + ], + [ + "The cart/trolley is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bars is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The shape of the bars is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The bars or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the wheels is mentioned in the description and is small.", + 1 + ], + [ + "The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The size of the wheels is mentioned in the description but is not small.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wheels is mentioned in the description and is black.", + 1 + ], + [ + "The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The color of the wheels is mentioned in the description but is not black.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The lid of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The basket of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shelves are mentioned in the description.", + -1 + ], + [ + "The shelves are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drawers of the cart/trolley are mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The drawers of the cart/trolley are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights are mentioned in the description.", + -1 + ], + [ + "The lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "19610023": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the slippers is mentioned in the description but is not green.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The color of the slippers is mentioned in the description and is green.", + 1 + ], + [ + "The color of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the toe box is mentioned in the description but is not round.", + -1 + ], + [ + "The toe box or the slippers is not mentioned.", + 0 + ], + [ + "The shape of the toe box is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the slippers is mentioned in the description but is not frog-shaped.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The shape of the slippers is mentioned in the description and is frog-shaped.", + 1 + ], + [ + "The shape of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The lining of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shirt is not mentioned in the description.", + 1 + ], + [ + "The shirt is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buckle of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The buckle of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is not mentioned in the description.", + 1 + ], + [ + "The wall is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "19610025": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the ear is mentioned in the description and is large.", + 1 + ], + [ + "The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The size of the ear is mentioned in the description but is not large.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the ear is mentioned in the description and is pink.", + 1 + ], + [ + "The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the ear is mentioned in the description but is not pink.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eye is mentioned in the description and is black.", + 1 + ], + [ + "The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + 0.5 + ], + [ + "The eye or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the eye is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the shirt is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The shirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the shirt is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skirt is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The skirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the skirt is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The whisker of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The whisker of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The teeth of the rabbit are not mentioned in the description.", + 1 + ], + [ + "The teeth of the rabbit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The tail of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The tail of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The poster is not mentioned in the description.", + 1 + ], + [ + "The poster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "20568676": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the rim is mentioned in the description and is round.", + 1 + ], + [ + "The rim or the pot is not mentioned.", + 0 + ], + [ + "The shape of the rim is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the rim is not mentioned, but the rim of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the content is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The content or the pot is not mentioned.", + 0 + ], + [ + "The color of the content is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the content is not mentioned, but the content of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the pot is mentioned in the description and is metal.", + 1 + ], + [ + "The pot is not mentioned.", + 0 + ], + [ + "The material of the pot is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pot is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spout of the pot is mentioned in the description.", + -1 + ], + [ + "The spout of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottom of the pot is mentioned in the description.", + -1 + ], + [ + "The bottom of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the pot is mentioned in the description.", + -1 + ], + [ + "The lid of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stove is mentioned in the description.", + -1 + ], + [ + "The stove is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "21107974": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the head is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The head or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not round or cylindrical.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The handle or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description and is brown.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description and is wood.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone is mentioned in the description.", + -1 + ], + [ + "The microphone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The podium is mentioned in the description.", + -1 + ], + [ + "The podium is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The gavel/mallet is not mentioned in the description.", + 0 + ], + [ + "The neck of the gavel/mallet is mentioned in the description.", + -1 + ], + [ + "The neck of the gavel/mallet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is mentioned in the description.", + -1 + ], + [ + "The sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "22064315": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the horn is mentioned in the description and is long, curved, or pointed.", + 1 + ], + [ + "The shape of the horn is mentioned in the description but is not long, curved, or pointed.", + -1 + ], + [ + "The shape of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the horn is mentioned in the description and is grey, black, or dark.", + 1 + ], + [ + "The color of the horn is mentioned in the description but is not grey, black, or dark.", + -1 + ], + [ + "The color of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the antelope is mentioned in the description.", + -1 + ], + [ + "The tail of the antelope is not mentioned in the description.", + 1 + ], + [ + "The antelope is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deer are mentioned in the description.", + -1 + ], + [ + "The deer are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "22107522": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the bow tie is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the bow tie is mentioned in the description and is smooth.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The texture of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bow tie is mentioned in the description but is not butterfly-shaped.", + -1 + ], + [ + "The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The shape of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bow tie is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the bow tie is mentioned in the description and is black.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The color of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bow tie is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the bow tie is mentioned in the description and is fabric.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The material of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bow tie is not mentioned in the description.", + 0 + ], + [ + "The neck band of the bow tie is mentioned in the description.", + -1 + ], + [ + "The neck band of the bow tie is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trumpet is mentioned in the description.", + -1 + ], + [ + "The trumpet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The suit is mentioned in the description.", + -1 + ], + [ + "The suit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glasses are mentioned in the description.", + -1 + ], + [ + "The glasses are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "24017816": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the windshield is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The windshield or the car is not mentioned.", + 0 + ], + [ + "The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + 0.5 + ], + [ + "The shape of the windshield is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the car is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The car is not mentioned.", + 0 + ], + [ + "The color of the car is not mentioned.", + 0 + ], + [ + "The color of the car is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the window is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The window or the car is not mentioned.", + 0 + ], + [ + "The color of the window is not mentioned, but the window of the car is mentioned.", + 0.5 + ], + [ + "The color of the window is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the side mirror is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The side mirror or the car is not mentioned.", + 0 + ], + [ + "The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + 0.5 + ], + [ + "The color of the side mirror is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fender of the car is not mentioned in the description.", + 1 + ], + [ + "The fender of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taillight of the car is not mentioned in the description.", + 1 + ], + [ + "The taillight of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire of the car is not mentioned in the description.", + 1 + ], + [ + "The tire of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The exhaust pipe of the car is not mentioned in the description.", + 1 + ], + [ + "The exhaust pipe of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hood of the car is not mentioned in the description.", + 1 + ], + [ + "The hood of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "24498027": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the pole is mentioned in the description but is not black or dark.", + -1 + ], + [ + "The color of the pole is not mentioned, but the pole of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the pole is mentioned in the description and is black or dark.", + 1 + ], + [ + "The pole or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the lamp is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the lamp is mentioned in the description and is white.", + 1 + ], + [ + "The lamp or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The direction of the bars is mentioned in the description but is not horizontal.", + -1 + ], + [ + "The direction of the bars is not mentioned, but the bars of the street lights are mentioned.", + 0.5 + ], + [ + "The direction of the bars is mentioned in the description and is horizontal.", + 1 + ], + [ + "The bars or the street lights are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The cable of the street lights is mentioned in the description.", + -1 + ], + [ + "The cable of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The wire of the street lights is mentioned in the description.", + -1 + ], + [ + "The wire of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bus is mentioned in the description.", + -1 + ], + [ + "The bus is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bike is mentioned in the description.", + -1 + ], + [ + "The bike is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ambulance is mentioned in the description.", + -1 + ], + [ + "The ambulance is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "24581953": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the dog is mentioned in the description but is not white or gray.", + -1 + ], + [ + "The color of the dog is mentioned in the description and is white or gray.", + 1 + ], + [ + "The color of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The posture of the dog is mentioned in the description but is not lying down.", + -1 + ], + [ + "The posture of the dog is mentioned in the description and is lying down.", + 1 + ], + [ + "The posture of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tail is mentioned in the description but is not large or long.", + -1 + ], + [ + "The size of the tail is mentioned in the description and is large or long.", + 1 + ], + [ + "The size of the tail is not mentioned, but the tail of the dog is mentioned.", + 0.5 + ], + [ + "The tail or the dog is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the dog is mentioned in the description but is not large.", + -1 + ], + [ + "The size of the dog is mentioned in the description and is large.", + 1 + ], + [ + "The size of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the coat is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the coat is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the coat is not mentioned, but the coat of the dog is mentioned.", + 0.5 + ], + [ + "The coat or the dog is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple is not mentioned in the description.", + 1 + ], + [ + "The temple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the dog is not mentioned in the description.", + 1 + ], + [ + "The mouth of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eye of the dog is not mentioned in the description.", + 1 + ], + [ + "The eye of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the dog is not mentioned in the description.", + 1 + ], + [ + "The nose of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bird is not mentioned in the description.", + 1 + ], + [ + "The bird is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "24786060": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the towel is mentioned in the description but is not gray or white.", + -1 + ], + [ + "The color of the towel is not mentioned.", + 0 + ], + [ + "The color of the towel is mentioned in the description and is gray or white.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the towel is mentioned in the description but is not plush.", + -1 + ], + [ + "The texture of the towel is not mentioned.", + 0 + ], + [ + "The texture of the towel is mentioned in the description and is plush.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the towel is mentioned in the description but is not irregular.", + -1 + ], + [ + "The shape of the towel is not mentioned.", + 0 + ], + [ + "The shape of the towel is mentioned in the description and is irregular.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The loop of the towel is mentioned in the description.", + -1 + ], + [ + "The loop of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vanity is mentioned in the description.", + -1 + ], + [ + "The vanity is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The design of the towel is mentioned in the description.", + -1 + ], + [ + "The design of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is mentioned in the description.", + -1 + ], + [ + "The toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "25054869": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The material of the lid is mentioned in the description but is not ceramic.", + -1 + ], + [ + "The material of the lid is mentioned in the description and is ceramic.", + 1 + ], + [ + "The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The texture of the lid is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the lid is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tank or the toilet is not mentioned.", + 0 + ], + [ + "The shape of the tank is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the tank is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the tank is not mentioned, but the tank of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned.", + 0 + ], + [ + "The color of the toilet is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the toilet is mentioned in the description and is white.", + 1 + ], + [ + "The color of the toilet is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush lever of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush lever of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush button of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush button of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet bowl of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet bowl of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet seat of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet seat of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "25273553": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the legs is mentioned in the description but is not slender.", + -1 + ], + [ + "The shape of the legs is mentioned in the description and is slender.", + 1 + ], + [ + "The legs or the tripod are not mentioned.", + 0 + ], + [ + "The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tripod is mentioned in the description but is not plastic or metal.", + -1 + ], + [ + "The material of the tripod is mentioned in the description and is plastic or metal.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The material of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tripod is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tripod is mentioned in the description and is black.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The color of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The leg locks of the tripod are not mentioned in the description.", + 1 + ], + [ + "The leg locks of the tripod are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The quick release plate of the tripod is not mentioned in the description.", + 1 + ], + [ + "The quick release plate of the tripod is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The hot air balloon is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is not mentioned in the description.", + 1 + ], + [ + "The sky is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "25419495": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gripping ends/claws is mentioned in the description and is black.", + 1 + ], + [ + "The color of the gripping ends/claws is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gripping ends/claws is mentioned in the description and is scalloped.", + 1 + ], + [ + "The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + -1 + ], + [ + "The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle/arm is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the handle/arm is mentioned in the description but is not black or metallic.", + -1 + ], + [ + "The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle/arm is mentioned in the description and is metal or rubber.", + 1 + ], + [ + "The material of the handle/arm is mentioned in the description but is not metal or rubber.", + -1 + ], + [ + "The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is mentioned in the description.", + -1 + ], + [ + "The spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spring of the tong is mentioned in the description.", + -1 + ], + [ + "The tong is not mentioned in the description.", + 0 + ], + [ + "The spring of the tong is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is mentioned in the description.", + -1 + ], + [ + "The cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "25419516": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stuffed toy is mentioned in the description but is not plush.", + -1 + ], + [ + "The material of the stuffed toy is not mentioned.", + 0 + ], + [ + "The material of the stuffed toy is mentioned in the description and is plush.", + 1 + ], + [ + "The stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is black.", + 1 + ], + [ + "The eyes or the stuffed toy are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the nose is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the nose is mentioned in the description and is blue.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is blue.", + 1 + ], + [ + "The body or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the nose is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The shape of the nose is mentioned in the description and is round.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the stuffed toy is not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The tail of the stuffed toy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the stuffed toy are not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The feet of the stuffed toy are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "25579493": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the bowl is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the bowl is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the bowl is mentioned in the description but is not white or beige.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The content or the bowl is not mentioned.", + 0 + ], + [ + "The color of the content is not mentioned, but the content of the bowl is mentioned.", + 0.5 + ], + [ + "The color of the content is mentioned in the description and is red, white, or yellow.", + 1 + ], + [ + "The color of the content is mentioned in the description but is not red, white, or yellow.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the bowl is not mentioned in the description.", + 1 + ], + [ + "The base of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the bowl is not mentioned in the description.", + 1 + ], + [ + "The lid of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner is not mentioned in the description.", + 1 + ], + [ + "The burner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "297718": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the rice is mentioned in the description and is outer layer.", + 1 + ], + [ + "The position of the rice is mentioned in the description but is not outer layer.", + -1 + ], + [ + "The position of the rice is not mentioned, but the rice of the sushi is mentioned.", + 0.5 + ], + [ + "The rice or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the wrap is mentioned in the description and is seaweed sheet.", + 1 + ], + [ + "The type of the wrap is mentioned in the description but is not seaweed sheet.", + -1 + ], + [ + "The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.", + 0.5 + ], + [ + "The wrap or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.", + 1 + ], + [ + "The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.", + -1 + ], + [ + "The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + 0.5 + ], + [ + "The sesame seeds or the sushi are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the filling is mentioned in the description and is fish or crab meat.", + 1 + ], + [ + "The type of the filling is mentioned in the description but is not fish or crab meat.", + -1 + ], + [ + "The type of the filling is not mentioned, but the filling of the sushi is mentioned.", + 0.5 + ], + [ + "The filling or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the topping is mentioned in the description and is sesame seeds.", + 1 + ], + [ + "The type of the topping is mentioned in the description but is not sesame seeds.", + -1 + ], + [ + "The type of the topping is not mentioned, but the topping of the sushi is mentioned.", + 0.5 + ], + [ + "The topping or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wasabi of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The wasabi of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soy sauce of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The soy sauce of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The avocado of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickled ginger of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The pickled ginger of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple chunks are not mentioned in the description.", + 1 + ], + [ + "The pineapple chunks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "361105": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The herb is not mentioned.", + 0 + ], + [ + "The color of the herb is mentioned in the description and is green.", + 1 + ], + [ + "The color of the herb is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the herb is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stems or the herb are not mentioned.", + 0 + ], + [ + "The shape of the stems is mentioned in the description and is thin or slender.", + 1 + ], + [ + "The shape of the stems is mentioned in the description but is not thin or slender.", + -1 + ], + [ + "The shape of the stems is not mentioned, but the stems of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaves or the herb are not mentioned.", + 0 + ], + [ + "The dark spots of the leaves is mentioned in the description and is visible.", + 1 + ], + [ + "The dark spots of the leaves is mentioned in the description but is not visible.", + -1 + ], + [ + "The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flowers of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The flowers of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The seeds of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The roots of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cream sauce is not mentioned in the description.", + 1 + ], + [ + "The cream sauce is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scallops are not mentioned in the description.", + 1 + ], + [ + "The scallops are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "1196168": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grille is mentioned in the description and is smooth with ridges.", + 1 + ], + [ + "The texture of the grille is mentioned in the description but is not smooth with ridges.", + -1 + ], + [ + "The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + 0.5 + ], + [ + "The grille or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the air conditioner is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the air conditioner is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the air conditioner is mentioned in the description and is white.", + 1 + ], + [ + "The color of the air conditioner is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the vent is mentioned in the description and is circular.", + 1 + ], + [ + "The shape of the vent is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.", + 0.5 + ], + [ + "The vent or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fan is mentioned in the description and is black, grey, silver, or dark.", + 1 + ], + [ + "The color of the fan is mentioned in the description but is not black, grey, silver, or dark.", + -1 + ], + [ + "The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + 0.5 + ], + [ + "The fan or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The remote control of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The remote control of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Hotel Shilaza sign is not mentioned in the description.", + 1 + ], + [ + "The Hotel Shilaza sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The display of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The display of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The UCO Bank branch is not mentioned in the description.", + 1 + ], + [ + "The UCO Bank branch is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycles are not mentioned in the description.", + 1 + ], + [ + "The motorcycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "1894089": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the screwdriver is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The color of the screwdriver is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The color of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screwdriver is mentioned in the description but is not metal.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The material of the screwdriver is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grip is mentioned in the description but is not rough or textured.", + -1 + ], + [ + "The grip or the screwdriver is not mentioned.", + 0 + ], + [ + "The texture of the grip is mentioned in the description and is rough or textured.", + 1 + ], + [ + "The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The CD is mentioned in the description.", + -1 + ], + [ + "The CD is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire is mentioned in the description.", + -1 + ], + [ + "The wire is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interchangeable bits of the screwdriver are mentioned in the description.", + -1 + ], + [ + "The interchangeable bits of the screwdriver are not mentioned in the description.", + 1 + ], + [ + "The screwdriver is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire cutters are mentioned in the description.", + -1 + ], + [ + "The wire cutters are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flashlight is mentioned in the description.", + -1 + ], + [ + "The flashlight is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "2391780": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the wings is mentioned in the description but is not spread or extended.", + -1 + ], + [ + "The position of the wings is not mentioned, but the wings of the wild bird are mentioned.", + 0.5 + ], + [ + "The position of the wings is mentioned in the description and is spread or extended.", + 1 + ], + [ + "The wings or the wild bird are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the head is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the head is not mentioned, but the head of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description and is white.", + 1 + ], + [ + "The head or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white, brown or gray.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white, brown or gray.", + 1 + ], + [ + "The body or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the beak is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description and is dark or black.", + 1 + ], + [ + "The beak or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are mentioned in the description.", + -1 + ], + [ + "The boats are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stone walls are mentioned in the description.", + -1 + ], + [ + "The stone walls are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are mentioned in the description.", + -1 + ], + [ + "The chimneys are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The water is mentioned in the description.", + -1 + ], + [ + "The water is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "4502267": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is elongated.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not elongated.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is mentioned in the description and is green.", + 1 + ], + [ + "The color of the green bean is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is arc or curved.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not arc or curved.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the green bean is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is not mentioned in the description.", + 1 + ], + [ + "The apple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The strings of the green bean are not mentioned in the description.", + 1 + ], + [ + "The strings of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The seeds of the green bean are not mentioned in the description.", + 1 + ], + [ + "The seeds of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 1 + ], + [ + "The pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The peach is not mentioned in the description.", + 1 + ], + [ + "The peach is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "4604873": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The size of the jib is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the jib is mentioned in the description and is long.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tower is not mentioned, but the tower of the crane is mentioned.", + 0.5 + ], + [ + "The tower or the crane is not mentioned.", + 0 + ], + [ + "The size of the tower is mentioned in the description but is not tall.", + -1 + ], + [ + "The size of the tower is mentioned in the description and is tall.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + 0.5 + ], + [ + "The hook or the crane is not mentioned.", + 0 + ], + [ + "The visibility of the hook is mentioned in the description but is not visible.", + -1 + ], + [ + "The visibility of the hook is mentioned in the description and is visible.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crane is not mentioned.", + 0 + ], + [ + "The crane is not mentioned.", + 0 + ], + [ + "The material of the crane is mentioned in the description but is not metal or steel.", + -1 + ], + [ + "The material of the crane is mentioned in the description and is metal or steel.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The shape of the jib is mentioned in the description but is not horizontal beam.", + -1 + ], + [ + "The shape of the jib is mentioned in the description and is horizontal beam.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The telescoping sections of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The telescoping sections of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tracks of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The tracks of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The wheels of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The silhouettes of structures are not mentioned in the description.", + 1 + ], + [ + "The silhouettes of structures are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clouds are not mentioned in the description.", + 1 + ], + [ + "The clouds are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "4916799": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the globe is mentioned in the description and is silver and blue.", + 1 + ], + [ + "The globe is not mentioned.", + 0 + ], + [ + "The color of the globe is not mentioned.", + 0 + ], + [ + "The color of the globe is mentioned in the description but is not silver and blue.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sphere is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The sphere or the globe is not mentioned.", + 0 + ], + [ + "The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + 0.5 + ], + [ + "The material of the sphere is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is mentioned in the description and is circular.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The shape of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The shape of the base is mentioned in the description but is not circular.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the base is mentioned in the description and is gray.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The color of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The color of the base is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is mentioned in the description.", + -1 + ], + [ + "The sidewalk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bushes are mentioned in the description.", + -1 + ], + [ + "The bushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "5718415": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The material of the canopy is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the canopy is mentioned in the description and is fabric.", + 1 + ], + [ + "The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole or the tent is not mentioned.", + 0 + ], + [ + "The material of the pole is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pole is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the pole is not mentioned, but the pole of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The color of the canopy is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the canopy is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The windows of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buildings are mentioned in the description.", + -1 + ], + [ + "The buildings are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walls of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The walls of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The door of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The floor of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The floor of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "6012878": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the symbol is mentioned in the description but is not red or orange.", + -1 + ], + [ + "The color of the symbol is mentioned in the description and is red or orange.", + 1 + ], + [ + "The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The shape of the symbol is mentioned in the description but is not hand outline.", + -1 + ], + [ + "The shape of the symbol is mentioned in the description and is hand outline.", + 1 + ], + [ + "The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The texture of the background is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the background is mentioned in the description and is matte.", + 1 + ], + [ + "The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the background is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the background is mentioned in the description and is gray or black.", + 1 + ], + [ + "The color of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The reflective surface or the traffic light is not mentioned.", + 0 + ], + [ + "The material of the reflective surface is mentioned in the description but is not glass or plastic.", + -1 + ], + [ + "The material of the reflective surface is mentioned in the description and is glass or plastic.", + 1 + ], + [ + "The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walking person symbol of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The walking person symbol of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The pole of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycles are not mentioned in the description.", + 1 + ], + [ + "The bicycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is not mentioned in the description.", + 1 + ], + [ + "The sidewalk is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green light of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The green light of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "6820595": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the ear is mentioned in the description but is not triangular.", + -1 + ], + [ + "The ear or the cat is not mentioned.", + 0 + ], + [ + "The shape of the ear is mentioned in the description and is triangular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The texture of the fur is mentioned in the description but is not fluffy.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The texture of the fur is mentioned in the description and is fluffy.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The color of the fur is mentioned in the description but is not black and white.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The color of the fur is mentioned in the description and is black and white.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the back is mentioned in the description but is not arched.", + -1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ], + [ + "The shape of the back is mentioned in the description and is arched.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.", + 0.5 + ], + [ + "The color of the underbelly is mentioned in the description but is not white.", + -1 + ], + [ + "The underbelly or the cat is not mentioned.", + 0 + ], + [ + "The color of the underbelly is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is mentioned in the description.", + -1 + ], + [ + "The door is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom cabinet is mentioned in the description.", + -1 + ], + [ + "The bathroom cabinet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom sink is mentioned in the description.", + -1 + ], + [ + "The bathroom sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hairbrush is mentioned in the description.", + -1 + ], + [ + "The hairbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "8556674": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description and is orange.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the surface is mentioned in the description but is not bright orange.", + -1 + ], + [ + "The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The color of the surface is mentioned in the description and is bright orange.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not glossy.", + -1 + ], + [ + "The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The texture of the surface is mentioned in the description and is glossy.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The stem of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The stem of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The leaves of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The leaves of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The segments of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The segments of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceiling lights are mentioned in the description.", + -1 + ], + [ + "The ceiling lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The flesh of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The flesh of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "8906172": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description but is not curved.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description and is curved.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control buttons of the earphone are mentioned in the description.", + -1 + ], + [ + "The control buttons of the earphone are not mentioned in the description.", + 1 + ], + [ + "The earphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cell phone is mentioned in the description.", + -1 + ], + [ + "The cell phone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "10666665": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the clock is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock face or the clock is not mentioned.", + 0 + ], + [ + "The color of the clock face is not mentioned, but the clock face of the clock is mentioned.", + 0.5 + ], + [ + "The color of the clock face is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clock face is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hour hand or the clock is not mentioned.", + 0 + ], + [ + "The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.", + 0.5 + ], + [ + "The color of the hour hand is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the hour hand is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The numbers or the clock are not mentioned.", + 0 + ], + [ + "The color of the numbers is not mentioned, but the numbers of the clock are mentioned.", + 0.5 + ], + [ + "The color of the numbers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the numbers is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the clock is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the clock is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the frame is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo blind is not mentioned in the description.", + 1 + ], + [ + "The bamboo blind is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The digital display of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The digital display of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pendulum of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The pendulum of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "10811497": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the key is not mentioned.", + 0 + ], + [ + "The color of the key is mentioned in the description and is dark green, gray, or black.", + 1 + ], + [ + "The color of the key is mentioned in the description but is not dark green, gray, or black.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the key is not mentioned.", + 0 + ], + [ + "The material of the key is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the key is mentioned in the description but is not plastic.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the key is not mentioned.", + 0 + ], + [ + "The texture of the key is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the key is mentioned in the description but is not smooth.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the key is not mentioned.", + 0 + ], + [ + "The shape of the key is mentioned in the description and is rounded, circular, or oval.", + 1 + ], + [ + "The shape of the key is mentioned in the description but is not rounded, circular, or oval.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key bow of the key is not mentioned in the description.", + 1 + ], + [ + "The key bow of the key is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key teeth of the key are not mentioned in the description.", + 1 + ], + [ + "The key teeth of the key are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky note is not mentioned in the description.", + 1 + ], + [ + "The sticky note is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The smartphone is not mentioned in the description.", + 1 + ], + [ + "The smartphone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The black fabric item is not mentioned in the description.", + 1 + ], + [ + "The black fabric item is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "11021562": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + 0.5 + ], + [ + "The texture of the casing is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the casing is mentioned in the description and is smooth.", + 1 + ], + [ + "The casing or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The orientation of the handle is mentioned in the description but is not vertical.", + -1 + ], + [ + "The orientation of the handle is mentioned in the description and is vertical.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the microwave is not mentioned.", + 0 + ], + [ + "The color of the microwave is mentioned in the description but is not white, beige, or yellow.", + -1 + ], + [ + "The color of the microwave is mentioned in the description and is white, beige, or yellow.", + 1 + ], + [ + "The microwave is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the vent is not mentioned, but the vent of the microwave is mentioned.", + 0.5 + ], + [ + "The position of the vent is mentioned in the description but is not top.", + -1 + ], + [ + "The position of the vent is mentioned in the description and is top.", + 1 + ], + [ + "The vent or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fire extinguisher is mentioned in the description.", + -1 + ], + [ + "The fire extinguisher is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The turntable of the microwave is mentioned in the description.", + -1 + ], + [ + "The turntable of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interior light of the microwave is mentioned in the description.", + -1 + ], + [ + "The interior light of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rug is mentioned in the description.", + -1 + ], + [ + "The rug is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "11021563": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control panel or the stove is not mentioned.", + 0 + ], + [ + "The location of the control panel is mentioned in the description but is not back.", + -1 + ], + [ + "The location of the control panel is mentioned in the description and is back.", + 1 + ], + [ + "The location of the control panel is not mentioned, but the control panel of the stove is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The color of the burners is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the burners is mentioned in the description and is black.", + 1 + ], + [ + "The color of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The number of the burners is mentioned in the description but is not 4.", + -1 + ], + [ + "The number of the burners is mentioned in the description and is 4.", + 1 + ], + [ + "The number of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The shape of the burners is mentioned in the description but is not coiled.", + -1 + ], + [ + "The shape of the burners is mentioned in the description and is coiled.", + 1 + ], + [ + "The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The countertop is mentioned in the description.", + -1 + ], + [ + "The countertop is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The broom is mentioned in the description.", + -1 + ], + [ + "The broom is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The induction cooktop surface of the stove is mentioned in the description.", + -1 + ], + [ + "The induction cooktop surface of the stove is not mentioned in the description.", + 1 + ], + [ + "The stove is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The small table is mentioned in the description.", + -1 + ], + [ + "The small table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "12348078": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The style of the clothes is mentioned in the description and is t-shirt.", + 1 + ], + [ + "The style of the clothes is mentioned in the description but is not t-shirt.", + -1 + ], + [ + "The style of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The color of the clothes is mentioned in the description and is white.", + 1 + ], + [ + "The color of the clothes is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hairstyle of the person is mentioned in the description and is bun.", + 1 + ], + [ + "The hairstyle of the person is mentioned in the description but is not bun.", + -1 + ], + [ + "The hairstyle of the person is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hair color of the person is mentioned in the description and is dark or black.", + 1 + ], + [ + "The hair color of the person is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The hair color of the person is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pants or the person are not mentioned.", + 0 + ], + [ + "The color of the pants is mentioned in the description and is black.", + 1 + ], + [ + "The color of the pants is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the pants is not mentioned, but the pants of the person are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The mouth of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The face of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The face of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The nose of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is mentioned in the description.", + -1 + ], + [ + "The wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle cart is mentioned in the description.", + -1 + ], + [ + "The bicycle cart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "13138178": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stool is mentioned in the description and is blue.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The color of the stool is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the stool is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is mentioned in the description and is plastic.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the stool is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The armrest of the stool is mentioned in the description.", + -1 + ], + [ + "The armrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The backrest of the stool is mentioned in the description.", + -1 + ], + [ + "The backrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Cooking grate is mentioned in the description.", + -1 + ], + [ + "The Cooking grate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the stool is mentioned in the description.", + -1 + ], + [ + "The footrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swivel base of the stool is mentioned in the description.", + -1 + ], + [ + "The swivel base of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "13187927": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the exterior is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the exterior is mentioned in the description and is white.", + 1 + ], + [ + "The exterior or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the seat is mentioned in the description but is not leather or vinyl.", + -1 + ], + [ + "The material of the seat is mentioned in the description and is leather or vinyl.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the taillight is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the taillight is mentioned in the description and is red.", + 1 + ], + [ + "The taillight or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ], + [ + "The license plate or the motorcycle is not mentioned.", + 0 + ], + [ + "The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycle is not mentioned in the description.", + 0 + ], + [ + "The windshield of the motorcycle is not mentioned in the description.", + 1 + ], + [ + "The windshield of the motorcycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The balconies are not mentioned in the description.", + 1 + ], + [ + "The balconies are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pepsi advertisements are not mentioned in the description.", + 1 + ], + [ + "The Pepsi advertisements are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative metal grill is not mentioned in the description.", + 1 + ], + [ + "The decorative metal grill is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "14490578": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the seal is mentioned in the description but is not smooth.", + -1 + ], + [ + "The seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not gray, black, or dark.", + -1 + ], + [ + "The skin or the seal is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the seal is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is gray, black, or dark.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the body is mentioned in the description but is not elongated.", + -1 + ], + [ + "The body or the seal is not mentioned.", + 0 + ], + [ + "The shape of the body is not mentioned, but the body of the seal is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is elongated.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The teeth of the seal are not mentioned in the description.", + 1 + ], + [ + "The teeth of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the seal are not mentioned in the description.", + 1 + ], + [ + "The claws of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ears of the seal are not mentioned in the description.", + 1 + ], + [ + "The ears of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sand is not mentioned in the description.", + 1 + ], + [ + "The sand is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rocks are not mentioned in the description.", + 1 + ], + [ + "The rocks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "14640483": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.", + 0.5 + ], + [ + "The surface or the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the surface is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.", + -1 + ], + [ + "The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The material of the cutting/chopping board is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the cutting/chopping board is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hanging hole of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The hanging hole of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The juice groove of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The juice groove of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The handle of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The whisk is mentioned in the description.", + -1 + ], + [ + "The whisk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "16010041": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the chopsticks is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the chopsticks is not mentioned.", + 0 + ], + [ + "The material of the chopsticks is mentioned in the description and is wood.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + -1 + ], + [ + "The shape of the chopsticks is not mentioned.", + 0 + ], + [ + "The shape of the chopsticks is mentioned in the description and is long cylindrical.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the chopsticks is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The body or the chopsticks is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon slices are not mentioned in the description.", + 1 + ], + [ + "The lemon slices are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the chopsticks are not mentioned in the description.", + 1 + ], + [ + "The chopsticks are not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the chopsticks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is not mentioned in the description.", + 1 + ], + [ + "The man is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meat is not mentioned in the description.", + 1 + ], + [ + "The sliced meat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green garnish is not mentioned in the description.", + 1 + ], + [ + "The green garnish is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "17072759": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the belt is not mentioned.", + 0 + ], + [ + "The color of the belt is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the belt is mentioned in the description and is gray or black.", + 1 + ], + [ + "The belt is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The texture of the strap is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the strap is mentioned in the description and is smooth.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The tip of the belt is mentioned in the description.", + -1 + ], + [ + "The tip of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chess board is mentioned in the description.", + -1 + ], + [ + "The chess board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blanket is mentioned in the description.", + -1 + ], + [ + "The blanket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The keeper of the belt is mentioned in the description.", + -1 + ], + [ + "The keeper of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "17072764": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the pear is mentioned in the description but is not smooth.", + -1 + ], + [ + "The pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the stem is mentioned in the description but is not short.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The size of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The size of the stem is mentioned in the description and is short.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stem is mentioned in the description but is not brown.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The color of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The color of the stem is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The skin or the pear is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the pear is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is yellow or green.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The core of the pear is not mentioned in the description.", + 1 + ], + [ + "The core of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The white top is not mentioned in the description.", + 1 + ], + [ + "The white top is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are not mentioned in the description.", + 1 + ], + [ + "The grass are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The leaf of the pear is not mentioned in the description.", + 1 + ], + [ + "The leaf of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "18301585": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the backrest is mentioned in the description and is black.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The color of the backrest is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the bench are not mentioned.", + 0 + ], + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the bench are mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the backrest is mentioned in the description and is slatted.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The shape of the backrest is mentioned in the description but is not slatted.", + -1 + ], + [ + "The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bollards are mentioned in the description.", + -1 + ], + [ + "The bollards are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the bench is mentioned in the description.", + -1 + ], + [ + "The storage compartment of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the bench is mentioned in the description.", + -1 + ], + [ + "The footrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headrest of the bench is mentioned in the description.", + -1 + ], + [ + "The headrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "18680641": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the carpet is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the carpet is not mentioned.", + 0 + ], + [ + "The material of the carpet is mentioned in the description and is fabric.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the edge is mentioned in the description but is not straight.", + -1 + ], + [ + "The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is straight.", + 1 + ], + [ + "The edge or the carpet is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the carpet is mentioned in the description but is not orange or red.", + -1 + ], + [ + "The color of the carpet is not mentioned.", + 0 + ], + [ + "The color of the carpet is mentioned in the description and is orange or red.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the carpet is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the carpet is not mentioned.", + 0 + ], + [ + "The shape of the carpet is mentioned in the description and is rectangular.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The tassels of the carpet are not mentioned in the description.", + 1 + ], + [ + "The tassels of the carpet are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drainage pipe is not mentioned in the description.", + 1 + ], + [ + "The drainage pipe is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The pattern of the carpet is not mentioned in the description.", + 1 + ], + [ + "The pattern of the carpet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shop sign is not mentioned in the description.", + 1 + ], + [ + "The shop sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The candy display is not mentioned in the description.", + 1 + ], + [ + "The candy display is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "25273528": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the hot air balloon is mentioned in the description and is multicolored.", + 1 + ], + [ + "The hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the hot air balloon is mentioned in the description but is not multicolored.", + -1 + ], + [ + "The color of the hot air balloon is not mentioned.", + 0 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the envelope is mentioned in the description and is nylon or polyester.", + 1 + ], + [ + "The envelope or the hot air balloon is not mentioned.", + 0 + ], + [ + "The material of the envelope is mentioned in the description but is not nylon or polyester.", + -1 + ], + [ + "The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the basket is mentioned in the description and is bottom.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The position of the basket is mentioned in the description but is not bottom.", + -1 + ], + [ + "The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the basket is mentioned in the description and is small.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The size of the basket is mentioned in the description but is not small.", + -1 + ], + [ + "The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the basket is mentioned in the description and is dark or black.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fuel tanks of the hot air balloon are not mentioned in the description.", + 1 + ], + [ + "The fuel tanks of the hot air balloon are mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner of the hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The burner of the hot air balloon is mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ground is not mentioned in the description.", + 1 + ], + [ + "The ground is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ], + "25419509": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned.", + 0 + ], + [ + "The color of the fork is not mentioned.", + 0 + ], + [ + "The color of the fork is mentioned in the description and is metallic.", + 1 + ], + [ + "The color of the fork is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The material of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The material of the handle is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the handle is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The shape of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The texture of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The texture of the handle is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handle is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tines or the fork are not mentioned.", + 0 + ], + [ + "The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.", + 0.5 + ], + [ + "The number of parts of the tines is mentioned in the description and is 4.", + 1 + ], + [ + "The number of parts of the tines is mentioned in the description but is not 4.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bolster of the fork is not mentioned in the description.", + 1 + ], + [ + "The bolster of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plates are not mentioned in the description.", + 1 + ], + [ + "The plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The end cap of the fork is not mentioned in the description.", + 1 + ], + [ + "The end cap of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drinks are not mentioned in the description.", + 1 + ], + [ + "The drinks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ferrule of the fork is not mentioned in the description.", + 1 + ], + [ + "The ferrule of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "25612310": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is mentioned in the description and is woven.", + 1 + ], + [ + "The texture of the basket is mentioned in the description but is not woven.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The material of the basket is not mentioned.", + 0 + ], + [ + "The material of the basket is mentioned in the description and is wicker.", + 1 + ], + [ + "The material of the basket is mentioned in the description but is not wicker.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The type of the basket is not mentioned.", + 0 + ], + [ + "The type of the basket is mentioned in the description and is interlaced.", + 1 + ], + [ + "The type of the basket is mentioned in the description but is not interlaced.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The color of the basket is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description and is brown or wooden.", + 1 + ], + [ + "The color of the basket is mentioned in the description but is not brown or wooden.", + -1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The base of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The handle of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lid of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the basket are mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the basket are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lining of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part" + } + ], + "17265253": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the spokes is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The material of the spokes is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tire is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The shape of the tire is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tire is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The material of the tire is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the spokes is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The color of the spokes is mentioned in the description and is silver.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tire is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the tire is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The path is not mentioned in the description.", + 1 + ], + [ + "The path is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw canopy of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The rickshaw canopy of the rickshaw is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw handlebars of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The rickshaw handlebars of the rickshaw are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo trees are not mentioned in the description.", + 1 + ], + [ + "The bamboo trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative" + } + ] +} diff --git a/evaluation/DLC-Bench/eval_gpt_with_image.py b/evaluation/DLC-Bench/eval_gpt_with_image.py new file mode 100644 index 0000000000000000000000000000000000000000..a494e3720937cbcddd94e559823c2f2e702b5426 --- /dev/null +++ b/evaluation/DLC-Bench/eval_gpt_with_image.py @@ -0,0 +1,483 @@ +# ************************************************************************* +# This file may have been modified by Bytedance Inc. (“Bytedance Inc.'s Mo- +# difications”). All Bytedance Inc.'s Modifications are Copyright (2025) B- +# ytedance Inc.. +# ************************************************************************* + +# Adapted from https://github.com/NVlabs/describe-anything/blob/main/evaluation/eval_model_outputs.py + +# Copyright 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import base64 +import io +import json +import os + +import inflect +import numpy as np +import openai +from PIL import Image +from pycocotools.coco import COCO +from tqdm import tqdm + +# Define Azure OpenAI details +model_name = "gpt-4o-2024-11-20" +max_tokens = 1000 # range: [1, 4095] + +# Initialize the Azure client +client = openai.AzureOpenAI( + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), + api_key=os.getenv("AZURE_OPENAI_KEY"), + api_version="2024-03-01-preview", +) + +prompt_eval = """Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules: +1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question. +2. There is no need for exact matching. Please choose the closest option based on the description. + +The description is: +{pred_caption} + +From the description above, please answer the following question with one of the choices: +{question_text_str} +""" + +api_call_count = 0 + + +def query(prompt, images, temperature, max_tokens): + global api_call_count + if api_call_count >= args.api_call_limit: + raise Exception("API call limit reached") + + api_call_count += 1 + content = [ + {"type": "text", "text": "The image:\n"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{images[0]}"}, + }, + {"type": "text", "text": "\nThe mask of the image:\n"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{images[1]}"}, + }, + {"type": "text", "text": f"\n{prompt}\n"}, + ] + + # Adjusted to use the Azure OpenAI client with the specified parameters + response = client.chat.completions.create( + model=model_name, + messages=[{"role": "user", "content": content}], + max_tokens=max_tokens, + temperature=temperature, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + ) + + message = response.choices[0].message.content + return message + + +def parse_pred(pred, choices, key): + pred = pred.strip().lower() + substr_indices = [] + for index, choice in enumerate(choices): + choice = choice.strip().lower() + prefix = "abcde"[index] + if choice == pred or pred == f"{prefix}. {choice}" or pred == prefix: + return index + if choice in pred: + substr_indices.append((index, pred.index(choice), len(choice))) + + if len(substr_indices) == 1: + return substr_indices[0][0] + + choices_label = "abcde" + if pred[0] in choices_label and pred[1] == ".": + ret = choices_label.index(pred[0]) + return ret + + if substr_indices: + if len(substr_indices) > 1: + ret, ret_pos, _ = max(substr_indices, key=lambda x: x[1]) + max_items = [item for item in substr_indices if item[1] == ret_pos] + if len(max_items) > 1: + ret = max(max_items, key=lambda x: x[2])[0] + return ret + else: + ret = substr_indices[0][0] + return ret + + match_lengths = [] + for index, choice in enumerate(choices): + choice = choice.strip().lower() + if pred in choice: + match_lengths.append((index, len(choice))) + if match_lengths: + if len(match_lengths) > 1: + ret = max(match_lengths, key=lambda x: x[1])[0] + else: + ret = match_lengths[0][0] + return ret + + if pred[0] in "abcde" and (len(pred.strip()) == 1 or pred[1] == "\n"): + ret = "abcde".index(pred[0]) + return ret + + return None + + +def evaluate( + question_dicts, + pred_caption, + temperature, + max_tokens, + images, + *, + response_override=None, + key, + verbose=False, +) -> dict: + pred_answers = [] + prompt = [] + response = [] + for index, question_dict in enumerate(question_dicts): + question_text_str = f"{question_dict['question']}\n" + choices_text = "" + for choice_index, (choice, score) in enumerate(question_dict["choices"]): + choice_index = "ABCDE"[choice_index] + choices_text += f"{choice_index}. {choice}\n" + question_text_str += choices_text + prompt_item = prompt_eval.format( + pred_caption=pred_caption, question_text_str=question_text_str.strip() + ) + + if ( + response_override is None + or len(response_override) < index + or response_override[index] is None + ): + response_item = query(prompt_item, images, temperature, max_tokens) + else: + response_item = response_override[index] + + pred_answer = response_item.strip() + pred_answers.append(pred_answer) + prompt.append(prompt_item) + response.append(response_item) + + pred_indices = [ + parse_pred( + pred_answer, [choice for choice, score in question_dict["choices"]], key + ) + for pred_answer, question_dict in zip(pred_answers, question_dicts) + ] + parsed_eval_results = [ + question_dict["choices"][pred_index][1] if pred_index is not None else 0 + for pred_index, question_dict in zip(pred_indices, question_dicts) + ] + + parsed_eval_results_positives = [] + parsed_eval_results_negatives = [] + details_positives = [] + details_negatives = [] + details_recognition = [] + recognition_result = None + for question_index, (parsed_eval_result, question_dict) in enumerate( + zip(parsed_eval_results, question_dicts) + ): + if question_dict["type"] == "recognition": + if parsed_eval_result == "correct": + recognition_result = True + elif parsed_eval_result == "incorrect": + recognition_result = False + print( + f"Recognition is incorrect for key {key}, setting score to at most 0 for all questions" + ) + else: + raise ValueError(f"Invalid recognition result: {parsed_eval_result}") + details_recognition.append( + { + **question_dict, + "pred_answer": pred_answers[question_index], + "pred_index": pred_indices[question_index], + "eval_result": parsed_eval_result, + } + ) + elif question_dict["type"] == "negative": + if recognition_result is False: + parsed_eval_result = min(0, parsed_eval_result) + parsed_eval_results_negatives.append(parsed_eval_result) + + details_negatives.append( + { + **question_dict, + "pred_answer": pred_answers[question_index], + "pred_index": pred_indices[question_index], + "eval_result": parsed_eval_result, + } + ) + elif question_dict["type"] == "positive": + if recognition_result is False: + parsed_eval_result = min(0, parsed_eval_result) + parsed_eval_results_positives.append(parsed_eval_result) + + details_positives.append( + { + **question_dict, + "pred_answer": pred_answers[question_index], + "pred_index": pred_indices[question_index], + "eval_result": parsed_eval_result, + } + ) + + score_pos = sum(parsed_eval_results_positives) / len(parsed_eval_results_positives) + score_neg = ( + sum(parsed_eval_results_negatives) / len(parsed_eval_results_negatives) + if parsed_eval_results_negatives + else None + ) + score = ( + sum(parsed_eval_results_positives) + sum(parsed_eval_results_negatives) + ) / (len(parsed_eval_results_positives) + len(parsed_eval_results_negatives)) + + info = dict( + details_positives=details_positives, + details_negatives=details_negatives, + details_recognition=details_recognition, + prompt=prompt, + response=response, + score=score, + score_pos=score_pos, + score_neg=score_neg, + recognition_result=recognition_result, + ) + + return info + + +def is_plural(string): + if string == "bus": + return False + return p.singular_noun(string) is not False + + +def select_ann(img_id, area_min=None, area_max=None): + cat_ids = coco.getCatIds() + ann_ids = coco.getAnnIds(imgIds=[img_id], catIds=cat_ids, iscrowd=None) + + if area_min is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] >= area_min + ] + + if area_max is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] <= area_max + ] + + return ann_ids + + +def mask_to_box(mask_np): + mask_coords = np.argwhere(mask_np) + y0, x0 = mask_coords.min(axis=0) + y1, x1 = mask_coords.max(axis=0) + 1 + + h = y1 - y0 + w = x1 - x0 + + return x0, y0, w, h + + +def encode_pil_image_to_base64(pil_image): + buffered = io.BytesIO() + pil_image.save(buffered, format="PNG") + img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") + return img_str + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Evaluate model outputs") + parser.add_argument( + "--pred", type=str, help="Path to the prediction JSON file", required=True + ) + parser.add_argument( + "--qa", + type=str, + help="Path to the reference QA file", + default="evaluation/DLC-Bench/annotations/qa.json", + ) + parser.add_argument( + "--class-names", + type=str, + help="Path to the class names JSON file", + default="evaluation/DLC-Bench/annotations/class_names.json", + ) + parser.add_argument( + "--api-call-limit", type=int, default=1000, help="API call limit" + ) + parser.add_argument( + "--suffix", type=str, default="", help="Suffix for the evaluation file" + ) + parser.add_argument("--verbose", action="store_true", help="Enable verbose mode") + parser.add_argument( + "--quiet", action="store_true", help="Enable quiet mode (result only)" + ) + parser.add_argument("--csv", action="store_true", help="Output results as CSV only") + parser.add_argument( + "--data-root", type=str, default="evaluation/DLC-Bench/annotations" + ) + + args = parser.parse_args() + + eval_file = os.path.splitext(args.pred)[0] + f"_eval_gpt{args.suffix}.json" + + eval_results = {} + + if os.path.exists(eval_file): + with open(eval_file) as f: + eval_results = json.load(f) + + with open(args.pred) as f: + data_pred = json.load(f) + + with open(args.qa) as f: + data_qa = json.load(f) + + with open(args.class_names) as f: + data_class_names = json.load(f) + + scores = {} + scores_pos = {} + scores_neg = {} + + keys = list(data_qa.keys()) + p = inflect.engine() + + annotations_file = os.path.join(args.data_root, "annotations.json") + coco = COCO(annotations_file) + + with open(annotations_file, "r") as f: + data = json.load(f) + + missing_key_count = 0 + for key in tqdm(keys, disable=args.quiet): + key = str(key) + for item in data["annotations"]: + if int(item["id"]) == int(key): + img_id = item["image_id"] + + img_info = coco.loadImgs(img_id)[0] + img_path = os.path.join(args.data_root, "images", img_info["file_name"]) + img = Image.open(img_path) + + anns = coco.loadAnns([int(key)]) + mask_np = coco.annToMask(anns[0]).astype(bool) + + img_np = np.array(img) + pil_mask = Image.fromarray((mask_np * 255).astype(np.uint8)) + + assert ( + img_np.shape[:2] == mask_np.shape + ), f"image shape mismatches with mask shape: {img_np.shape}, {mask_np.shape}" + img_h, img_w = img_np.shape[:2] + + x0, y0, w, h = mask_to_box(mask_np) + xc, yc = x0 + w / 2, y0 + h / 2 + + # focal_crop: need to have at least min_box_w and min_box_h pixels, otherwise resizing to (384, 384) leads to artifacts that may be OOD + w, h = max(w, 56), max(h, 56) + x0, y0 = int(xc - w / 2), int(yc - h / 2) + + # focal crop + cropped_img_np = img_np[ + max(y0 - h, 0) : min(y0 + 2 * h, img_h), + max(x0 - w, 0) : min(x0 + 2 * w, img_w), + ] + cropped_mask_np = mask_np[ + max(y0 - h, 0) : min(y0 + 2 * h, img_h), + max(x0 - w, 0) : min(x0 + 2 * w, img_w), + ] + + cropped_pil_img = Image.fromarray(cropped_img_np) + cropped_pil_mask = Image.fromarray((cropped_mask_np * 255).astype(np.uint8)) + + base64_image = encode_pil_image_to_base64(img) + base64_mask = encode_pil_image_to_base64(pil_mask) + base64_cropped_image = encode_pil_image_to_base64(cropped_pil_img) + base64_cropped_mask = encode_pil_image_to_base64(cropped_pil_mask) + images = [base64_cropped_image, base64_cropped_mask] + + if key in eval_results: + response_override = eval_results[key]["response"] + else: + response_override = None + + if key not in data_pred: + if args.default_prediction is None: + raise ValueError(f"Key {key} not found in prediction data") + else: + pred_value = args.default_prediction + missing_key_count += 1 + else: + pred_value = data_pred[key] + + class_name = data_class_names[key] + recognition_question = f"The object in the image is {class_name}. Based on the image, is it likely that the object in the description is given class: {class_name} or object of a similar type?" + recognition_question_dict = { + "question": recognition_question, + "choices": [("Yes", "correct"), ("No", "incorrect")], + "type": "recognition", + } + + question_dicts = [recognition_question_dict, *data_qa[key]] + info = evaluate( + question_dicts=question_dicts, + pred_caption=pred_value, + images=images, + temperature=0.0, + max_tokens=300, + response_override=response_override, + key=key, + ) + score = info["score"] + scores[key] = score + scores_pos[key] = info["score_pos"] + scores_neg[key] = info["score_neg"] + eval_results[key] = {"pred": pred_value, **info} + + avg_score_pos = sum(scores_pos.values()) / len(scores_pos) + avg_score_neg = sum( + [item for item in scores_neg.values() if item is not None] + ) / len(scores_neg) + eval_results["avg_pos"] = avg_score_pos + eval_results["avg_neg"] = avg_score_neg + + with open(eval_file, "w") as f: + json.dump(eval_results, f, indent=4) + + print(f"Average Positive Score: {avg_score_pos:.3f}") + print(f"Average Negative Score: {avg_score_neg:.3f}") + print( + f"Summary (Pos\tNeg\tAvg(Pos, Neg)):\t{avg_score_pos:.3f},\t{avg_score_neg:.3f},\t{(avg_score_pos + avg_score_neg) / 2:.3f}" + ) + print(f"QA Scores: {scores}") + print(f"Evaluation data saved to {eval_file}") diff --git a/evaluation/DLC-Bench/eval_llama_without_image.py b/evaluation/DLC-Bench/eval_llama_without_image.py new file mode 100644 index 0000000000000000000000000000000000000000..af977f0f9d8d00999ee6f0a3b3f90f4152c8f4b0 --- /dev/null +++ b/evaluation/DLC-Bench/eval_llama_without_image.py @@ -0,0 +1,503 @@ +# Copyright 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import json +import os + +import inflect +from openai import OpenAI +from tqdm import tqdm + +prompt_eval = """Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules: +1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question. +2. There is no need for exact matching. Please choose the closest option based on the description. + +The description is: +{pred_caption} + +From the description above, please answer the following question with one of the choices: +{question_text_str} +""" + +api_call_count = 0 + + +def query(prompt, temperature, max_tokens, model): + global api_call_count + if api_call_count >= args.api_call_limit: + raise Exception("API call limit reached") + + api_call_count += 1 + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + temperature=temperature, + max_tokens=max_tokens, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + ) + + message = response.choices[0].message.content + return message + + +def parse_pred(pred, choices, key): + pred = pred.strip().lower() + substr_indices = [] + for index, choice in enumerate(choices): + choice = choice.strip().lower() + prefix = "abcde"[index] + if choice == pred or pred == f"{prefix}. {choice}" or pred == prefix: + return index + if choice in pred: + substr_indices.append((index, pred.index(choice), len(choice))) + + # Only one match (choice in prediction) + if len(substr_indices) == 1: + return substr_indices[0][0] + + # Prefix match + choices_label = "abcde" + if pred[0] in choices_label and pred[1] == ".": + ret = choices_label.index(pred[0]) + # print(f"{key}: Chosen {ret} for pred: {pred}, choices: {choices}") + # print(f"{key}: More than one occurrence found or no substr of choice in pred: pred {pred}, choices {choices}, substr indices: {substr_indices}, returning {ret} (choice {choices_label})") + return ret + + # More than one match + if substr_indices: + # Return the last occurrence if there are multiple matches (referenced from MMMU): https://github.com/MMMU-Benchmark/MMMU/blob/b119c944a15c145c10d52a58e841c5b9cb6a535e/eval/utils/eval_utils.py#L57 + if len(substr_indices) > 1: + ret, ret_pos, _ = max(substr_indices, key=lambda x: x[1]) + max_items = [item for item in substr_indices if item[1] == ret_pos] + if len(max_items) > 1: + # select the item with the longest match if there are multiple occurrence at the same place + ret = max(max_items, key=lambda x: x[2])[0] + print( + f"{key}: More than one occurrence found: pred {pred}, choices {choices}, {substr_indices}, returning {ret} (choice {choices_label})" + ) + else: + ret = substr_indices[0][0] + return ret + + # Parse the case where pred is a substr of choice + match_lengths = [] + for index, choice in enumerate(choices): + choice = choice.strip().lower() + if pred in choice: + match_lengths.append((index, len(choice))) + if match_lengths: + # Return the longest matched substring if there are multiple matches + if len(match_lengths) > 1: + ret = max(match_lengths, key=lambda x: x[1])[0] + print( + f"{key}: More than one occurrence found: pred {pred}, choices {choices}, {match_lengths}, returning {ret}" + ) + else: + ret = match_lengths[0][0] + return ret + + if pred[0] in "abcde" and (len(pred.strip()) == 1 or pred[1] == "\n"): + ret = "abcde".index(pred[0]) + print(f"{key}: Chosen {ret} for pred: {pred}, choices: {choices}") + return ret + + print(f"*WARNING*: {key}: No match found. Pred: {pred}, choices: {choices}") + + # If no matching choice is found, raise an error. + # raise ValueError(f"No match found. Pred: {pred}, Choices: {choices}") + # If no matching choice is found, return None (treat as no mention, score 0). + return None + + +def evaluate( + question_dicts, + pred_caption, + temperature, + max_tokens, + model, + *, + response_override=None, + key, + verbose=False, +) -> dict: + pred_answers = [] + prompt = [] + response = [] + for index, question_dict in enumerate(question_dicts): + question_text_str = f"{question_dict['question']}\n" + choices_text = "" + for choice_index, (choice, score) in enumerate(question_dict["choices"]): + choice_index = "ABCDE"[choice_index] + choices_text += f"{choice_index}. {choice}\n" + question_text_str += choices_text + prompt_item = prompt_eval.format( + pred_caption=pred_caption, question_text_str=question_text_str.strip() + ) + + if ( + response_override is None + or len(response_override) < index + or response_override[index] is None + ): + response_item = query(prompt_item, temperature, max_tokens, model) + # print(f"Prompt:\n{prompt_item}") + # print(f"Output: {response_item}") + else: + response_item = response_override[index] + + pred_answer = response_item.strip() + pred_answers.append(pred_answer) + prompt.append(prompt_item) + response.append(response_item) + + assert len(pred_answers) == len( + question_dicts + ), f"Length mismatch for key {key} question {index}: pred: {len(pred_answers)} vs question: {len(question_dicts)}" + pred_indices = [ + parse_pred( + pred_answer, [choice for choice, score in question_dict["choices"]], key + ) + for pred_answer, question_dict in zip(pred_answers, question_dicts) + ] + + assert len(pred_indices) == len( + question_dicts + ), f"Length mismatch for key {key} question {index}: pred: {len(pred_indices)} vs question: {len(question_dicts)}" + + # If no matching, treat as no mention. + try: + parsed_eval_results = [ + question_dict["choices"][pred_index][1] if pred_index is not None else 0 + for pred_index, question_dict in zip(pred_indices, question_dicts) + ] + except IndexError as e: + print( + f"Error: {e}, key: {key}, pred_indices: {pred_indices}, question_dicts: {question_dicts}" + ) + raise e + + parsed_eval_results_positives = [] + parsed_eval_results_negatives = [] + + details_positives = [] + details_negatives = [] + details_recognition = [] + recognition_result = None + for question_index, (parsed_eval_result, question_dict) in enumerate( + zip(parsed_eval_results, question_dicts) + ): + if question_dict["type"] == "recognition": + # If the type is recognition, it's the recognition question. + if parsed_eval_result == "correct": + recognition_result = True + elif parsed_eval_result == "incorrect": + recognition_result = False + print( + f"Recognition is incorrect for key {key}, setting score to at most 0 for all questions" + ) + else: + raise ValueError(f"Invalid recognition result: {parsed_eval_result}") + details_recognition.append( + { + **question_dict, + "pred_answer": pred_answers[question_index], + "pred_index": pred_indices[question_index], + "eval_result": parsed_eval_result, + } + ) + elif question_dict["type"] == "negative": + assert ( + recognition_result is not None + ), f"Negative questions come before recognition question in {key}, {question_dicts}" + if recognition_result is False: + if verbose: + print( + f"Processing negative question {question_index} for key {key}, setting score to at most 0 since recognition is incorrect" + ) + parsed_eval_result = min(0, parsed_eval_result) + # If the type is negative, it's one of the negatives. + parsed_eval_results_negatives.append(parsed_eval_result) + details_negatives.append( + { + **question_dict, + "pred_answer": pred_answers[question_index], + "pred_index": pred_indices[question_index], + # Subtract 1 to get the index in the original question list (excluding the recognition question) + "question_index": question_index - 1, + "eval_result": parsed_eval_result, + } + ) + elif question_dict["type"] == "positive": + assert ( + recognition_result is not None + ), f"Positive questions come before recognition question in {key}, {question_dicts}" + if recognition_result is False: + if verbose: + print( + f"Processing positive question {question_index} for key {key}, setting score to at most 0 since recognition is incorrect" + ) + parsed_eval_result = min(0, parsed_eval_result) + parsed_eval_results_positives.append(parsed_eval_result) + details_positives.append( + { + **question_dict, + "pred_answer": pred_answers[question_index], + "pred_index": pred_indices[question_index], + # Subtract 1 to get the index in the original question list (excluding the recognition question) + "question_index": question_index - 1, + "eval_result": parsed_eval_result, + } + ) + else: + raise ValueError(f"Invalid question type: {question_dict['type']}") + + score_pos = sum(parsed_eval_results_positives) / len(parsed_eval_results_positives) + # It's possible that we don't have negatives for an instance. For this case, we skip over the instance for negative score calculation. + if len(parsed_eval_results_negatives): + score_neg = sum(parsed_eval_results_negatives) / len( + parsed_eval_results_negatives + ) + else: + score_neg = None + + # Overall score is the average of the positive and negative scores + info = dict( + details_positives=details_positives, + details_negatives=details_negatives, + details_recognition=details_recognition, + prompt=prompt, + response=response, + score=(sum(parsed_eval_results_positives) + sum(parsed_eval_results_negatives)) + / (len(parsed_eval_results_positives) + len(parsed_eval_results_negatives)), + score_pos=score_pos, + score_neg=score_neg, + neg_valid_num=len(parsed_eval_results_negatives), + recognition_result=recognition_result, + ) + + return info + + +def is_plural(string): + # A case that the inflect library does not handle + if string == "bus": + return False + # singular_noun returns False if the word is already singular (otherwise it returns the singular form) + return p.singular_noun(string) is not False + + +if __name__ == "__main__": + # Example: + # python eval_model_outputs.py --pred model_outputs_cache/dam_3b_v1.json --base-url "http://localhost:9100/v1" + + parser = argparse.ArgumentParser(description="Evaluate model outputs") + parser.add_argument( + "--pred", type=str, help="Path to the prediction JSON file", required=True + ) + parser.add_argument( + "--qa", + type=str, + help="Path to the reference QA file", + default="evaluation/DLC-Bench/annotations/qa.json", + ) + parser.add_argument( + "--class-names", + type=str, + help="Path to the class names JSON file", + default="evaluation/DLC-Bench/annotations/class_names.json", + ) + parser.add_argument( + "--default-prediction", + type=str, + default=None, + help="Default prediction if key is not present in the prediction file", + ) + parser.add_argument( + "--api-call-limit", type=int, default=1000, help="API call limit" + ) + parser.add_argument( + "--api-key", type=str, default=None, help="Path to the OpenAI API key file" + ) + parser.add_argument( + "--suffix", type=str, default="", help="Suffix for the evaluation file" + ) + parser.add_argument("--model", type=str, default="llama3.1-8b", help="Model name") + parser.add_argument("--verbose", action="store_true", help="Enable verbose mode") + parser.add_argument( + "--quiet", action="store_true", help="Enable quiet mode (result only)" + ) + parser.add_argument("--csv", action="store_true", help="Output results as CSV only") + + parser.add_argument( + "--base-url", + type=str, + default="http://localhost:8007/v1", + help="Base URL for the API call", + ) + args = parser.parse_args() + + always_print = print + if args.quiet: + print = lambda *args, **kwargs: None + + # v3 is from v2.1 + eval_file = os.path.splitext(args.pred)[0] + f"_eval{args.suffix}.json" + eval_results = {} + + if False: + assert not os.path.exists(eval_file), f"Evaluation file exists at {eval_file}" + else: + if os.path.exists(eval_file): + print(f"Loading existing evaluation data from {eval_file}") + try: + with open(eval_file) as f: + eval_results = json.load(f) + except Exception as e: + always_print(f"Error loading evaluation data {eval_file}: {e}") + raise e + + if args.api_key: + with open(args.api_key) as f: + client = OpenAI(api_key=f.read().strip(), base_url=args.base_url) + else: + client = OpenAI(api_key="sk-abc123", base_url=args.base_url) + + with open(args.pred) as f: + data_pred = json.load(f) + + with open(args.qa) as f: + data_qa = json.load(f) + + with open(args.class_names) as f: + data_class_names = json.load(f) + + scores = {} + scores_pos = {} + scores_neg = {} + + keys = list(data_qa.keys()) + + p = inflect.engine() + + print(f"Using model {args.model}") + + missing_key_count = 0 + for key in tqdm(keys, disable=args.quiet): + key = str(key) + if key in eval_results: + if args.verbose: + print(f"Skipping {key}") + response_override = eval_results[key]["response"] + else: + response_override = None + + if key not in data_pred: + if args.default_prediction is None: + raise ValueError( + f"Key {key} not found in prediction data, and no default prediction provided" + ) + else: + print( + f"Key {key} not found in prediction data, using default prediction {args.default_prediction}" + ) + pred_value = args.default_prediction + missing_key_count += 1 + elif data_pred[key].startswith("Error:"): + if args.default_prediction is None: + raise ValueError( + f"Key {key} has an error in prediction data, and no default prediction provided: {data_pred[key]}" + ) + else: + print( + f"Key {key} has an error in prediction: {data_pred[key]}, using default prediction {args.default_prediction}" + ) + pred_value = args.default_prediction + missing_key_count += 1 + else: + pred_value = data_pred[key] + + # print(f"Evaluating {key}") + class_name = data_class_names[key] + + if is_plural(class_name): + recognition_question = f"Is it likely that the objects in the description are {class_name} or objects of a similar type? Again, It does not have to be an exact match." + else: + recognition_question = f"Is it likely that the object in the description is {p.a(class_name)} or an object of a similar type? Again, It does not have to be an exact match." + recognition_question_dict = { + "question": recognition_question, + "choices": [("Yes", "correct"), ("No", "incorrect")], + "type": "recognition", + } + + # Add the recognition question to the beginning of the list + question_dicts = [recognition_question_dict, *data_qa[key]] + info = evaluate( + question_dicts=question_dicts, + pred_caption=pred_value, + model=args.model, + temperature=0.0, + max_tokens=300, + response_override=response_override, + key=key, + verbose=args.verbose, + ) + score = info["score"] + scores[key] = score + scores_pos[key] = info["score_pos"] + scores_neg[key] = info["score_neg"] + eval_results[key] = {"pred": pred_value, **info} + + if args.verbose: + print(f"Score: {score}") + + with open(eval_file, "w") as f: + json.dump(eval_results, f, indent=4) + + avg_score_pos = sum(scores_pos.values()) / len(scores_pos) + scores_neg_valid_only = [item for item in scores_neg.values() if item is not None] + avg_score_neg = sum(scores_neg_valid_only) / len(scores_neg_valid_only) + + if args.csv: + # Print comma-separated values directly to stdout + always_print( + f"{avg_score_pos:.3f},{avg_score_neg:.3f},{(avg_score_pos + avg_score_neg) / 2:.3f}" + ) + else: + always_print(f"Result for {args.pred}") + always_print(f"Average Positive Score: {avg_score_pos:.3f}") + always_print(f"Average Negative Score: {avg_score_neg:.3f}") + always_print( + f"Average of Positive and Negative Scores: {(avg_score_pos + avg_score_neg) / 2:.3f}" + ) + always_print( + f"Summary (Pos\tNeg\tAvg(Pos, Neg)):\t{avg_score_pos:.3f},\t{avg_score_neg:.3f},\t{(avg_score_pos + avg_score_neg) / 2:.3f}" + ) + print(f"QA Scores: {scores}") + + if missing_key_count: + print( + f"Note: Missing {missing_key_count} keys, using default prediction {args.default_prediction}" + ) + + eval_results["avg_pos"] = avg_score_pos + eval_results["avg_neg"] = avg_score_neg + with open(eval_file, "w") as f: + json.dump(eval_results, f, indent=4) + + print(f"Evaluation data saved to {eval_file}") diff --git a/evaluation/DLC-Bench/inference.py b/evaluation/DLC-Bench/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..322a00a8ee6bed3c085d3ff58ce02f005f962685 --- /dev/null +++ b/evaluation/DLC-Bench/inference.py @@ -0,0 +1,173 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang +# -------------------------------------------------------- + +import argparse +import json +import os + +import numpy as np +import torch +from PIL import Image +from pycocotools import mask as mask_utils +from pycocotools.coco import COCO +from tqdm import tqdm +from transformers import AutoModel, AutoProcessor, GenerationConfig + +from evaluation.eval_dataset import SingleRegionCaptionDataset + +TORCH_DTYPE_MAP = dict(fp16=torch.float16, bf16=torch.bfloat16, fp32=torch.float32) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Inference of Grasp Any Region models on DLC-Bench." + ) + + parser.add_argument( + "--model_name_or_path", + help="HF model name or path", + default="HaochenWang/GAR-1B", + ) + parser.add_argument( + "--cache_name", + help="cache name to save model outputs.", + default="gar_1b", + ) + parser.add_argument( + "--data_type", + help="data dtype", + type=str, + choices=["fp16", "bf16", "fp32"], + default="bf16", + ) + parser.add_argument( + "--anno_file", + help="path to the annotation file.", + default="evaluation/DLC-Bench/annotations/annotations.json", + ) + parser.add_argument( + "--image_folder", + help="the folder of images", + default="evaluation/DLC-Bench/annotations", + ) + parser.add_argument( + "--seed", + type=int, + default=0, + help="Random seed for reproducible text generation", + ) + args = parser.parse_args() + return args + + +def select_ann(coco, img_id, area_min=None, area_max=None): + cat_ids = coco.getCatIds() + ann_ids = coco.getAnnIds(imgIds=[img_id], catIds=cat_ids, iscrowd=None) + + if area_min is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] >= area_min + ] + + if area_max is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] <= area_max + ] + + return ann_ids + + +def main(): + args = parse_args() + data_dtype = TORCH_DTYPE_MAP[args.data_type] + torch.manual_seed(args.seed) + + # init ditribution for dispatch_modules in LLM + torch.cuda.set_device(0) + torch.distributed.init_process_group(backend="nccl") + + # build HF model + model = AutoModel.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + torch_dtype=data_dtype, + ) + model.cuda() + model.eval() + + processor = AutoProcessor.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + ) + model_outputs = {} + cache_name = args.cache_name + + # This coco instance is actually an o365 subset. This is for code reuse. + coco = COCO(args.anno_file) + img_ids = list(coco.imgs.keys()) + num_anns = len(coco.anns) + pbar = tqdm(total=num_anns) + + for img_id in img_ids: + ann_ids = select_ann(coco, img_id) + img_info = coco.loadImgs(img_id)[0] + + for i, ann_id in enumerate(ann_ids): + if ann_id in model_outputs.keys(): + pbar.update(1) + continue + + anns = coco.loadAnns([ann_id]) + mask = coco.annToMask(anns[0]) + + img_path = os.path.join(args.image_folder, "images", img_info["file_name"]) + img = Image.open(img_path) + + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [ + "" + ] + dataset = SingleRegionCaptionDataset( + image=img, + mask=mask, + processor=processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=data_dtype, + ) + data_sample = dataset[0] + + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + do_sample=False, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + outputs = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + print(outputs) # Print model output for this image + + model_outputs[ann_id] = outputs + pbar.update(1) + pbar.close() + + with open(f"evaluation/DLC-Bench/model_outputs/{cache_name}.json", "w") as file: + json.dump(model_outputs, file, indent=4, ensure_ascii=False) + + print(f"Cache name: {cache_name}") + + +if __name__ == "__main__": + main() diff --git a/evaluation/DLC-Bench/model_outputs/gar_1b.json b/evaluation/DLC-Bench/model_outputs/gar_1b.json new file mode 100644 index 0000000000000000000000000000000000000000..82d661a06bd801c3c870303fd5621b1895efb048 --- /dev/null +++ b/evaluation/DLC-Bench/model_outputs/gar_1b.json @@ -0,0 +1,102 @@ +{ + "279135": "The ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.", + "297718": "A piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.", + "361105": "A small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.", + "622329": "A rectangular, flat, beige eraser with rounded corners and a slightly textured surface.", + "622332": "A black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.", + "1075308": "A vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.", + "1196168": "A rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.", + "1770866": "A white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.", + "1894089": "A metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.", + "2391761": "The canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.", + "2391780": "A bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.", + "2391781": "The bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.", + "2580318": "The mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.", + "2580323": "A rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.", + "2588513": "A rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.", + "3993075": "A cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.", + "4027486": "The bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.", + "4243725": "The soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.", + "4502267": "A green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.", + "4604873": "A large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.", + "4781902": "A dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.", + "4782942": "A large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.", + "4782949": "The drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.", + "4916799": "A spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.", + "5211280": "A stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.", + "5718392": "A woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.", + "5718415": "The tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.", + "5718424": "A black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.", + "6012878": "A square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.", + "6037269": "A metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.", + "6037272": "A green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.", + "6055310": "A golden ruler with a series of evenly spaced, small, rectangular notches along its length.", + "6820594": "A medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.", + "6820595": "A cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.", + "7050495": "A black leather handbag with a smooth texture and a slightly curved bottom edge.", + "8201777": "A black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.", + "8331685": "The earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.", + "8331699": "The visible part of the printer is black with a smooth, curved surface.", + "8331718": "A black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.", + "8556674": "A round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.", + "8556676": "A deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.", + "8557176": "The watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.", + "8557195": "The microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.", + "8906172": "A black, in-ear headphone with a sleek, curved design.", + "9766617": "The goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.", + "10666665": "A round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.", + "10811497": "A dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.", + "11012500": "A soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.", + "11021544": "The faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.", + "11021562": "The microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.", + "11021563": "A white stove with four black burners, featuring a control panel with knobs on the back.", + "11775390": "A green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.", + "11950619": "The racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.", + "12178946": "A cylindrical bottle with a yellow cap and a blue label featuring white text.", + "12348078": "A woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.", + "12348079": "A digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.", + "12348080": "A pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.", + "13138178": "The stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.", + "13187927": "The motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.", + "14490578": "The harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.", + "14640483": "A rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.", + "14832137": "A cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.", + "15050320": "A dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.", + "16010041": "A pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.", + "16951734": "A triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.", + "16957916": "A piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.", + "17072759": "A black belt with a smooth texture, featuring a silver rectangular buckle.", + "17072764": "A partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.", + "17265253": "A black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.", + "17265254": "A traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.", + "17385866": "A scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.", + "17404769": "The car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.", + "18217373": "The spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.", + "18301585": "The bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.", + "18680641": "A rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.", + "18845103": "A metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.", + "19455186": "A blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.", + "19610023": "A bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.", + "19610025": "A white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.", + "20568676": "A stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.", + "20993402": "A roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.", + "21107974": "A wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.", + "21529954": "A cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.", + "22064315": "The visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.", + "22107522": "A black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.", + "22879790": "A single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.", + "24010373": "The guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.", + "24017816": "The car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.", + "24498027": "A tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.", + "24581953": "A large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.", + "24694197": "A ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.", + "24786060": "A light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.", + "25054869": "A beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.", + "25273528": "A hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.", + "25273553": "A black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.", + "25419495": "The tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.", + "25419509": "A metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.", + "25419516": "The toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.", + "25579493": "A square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.", + "25612310": "A woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge." +} \ No newline at end of file diff --git a/evaluation/DLC-Bench/model_outputs/gar_1b_eval.json b/evaluation/DLC-Bench/model_outputs/gar_1b_eval.json new file mode 100644 index 0000000000000000000000000000000000000000..5f3db0eaae8f7082843fa0e9657c161b67e17e69 --- /dev/null +++ b/evaluation/DLC-Bench/model_outputs/gar_1b_eval.json @@ -0,0 +1,26028 @@ +{ + "2391781": { + "pred": "The bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The color of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the wing is mentioned in the description but is not grey or brown.", + -1 + ], + [ + "The color of the wing is mentioned in the description and is grey or brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the wing is mentioned in the description and is grey or brown.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feathers or the wild bird are not mentioned.", + 0 + ], + [ + "The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.", + 0.5 + ], + [ + "The color of the feathers is mentioned in the description but is not white, grey, or brown.", + -1 + ], + [ + "The color of the feathers is mentioned in the description and is white, grey, or brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the feathers is mentioned in the description and is white, grey, or brown.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the wild bird is not mentioned.", + 0 + ], + [ + "The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + 0.5 + ], + [ + "The shape of the tail is mentioned in the description but is not fan-like.", + -1 + ], + [ + "The shape of the tail is mentioned in the description and is fan-like.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The tail or the wild bird is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The null or the wild bird is not mentioned.", + 0 + ], + [ + "The action of the null is not mentioned, but the null of the wild bird is mentioned.", + 0.5 + ], + [ + "The action of the null is mentioned in the description but is not flying.", + -1 + ], + [ + "The action of the null is mentioned in the description and is flying.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The null or the wild bird is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The position of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The position of the wing is mentioned in the description but is not extended or outstretched.", + -1 + ], + [ + "The position of the wing is mentioned in the description and is extended or outstretched.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The position of the wing is mentioned in the description and is extended or outstretched.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the wild bird are not mentioned in the description.", + 1 + ], + [ + "The claws of the wild bird are mentioned in the description.", + -1 + ], + [ + "The wild bird is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The claws of the wild bird are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are not mentioned in the description.", + 1 + ], + [ + "The boats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The boats are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are not mentioned in the description.", + 1 + ], + [ + "The chimneys are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chimneys are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bridge is not mentioned in the description.", + 1 + ], + [ + "The bridge is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bridge is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The windows are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a wild bird or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a wild bird or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wing or the wild bird is not mentioned.\nB. The color of the wing is not mentioned, but the wing of the wild bird is mentioned.\nC. The color of the wing is mentioned in the description but is not grey or brown.\nD. The color of the wing is mentioned in the description and is grey or brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feathers or the wild bird are not mentioned.\nB. The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.\nC. The color of the feathers is mentioned in the description but is not white, grey, or brown.\nD. The color of the feathers is mentioned in the description and is white, grey, or brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the wild bird is not mentioned.\nB. The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.\nC. The shape of the tail is mentioned in the description but is not fan-like.\nD. The shape of the tail is mentioned in the description and is fan-like.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The null or the wild bird is not mentioned.\nB. The action of the null is not mentioned, but the null of the wild bird is mentioned.\nC. The action of the null is mentioned in the description but is not flying.\nD. The action of the null is mentioned in the description and is flying.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wing or the wild bird is not mentioned.\nB. The position of the wing is not mentioned, but the wing of the wild bird is mentioned.\nC. The position of the wing is mentioned in the description but is not extended or outstretched.\nD. The position of the wing is mentioned in the description and is extended or outstretched.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The claws of the wild bird are not mentioned in the description.\nB. The claws of the wild bird are mentioned in the description.\nC. The wild bird is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boats are not mentioned in the description.\nB. The boats are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chimneys are not mentioned in the description.\nB. The chimneys are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bridge is not mentioned in the description.\nB. The bridge is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are not mentioned in the description.\nB. The windows are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the wing is mentioned in the description and is grey or brown.", + "D. The color of the feathers is mentioned in the description and is white, grey, or brown.", + "A. The tail or the wild bird is not mentioned.", + "A. The null or the wild bird is not mentioned.", + "D. The position of the wing is mentioned in the description and is extended or outstretched.", + "A. The claws of the wild bird are not mentioned in the description.", + "A. The boats are not mentioned in the description.", + "A. The chimneys are not mentioned in the description.", + "A. The bridge is not mentioned in the description.", + "A. The windows are not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "2580323": { + "pred": "A rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the frame is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The shape of the frame is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the frame is mentioned in the description and is rectangular.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the picture is mentioned in the description and is white.", + 1 + ], + [ + "The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The background color of the picture is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + 1 + ], + [ + "The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The material of the frame is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the frame is mentioned in the description and is wood.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The radio is mentioned in the description.", + -1 + ], + [ + "The radio is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The radio is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The picture/frame is not mentioned in the description.", + 0 + ], + [ + "The glass of the picture/frame are mentioned in the description.", + -1 + ], + [ + "The glass of the picture/frame are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The glass of the picture/frame are not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The keyboard is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The light switch is mentioned in the description.", + -1 + ], + [ + "The light switch is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The light switch is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The curtain is mentioned in the description.", + -1 + ], + [ + "The curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The curtain is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a picture/frame or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a picture/frame or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the frame is mentioned in the description and is rectangular.\nB. The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.\nC. The frame or the picture/frame is not mentioned.\nD. The shape of the frame is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background color of the picture is mentioned in the description and is white.\nB. The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.\nC. The picture or the picture/frame is not mentioned.\nD. The background color of the picture is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.\nB. The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.\nC. The picture or the picture/frame is not mentioned.\nD. The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the frame is mentioned in the description and is wood.\nB. The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.\nC. The frame or the picture/frame is not mentioned.\nD. The material of the frame is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The radio is mentioned in the description.\nB. The radio is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The picture/frame is not mentioned in the description.\nB. The glass of the picture/frame are mentioned in the description.\nC. The glass of the picture/frame are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is mentioned in the description.\nB. The keyboard is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The light switch is mentioned in the description.\nB. The light switch is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The curtain is mentioned in the description.\nB. The curtain is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the frame is mentioned in the description and is rectangular.", + "B. The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + "A. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + "A. The material of the frame is mentioned in the description and is wood.", + "B. The radio is not mentioned in the description.", + "C. The glass of the picture/frame are not mentioned in the description.", + "B. The keyboard is not mentioned in the description.", + "B. The light switch is not mentioned in the description.", + "B. The curtain is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4782942": { + "pred": "A large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description but is not conical.", + -1 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description and is conical.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the megaphone/speaker is mentioned in the description and is conical.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the megaphone/speaker is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the megaphone/speaker is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front/opening or the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.", + 0.5 + ], + [ + "The shape of the front/opening is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the front/opening is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The front/opening or the megaphone/speaker is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The siren button of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The siren button of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The megaphone/speaker is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fence is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The strap of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The strap of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The strap of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The battery compartment of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The battery compartment of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The battery compartment of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The houses are not mentioned in the description.", + 1 + ], + [ + "The houses are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The houses are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a megaphone/speaker or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a megaphone/speaker or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned.\nB. The shape of the megaphone/speaker is not mentioned.\nC. The shape of the megaphone/speaker is mentioned in the description but is not conical.\nD. The shape of the megaphone/speaker is mentioned in the description and is conical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned.\nB. The color of the megaphone/speaker is not mentioned.\nC. The color of the megaphone/speaker is mentioned in the description but is not gray.\nD. The color of the megaphone/speaker is mentioned in the description and is gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front/opening or the megaphone/speaker is not mentioned.\nB. The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.\nC. The shape of the front/opening is mentioned in the description but is not round.\nD. The shape of the front/opening is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The siren button of the megaphone/speaker is not mentioned in the description.\nC. The siren button of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fence is not mentioned in the description.\nB. The fence is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The strap of the megaphone/speaker is not mentioned in the description.\nC. The strap of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The battery compartment of the megaphone/speaker is not mentioned in the description.\nC. The battery compartment of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The houses are not mentioned in the description.\nB. The houses are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the megaphone/speaker is mentioned in the description and is conical.", + "B. The color of the megaphone/speaker is not mentioned.", + "A. The front/opening or the megaphone/speaker is not mentioned.", + "A. The megaphone/speaker is not mentioned in the description.", + "A. The fence is not mentioned in the description.", + "B. The strap of the megaphone/speaker is not mentioned in the description.", + "B. The battery compartment of the megaphone/speaker is not mentioned in the description.", + "A. The houses are not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.3333333333333333, + "score_neg": 0.8, + "neg_valid_num": 5, + "recognition_result": true + }, + "6037269": { + "pred": "A metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the showerhead is mentioned in the description but is not silver and/or metallic.", + -1 + ], + [ + "The color of the showerhead is not mentioned.", + 0 + ], + [ + "The color of the showerhead is mentioned in the description and is silver and/or metallic.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the showerhead is mentioned in the description and is silver and/or metallic.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the showerhead is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the showerhead is not mentioned.", + 0 + ], + [ + "The texture of the showerhead is mentioned in the description and is smooth.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the showerhead is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the showerhead is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the showerhead is not mentioned.", + 0 + ], + [ + "The shape of the showerhead is mentioned in the description and is circular.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the showerhead is mentioned in the description but is not circular.", + "pred_index": 0, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the showerhead is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the showerhead is not mentioned.", + 0 + ], + [ + "The material of the showerhead is mentioned in the description and is metal.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the showerhead is mentioned in the description and is metal.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the handle is not mentioned, but the handle of the showerhead is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is white.", + 1 + ], + [ + "The handle or the showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is mentioned in the description but is not white.", + "pred_index": 0, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower hose of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The shower hose of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The shower hose of the showerhead is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bath caddy is mentioned in the description.", + -1 + ], + [ + "The bath caddy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bath caddy is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathtub is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The showerhead filter of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The showerhead filter of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The showerhead filter of the showerhead is not mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet paper holder is mentioned in the description.", + -1 + ], + [ + "The toilet paper holder is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toilet paper holder is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a showerhead or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a showerhead or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the showerhead is mentioned in the description but is not silver and/or metallic.\nB. The color of the showerhead is not mentioned.\nC. The color of the showerhead is mentioned in the description and is silver and/or metallic.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the showerhead is mentioned in the description but is not smooth.\nB. The texture of the showerhead is not mentioned.\nC. The texture of the showerhead is mentioned in the description and is smooth.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the showerhead is mentioned in the description but is not circular.\nB. The shape of the showerhead is not mentioned.\nC. The shape of the showerhead is mentioned in the description and is circular.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the showerhead is mentioned in the description but is not metal.\nB. The material of the showerhead is not mentioned.\nC. The material of the showerhead is mentioned in the description and is metal.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is mentioned in the description but is not white.\nB. The color of the handle is not mentioned, but the handle of the showerhead is mentioned.\nC. The color of the handle is mentioned in the description and is white.\nD. The handle or the showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shower hose of the showerhead is mentioned in the description.\nB. The showerhead is not mentioned in the description.\nC. The shower hose of the showerhead is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bath caddy is mentioned in the description.\nB. The bath caddy is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is mentioned in the description.\nB. The bathtub is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The showerhead filter of the showerhead is mentioned in the description.\nB. The showerhead is not mentioned in the description.\nC. The showerhead filter of the showerhead is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet paper holder is mentioned in the description.\nB. The toilet paper holder is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the showerhead is mentioned in the description and is silver and/or metallic.", + "B. The texture of the showerhead is not mentioned.", + "A. The shape of the showerhead is mentioned in the description but is not circular.", + "C. The material of the showerhead is mentioned in the description and is metal.", + "A. The color of the handle is mentioned in the description but is not white.", + "C. The shower hose of the showerhead is not mentioned in the description.", + "B. The bath caddy is not mentioned in the description.", + "B. The bathtub is not mentioned in the description.", + "C. The showerhead filter of the showerhead is not mentioned in the description.", + "B. The toilet paper holder is not mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "7050495": { + "pred": "A black leather handbag with a smooth texture and a slightly curved bottom edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the handbag is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handbag is not mentioned.", + 0 + ], + [ + "The texture of the handbag is mentioned in the description but is not smooth.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the handbag is mentioned in the description and is smooth.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handbag is mentioned in the description and is black or glossy.", + 1 + ], + [ + "The color of the handbag is not mentioned.", + 0 + ], + [ + "The color of the handbag is mentioned in the description but is not black or glossy.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handbag is mentioned in the description and is black or glossy.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handbag is mentioned in the description and is leather.", + 1 + ], + [ + "The material of the handbag is not mentioned.", + 0 + ], + [ + "The material of the handbag is mentioned in the description but is not leather.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the handbag is mentioned in the description and is leather.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handbag is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the handbag is not mentioned.", + 0 + ], + [ + "The shape of the handbag is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handbag is mentioned in the description but is not rectangular.", + "pred_index": 2, + "question_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seam of the handbag is mentioned in the description and is visible.", + 1 + ], + [ + "The seam of the handbag is not mentioned.", + 0 + ], + [ + "The seam of the handbag is mentioned in the description but is not visible.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The seam of the handbag is not mentioned.", + "pred_index": 1, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The price tags are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The logo of the handbag is not mentioned in the description.", + 1 + ], + [ + "The logo of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The logo of the handbag is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The handle of the handbag is not mentioned in the description.", + 1 + ], + [ + "The handle of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The handle of the handbag is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The pocket of the handbag is not mentioned in the description.", + 1 + ], + [ + "The pocket of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pocket of the handbag is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The zipper of the handbag is not mentioned in the description.", + 1 + ], + [ + "The zipper of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The zipper of the handbag is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a handbag or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a handbag or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the handbag is mentioned in the description and is smooth.\nB. The texture of the handbag is not mentioned.\nC. The texture of the handbag is mentioned in the description but is not smooth.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handbag is mentioned in the description and is black or glossy.\nB. The color of the handbag is not mentioned.\nC. The color of the handbag is mentioned in the description but is not black or glossy.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handbag is mentioned in the description and is leather.\nB. The material of the handbag is not mentioned.\nC. The material of the handbag is mentioned in the description but is not leather.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handbag is mentioned in the description and is rectangular.\nB. The shape of the handbag is not mentioned.\nC. The shape of the handbag is mentioned in the description but is not rectangular.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seam of the handbag is mentioned in the description and is visible.\nB. The seam of the handbag is not mentioned.\nC. The seam of the handbag is mentioned in the description but is not visible.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The price tags are not mentioned in the description.\nB. The price tags are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The logo of the handbag is not mentioned in the description.\nC. The logo of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The handle of the handbag is not mentioned in the description.\nC. The handle of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The pocket of the handbag is not mentioned in the description.\nC. The pocket of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The zipper of the handbag is not mentioned in the description.\nC. The zipper of the handbag is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The texture of the handbag is mentioned in the description and is smooth.", + "A. The color of the handbag is mentioned in the description and is black or glossy.", + "A. The material of the handbag is mentioned in the description and is leather.", + "C. The shape of the handbag is mentioned in the description but is not rectangular.", + "B. The seam of the handbag is not mentioned.", + "A. The price tags are not mentioned in the description.", + "B. The logo of the handbag is not mentioned in the description.", + "B. The handle of the handbag is not mentioned in the description.", + "B. The pocket of the handbag is not mentioned in the description.", + "B. The zipper of the handbag is not mentioned in the description." + ], + "score": 0.7, + "score_pos": 0.4, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8331699": { + "pred": "The visible part of the printer is black with a smooth, curved surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the computer box is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The computer box is not mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is mentioned in the description and is black or gray.", + 1 + ], + [ + "The color of the computer box is mentioned in the description but is not black or gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The computer box is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side panel or the computer box is not mentioned.", + 0 + ], + [ + "The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.", + 0.5 + ], + [ + "The color of the side panel is mentioned in the description and is gray.", + 1 + ], + [ + "The color of the side panel is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the side panel is mentioned in the description but is not gray.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rubber duck is not mentioned in the description.", + 1 + ], + [ + "The rubber duck is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The rubber duck is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power button of the computer box is not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The power button of the computer box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The computer box is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky notes are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The USB ports of the computer box are not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The USB ports of the computer box are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The computer box is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a computer box or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a computer box or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The computer box is not mentioned.\nB. The shape of the computer box is not mentioned.\nC. The shape of the computer box is mentioned in the description and is rectangular.\nD. The shape of the computer box is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The computer box is not mentioned.\nB. The color of the computer box is not mentioned.\nC. The color of the computer box is mentioned in the description and is black or gray.\nD. The color of the computer box is mentioned in the description but is not black or gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The side panel or the computer box is not mentioned.\nB. The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.\nC. The color of the side panel is mentioned in the description and is gray.\nD. The color of the side panel is mentioned in the description but is not gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rubber duck is not mentioned in the description.\nB. The rubber duck is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The power button of the computer box is not mentioned in the description.\nB. The computer box is not mentioned in the description.\nC. The power button of the computer box is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky notes are not mentioned in the description.\nB. The sticky notes are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The USB ports of the computer box are not mentioned in the description.\nB. The computer box is not mentioned in the description.\nC. The USB ports of the computer box are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The computer box is not mentioned.", + "A. The computer box is not mentioned.", + "D. The color of the side panel is mentioned in the description but is not gray.", + "A. The chair is not mentioned in the description.", + "A. The rubber duck is not mentioned in the description.", + "B. The computer box is not mentioned in the description.", + "A. The sticky notes are not mentioned in the description.", + "B. The computer box is not mentioned in the description." + ], + "score": 0.25, + "score_pos": -0.3333333333333333, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "8556676": { + "pred": "A deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the apple is mentioned in the description and is small.", + 1 + ], + [ + "The size of the apple is not mentioned.", + 0 + ], + [ + "The size of the apple is mentioned in the description but is not small.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the apple is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the apple is mentioned in the description and is smooth or glossy.", + 1 + ], + [ + "The texture of the apple is not mentioned.", + 0 + ], + [ + "The texture of the apple is mentioned in the description but is not smooth or glossy.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the apple is mentioned in the description and is smooth or glossy.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the apple is mentioned in the description and is red.", + 1 + ], + [ + "The color of the apple is not mentioned.", + 0 + ], + [ + "The color of the apple is mentioned in the description but is not red.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the apple is mentioned in the description and is red.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon is mentioned in the description.", + -1 + ], + [ + "The lemon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The lemon is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange is mentioned in the description.", + -1 + ], + [ + "The orange is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The orange is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple is mentioned in the description.", + -1 + ], + [ + "The pineapple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The pineapple is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the apple are mentioned in the description.", + -1 + ], + [ + "The seeds of the apple are not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The seeds of the apple are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the apple is mentioned in the description.", + -1 + ], + [ + "The stem of the apple is not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The stem of the apple is mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an apple or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an apple or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the apple is mentioned in the description and is small.\nB. The size of the apple is not mentioned.\nC. The size of the apple is mentioned in the description but is not small.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the apple is mentioned in the description and is smooth or glossy.\nB. The texture of the apple is not mentioned.\nC. The texture of the apple is mentioned in the description but is not smooth or glossy.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the apple is mentioned in the description and is red.\nB. The color of the apple is not mentioned.\nC. The color of the apple is mentioned in the description but is not red.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lemon is mentioned in the description.\nB. The lemon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange is mentioned in the description.\nB. The orange is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pineapple is mentioned in the description.\nB. The pineapple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seeds of the apple are mentioned in the description.\nB. The seeds of the apple are not mentioned in the description.\nC. The apple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stem of the apple is mentioned in the description.\nB. The stem of the apple is not mentioned in the description.\nC. The apple is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The size of the apple is not mentioned.", + "A. The texture of the apple is mentioned in the description and is smooth or glossy.", + "A. The color of the apple is mentioned in the description and is red.", + "B. The lemon is not mentioned in the description.", + "B. The orange is not mentioned in the description.", + "B. The pineapple is not mentioned in the description.", + "B. The seeds of the apple are not mentioned in the description.", + "A. The stem of the apple is mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.6666666666666666, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "11012500": { + "pred": "A soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the lettuce is mentioned in the description but is not shredded.", + -1 + ], + [ + "The lettuce or the taco is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is shredded.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the lettuce is mentioned in the description and is shredded.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the tomato is mentioned in the description but is not sliced.", + -1 + ], + [ + "The tomato or the taco is not mentioned.", + 0 + ], + [ + "The shape of the tomato is mentioned in the description and is sliced.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the tomato is mentioned in the description and is sliced.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the arugula is not mentioned, but the arugula of the taco is mentioned.", + 0.5 + ], + [ + "The color of the arugula is mentioned in the description but is not green.", + -1 + ], + [ + "The arugula or the taco is not mentioned.", + 0 + ], + [ + "The color of the arugula is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the arugula is mentioned in the description and is green.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.", + 0.5 + ], + [ + "The color of the tortilla is mentioned in the description but is not white.", + -1 + ], + [ + "The tortilla or the taco is not mentioned.", + 0 + ], + [ + "The color of the tortilla is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The tortilla or the taco is not mentioned.", + "pred_index": 2, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The whipped cream of the taco is not mentioned in the description.", + 1 + ], + [ + "The whipped cream of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taco is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + 1 + ], + [ + "The two glasses of lemonade with lemon slices and straws are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The nuts of the taco are not mentioned in the description.", + 1 + ], + [ + "The nuts of the taco are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taco is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandwich with vegetables are not mentioned in the description.", + 1 + ], + [ + "The sandwich with vegetables are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sandwich with vegetables are mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The chocolate of the taco is not mentioned in the description.", + 1 + ], + [ + "The chocolate of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taco is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a taco or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a taco or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.\nB. The shape of the lettuce is mentioned in the description but is not shredded.\nC. The lettuce or the taco is not mentioned.\nD. The shape of the lettuce is mentioned in the description and is shredded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.\nB. The shape of the tomato is mentioned in the description but is not sliced.\nC. The tomato or the taco is not mentioned.\nD. The shape of the tomato is mentioned in the description and is sliced.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the arugula is not mentioned, but the arugula of the taco is mentioned.\nB. The color of the arugula is mentioned in the description but is not green.\nC. The arugula or the taco is not mentioned.\nD. The color of the arugula is mentioned in the description and is green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.\nB. The color of the tortilla is mentioned in the description but is not white.\nC. The tortilla or the taco is not mentioned.\nD. The color of the tortilla is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The whipped cream of the taco is not mentioned in the description.\nC. The whipped cream of the taco is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.\nB. The two glasses of lemonade with lemon slices and straws are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The nuts of the taco are not mentioned in the description.\nC. The nuts of the taco are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandwich with vegetables are not mentioned in the description.\nB. The sandwich with vegetables are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The chocolate of the taco is not mentioned in the description.\nC. The chocolate of the taco is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the lettuce is mentioned in the description and is shredded.", + "D. The shape of the tomato is mentioned in the description and is sliced.", + "D. The color of the arugula is mentioned in the description and is green.", + "C. The tortilla or the taco is not mentioned.", + "A. The taco is not mentioned in the description.", + "A. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + "A. The taco is not mentioned in the description.", + "B. The sandwich with vegetables are mentioned in the description.", + "A. The taco is not mentioned in the description." + ], + "score": 0.3333333333333333, + "score_pos": 0.75, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "12348080": { + "pred": "A pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handles is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the handles is mentioned in the description and is plastic.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blades is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The color of the blades is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The color of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the blades is mentioned in the description and is silver or metallic.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blades is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blades is mentioned in the description and is metal.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The material of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the blades is mentioned in the description and is metal.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handles is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the handles is mentioned in the description and is red.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The color of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the handles is mentioned in the description and is red.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The adjustment screw of the scissors is not mentioned in the description.", + 1 + ], + [ + "The adjustment screw of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The adjustment screw of the scissors is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blade guard of the scissors is not mentioned in the description.", + 1 + ], + [ + "The blade guard of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The blade guard of the scissors is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tricycle cart is not mentioned in the description.", + 1 + ], + [ + "The tricycle cart is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tricycle cart is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The baskets of fruit are not mentioned in the description.", + 1 + ], + [ + "The baskets of fruit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The baskets of fruit are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scale is not mentioned in the description.", + 1 + ], + [ + "The scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The scale is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are scissors or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are scissors or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handles is mentioned in the description but is not plastic.\nB. The material of the handles is mentioned in the description and is plastic.\nC. The handles or the scissors are not mentioned.\nD. The material of the handles is not mentioned, but the handles of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the blades is mentioned in the description but is not silver or metallic.\nB. The color of the blades is mentioned in the description and is silver or metallic.\nC. The blades or the scissors are not mentioned.\nD. The color of the blades is not mentioned, but the blades of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the blades is mentioned in the description but is not metal.\nB. The material of the blades is mentioned in the description and is metal.\nC. The blades or the scissors are not mentioned.\nD. The material of the blades is not mentioned, but the blades of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handles is mentioned in the description but is not red.\nB. The color of the handles is mentioned in the description and is red.\nC. The handles or the scissors are not mentioned.\nD. The color of the handles is not mentioned, but the handles of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The adjustment screw of the scissors is not mentioned in the description.\nB. The adjustment screw of the scissors is mentioned in the description.\nC. The scissors are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blade guard of the scissors is not mentioned in the description.\nB. The blade guard of the scissors is mentioned in the description.\nC. The scissors are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tricycle cart is not mentioned in the description.\nB. The tricycle cart is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The baskets of fruit are not mentioned in the description.\nB. The baskets of fruit are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The scale is not mentioned in the description.\nB. The scale is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + "B. The color of the blades is mentioned in the description and is silver or metallic.", + "B. The material of the blades is mentioned in the description and is metal.", + "B. The color of the handles is mentioned in the description and is red.", + "A. The adjustment screw of the scissors is not mentioned in the description.", + "A. The blade guard of the scissors is not mentioned in the description.", + "A. The tricycle cart is not mentioned in the description.", + "A. The baskets of fruit are not mentioned in the description.", + "A. The scale is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "16951734": { + "pred": "A triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description and is yellow, golden, or brown.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description but is not yellow, golden, or brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description and is smooth.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description and is irregular.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description but is not irregular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprouts of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The sprouts of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The potato is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned in the description.", + 1 + ], + [ + "The bowl is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bowl is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The roots of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The potato is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The liquid is not mentioned in the description.", + 1 + ], + [ + "The liquid is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The liquid is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a potato or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a potato or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the potato is not mentioned.\nB. The color of the potato is mentioned in the description and is yellow, golden, or brown.\nC. The potato is not mentioned.\nD. The color of the potato is mentioned in the description but is not yellow, golden, or brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the potato is not mentioned.\nB. The texture of the potato is mentioned in the description and is smooth.\nC. The potato is not mentioned.\nD. The texture of the potato is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the potato is not mentioned.\nB. The shape of the potato is mentioned in the description and is irregular.\nC. The potato is not mentioned.\nD. The shape of the potato is mentioned in the description but is not irregular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sprouts of the potato are not mentioned in the description.\nB. The potato is not mentioned in the description.\nC. The sprouts of the potato are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned in the description.\nB. The bowl is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The roots of the potato are not mentioned in the description.\nB. The potato is not mentioned in the description.\nC. The roots of the potato are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The liquid is not mentioned in the description.\nB. The liquid is mentioned in the description.\n" + ], + "response": [ + "B. No", + "C. The potato is not mentioned.", + "C. The potato is not mentioned.", + "C. The potato is not mentioned.", + "A. The cup is not mentioned in the description.", + "B. The potato is not mentioned in the description.", + "A. The bowl is not mentioned in the description.", + "B. The potato is not mentioned in the description.", + "A. The liquid is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "17265254": { + "pred": "A traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The shape of the wheels is mentioned in the description and is circular or spoked.", + 1 + ], + [ + "The shape of the wheels is mentioned in the description but is not circular or spoked.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the wheels is mentioned in the description and is circular or spoked.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + 0.5 + ], + [ + "The seat or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The number of parts of the wheels is mentioned in the description and is 2.", + 1 + ], + [ + "The number of parts of the wheels is mentioned in the description but is not 2.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The number of parts of the wheels is mentioned in the description but is not 2.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The lights of the rickshaw are mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lights of the rickshaw are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The people are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The storage compartment of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The storage compartment of the rickshaw is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo forest is not mentioned in the description.", + 1 + ], + [ + "The bamboo forest is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo forest is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The horn of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The horn of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The horn of the rickshaw is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a rickshaw or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a rickshaw or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.\nB. The wheels or the rickshaw are not mentioned.\nC. The shape of the wheels is mentioned in the description and is circular or spoked.\nD. The shape of the wheels is mentioned in the description but is not circular or spoked.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.\nB. The seat or the rickshaw is not mentioned.\nC. The color of the seat is mentioned in the description and is black.\nD. The color of the seat is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.\nB. The wheels or the rickshaw are not mentioned.\nC. The number of parts of the wheels is mentioned in the description and is 2.\nD. The number of parts of the wheels is mentioned in the description but is not 2.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lights of the rickshaw are not mentioned in the description.\nB. The lights of the rickshaw are mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are not mentioned in the description.\nB. The people are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The storage compartment of the rickshaw is not mentioned in the description.\nB. The storage compartment of the rickshaw is mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo forest is not mentioned in the description.\nB. The bamboo forest is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The horn of the rickshaw is not mentioned in the description.\nB. The horn of the rickshaw is mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the wheels is mentioned in the description and is circular or spoked.", + "A. The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + "D. The number of parts of the wheels is mentioned in the description but is not 2.", + "A. The lights of the rickshaw are not mentioned in the description.", + "A. The people are not mentioned in the description.", + "A. The storage compartment of the rickshaw is not mentioned in the description.", + "A. The bamboo forest is not mentioned in the description.", + "A. The horn of the rickshaw is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "18845103": { + "pred": "A metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is metallic.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The material of the bowl is mentioned in the description and is metal.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The material of the bowl is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the bowl is mentioned in the description and is metal.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the bowl is mentioned in the description and is metallic.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the bowl is mentioned in the description and is round or oval.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description but is not round or oval.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bowl is mentioned in the description and is round or oval.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The engraved handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The engraved handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The engraved handle of the spoon is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The kitchen cabinets are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The twisted handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The twisted handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The twisted handle of the spoon is not mentioned in the description.", + "pred_index": 2, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a spoon or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a spoon or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is not mentioned, but the handle of the spoon is mentioned.\nB. The color of the handle is mentioned in the description and is metallic.\nC. The handle or the spoon is not mentioned.\nD. The color of the handle is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The material of the bowl is mentioned in the description and is metal.\nC. The bowl or the spoon is not mentioned.\nD. The material of the bowl is mentioned in the description but is not metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The color of the bowl is mentioned in the description and is metallic.\nC. The bowl or the spoon is not mentioned.\nD. The color of the bowl is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The shape of the bowl is mentioned in the description and is round or oval.\nC. The bowl or the spoon is not mentioned.\nD. The shape of the bowl is mentioned in the description but is not round or oval.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the spoon is mentioned.\nB. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.\nC. The handle or the spoon is not mentioned.\nD. The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is not mentioned in the description.\nB. The engraved handle of the spoon is mentioned in the description.\nC. The engraved handle of the spoon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is mentioned in the description.\nB. The cutting board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The kitchen cabinets are mentioned in the description.\nB. The kitchen cabinets are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is not mentioned in the description.\nB. The twisted handle of the spoon is mentioned in the description.\nC. The twisted handle of the spoon is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + "B. The material of the bowl is mentioned in the description and is metal.", + "A. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "B. The shape of the bowl is mentioned in the description and is round or oval.", + "B. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + "C. The engraved handle of the spoon is not mentioned in the description.", + "B. The cutting board is not mentioned in the description.", + "B. The kitchen cabinets are not mentioned in the description.", + "B. The sink is not mentioned in the description.", + "C. The twisted handle of the spoon is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "20993402": { + "pred": "A roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the tape is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the tape is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the tape is mentioned in the description and is smooth.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tape roll is mentioned in the description but is not beige, white, or transparent.", + -1 + ], + [ + "The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + 1 + ], + [ + "The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the tape is mentioned in the description but is not adhesive tape.", + -1 + ], + [ + "The type of the tape is mentioned in the description and is adhesive tape.", + 1 + ], + [ + "The type of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the tape is mentioned in the description and is adhesive tape.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the core is mentioned in the description but is not cardboard.", + -1 + ], + [ + "The material of the core is mentioned in the description and is cardboard.", + 1 + ], + [ + "The material of the core is not mentioned, but the core of the tape is mentioned.", + 0.5 + ], + [ + "The core or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The core of the tape is mentioned.", + "pred_index": 2, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tape roll is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the tape roll is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the tape roll is mentioned in the description and is plastic.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The dispenser of the tape is not mentioned in the description.", + 1 + ], + [ + "The dispenser of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The dispenser of the tape is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trees are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The window is not mentioned in the description.", + 1 + ], + [ + "The window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The window is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stack of plates are not mentioned in the description.", + 1 + ], + [ + "The stack of plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The stack of plates are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The cutting edge of the tape is not mentioned in the description.", + 1 + ], + [ + "The cutting edge of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The cutting edge of the tape is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tape or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tape or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the tape is mentioned in the description but is not smooth.\nB. The texture of the tape is mentioned in the description and is smooth.\nC. The texture of the tape is not mentioned.\nD. The tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tape roll is mentioned in the description but is not beige, white, or transparent.\nB. The color of the tape roll is mentioned in the description and is beige, white, or transparent.\nC. The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.\nD. The tape roll or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the tape is mentioned in the description but is not adhesive tape.\nB. The type of the tape is mentioned in the description and is adhesive tape.\nC. The type of the tape is not mentioned.\nD. The tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the core is mentioned in the description but is not cardboard.\nB. The material of the core is mentioned in the description and is cardboard.\nC. The material of the core is not mentioned, but the core of the tape is mentioned.\nD. The core or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tape roll is mentioned in the description but is not plastic.\nB. The material of the tape roll is mentioned in the description and is plastic.\nC. The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.\nD. The tape roll or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The dispenser of the tape is not mentioned in the description.\nC. The dispenser of the tape is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are not mentioned in the description.\nB. The trees are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The window is not mentioned in the description.\nB. The window is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stack of plates are not mentioned in the description.\nB. The stack of plates are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The cutting edge of the tape is not mentioned in the description.\nC. The cutting edge of the tape is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the tape is mentioned in the description and is smooth.", + "B. The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + "B. The type of the tape is mentioned in the description and is adhesive tape.", + "C. The core of the tape is mentioned.", + "B. The material of the tape roll is mentioned in the description and is plastic.", + "B. The dispenser of the tape is not mentioned in the description.", + "A. The trees are not mentioned in the description.", + "A. The window is not mentioned in the description.", + "A. The stack of plates are not mentioned in the description.", + "B. The cutting edge of the tape is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "21529954": { + "pred": "A cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the can/container is not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the cap/lid is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the cap/lid is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The color of the cap/lid is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the cap/lid is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the cap/lid is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the body is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the body is not mentioned, but the body of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is cylindrical.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the body is mentioned in the description and is cylindrical.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the can/container is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description but is not green, white, yellow.", + -1 + ], + [ + "The color of the label is not mentioned, but the label of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description and is green, white, yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the label is mentioned in the description and is green, white, yellow.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The raspberries are mentioned in the description.", + -1 + ], + [ + "The raspberries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The raspberries are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red bell peppers are mentioned in the description.", + -1 + ], + [ + "The red bell peppers are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The red bell peppers are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ginger is mentioned in the description.", + -1 + ], + [ + "The ginger is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ginger is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sweet potato is mentioned in the description.", + -1 + ], + [ + "The sweet potato is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sweet potato is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the can/container is mentioned in the description.", + -1 + ], + [ + "The base of the can/container is not mentioned in the description.", + 1 + ], + [ + "The can/container is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The base of the can/container is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a can/container or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a can/container or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The can/container is not mentioned.\nB. The material of the can/container is mentioned in the description but is not plastic.\nC. The material of the can/container is not mentioned.\nD. The material of the can/container is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/lid or the can/container is not mentioned.\nB. The shape of the cap/lid is mentioned in the description but is not circular.\nC. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.\nD. The shape of the cap/lid is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/lid or the can/container is not mentioned.\nB. The color of the cap/lid is mentioned in the description but is not white.\nC. The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.\nD. The color of the cap/lid is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the can/container is not mentioned.\nB. The shape of the body is mentioned in the description but is not cylindrical.\nC. The shape of the body is not mentioned, but the body of the can/container is mentioned.\nD. The shape of the body is mentioned in the description and is cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label or the can/container is not mentioned.\nB. The color of the label is mentioned in the description but is not green, white, yellow.\nC. The color of the label is not mentioned, but the label of the can/container is mentioned.\nD. The color of the label is mentioned in the description and is green, white, yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The raspberries are mentioned in the description.\nB. The raspberries are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The red bell peppers are mentioned in the description.\nB. The red bell peppers are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ginger is mentioned in the description.\nB. The ginger is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sweet potato is mentioned in the description.\nB. The sweet potato is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the can/container is mentioned in the description.\nB. The base of the can/container is not mentioned in the description.\nC. The can/container is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the can/container is not mentioned.", + "C. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + "D. The color of the cap/lid is mentioned in the description and is white.", + "D. The shape of the body is mentioned in the description and is cylindrical.", + "D. The color of the label is mentioned in the description and is green, white, yellow.", + "B. The raspberries are not mentioned in the description.", + "B. The red bell peppers are not mentioned in the description.", + "B. The ginger is not mentioned in the description.", + "B. The sweet potato is not mentioned in the description.", + "B. The base of the can/container is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "22879790": { + "pred": "A single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the garlic is mentioned in the description and is white.", + 1 + ], + [ + "The color of the garlic is not mentioned.", + 0 + ], + [ + "The garlic is not mentioned.", + 0 + ], + [ + "The color of the garlic is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The garlic is not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the skin is mentioned in the description and is papery.", + 1 + ], + [ + "The texture of the skin is not mentioned, but the skin of the garlic is mentioned.", + 0.5 + ], + [ + "The skin or the garlic is not mentioned.", + 0 + ], + [ + "The texture of the skin is mentioned in the description but is not papery.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the skin is mentioned in the description but is not papery.", + "pred_index": 3, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the root is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the root is not mentioned, but the root of the garlic is mentioned.", + 0.5 + ], + [ + "The root or the garlic is not mentioned.", + 0 + ], + [ + "The color of the root is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the root is mentioned in the description and is brown.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaf of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The leaf of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The garlic is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + 1 + ], + [ + "The ceramic rooster and hen salt and pepper shakers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The stem of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The garlic is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic hen salt shaker is not mentioned in the description.", + 1 + ], + [ + "The ceramic hen salt shaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ceramic hen salt shaker is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green glass rooster is not mentioned in the description.", + 1 + ], + [ + "The green glass rooster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The green glass rooster is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a garlic or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a garlic or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the garlic is mentioned in the description and is white.\nB. The color of the garlic is not mentioned.\nC. The garlic is not mentioned.\nD. The color of the garlic is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the skin is mentioned in the description and is papery.\nB. The texture of the skin is not mentioned, but the skin of the garlic is mentioned.\nC. The skin or the garlic is not mentioned.\nD. The texture of the skin is mentioned in the description but is not papery.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the root is mentioned in the description and is brown.\nB. The color of the root is not mentioned, but the root of the garlic is mentioned.\nC. The root or the garlic is not mentioned.\nD. The color of the root is mentioned in the description but is not brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The leaf of the garlic is not mentioned in the description.\nB. The garlic is not mentioned in the description.\nC. The leaf of the garlic is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.\nB. The ceramic rooster and hen salt and pepper shakers are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stem of the garlic is not mentioned in the description.\nB. The garlic is not mentioned in the description.\nC. The stem of the garlic is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceramic hen salt shaker is not mentioned in the description.\nB. The ceramic hen salt shaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green glass rooster is not mentioned in the description.\nB. The green glass rooster is mentioned in the description.\n" + ], + "response": [ + "B. No", + "C. The garlic is not mentioned.", + "D. The texture of the skin is mentioned in the description but is not papery.", + "A. The color of the root is mentioned in the description and is brown.", + "B. The garlic is not mentioned in the description.", + "A. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + "B. The garlic is not mentioned in the description.", + "A. The ceramic hen salt shaker is not mentioned in the description.", + "A. The green glass rooster is not mentioned in the description." + ], + "score": -0.125, + "score_pos": -0.3333333333333333, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "24010373": { + "pred": "The guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The neck or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the neck is mentioned in the description and is straight or slightly curved.", + 1 + ], + [ + "The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the neck is mentioned in the description but is not straight or slightly curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the guitar is not mentioned.", + 0 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is not mentioned, but the body of the guitar is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the body is not mentioned, but the body of the guitar is mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strings or the guitar are not mentioned.", + 0 + ], + [ + "The number of parts of the strings is mentioned in the description and is 6.", + 1 + ], + [ + "The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + 0.5 + ], + [ + "The number of parts of the strings is mentioned in the description but is not 6.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sound hole or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the sound hole is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the sound hole is mentioned in the description but is not round.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the sound hole is mentioned in the description and is round.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description and is black.", + 1 + ], + [ + "The color of the guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the guitar is mentioned in the description but is not black.", + "pred_index": 3, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The violin is not mentioned in the description.", + 1 + ], + [ + "The violin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The violin is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned in the description.", + 0 + ], + [ + "The pickguard of the guitar is not mentioned in the description.", + 1 + ], + [ + "The pickguard of the guitar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The pickguard of the guitar is mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is not mentioned in the description.", + 1 + ], + [ + "The sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sign is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The music stands are not mentioned in the description.", + 1 + ], + [ + "The music stands are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The music stands are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The books are not mentioned in the description.", + 1 + ], + [ + "The books are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The books are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a guitar or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a guitar or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The neck or the guitar is not mentioned.\nB. The shape of the neck is mentioned in the description and is straight or slightly curved.\nC. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.\nD. The shape of the neck is mentioned in the description but is not straight or slightly curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the guitar is not mentioned.\nB. The texture of the body is mentioned in the description and is smooth.\nC. The texture of the body is not mentioned, but the body of the guitar is mentioned.\nD. The texture of the body is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strings or the guitar are not mentioned.\nB. The number of parts of the strings is mentioned in the description and is 6.\nC. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.\nD. The number of parts of the strings is mentioned in the description but is not 6.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sound hole or the guitar is not mentioned.\nB. The shape of the sound hole is mentioned in the description and is round.\nC. The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.\nD. The shape of the sound hole is mentioned in the description but is not round.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The guitar is not mentioned.\nB. The color of the guitar is mentioned in the description and is black.\nC. The color of the guitar is not mentioned.\nD. The color of the guitar is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The violin is not mentioned in the description.\nB. The violin is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The guitar is not mentioned in the description.\nB. The pickguard of the guitar is not mentioned in the description.\nC. The pickguard of the guitar is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign is not mentioned in the description.\nB. The sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The music stands are not mentioned in the description.\nB. The music stands are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The books are not mentioned in the description.\nB. The books are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + "C. The texture of the body is not mentioned, but the body of the guitar is mentioned.", + "C. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + "B. The shape of the sound hole is mentioned in the description and is round.", + "D. The color of the guitar is mentioned in the description but is not black.", + "A. The violin is not mentioned in the description.", + "C. The pickguard of the guitar is mentioned in the description.", + "A. The sign is not mentioned in the description.", + "A. The music stands are not mentioned in the description.", + "A. The books are not mentioned in the description." + ], + "score": 0.45, + "score_pos": 0.3, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "24694197": { + "pred": "A ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The skin or the avocado is not mentioned.", + 0 + ], + [ + "The color of the skin is mentioned in the description but is not dark green.", + -1 + ], + [ + "The color of the skin is mentioned in the description and is dark green.", + 1 + ], + [ + "The color of the skin is not mentioned, but the skin of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the skin is mentioned in the description and is dark green.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The texture of the flesh is mentioned in the description but is not creamy.", + -1 + ], + [ + "The texture of the flesh is mentioned in the description and is creamy.", + 1 + ], + [ + "The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The flesh or the avocado is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pit or the avocado is not mentioned.", + 0 + ], + [ + "The shape of the pit is mentioned in the description but is not oval.", + -1 + ], + [ + "The shape of the pit is mentioned in the description and is oval.", + 1 + ], + [ + "The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The color of the flesh is mentioned in the description but is not light green.", + -1 + ], + [ + "The color of the flesh is mentioned in the description and is light green.", + 1 + ], + [ + "The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The flesh or the avocado is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The leaves of the avocado are mentioned in the description.", + -1 + ], + [ + "The leaves of the avocado are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The leaves of the avocado are not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utility pole is mentioned in the description.", + -1 + ], + [ + "The utility pole is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utility pole is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The stem of the avocado is mentioned in the description.", + -1 + ], + [ + "The stem of the avocado is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The stem of the avocado is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is mentioned in the description.", + -1 + ], + [ + "The building is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The building is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are mentioned in the description.", + -1 + ], + [ + "The people are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The people are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an avocado or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an avocado or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The skin or the avocado is not mentioned.\nB. The color of the skin is mentioned in the description but is not dark green.\nC. The color of the skin is mentioned in the description and is dark green.\nD. The color of the skin is not mentioned, but the skin of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flesh or the avocado is not mentioned.\nB. The texture of the flesh is mentioned in the description but is not creamy.\nC. The texture of the flesh is mentioned in the description and is creamy.\nD. The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pit or the avocado is not mentioned.\nB. The shape of the pit is mentioned in the description but is not oval.\nC. The shape of the pit is mentioned in the description and is oval.\nD. The shape of the pit is not mentioned, but the pit of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flesh or the avocado is not mentioned.\nB. The color of the flesh is mentioned in the description but is not light green.\nC. The color of the flesh is mentioned in the description and is light green.\nD. The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado is not mentioned in the description.\nB. The leaves of the avocado are mentioned in the description.\nC. The leaves of the avocado are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utility pole is mentioned in the description.\nB. The utility pole is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado is not mentioned in the description.\nB. The stem of the avocado is mentioned in the description.\nC. The stem of the avocado is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is mentioned in the description.\nB. The building is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are mentioned in the description.\nB. The people are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the skin is mentioned in the description and is dark green.", + "A. The flesh or the avocado is not mentioned.", + "D. The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + "A. The flesh or the avocado is not mentioned.", + "C. The leaves of the avocado are not mentioned in the description.", + "B. The utility pole is not mentioned in the description.", + "C. The stem of the avocado is not mentioned in the description.", + "B. The building is not mentioned in the description.", + "B. The people are not mentioned in the description." + ], + "score": 0.7222222222222222, + "score_pos": 0.375, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "279135": { + "pred": "The ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The color of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The color of the binding is mentioned in the description and is black or orange.", + 1 + ], + [ + "The color of the binding is mentioned in the description but is not black or orange.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the binding is mentioned in the description and is black or orange.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The color of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The color of the deck is mentioned in the description and is black, white, or orange.", + 1 + ], + [ + "The color of the deck is mentioned in the description but is not black, white, or orange.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The deck or the ski is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The material of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The material of the binding is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The material of the binding is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the binding is not mentioned, but the binding of the ski is mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The shape of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The shape of the deck is mentioned in the description and is slightly curved.", + 1 + ], + [ + "The shape of the deck is mentioned in the description but is not slightly curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The deck or the ski is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the ski is not mentioned.", + 0 + ], + [ + "The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.", + 0.5 + ], + [ + "The texture/pattern of the tail is mentioned in the description and is geometric shapes.", + 1 + ], + [ + "The texture/pattern of the tail is mentioned in the description but is not geometric shapes.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The tail or the ski is not mentioned.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the ski are not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The wheels of the ski are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wheels of the ski are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wooden post is not mentioned in the description.", + 1 + ], + [ + "The wooden post is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The wooden post is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass window is not mentioned in the description.", + 1 + ], + [ + "The glass window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The glass window is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski base of the ski is not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The ski base of the ski is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ski base of the ski is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski poles are not mentioned in the description.", + 1 + ], + [ + "The ski poles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ski poles are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a ski or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a ski or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The binding or the ski is not mentioned.\nB. The color of the binding is not mentioned, but the binding of the ski is mentioned.\nC. The color of the binding is mentioned in the description and is black or orange.\nD. The color of the binding is mentioned in the description but is not black or orange.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deck or the ski is not mentioned.\nB. The color of the deck is not mentioned, but the deck of the ski is mentioned.\nC. The color of the deck is mentioned in the description and is black, white, or orange.\nD. The color of the deck is mentioned in the description but is not black, white, or orange.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The binding or the ski is not mentioned.\nB. The material of the binding is not mentioned, but the binding of the ski is mentioned.\nC. The material of the binding is mentioned in the description and is metal and plastic.\nD. The material of the binding is mentioned in the description but is not metal and plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deck or the ski is not mentioned.\nB. The shape of the deck is not mentioned, but the deck of the ski is mentioned.\nC. The shape of the deck is mentioned in the description and is slightly curved.\nD. The shape of the deck is mentioned in the description but is not slightly curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the ski is not mentioned.\nB. The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.\nC. The texture/pattern of the tail is mentioned in the description and is geometric shapes.\nD. The texture/pattern of the tail is mentioned in the description but is not geometric shapes.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the ski are not mentioned in the description.\nB. The ski is not mentioned in the description.\nC. The wheels of the ski are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wooden post is not mentioned in the description.\nB. The wooden post is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glass window is not mentioned in the description.\nB. The glass window is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ski base of the ski is not mentioned in the description.\nB. The ski is not mentioned in the description.\nC. The ski base of the ski is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ski poles are not mentioned in the description.\nB. The ski poles are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the binding is mentioned in the description and is black or orange.", + "A. The deck or the ski is not mentioned.", + "B. The material of the binding is not mentioned, but the binding of the ski is mentioned.", + "A. The deck or the ski is not mentioned.", + "A. The tail or the ski is not mentioned.", + "A. The wheels of the ski are not mentioned in the description.", + "A. The wooden post is not mentioned in the description.", + "A. The glass window is not mentioned in the description.", + "A. The ski base of the ski is not mentioned in the description.", + "A. The ski poles are not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.3, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "622329": { + "pred": "A rectangular, flat, beige eraser with rounded corners and a slightly textured surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The color of the eraser is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the eraser is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eraser is mentioned in the description but is not brown.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The material of the eraser is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the eraser is mentioned in the description and is rubber.", + 1 + ], + [ + "The material of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the eraser is not mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The corner or the eraser is not mentioned.", + 0 + ], + [ + "The shape of the corner is mentioned in the description but is not rounded.", + -1 + ], + [ + "The shape of the corner is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the corner is not mentioned, but the corner of the eraser is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the corner is mentioned in the description and is rounded.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The desk organizer is not mentioned in the description.", + 1 + ], + [ + "The desk organizer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The desk organizer is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper sleeve of the eraser is not mentioned in the description.", + 1 + ], + [ + "The eraser is not mentioned in the description.", + 0 + ], + [ + "The paper sleeve of the eraser is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The paper sleeve of the eraser is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The phone is not mentioned in the description.", + 1 + ], + [ + "The phone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The phone is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky notes are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 1 + ], + [ + "The tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tape is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an eraser or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an eraser or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eraser is not mentioned.\nB. The color of the eraser is mentioned in the description but is not brown.\nC. The color of the eraser is mentioned in the description and is brown.\nD. The color of the eraser is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eraser is not mentioned.\nB. The material of the eraser is mentioned in the description but is not rubber.\nC. The material of the eraser is mentioned in the description and is rubber.\nD. The material of the eraser is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The corner or the eraser is not mentioned.\nB. The shape of the corner is mentioned in the description but is not rounded.\nC. The shape of the corner is mentioned in the description and is rounded.\nD. The shape of the corner is not mentioned, but the corner of the eraser is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The desk organizer is not mentioned in the description.\nB. The desk organizer is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paper sleeve of the eraser is not mentioned in the description.\nB. The eraser is not mentioned in the description.\nC. The paper sleeve of the eraser is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The phone is not mentioned in the description.\nB. The phone is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky notes are not mentioned in the description.\nB. The sticky notes are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The tape is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the eraser is mentioned in the description but is not brown.", + "D. The material of the eraser is not mentioned.", + "C. The shape of the corner is mentioned in the description and is rounded.", + "A. The desk organizer is not mentioned in the description.", + "A. The paper sleeve of the eraser is not mentioned in the description.", + "A. The phone is not mentioned in the description.", + "A. The sticky notes are not mentioned in the description.", + "A. The tape is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "622332": { + "pred": "A black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base plate is mentioned in the description and is metallic.", + 1 + ], + [ + "The base plate or the stapler is not mentioned.", + 0 + ], + [ + "The material of the base plate is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the base plate is mentioned in the description and is metallic.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stapler is mentioned in the description and is rectangular.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The shape of the stapler is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the stapler is mentioned in the description and is rectangular.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo is mentioned in the description and is white.", + 1 + ], + [ + "The logo or the stapler is not mentioned.", + 0 + ], + [ + "The color of the logo is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo is not mentioned, but the logo of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the logo is mentioned in the description but is not white.", + "pred_index": 2, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the top cover is mentioned in the description and is black.", + 1 + ], + [ + "The top cover or the stapler is not mentioned.", + 0 + ], + [ + "The color of the top cover is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the top cover is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stapler is mentioned in the description and is black.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The color of the stapler is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the stapler is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is mentioned in the description.", + -1 + ], + [ + "The tape is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tape is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple remover of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple remover of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The staple remover of the stapler is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper clips are mentioned in the description.", + -1 + ], + [ + "The paper clips are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The paper clips are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paintbrushes are mentioned in the description.", + -1 + ], + [ + "The paintbrushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The paintbrushes are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple storage compartment of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple storage compartment of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The staple storage compartment of the stapler is not mentioned in the description.", + "pred_index": 2, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stapler or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stapler or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the base plate is mentioned in the description and is metallic.\nB. The base plate or the stapler is not mentioned.\nC. The material of the base plate is mentioned in the description but is not metallic.\nD. The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the stapler is mentioned in the description and is rectangular.\nB. The stapler is not mentioned.\nC. The shape of the stapler is mentioned in the description but is not rectangular.\nD. The shape of the stapler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the logo is mentioned in the description and is white.\nB. The logo or the stapler is not mentioned.\nC. The color of the logo is mentioned in the description but is not white.\nD. The color of the logo is not mentioned, but the logo of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the top cover is mentioned in the description and is black.\nB. The top cover or the stapler is not mentioned.\nC. The color of the top cover is mentioned in the description but is not black.\nD. The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stapler is mentioned in the description and is black.\nB. The stapler is not mentioned.\nC. The color of the stapler is mentioned in the description but is not black.\nD. The color of the stapler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is mentioned in the description.\nB. The tape is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The staple remover of the stapler is mentioned in the description.\nB. The stapler is not mentioned in the description.\nC. The staple remover of the stapler is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paper clips are mentioned in the description.\nB. The paper clips are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paintbrushes are mentioned in the description.\nB. The paintbrushes are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The staple storage compartment of the stapler is mentioned in the description.\nB. The stapler is not mentioned in the description.\nC. The staple storage compartment of the stapler is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The material of the base plate is mentioned in the description and is metallic.", + "A. The shape of the stapler is mentioned in the description and is rectangular.", + "C. The color of the logo is mentioned in the description but is not white.", + "A. The color of the top cover is mentioned in the description and is black.", + "A. The color of the stapler is mentioned in the description and is black.", + "B. The tape is not mentioned in the description.", + "C. The staple remover of the stapler is not mentioned in the description.", + "B. The paper clips are not mentioned in the description.", + "B. The paintbrushes are not mentioned in the description.", + "C. The staple storage compartment of the stapler is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "1075308": { + "pred": "A vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + 1 + ], + [ + "The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the frame is mentioned in the description but is not plastic.", + -1 + ], + [ + "The frame or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the frame is mentioned in the description and is plastic.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the monitor/tv is mentioned in the description and is black.", + 1 + ], + [ + "The color of the monitor/tv is mentioned in the description but is not black.", + -1 + ], + [ + "The monitor/tv is not mentioned.", + 0 + ], + [ + "The color of the monitor/tv is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the monitor/tv is not mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screen is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the screen is mentioned in the description but is not glass.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + "pred_index": 3, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chairs are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The remote control of the monitor/tv is mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The remote control of the monitor/tv is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The remote control of the monitor/tv is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ports of the monitor/tv are mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The ports of the monitor/tv are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The ports of the monitor/tv are not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass are mentioned in the description.", + -1 + ], + [ + "The glass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The glass are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The range hood is mentioned in the description.", + -1 + ], + [ + "The range hood is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The range hood is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a monitor/tv or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a monitor/tv or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.\nB. The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.\nC. The screen or the monitor/tv is not mentioned.\nD. The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the frame is mentioned in the description and is plastic.\nB. The material of the frame is mentioned in the description but is not plastic.\nC. The frame or the monitor/tv is not mentioned.\nD. The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the monitor/tv is mentioned in the description and is black.\nB. The color of the monitor/tv is mentioned in the description but is not black.\nC. The monitor/tv is not mentioned.\nD. The color of the monitor/tv is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the screen is mentioned in the description and is glass.\nB. The material of the screen is mentioned in the description but is not glass.\nC. The screen or the monitor/tv is not mentioned.\nD. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chairs are mentioned in the description.\nB. The chairs are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The remote control of the monitor/tv is mentioned in the description.\nB. The monitor/tv is not mentioned in the description.\nC. The remote control of the monitor/tv is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ports of the monitor/tv are mentioned in the description.\nB. The monitor/tv is not mentioned in the description.\nC. The ports of the monitor/tv are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glass are mentioned in the description.\nB. The glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The range hood is mentioned in the description.\nB. The range hood is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + "A. The material of the frame is mentioned in the description and is plastic.", + "D. The color of the monitor/tv is not mentioned.", + "D. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + "B. The chairs are not mentioned in the description.", + "C. The remote control of the monitor/tv is not mentioned in the description.", + "C. The ports of the monitor/tv are not mentioned in the description.", + "B. The glass are not mentioned in the description.", + "B. The range hood is not mentioned in the description." + ], + "score": 0.8333333333333334, + "score_pos": 0.625, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "1770866": { + "pred": "A white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the text is mentioned in the description but is not handwritten.", + -1 + ], + [ + "The type of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The type of the text is mentioned in the description and is handwritten.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The type of the text is mentioned in the description and is handwritten.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + 0.5 + ], + [ + "The board or the sign/banner is not mentioned.", + 0 + ], + [ + "The shape of the board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The board or the sign/banner is not mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the sign/banner is mentioned in the description but is not white.", + -1 + ], + [ + "The background color of the sign/banner is not mentioned.", + 0 + ], + [ + "The sign/banner is not mentioned.", + 0 + ], + [ + "The background color of the sign/banner is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The background color of the sign/banner is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the text is mentioned in the description but is not black or blue and red.", + -1 + ], + [ + "The color of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is black or blue and red.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the text is mentioned in the description and is black or blue and red.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The salami is not mentioned in the description.", + 1 + ], + [ + "The salami is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The salami is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meats are not mentioned in the description.", + 1 + ], + [ + "The sliced meats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sliced meats are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The duster of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The duster of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The duster of the sign/banner is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The marker of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The marker of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The marker of the sign/banner is mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The price tags are mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a sign/banner or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a sign/banner or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the text is mentioned in the description but is not handwritten.\nB. The type of the text is not mentioned, but the text of the sign/banner is mentioned.\nC. The text or the sign/banner is not mentioned.\nD. The type of the text is mentioned in the description and is handwritten.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the board is mentioned in the description but is not rectangular.\nB. The shape of the board is not mentioned, but the board of the sign/banner is mentioned.\nC. The board or the sign/banner is not mentioned.\nD. The shape of the board is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background color of the sign/banner is mentioned in the description but is not white.\nB. The background color of the sign/banner is not mentioned.\nC. The sign/banner is not mentioned.\nD. The background color of the sign/banner is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the text is mentioned in the description but is not black or blue and red.\nB. The color of the text is not mentioned, but the text of the sign/banner is mentioned.\nC. The text or the sign/banner is not mentioned.\nD. The color of the text is mentioned in the description and is black or blue and red.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The salami is not mentioned in the description.\nB. The salami is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sliced meats are not mentioned in the description.\nB. The sliced meats are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign/banner is not mentioned in the description.\nB. The duster of the sign/banner is not mentioned in the description.\nC. The duster of the sign/banner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign/banner is not mentioned in the description.\nB. The marker of the sign/banner is not mentioned in the description.\nC. The marker of the sign/banner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The price tags are not mentioned in the description.\nB. The price tags are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The type of the text is mentioned in the description and is handwritten.", + "C. The board or the sign/banner is not mentioned.", + "D. The background color of the sign/banner is mentioned in the description and is white.", + "D. The color of the text is mentioned in the description and is black or blue and red.", + "A. The salami is not mentioned in the description.", + "A. The sliced meats are not mentioned in the description.", + "B. The duster of the sign/banner is not mentioned in the description.", + "C. The marker of the sign/banner is mentioned in the description.", + "B. The price tags are mentioned in the description." + ], + "score": 0.4444444444444444, + "score_pos": 0.75, + "score_neg": 0.2, + "neg_valid_num": 5, + "recognition_result": true + }, + "2391761": { + "pred": "The canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hull or the boat is not mentioned.", + 0 + ], + [ + "The color of the hull is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the hull is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the hull is not mentioned, but the hull of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the hull is mentioned in the description but is not brown.", + "pred_index": 2, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cover or the boat is not mentioned.", + 0 + ], + [ + "The color of the cover is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cover is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the cover is not mentioned, but the cover of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cover is mentioned in the description and is blue.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The life preserver/life buoy or the boat is not mentioned.", + 0 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description and is red or white.", + 1 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description but is not red or white.", + -1 + ], + [ + "The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The life preserver/life buoy or the boat is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motor or the boat is not mentioned.", + 0 + ], + [ + "The color of the motor is mentioned in the description and is black.", + 1 + ], + [ + "The color of the motor is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the motor is not mentioned, but the motor of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the motor is not mentioned, but the motor of the boat is mentioned.", + "pred_index": 3, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rudder of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The rudder of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The rudder of the boat is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sail of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The sail of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The sail of the boat is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabin of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The cabin of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cabin of the boat is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The anchor of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The anchor of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The anchor of the boat is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ducks are mentioned in the description.", + -1 + ], + [ + "The ducks are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ducks are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a boat or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a boat or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hull or the boat is not mentioned.\nB. The color of the hull is mentioned in the description and is brown.\nC. The color of the hull is mentioned in the description but is not brown.\nD. The color of the hull is not mentioned, but the hull of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cover or the boat is not mentioned.\nB. The color of the cover is mentioned in the description and is blue.\nC. The color of the cover is mentioned in the description but is not blue.\nD. The color of the cover is not mentioned, but the cover of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The life preserver/life buoy or the boat is not mentioned.\nB. The color of the life preserver/life buoy is mentioned in the description and is red or white.\nC. The color of the life preserver/life buoy is mentioned in the description but is not red or white.\nD. The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motor or the boat is not mentioned.\nB. The color of the motor is mentioned in the description and is black.\nC. The color of the motor is mentioned in the description but is not black.\nD. The color of the motor is not mentioned, but the motor of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rudder of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The rudder of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sail of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The sail of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabin of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The cabin of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The anchor of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The anchor of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ducks are mentioned in the description.\nB. The ducks are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the hull is mentioned in the description but is not brown.", + "B. The color of the cover is mentioned in the description and is blue.", + "A. The life preserver/life buoy or the boat is not mentioned.", + "D. The color of the motor is not mentioned, but the motor of the boat is mentioned.", + "C. The rudder of the boat is not mentioned in the description.", + "C. The sail of the boat is not mentioned in the description.", + "C. The cabin of the boat is not mentioned in the description.", + "C. The anchor of the boat is not mentioned in the description.", + "B. The ducks are not mentioned in the description." + ], + "score": 0.6111111111111112, + "score_pos": 0.125, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "2580318": { + "pred": "The mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the mouse is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the mouse is mentioned in the description but is not gray.", + "pred_index": 2, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the mouse is mentioned in the description and is matte.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the mouse is mentioned in the description but is not matte.", + "pred_index": 2, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is mentioned in the description but is not ergonomic.", + -1 + ], + [ + "The shape of the mouse is mentioned in the description and is ergonomic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the mouse is mentioned in the description but is not ergonomic.", + "pred_index": 2, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Paper is mentioned in the description.", + -1 + ], + [ + "The Paper is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Paper is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The cable of the mouse is mentioned in the description.", + -1 + ], + [ + "The cable of the mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cable of the mouse is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Framed diagram is mentioned in the description.", + -1 + ], + [ + "The Framed diagram is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Framed diagram is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pen is mentioned in the description.", + -1 + ], + [ + "The Pen is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Pen is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The side buttons of the mouse are mentioned in the description.", + -1 + ], + [ + "The side buttons of the mouse are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The side buttons of the mouse are not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a mouse or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a mouse or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The color of the mouse is not mentioned.\nC. The color of the mouse is mentioned in the description but is not gray.\nD. The color of the mouse is mentioned in the description and is gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The texture of the mouse is not mentioned.\nC. The texture of the mouse is mentioned in the description but is not matte.\nD. The texture of the mouse is mentioned in the description and is matte.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The shape of the mouse is not mentioned.\nC. The shape of the mouse is mentioned in the description but is not ergonomic.\nD. The shape of the mouse is mentioned in the description and is ergonomic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Paper is mentioned in the description.\nB. The Paper is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned in the description.\nB. The cable of the mouse is mentioned in the description.\nC. The cable of the mouse is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Framed diagram is mentioned in the description.\nB. The Framed diagram is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Pen is mentioned in the description.\nB. The Pen is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned in the description.\nB. The side buttons of the mouse are mentioned in the description.\nC. The side buttons of the mouse are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the mouse is mentioned in the description but is not gray.", + "C. The texture of the mouse is mentioned in the description but is not matte.", + "C. The shape of the mouse is mentioned in the description but is not ergonomic.", + "B. The Paper is not mentioned in the description.", + "C. The cable of the mouse is not mentioned in the description.", + "B. The Framed diagram is not mentioned in the description.", + "B. The Pen is not mentioned in the description.", + "C. The side buttons of the mouse are not mentioned in the description." + ], + "score": 0.25, + "score_pos": -1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "2588513": { + "pred": "A rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wood block is not mentioned.", + 0 + ], + [ + "The color of the wood block is mentioned in the description but is not wooden or brown.", + -1 + ], + [ + "The color of the wood block is mentioned in the description and is wooden or brown.", + 1 + ], + [ + "The wood block is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the wood block is mentioned in the description but is not wooden or brown.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the top is not mentioned, but the top of the wood block is mentioned.", + 0.5 + ], + [ + "The texture of the top is mentioned in the description but is not grain.", + -1 + ], + [ + "The texture of the top is mentioned in the description and is grain.", + 1 + ], + [ + "The top or the wood block is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the top is mentioned in the description and is grain.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The metal clip of the wood block is not mentioned in the description.", + 1 + ], + [ + "The metal clip of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The metal clip of the wood block is not mentioned in the description.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The machine with a motor are not mentioned in the description.", + 1 + ], + [ + "The machine with a motor are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The machine with a motor are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickup is not mentioned in the description.", + 1 + ], + [ + "The pickup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pickup is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The plastic handle of the wood block is not mentioned in the description.", + 1 + ], + [ + "The plastic handle of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The plastic handle of the wood block is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The copper wire spool is not mentioned in the description.", + 1 + ], + [ + "The copper wire spool is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The copper wire spool is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a wood block or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a wood block or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wood block is not mentioned.\nB. The color of the wood block is mentioned in the description but is not wooden or brown.\nC. The color of the wood block is mentioned in the description and is wooden or brown.\nD. The wood block is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the top is not mentioned, but the top of the wood block is mentioned.\nB. The texture of the top is mentioned in the description but is not grain.\nC. The texture of the top is mentioned in the description and is grain.\nD. The top or the wood block is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wood block is not mentioned in the description.\nB. The metal clip of the wood block is not mentioned in the description.\nC. The metal clip of the wood block is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The machine with a motor are not mentioned in the description.\nB. The machine with a motor are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pickup is not mentioned in the description.\nB. The pickup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wood block is not mentioned in the description.\nB. The plastic handle of the wood block is not mentioned in the description.\nC. The plastic handle of the wood block is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The copper wire spool is not mentioned in the description.\nB. The copper wire spool is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the wood block is mentioned in the description but is not wooden or brown.", + "C. The texture of the top is mentioned in the description and is grain.", + "B. The metal clip of the wood block is not mentioned in the description.", + "A. The machine with a motor are not mentioned in the description.", + "A. The pickup is not mentioned in the description.", + "B. The plastic handle of the wood block is not mentioned in the description.", + "A. The copper wire spool is not mentioned in the description." + ], + "score": 0.7142857142857143, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "3993075": { + "pred": "A cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cap is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the cap is mentioned in the description and is orange.", + 1 + ], + [ + "The cap or the marker is not mentioned.", + 0 + ], + [ + "The color of the cap is not mentioned, but the cap of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the cap is not mentioned, but the cap of the marker is mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the marker is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the marker is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The marker is not mentioned.", + 0 + ], + [ + "The shape of the marker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the marker is mentioned in the description and is cylindrical.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The material of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the body is not mentioned, but the body of the marker is mentioned.", + "pred_index": 3, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The children are mentioned in the description.", + -1 + ], + [ + "The children are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The children are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is mentioned in the description.", + -1 + ], + [ + "The table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The table is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clip of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The clip of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The clip of the marker is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The label of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The label of the marker is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chairs are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a marker or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a marker or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cap is mentioned in the description but is not orange.\nB. The color of the cap is mentioned in the description and is orange.\nC. The cap or the marker is not mentioned.\nD. The color of the cap is not mentioned, but the cap of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not white.\nB. The color of the body is mentioned in the description and is white.\nC. The body or the marker is not mentioned.\nD. The color of the body is not mentioned, but the body of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the marker is mentioned in the description but is not cylindrical.\nB. The shape of the marker is mentioned in the description and is cylindrical.\nC. The marker is not mentioned.\nD. The shape of the marker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the body is mentioned in the description but is not plastic.\nB. The material of the body is mentioned in the description and is plastic.\nC. The body or the marker is not mentioned.\nD. The material of the body is not mentioned, but the body of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The children are mentioned in the description.\nB. The children are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table is mentioned in the description.\nB. The table is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clip of the marker is mentioned in the description.\nB. The marker is not mentioned in the description.\nC. The clip of the marker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label of the marker is mentioned in the description.\nB. The marker is not mentioned in the description.\nC. The label of the marker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chairs are mentioned in the description.\nB. The chairs are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the cap is not mentioned, but the cap of the marker is mentioned.", + "B. The color of the body is mentioned in the description and is white.", + "B. The shape of the marker is mentioned in the description and is cylindrical.", + "D. The material of the body is not mentioned, but the body of the marker is mentioned.", + "B. The children are not mentioned in the description.", + "B. The table is not mentioned in the description.", + "C. The clip of the marker is not mentioned in the description.", + "C. The label of the marker is not mentioned in the description.", + "B. The chairs are not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4027486": { + "pred": "The bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.", + 0.5 + ], + [ + "The license plate or the truck is not mentioned.", + 0 + ], + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The license plate or the truck is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the front is not mentioned, but the front of the truck is mentioned.", + 0.5 + ], + [ + "The front or the truck is not mentioned.", + 0 + ], + [ + "The color of the front is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the front is mentioned in the description and is blue.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the front is mentioned in the description and is blue.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the grille is not mentioned, but the grille of the truck is mentioned.", + 0.5 + ], + [ + "The grille or the truck is not mentioned.", + 0 + ], + [ + "The color of the grille is mentioned in the description but is not black, green, or blue.", + -1 + ], + [ + "The color of the grille is mentioned in the description and is black, green, or blue.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The grille or the truck is not mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The door handle of the truck is mentioned in the description.", + -1 + ], + [ + "The door handle of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The truck is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The rear part of the truck is mentioned in the description.", + -1 + ], + [ + "The rear part of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The truck is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The exhaust pipe of the truck is mentioned in the description.", + -1 + ], + [ + "The exhaust pipe of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The exhaust pipe of the truck is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The tail light of the truck is mentioned in the description.", + -1 + ], + [ + "The tail light of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The truck is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The cargo area of the truck is mentioned in the description.", + -1 + ], + [ + "The cargo area of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The truck is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a truck or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a truck or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.\nB. The license plate or the truck is not mentioned.\nC. The shape of the license plate is mentioned in the description but is not rectangular.\nD. The shape of the license plate is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the front is not mentioned, but the front of the truck is mentioned.\nB. The front or the truck is not mentioned.\nC. The color of the front is mentioned in the description but is not blue.\nD. The color of the front is mentioned in the description and is blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the grille is not mentioned, but the grille of the truck is mentioned.\nB. The grille or the truck is not mentioned.\nC. The color of the grille is mentioned in the description but is not black, green, or blue.\nD. The color of the grille is mentioned in the description and is black, green, or blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The door handle of the truck is mentioned in the description.\nC. The door handle of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The rear part of the truck is mentioned in the description.\nC. The rear part of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The exhaust pipe of the truck is mentioned in the description.\nC. The exhaust pipe of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The tail light of the truck is mentioned in the description.\nC. The tail light of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The cargo area of the truck is mentioned in the description.\nC. The cargo area of the truck is not mentioned in the description.\n" + ], + "response": [ + "B. No", + "B. The license plate or the truck is not mentioned.", + "D. The color of the front is mentioned in the description and is blue.", + "B. The grille or the truck is not mentioned.", + "A. The truck is not mentioned in the description.", + "A. The truck is not mentioned in the description.", + "C. The exhaust pipe of the truck is not mentioned in the description.", + "A. The truck is not mentioned in the description.", + "A. The truck is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "4243725": { + "pred": "The soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the soap is mentioned in the description and is bar soap.", + 1 + ], + [ + "The type of the soap is mentioned in the description but is not bar soap.", + -1 + ], + [ + "The type of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the soap is mentioned in the description and is bar soap.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the soap is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The color of the soap is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the soap is mentioned in the description but is not yellow or brown.", + "pred_index": 1, + "question_index": 1, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the soap are not mentioned in the description.", + 1 + ], + [ + "The decorative elements of the soap are mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The decorative elements of the soap are not mentioned in the description.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is not mentioned in the description.", + 1 + ], + [ + "The cutting board is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cutting board is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dishwasher is not mentioned in the description.", + 1 + ], + [ + "The dishwasher is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The dishwasher is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative peppers are not mentioned in the description.", + 1 + ], + [ + "The decorative peppers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The decorative peppers are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The logo of the soap is not mentioned in the description.", + 1 + ], + [ + "The logo of the soap is mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The logo of the soap is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a soap or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a soap or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the soap is mentioned in the description and is bar soap.\nB. The type of the soap is mentioned in the description but is not bar soap.\nC. The type of the soap is not mentioned.\nD. The soap is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the soap is mentioned in the description and is yellow or brown.\nB. The color of the soap is mentioned in the description but is not yellow or brown.\nC. The color of the soap is not mentioned.\nD. The soap is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the soap are not mentioned in the description.\nB. The decorative elements of the soap are mentioned in the description.\nC. The soap is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is not mentioned in the description.\nB. The cutting board is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The dishwasher is not mentioned in the description.\nB. The dishwasher is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative peppers are not mentioned in the description.\nB. The decorative peppers are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The logo of the soap is not mentioned in the description.\nB. The logo of the soap is mentioned in the description.\nC. The soap is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The type of the soap is mentioned in the description and is bar soap.", + "B. The color of the soap is mentioned in the description but is not yellow or brown.", + "A. The decorative elements of the soap are not mentioned in the description.", + "A. The cutting board is not mentioned in the description.", + "A. The dishwasher is not mentioned in the description.", + "A. The decorative peppers are not mentioned in the description.", + "A. The logo of the soap is not mentioned in the description." + ], + "score": 0.7142857142857143, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4781902": { + "pred": "A dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is not mentioned.", + 0 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the stool is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the stool is mentioned in the description and is wood.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the leg is not mentioned, but the leg of the stool is mentioned.", + 0.5 + ], + [ + "The leg or the stool is not mentioned.", + 0 + ], + [ + "The material of the leg is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the leg is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the leg is mentioned in the description and is wood.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The shape of the crossbar is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the crossbar is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The crossbar or the stool is not mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The material of the crossbar is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the crossbar is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The crossbar or the stool is not mentioned.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The seat or the stool is not mentioned.", + "pred_index": 1, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building in the background are mentioned in the description.", + -1 + ], + [ + "The building in the background are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The building in the background are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swing set is mentioned in the description.", + -1 + ], + [ + "The swing set is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The swing set is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The slide is mentioned in the description.", + -1 + ], + [ + "The slide is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The slide is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The climbing wall is mentioned in the description.", + -1 + ], + [ + "The climbing wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The climbing wall is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stool or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stool or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stool is not mentioned.\nB. The stool is not mentioned.\nC. The material of the stool is mentioned in the description but is not wood.\nD. The material of the stool is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the leg is not mentioned, but the leg of the stool is mentioned.\nB. The leg or the stool is not mentioned.\nC. The material of the leg is mentioned in the description but is not wood.\nD. The material of the leg is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.\nB. The crossbar or the stool is not mentioned.\nC. The shape of the crossbar is mentioned in the description but is not rectangular.\nD. The shape of the crossbar is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.\nB. The crossbar or the stool is not mentioned.\nC. The material of the crossbar is mentioned in the description but is not wood.\nD. The material of the crossbar is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the seat is not mentioned, but the seat of the stool is mentioned.\nB. The seat or the stool is not mentioned.\nC. The shape of the seat is mentioned in the description but is not flat.\nD. The shape of the seat is mentioned in the description and is flat.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building in the background are mentioned in the description.\nB. The building in the background are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The swing set is mentioned in the description.\nB. The swing set is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The slide is mentioned in the description.\nB. The slide is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The climbing wall is mentioned in the description.\nB. The climbing wall is not mentioned in the description.\n" + ], + "response": [ + "B. No", + "D. The material of the stool is mentioned in the description and is wood.", + "D. The material of the leg is mentioned in the description and is wood.", + "B. The crossbar or the stool is not mentioned.", + "B. The crossbar or the stool is not mentioned.", + "B. The seat or the stool is not mentioned.", + "B. The sky is not mentioned in the description.", + "B. The building in the background are not mentioned in the description.", + "B. The swing set is not mentioned in the description.", + "B. The slide is not mentioned in the description.", + "B. The climbing wall is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "4782949": { + "pred": "The drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the drum is mentioned in the description but is not round or circular.", + -1 + ], + [ + "The shape of the drum is mentioned in the description and is round or circular.", + 1 + ], + [ + "The shape of the drum is not mentioned.", + 0 + ], + [ + "The drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the drum is mentioned in the description and is round or circular.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the body is mentioned in the description and is red.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the drum is mentioned.", + 0.5 + ], + [ + "The body or the drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is red.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the rim is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the rim is mentioned in the description and is metallic.", + 1 + ], + [ + "The material of the rim is not mentioned, but the rim of the drum is mentioned.", + 0.5 + ], + [ + "The rim or the drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the rim is not mentioned, but the rim of the drum is mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum stand of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum stand of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drum stand of the drum is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Trees are mentioned in the description.", + -1 + ], + [ + "The Trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Trees are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum pedal of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum pedal of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drum pedal of the drum is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Superior Foods sign is mentioned in the description.", + -1 + ], + [ + "The Superior Foods sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Superior Foods sign is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The People are mentioned in the description.", + -1 + ], + [ + "The People are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The People are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a drum or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a drum or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the drum is mentioned in the description but is not round or circular.\nB. The shape of the drum is mentioned in the description and is round or circular.\nC. The shape of the drum is not mentioned.\nD. The drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not red.\nB. The color of the body is mentioned in the description and is red.\nC. The color of the body is not mentioned, but the body of the drum is mentioned.\nD. The body or the drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the rim is mentioned in the description but is not metallic.\nB. The material of the rim is mentioned in the description and is metallic.\nC. The material of the rim is not mentioned, but the rim of the drum is mentioned.\nD. The rim or the drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drum stand of the drum is mentioned in the description.\nB. The drum is not mentioned in the description.\nC. The drum stand of the drum is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Trees are mentioned in the description.\nB. The Trees are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drum pedal of the drum is mentioned in the description.\nB. The drum is not mentioned in the description.\nC. The drum pedal of the drum is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Superior Foods sign is mentioned in the description.\nB. The Superior Foods sign is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The People are mentioned in the description.\nB. The People are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the drum is mentioned in the description and is round or circular.", + "B. The color of the body is mentioned in the description and is red.", + "C. The material of the rim is not mentioned, but the rim of the drum is mentioned.", + "C. The drum stand of the drum is not mentioned in the description.", + "B. The Trees are not mentioned in the description.", + "C. The drum pedal of the drum is not mentioned in the description.", + "B. The Superior Foods sign is not mentioned in the description.", + "B. The People are not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "5211280": { + "pred": "A stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the rice cooker is mentioned in the description and is silver and black.", + 1 + ], + [ + "The color of the rice cooker is mentioned in the description but is not silver and black.", + -1 + ], + [ + "The color of the rice cooker is not mentioned.", + 0 + ], + [ + "The rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the rice cooker is mentioned in the description but is not silver and black.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + 0.5 + ], + [ + "The handle or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the display is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the display is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + 0.5 + ], + [ + "The display or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the display is mentioned in the description but is not rectangular.", + "pred_index": 1, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description and is silver.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the rice cooker is mentioned.", + 0.5 + ], + [ + "The body or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description but is not silver.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the base is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + 0.5 + ], + [ + "The base or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the base is mentioned in the description but is not plastic.", + "pred_index": 1, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red crates are mentioned in the description.", + -1 + ], + [ + "The red crates are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The red crates are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The generator is mentioned in the description.", + -1 + ], + [ + "The generator is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The generator is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The steam vent of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The steam vent of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The steam vent of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The inner pot of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The inner pot of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The inner pot of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The measuring cup of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The measuring cup of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a rice cooker or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a rice cooker or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the rice cooker is mentioned in the description and is silver and black.\nB. The color of the rice cooker is mentioned in the description but is not silver and black.\nC. The color of the rice cooker is not mentioned.\nD. The rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is mentioned in the description and is curved.\nB. The shape of the handle is mentioned in the description but is not curved.\nC. The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.\nD. The handle or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the display is mentioned in the description and is rectangular.\nB. The shape of the display is mentioned in the description but is not rectangular.\nC. The shape of the display is not mentioned, but the display of the rice cooker is mentioned.\nD. The display or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description and is silver.\nB. The color of the body is mentioned in the description but is not silver.\nC. The color of the body is not mentioned, but the body of the rice cooker is mentioned.\nD. The body or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the base is mentioned in the description and is plastic.\nB. The material of the base is mentioned in the description but is not plastic.\nC. The material of the base is not mentioned, but the base of the rice cooker is mentioned.\nD. The base or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The red crates are mentioned in the description.\nB. The red crates are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The generator is mentioned in the description.\nB. The generator is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The steam vent of the rice cooker is mentioned in the description.\nB. The steam vent of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The inner pot of the rice cooker is mentioned in the description.\nB. The inner pot of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The measuring cup of the rice cooker is mentioned in the description.\nB. The measuring cup of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the rice cooker is mentioned in the description but is not silver and black.", + "C. The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + "B. The shape of the display is mentioned in the description but is not rectangular.", + "B. The color of the body is mentioned in the description but is not silver.", + "B. The material of the base is mentioned in the description but is not plastic.", + "B. The red crates are not mentioned in the description.", + "B. The generator is not mentioned in the description.", + "B. The steam vent of the rice cooker is not mentioned in the description.", + "B. The inner pot of the rice cooker is not mentioned in the description.", + "B. The measuring cup of the rice cooker is not mentioned in the description." + ], + "score": 0.15, + "score_pos": -0.7, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "5718392": { + "pred": "A woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the storage box is mentioned in the description but is not wicker/woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is mentioned in the description and is wicker/woven.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the storage box is mentioned in the description and is wicker/woven.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the storage box is mentioned in the description but is not woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is mentioned in the description and is woven.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the storage box is mentioned in the description and is woven.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the storage box is mentioned in the description but is not brown.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the storage box is mentioned in the description and is brown.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the storage box are mentioned in the description.", + -1 + ], + [ + "The wheels of the storage box are not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The wheels of the storage box are not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensils are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the storage box is mentioned in the description.", + -1 + ], + [ + "The handle of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The handle of the storage box is mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the storage box is mentioned in the description.", + -1 + ], + [ + "The lock of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The lock of the storage box is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blue canopy is mentioned in the description.", + -1 + ], + [ + "The blue canopy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The blue canopy is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a storage box or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a storage box or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the storage box is mentioned in the description but is not wicker/woven.\nB. The storage box is not mentioned.\nC. The material of the storage box is not mentioned.\nD. The material of the storage box is mentioned in the description and is wicker/woven.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the storage box is mentioned in the description but is not woven.\nB. The storage box is not mentioned.\nC. The texture of the storage box is not mentioned.\nD. The texture of the storage box is mentioned in the description and is woven.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the storage box is mentioned in the description but is not brown.\nB. The storage box is not mentioned.\nC. The color of the storage box is not mentioned.\nD. The color of the storage box is mentioned in the description and is brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the storage box are mentioned in the description.\nB. The wheels of the storage box are not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensils are mentioned in the description.\nB. The utensils are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the storage box is mentioned in the description.\nB. The handle of the storage box is not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lock of the storage box is mentioned in the description.\nB. The lock of the storage box is not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blue canopy is mentioned in the description.\nB. The blue canopy is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the storage box is mentioned in the description and is wicker/woven.", + "D. The texture of the storage box is mentioned in the description and is woven.", + "D. The color of the storage box is mentioned in the description and is brown.", + "B. The wheels of the storage box are not mentioned in the description.", + "B. The utensils are not mentioned in the description.", + "A. The handle of the storage box is mentioned in the description.", + "B. The lock of the storage box is not mentioned in the description.", + "B. The blue canopy is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 1.0, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "6037272": { + "pred": "A green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle or the bottle is not mentioned.", + 0 + ], + [ + "The material of the bottle is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + 0.5 + ], + [ + "The material of the bottle is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is green.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The cap or the bottle is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text on the label or the bottle are not mentioned.", + 0 + ], + [ + "The color of the text on the label is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.", + 0.5 + ], + [ + "The color of the text on the label is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the text on the label is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description and is green.", + 1 + ], + [ + "The color of the bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the bottle is mentioned in the description and is green.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/top or the bottle is not mentioned.", + 0 + ], + [ + "The shape of the cap/top is mentioned in the description and is flat or tapered.", + 1 + ], + [ + "The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.", + 0.5 + ], + [ + "The shape of the cap/top is mentioned in the description but is not flat or tapered.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The cap/top or the bottle is not mentioned.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nozzle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The nozzle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The nozzle of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower curtain is mentioned in the description.", + -1 + ], + [ + "The shower curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The shower curtain is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathtub is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pump of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The pump of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The pump of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bottle or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bottle or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle or the bottle is not mentioned.\nB. The material of the bottle is mentioned in the description and is plastic.\nC. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.\nD. The material of the bottle is mentioned in the description but is not plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap or the bottle is not mentioned.\nB. The color of the cap is mentioned in the description and is green.\nC. The color of the cap is not mentioned, but the cap of the bottle is mentioned.\nD. The color of the cap is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text on the label or the bottle are not mentioned.\nB. The color of the text on the label is mentioned in the description and is white.\nC. The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.\nD. The color of the text on the label is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned.\nB. The color of the bottle is mentioned in the description and is green.\nC. The color of the bottle is not mentioned.\nD. The color of the bottle is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/top or the bottle is not mentioned.\nB. The shape of the cap/top is mentioned in the description and is flat or tapered.\nC. The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.\nD. The shape of the cap/top is mentioned in the description but is not flat or tapered.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nozzle of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The nozzle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shower curtain is mentioned in the description.\nB. The shower curtain is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is mentioned in the description.\nB. The bathtub is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The handle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pump of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The pump of the bottle is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + "A. The cap or the bottle is not mentioned.", + "B. The color of the text on the label is mentioned in the description and is white.", + "B. The color of the bottle is mentioned in the description and is green.", + "A. The cap/top or the bottle is not mentioned.", + "C. The nozzle of the bottle is not mentioned in the description.", + "B. The shower curtain is not mentioned in the description.", + "B. The bathtub is not mentioned in the description.", + "C. The handle of the bottle is not mentioned in the description.", + "C. The pump of the bottle is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "6820594": { + "pred": "A medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ears is mentioned in the description but is not triangular or pointed.", + -1 + ], + [ + "The shape of the ears is not mentioned, but the ears of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the ears is mentioned in the description and is triangular or pointed.", + 1 + ], + [ + "The ears or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the ears is mentioned in the description and is triangular or pointed.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is yellow or green.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the eyes is mentioned in the description and is yellow or green.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the cat are mentioned.", + 0.5 + ], + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the legs is mentioned in the description but is not white.", + "pred_index": 0, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the eyes is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the eyes is mentioned in the description and is round.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the back is mentioned in the description but is not brown or black.", + -1 + ], + [ + "The color of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The color of the back is mentioned in the description and is brown or black.", + 1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the back is mentioned in the description and is brown or black.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothpaste is mentioned in the description.", + -1 + ], + [ + "The toothpaste is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothpaste is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothbrush is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the cat is mentioned in the description.", + -1 + ], + [ + "The tail of the cat is not mentioned in the description.", + 1 + ], + [ + "The cat is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the cat is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The faucet is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a cat or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a cat or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the ears is mentioned in the description but is not triangular or pointed.\nB. The shape of the ears is not mentioned, but the ears of the cat are mentioned.\nC. The shape of the ears is mentioned in the description and is triangular or pointed.\nD. The ears or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eyes is mentioned in the description but is not yellow or green.\nB. The color of the eyes is not mentioned, but the eyes of the cat are mentioned.\nC. The color of the eyes is mentioned in the description and is yellow or green.\nD. The eyes or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the legs is mentioned in the description but is not white.\nB. The color of the legs is not mentioned, but the legs of the cat are mentioned.\nC. The color of the legs is mentioned in the description and is white.\nD. The legs or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the eyes is mentioned in the description but is not round.\nB. The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.\nC. The shape of the eyes is mentioned in the description and is round.\nD. The eyes or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the back is mentioned in the description but is not brown or black.\nB. The color of the back is not mentioned, but the back of the cat is mentioned.\nC. The color of the back is mentioned in the description and is brown or black.\nD. The back or the cat is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothpaste is mentioned in the description.\nB. The toothpaste is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothbrush is mentioned in the description.\nB. The toothbrush is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the cat is mentioned in the description.\nB. The tail of the cat is not mentioned in the description.\nC. The cat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is mentioned in the description.\nB. The faucet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the ears is mentioned in the description and is triangular or pointed.", + "C. The color of the eyes is mentioned in the description and is yellow or green.", + "A. The color of the legs is mentioned in the description but is not white.", + "B. The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + "C. The color of the back is mentioned in the description and is brown or black.", + "B. The toothpaste is not mentioned in the description.", + "B. The toothbrush is not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The tail of the cat is not mentioned in the description.", + "B. The faucet is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "5718424": { + "pred": "A black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + 0.5 + ], + [ + "The color of the laces is mentioned in the description but is not white or green.", + -1 + ], + [ + "The color of the laces is mentioned in the description and is white or green.", + 1 + ], + [ + "The laces or the sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sneakers is not mentioned.", + 0 + ], + [ + "The color of the sneakers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the sneakers is mentioned in the description and is black.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the sneakers is mentioned in the description and is black.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sneakers is not mentioned.", + 0 + ], + [ + "The material of the sneakers is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sneakers is mentioned in the description and is rubber.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the sneakers is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is not mentioned in the description.", + 1 + ], + [ + "The table is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The table is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The base of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The base of the sneakers is mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heel of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The heel of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The heel of the sneakers is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The person is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The food is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are sneakers or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are sneakers or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.\nB. The color of the laces is mentioned in the description but is not white or green.\nC. The color of the laces is mentioned in the description and is white or green.\nD. The laces or the sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sneakers is not mentioned.\nB. The color of the sneakers is mentioned in the description but is not black.\nC. The color of the sneakers is mentioned in the description and is black.\nD. The sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the sneakers is not mentioned.\nB. The material of the sneakers is mentioned in the description but is not rubber.\nC. The material of the sneakers is mentioned in the description and is rubber.\nD. The sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table is not mentioned in the description.\nB. The table is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the sneakers is not mentioned in the description.\nB. The sneakers are not mentioned in the description.\nC. The base of the sneakers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The heel of the sneakers is not mentioned in the description.\nB. The sneakers are not mentioned in the description.\nC. The heel of the sneakers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned in the description.\nB. The person is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is not mentioned in the description.\nB. The food is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + "C. The color of the sneakers is mentioned in the description and is black.", + "A. The material of the sneakers is not mentioned.", + "A. The table is not mentioned in the description.", + "C. The base of the sneakers is mentioned in the description.", + "A. The heel of the sneakers is not mentioned in the description.", + "A. The person is not mentioned in the description.", + "A. The food is not mentioned in the description." + ], + "score": 0.5625, + "score_pos": 0.5, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "6055310": { + "pred": "A golden ruler with a series of evenly spaced, small, rectangular notches along its length.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the blade is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the blade is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the blade is mentioned in the description but is not rectangular.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blade is mentioned in the description and is yellow or golden.", + 1 + ], + [ + "The color of the blade is mentioned in the description but is not yellow or golden.", + -1 + ], + [ + "The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the blade is mentioned in the description and is yellow or golden.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the blade is mentioned in the description and is long.", + 1 + ], + [ + "The size of the blade is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blade is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the blade is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the blade is mentioned in the description and is metal.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The case of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The case of the tape measure/ruler is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is not mentioned in the description.", + 1 + ], + [ + "The bathtub is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bathtub is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The lock of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lock of the tape measure/ruler is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The door is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The box is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tape measure/ruler or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tape measure/ruler or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the blade is mentioned in the description and is rectangular.\nB. The shape of the blade is mentioned in the description but is not rectangular.\nC. The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the blade is mentioned in the description and is yellow or golden.\nB. The color of the blade is mentioned in the description but is not yellow or golden.\nC. The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the blade is mentioned in the description and is long.\nB. The size of the blade is mentioned in the description but is not long.\nC. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the blade is mentioned in the description and is metal.\nB. The material of the blade is mentioned in the description but is not metal.\nC. The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case of the tape measure/ruler is not mentioned in the description.\nB. The tape measure/ruler is not mentioned in the description.\nC. The case of the tape measure/ruler is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is not mentioned in the description.\nB. The bathtub is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lock of the tape measure/ruler is not mentioned in the description.\nB. The tape measure/ruler is not mentioned in the description.\nC. The lock of the tape measure/ruler is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is not mentioned in the description.\nB. The door is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The box is not mentioned in the description.\nB. The box is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the blade is mentioned in the description but is not rectangular.", + "A. The color of the blade is mentioned in the description and is yellow or golden.", + "C. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "A. The material of the blade is mentioned in the description and is metal.", + "A. The case of the tape measure/ruler is not mentioned in the description.", + "A. The bathtub is not mentioned in the description.", + "A. The lock of the tape measure/ruler is not mentioned in the description.", + "A. The door is not mentioned in the description.", + "A. The box is not mentioned in the description." + ], + "score": 0.7222222222222222, + "score_pos": 0.375, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8201777": { + "pred": "A black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the van is mentioned in the description but is not black.", + -1 + ], + [ + "The van is not mentioned.", + 0 + ], + [ + "The color of the van is not mentioned.", + 0 + ], + [ + "The color of the van is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the van is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sticker is mentioned in the description but is not white.", + -1 + ], + [ + "The sticker or the van is not mentioned.", + 0 + ], + [ + "The color of the sticker is not mentioned, but the sticker of the van is mentioned.", + 0.5 + ], + [ + "The color of the sticker is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the sticker is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The taillight or the van is not mentioned.", + 0 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + 0.5 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the license plate is mentioned in the description but is not yellow.", + -1 + ], + [ + "The license plate or the van is not mentioned.", + 0 + ], + [ + "The color of the license plate is not mentioned, but the license plate of the van is mentioned.", + 0.5 + ], + [ + "The color of the license plate is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the license plate is mentioned in the description and is yellow.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the sign is mentioned in the description but is not taxi.", + -1 + ], + [ + "The sign or the van is not mentioned.", + 0 + ], + [ + "The text of the sign is not mentioned, but the sign of the van is mentioned.", + 0.5 + ], + [ + "The text of the sign is mentioned in the description and is taxi.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The text of the sign is mentioned in the description and is taxi.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The grill of the van is not mentioned in the description.", + 1 + ], + [ + "The grill of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The grill of the van is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The building is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the van is not mentioned in the description.", + 1 + ], + [ + "The front bumper of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The front bumper of the van is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The antenna of the van is not mentioned in the description.", + 1 + ], + [ + "The antenna of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The antenna of the van is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The person is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a van or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a van or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the van is mentioned in the description but is not black.\nB. The van is not mentioned.\nC. The color of the van is not mentioned.\nD. The color of the van is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sticker is mentioned in the description but is not white.\nB. The sticker or the van is not mentioned.\nC. The color of the sticker is not mentioned, but the sticker of the van is mentioned.\nD. The color of the sticker is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the taillight is mentioned in the description but is not plastic.\nB. The taillight or the van is not mentioned.\nC. The material of the taillight is not mentioned, but the taillight of the van is mentioned.\nD. The material of the taillight is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the license plate is mentioned in the description but is not yellow.\nB. The license plate or the van is not mentioned.\nC. The color of the license plate is not mentioned, but the license plate of the van is mentioned.\nD. The color of the license plate is mentioned in the description and is yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text of the sign is mentioned in the description but is not taxi.\nB. The sign or the van is not mentioned.\nC. The text of the sign is not mentioned, but the sign of the van is mentioned.\nD. The text of the sign is mentioned in the description and is taxi.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The grill of the van is not mentioned in the description.\nC. The grill of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is not mentioned in the description.\nB. The building is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The front bumper of the van is not mentioned in the description.\nC. The front bumper of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The antenna of the van is not mentioned in the description.\nC. The antenna of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned in the description.\nB. The person is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the van is mentioned in the description and is black.", + "D. The color of the sticker is mentioned in the description and is white.", + "C. The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + "D. The color of the license plate is mentioned in the description and is yellow.", + "D. The text of the sign is mentioned in the description and is taxi.", + "B. The grill of the van is not mentioned in the description.", + "A. The building is not mentioned in the description.", + "B. The front bumper of the van is not mentioned in the description.", + "B. The antenna of the van is not mentioned in the description.", + "A. The person is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8331685": { + "pred": "The earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is mentioned in the description and is black.", + 1 + ], + [ + "The color of the headphone is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the headphone is mentioned in the description but is not black.", + "pred_index": 3, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ear cup or the headphone is not mentioned.", + 0 + ], + [ + "The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.", + 0.5 + ], + [ + "The type of the ear cup is mentioned in the description and is cushioned.", + 1 + ], + [ + "The type of the ear cup is mentioned in the description but is not cushioned.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The type of the ear cup is mentioned in the description and is cushioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headband or the headphone is not mentioned.", + 0 + ], + [ + "The shape of the headband is not mentioned, but the headband of the headphone is mentioned.", + 0.5 + ], + [ + "The shape of the headband is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the headband is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the headband is not mentioned, but the headband of the headphone is mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone of the headphone is not mentioned in the description.", + 1 + ], + [ + "The microphone of the headphone is mentioned in the description.", + -1 + ], + [ + "The headphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The microphone of the headphone is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is not mentioned in the description.", + 1 + ], + [ + "The keyboard is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The keyboard is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned in the description.", + 1 + ], + [ + "The clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The clock is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 1 + ], + [ + "The bottle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bottle is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a headphone or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a headphone or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headphone is not mentioned.\nB. The color of the headphone is not mentioned.\nC. The color of the headphone is mentioned in the description and is black.\nD. The color of the headphone is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ear cup or the headphone is not mentioned.\nB. The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.\nC. The type of the ear cup is mentioned in the description and is cushioned.\nD. The type of the ear cup is mentioned in the description but is not cushioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headband or the headphone is not mentioned.\nB. The shape of the headband is not mentioned, but the headband of the headphone is mentioned.\nC. The shape of the headband is mentioned in the description and is curved.\nD. The shape of the headband is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microphone of the headphone is not mentioned in the description.\nB. The microphone of the headphone is mentioned in the description.\nC. The headphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is not mentioned in the description.\nB. The keyboard is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock is not mentioned in the description.\nB. The clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The bottle is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the headphone is mentioned in the description but is not black.", + "C. The type of the ear cup is mentioned in the description and is cushioned.", + "B. The shape of the headband is not mentioned, but the headband of the headphone is mentioned.", + "A. The microphone of the headphone is not mentioned in the description.", + "A. The keyboard is not mentioned in the description.", + "A. The plant is not mentioned in the description.", + "A. The clock is not mentioned in the description.", + "A. The bottle is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8331718": { + "pred": "A black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the notebook is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the notebook is mentioned in the description and is black.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The color of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the notebook is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the notebook is mentioned in the description but is not spiral-bound.", + -1 + ], + [ + "The type of the notebook is mentioned in the description and is spiral-bound.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The type of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the notebook is mentioned in the description and is spiral-bound.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the cover is mentioned in the description but is not YAHOO.", + -1 + ], + [ + "The text of the cover is mentioned in the description and is YAHOO.", + 1 + ], + [ + "The cover or the notebook is not mentioned.", + 0 + ], + [ + "The text of the cover is not mentioned, but the cover of the notebook is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The text of the cover is mentioned in the description but is not YAHOO.", + "pred_index": 0, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo/text on the cover is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo/text on the cover is mentioned in the description and is white.", + 1 + ], + [ + "The logo/text on the cover or the notebook are not mentioned.", + 0 + ], + [ + "The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the logo/text on the cover is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is mentioned in the description.", + -1 + ], + [ + "The chair is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chair is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bottle is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bookmark of the notebook is mentioned in the description.", + -1 + ], + [ + "The notebook is not mentioned in the description.", + 0 + ], + [ + "The bookmark of the notebook is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The bookmark of the notebook is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is mentioned in the description.", + -1 + ], + [ + "The mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mouse is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The keyboard is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a notebook or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a notebook or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the notebook is mentioned in the description but is not black.\nB. The color of the notebook is mentioned in the description and is black.\nC. The notebook is not mentioned.\nD. The color of the notebook is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the notebook is mentioned in the description but is not spiral-bound.\nB. The type of the notebook is mentioned in the description and is spiral-bound.\nC. The notebook is not mentioned.\nD. The type of the notebook is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text of the cover is mentioned in the description but is not YAHOO.\nB. The text of the cover is mentioned in the description and is YAHOO.\nC. The cover or the notebook is not mentioned.\nD. The text of the cover is not mentioned, but the cover of the notebook is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the logo/text on the cover is mentioned in the description but is not white.\nB. The color of the logo/text on the cover is mentioned in the description and is white.\nC. The logo/text on the cover or the notebook are not mentioned.\nD. The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is mentioned in the description.\nB. The chair is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bookmark of the notebook is mentioned in the description.\nB. The notebook is not mentioned in the description.\nC. The bookmark of the notebook is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is mentioned in the description.\nB. The mouse is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is mentioned in the description.\nB. The keyboard is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the notebook is mentioned in the description and is black.", + "B. The type of the notebook is mentioned in the description and is spiral-bound.", + "A. The text of the cover is mentioned in the description but is not YAHOO.", + "B. The color of the logo/text on the cover is mentioned in the description and is white.", + "B. The chair is not mentioned in the description.", + "B. The bottle is not mentioned in the description.", + "C. The bookmark of the notebook is not mentioned in the description.", + "B. The mouse is not mentioned in the description.", + "B. The keyboard is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8557176": { + "pred": "The watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The color of the case is mentioned in the description but is not golden.", + -1 + ], + [ + "The color of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The color of the case is mentioned in the description and is golden.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the case is mentioned in the description and is golden.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The material of the case is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The material of the case is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the case is mentioned in the description and is metal.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The color of the strap is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The color of the strap is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the strap is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the strap is mentioned in the description and is leather.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The food is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The oven is not mentioned in the description.", + 1 + ], + [ + "The oven is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The oven is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The crab cracker is not mentioned in the description.", + 1 + ], + [ + "The crab cracker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The crab cracker is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The jar is not mentioned in the description.", + 1 + ], + [ + "The jar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The jar is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a watch or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a watch or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case or the watch is not mentioned.\nB. The color of the case is mentioned in the description but is not golden.\nC. The color of the case is not mentioned, but the case of the watch is mentioned.\nD. The color of the case is mentioned in the description and is golden.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case or the watch is not mentioned.\nB. The material of the case is mentioned in the description but is not metal.\nC. The material of the case is not mentioned, but the case of the watch is mentioned.\nD. The material of the case is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the watch is not mentioned.\nB. The color of the strap is mentioned in the description but is not black.\nC. The color of the strap is not mentioned, but the strap of the watch is mentioned.\nD. The color of the strap is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the watch is not mentioned.\nB. The material of the strap is mentioned in the description but is not leather.\nC. The material of the strap is not mentioned, but the strap of the watch is mentioned.\nD. The material of the strap is mentioned in the description and is leather.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is not mentioned in the description.\nB. The food is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The oven is not mentioned in the description.\nB. The oven is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The crab cracker is not mentioned in the description.\nB. The crab cracker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The jar is not mentioned in the description.\nB. The jar is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the case is mentioned in the description and is golden.", + "D. The material of the case is mentioned in the description and is metal.", + "D. The color of the strap is mentioned in the description and is black.", + "D. The material of the strap is mentioned in the description and is leather.", + "A. The food is not mentioned in the description.", + "A. The cup is not mentioned in the description.", + "A. The oven is not mentioned in the description.", + "A. The crab cracker is not mentioned in the description.", + "A. The jar is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8557195": { + "pred": "The microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the body is mentioned in the description but is not plastic.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the toaster is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the toaster is mentioned in the description but is not white or beige.", + -1 + ], + [ + "The color of the toaster is not mentioned.", + 0 + ], + [ + "The toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The toaster is not mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the body is mentioned in the description and is smooth.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lever of the toaster is not mentioned in the description.", + 1 + ], + [ + "The lever of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toaster is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The knife is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cord of the toaster is not mentioned in the description.", + 1 + ], + [ + "The cord of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toaster is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knob of the toaster is not mentioned in the description.", + 1 + ], + [ + "The knob of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toaster is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is not mentioned in the description.", + 1 + ], + [ + "The utensil is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The utensil is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a toaster or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a toaster or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the body is mentioned in the description and is plastic.\nB. The material of the body is mentioned in the description but is not plastic.\nC. The material of the body is not mentioned, but the body of the toaster is mentioned.\nD. The body or the toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the toaster is mentioned in the description and is white or beige.\nB. The color of the toaster is mentioned in the description but is not white or beige.\nC. The color of the toaster is not mentioned.\nD. The toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the body is mentioned in the description and is smooth.\nB. The texture of the body is mentioned in the description but is not smooth.\nC. The texture of the body is not mentioned, but the body of the toaster is mentioned.\nD. The body or the toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lever of the toaster is not mentioned in the description.\nB. The lever of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knife is not mentioned in the description.\nB. The knife is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cord of the toaster is not mentioned in the description.\nB. The cord of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knob of the toaster is not mentioned in the description.\nB. The knob of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensil is not mentioned in the description.\nB. The utensil is mentioned in the description.\n" + ], + "response": [ + "B. No", + "B. The material of the body is mentioned in the description but is not plastic.", + "D. The toaster is not mentioned.", + "A. The texture of the body is mentioned in the description and is smooth.", + "C. The toaster is not mentioned in the description.", + "A. The knife is not mentioned in the description.", + "C. The toaster is not mentioned in the description.", + "C. The toaster is not mentioned in the description.", + "A. The utensil is not mentioned in the description." + ], + "score": -0.125, + "score_pos": -0.3333333333333333, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "9766617": { + "pred": "The goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The head or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the head is mentioned in the description and is black.", + 1 + ], + [ + "The color of the head is not mentioned, but the head of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the head is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The beak or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the beak is mentioned in the description and is black.", + 1 + ], + [ + "The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the beak is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the body is mentioned in the description and is grey or brown.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description but is not grey or brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is grey or brown.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the tail is mentioned in the description and is black and white.", + 1 + ], + [ + "The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the tail is mentioned in the description but is not black and white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The tail or the duck/goose is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wings or the duck/goose are not mentioned.", + 0 + ], + [ + "The shape of the wings is mentioned in the description and is folded.", + 1 + ], + [ + "The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + 0.5 + ], + [ + "The shape of the wings is mentioned in the description but is not folded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + "pred_index": 2, + "question_index": 4, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the duck/goose are mentioned in the description.", + -1 + ], + [ + "The feet of the duck/goose are not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The feet of the duck/goose are mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mud of the duck/goose is mentioned in the description.", + -1 + ], + [ + "The mud of the duck/goose is not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The mud of the duck/goose is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The grass are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pigeon is mentioned in the description.", + -1 + ], + [ + "The pigeon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The pigeon is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tree is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a duck/goose or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a duck/goose or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The head or the duck/goose is not mentioned.\nB. The color of the head is mentioned in the description and is black.\nC. The color of the head is not mentioned, but the head of the duck/goose is mentioned.\nD. The color of the head is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The beak or the duck/goose is not mentioned.\nB. The color of the beak is mentioned in the description and is black.\nC. The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.\nD. The color of the beak is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the duck/goose is not mentioned.\nB. The color of the body is mentioned in the description and is grey or brown.\nC. The color of the body is not mentioned, but the body of the duck/goose is mentioned.\nD. The color of the body is mentioned in the description but is not grey or brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the duck/goose is not mentioned.\nB. The color of the tail is mentioned in the description and is black and white.\nC. The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.\nD. The color of the tail is mentioned in the description but is not black and white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wings or the duck/goose are not mentioned.\nB. The shape of the wings is mentioned in the description and is folded.\nC. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.\nD. The shape of the wings is mentioned in the description but is not folded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feet of the duck/goose are mentioned in the description.\nB. The feet of the duck/goose are not mentioned in the description.\nC. The duck/goose is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mud of the duck/goose is mentioned in the description.\nB. The mud of the duck/goose is not mentioned in the description.\nC. The duck/goose is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are mentioned in the description.\nB. The grass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pigeon is mentioned in the description.\nB. The pigeon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is mentioned in the description.\nB. The tree is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the head is mentioned in the description and is black.", + "B. The color of the beak is mentioned in the description and is black.", + "B. The color of the body is mentioned in the description and is grey or brown.", + "A. The tail or the duck/goose is not mentioned.", + "C. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + "A. The feet of the duck/goose are mentioned in the description.", + "B. The mud of the duck/goose is not mentioned in the description.", + "B. The grass are not mentioned in the description.", + "B. The pigeon is not mentioned in the description.", + "B. The tree is not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.7, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "11021544": { + "pred": "The faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the faucet is mentioned in the description but is not metallic.", + -1 + ], + [ + "The texture of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The texture of the faucet is mentioned in the description and is metallic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the faucet is mentioned in the description and is metallic.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the spout is mentioned in the description but is not curved or arc.", + -1 + ], + [ + "The shape of the spout is not mentioned, but the spout of the faucet is mentioned.", + 0.5 + ], + [ + "The spout or the faucet is not mentioned.", + 0 + ], + [ + "The shape of the spout is mentioned in the description and is curved or arc.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the spout is mentioned in the description and is curved or arc.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the faucet is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The material of the faucet is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the faucet is mentioned in the description and is metal.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soap dispenser of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The soap dispenser of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The soap dispenser of the faucet is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side spray of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The side spray of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The side spray of the faucet is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprayer of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The sprayer of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The sprayer of the faucet is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet door is not mentioned in the description.", + 1 + ], + [ + "The cabinet door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet door is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a faucet or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a faucet or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the faucet is mentioned in the description but is not metallic.\nB. The texture of the faucet is not mentioned.\nC. The faucet is not mentioned.\nD. The texture of the faucet is mentioned in the description and is metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the spout is mentioned in the description but is not curved or arc.\nB. The shape of the spout is not mentioned, but the spout of the faucet is mentioned.\nC. The spout or the faucet is not mentioned.\nD. The shape of the spout is mentioned in the description and is curved or arc.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the faucet is mentioned in the description but is not metal.\nB. The material of the faucet is not mentioned.\nC. The faucet is not mentioned.\nD. The material of the faucet is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The soap dispenser of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The soap dispenser of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The side spray of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The side spray of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sprayer of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The sprayer of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet door is not mentioned in the description.\nB. The cabinet door is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the faucet is mentioned in the description and is metallic.", + "D. The shape of the spout is mentioned in the description and is curved or arc.", + "D. The material of the faucet is mentioned in the description and is metal.", + "A. The soap dispenser of the faucet is not mentioned in the description.", + "A. The side spray of the faucet is not mentioned in the description.", + "A. The chair is not mentioned in the description.", + "A. The sprayer of the faucet is not mentioned in the description.", + "A. The cabinet door is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "11775390": { + "pred": "A green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the sandals is not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the sandals is mentioned in the description and is green.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description but is not perforated.", + -1 + ], + [ + "The style of the sandals is not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description and is perforated.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The style of the sandals is mentioned in the description and is perforated.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sandals is not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the sandals is mentioned in the description and is rubber.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the sandals is not mentioned.", + 0 + ], + [ + "The number of parts of the strap is mentioned in the description but is not one.", + -1 + ], + [ + "The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + 0.5 + ], + [ + "The number of parts of the strap is mentioned in the description and is one.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The strap or the sandals is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trailer is not mentioned in the description.", + 1 + ], + [ + "The trailer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trailer is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire is not mentioned in the description.", + 1 + ], + [ + "The tire is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tire is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pumpkins are not mentioned in the description.", + 1 + ], + [ + "The pumpkins are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pumpkins are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The box is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boy is not mentioned in the description.", + 1 + ], + [ + "The boy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The boy is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are sandals or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are sandals or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The color of the sandals is mentioned in the description but is not green.\nC. The color of the sandals is not mentioned.\nD. The color of the sandals is mentioned in the description and is green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The style of the sandals is mentioned in the description but is not perforated.\nC. The style of the sandals is not mentioned.\nD. The style of the sandals is mentioned in the description and is perforated.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The material of the sandals is mentioned in the description but is not rubber.\nC. The material of the sandals is not mentioned.\nD. The material of the sandals is mentioned in the description and is rubber.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the sandals is not mentioned.\nB. The number of parts of the strap is mentioned in the description but is not one.\nC. The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.\nD. The number of parts of the strap is mentioned in the description and is one.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trailer is not mentioned in the description.\nB. The trailer is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tire is not mentioned in the description.\nB. The tire is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pumpkins are not mentioned in the description.\nB. The pumpkins are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The box is not mentioned in the description.\nB. The box is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boy is not mentioned in the description.\nB. The boy is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the sandals is mentioned in the description and is green.", + "D. The style of the sandals is mentioned in the description and is perforated.", + "D. The material of the sandals is mentioned in the description and is rubber.", + "A. The strap or the sandals is not mentioned.", + "A. The trailer is not mentioned in the description.", + "A. The tire is not mentioned in the description.", + "A. The pumpkins are not mentioned in the description.", + "A. The box is not mentioned in the description.", + "A. The boy is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "11950619": { + "pred": "The racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description and is white, beige, or wooden color.", + 1 + ], + [ + "The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not white, beige, or wooden color.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is mentioned in the description and is white, beige, or wooden color.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the handle is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the handle is mentioned in the description and is wood.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cover is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The cover or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the cover is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the cover is mentioned in the description but is not plastic.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The head or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the head is mentioned in the description and is rounded.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dumbbell is not mentioned in the description.", + 1 + ], + [ + "The dumbbell is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The dumbbell is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The edge tape of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The edge tape of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The edge tape of the table tennis paddle is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The logo of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The logo of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The table tennis paddle is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mat is not mentioned in the description.", + 1 + ], + [ + "The mat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The mat is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a table tennis paddle or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a table tennis paddle or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is mentioned in the description and is white, beige, or wooden color.\nB. The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.\nC. The handle or the table tennis paddle is not mentioned.\nD. The color of the handle is mentioned in the description but is not white, beige, or wooden color.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handle is mentioned in the description and is wood.\nB. The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.\nC. The handle or the table tennis paddle is not mentioned.\nD. The material of the handle is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cover is mentioned in the description and is plastic.\nB. The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.\nC. The cover or the table tennis paddle is not mentioned.\nD. The material of the cover is mentioned in the description but is not plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the head is mentioned in the description and is rounded.\nB. The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.\nC. The head or the table tennis paddle is not mentioned.\nD. The shape of the head is mentioned in the description but is not rounded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The dumbbell is not mentioned in the description.\nB. The dumbbell is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table tennis paddle is not mentioned in the description.\nB. The edge tape of the table tennis paddle is not mentioned in the description.\nC. The edge tape of the table tennis paddle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table tennis paddle is not mentioned in the description.\nB. The logo of the table tennis paddle is not mentioned in the description.\nC. The logo of the table tennis paddle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mat is not mentioned in the description.\nB. The mat is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet is not mentioned in the description.\nB. The cabinet is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the handle is mentioned in the description and is white, beige, or wooden color.", + "A. The material of the handle is mentioned in the description and is wood.", + "D. The material of the cover is mentioned in the description but is not plastic.", + "A. The shape of the head is mentioned in the description and is rounded.", + "A. The dumbbell is not mentioned in the description.", + "B. The edge tape of the table tennis paddle is not mentioned in the description.", + "A. The table tennis paddle is not mentioned in the description.", + "A. The mat is not mentioned in the description.", + "A. The cabinet is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.5, + "score_neg": 0.8, + "neg_valid_num": 5, + "recognition_result": true + }, + "12178946": { + "pred": "A cylindrical bottle with a yellow cap and a blue label featuring white text.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The shape of the bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bottle is mentioned in the description and is cylindrical.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cap is mentioned in the description and is yellow.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the bottle is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the label is not mentioned, but the label of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the label is mentioned in the description and is blue.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text or the bottle is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text is not mentioned, but the text of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the text is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the text is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is mentioned in the description.", + -1 + ], + [ + "The apple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The apple is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The trigger of the bottle is mentioned in the description.", + -1 + ], + [ + "The trigger of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The trigger of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup is mentioned in the description.", + -1 + ], + [ + "The measuring cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The measuring cup is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bottle or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bottle or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned.\nB. The shape of the bottle is mentioned in the description and is cylindrical.\nC. The shape of the bottle is not mentioned.\nD. The shape of the bottle is mentioned in the description but is not cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap or the bottle is not mentioned.\nB. The color of the cap is mentioned in the description and is yellow.\nC. The color of the cap is not mentioned, but the cap of the bottle is mentioned.\nD. The color of the cap is mentioned in the description but is not yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label or the bottle is not mentioned.\nB. The color of the label is mentioned in the description and is blue.\nC. The color of the label is not mentioned, but the label of the bottle is mentioned.\nD. The color of the label is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text or the bottle is not mentioned.\nB. The color of the text is mentioned in the description and is white.\nC. The color of the text is not mentioned, but the text of the bottle is mentioned.\nD. The color of the text is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The apple is mentioned in the description.\nB. The apple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The handle of the bottle is mentioned in the description.\nC. The handle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The trigger of the bottle is mentioned in the description.\nC. The trigger of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The measuring cup is mentioned in the description.\nB. The measuring cup is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is mentioned in the description.\nB. The cutting board is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the bottle is mentioned in the description and is cylindrical.", + "B. The color of the cap is mentioned in the description and is yellow.", + "B. The color of the label is mentioned in the description and is blue.", + "B. The color of the text is mentioned in the description and is white.", + "B. The apple is not mentioned in the description.", + "C. The handle of the bottle is not mentioned in the description.", + "C. The trigger of the bottle is not mentioned in the description.", + "B. The measuring cup is not mentioned in the description.", + "B. The cutting board is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "12348079": { + "pred": "A digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the weighing platform is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the weighing platform is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the weighing platform is mentioned in the description and is rectangular.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The color of the weighing platform is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the weighing platform is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the weighing platform is mentioned in the description and is blue.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keypad or the scale is not mentioned.", + 0 + ], + [ + "The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the keypad is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the keypad is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The keypad or the scale is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The display screen or the scale is not mentioned.", + 0 + ], + [ + "The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + 0.5 + ], + [ + "The color of the display screen is mentioned in the description and is black.", + 1 + ], + [ + "The color of the display screen is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the scale is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the scale is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is white.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power cord of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The power cord of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The power cord of the scale is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The usb cable of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The usb cable of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The usb cable of the scale is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle is not mentioned in the description.", + 1 + ], + [ + "The bicycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bicycle is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The child is not mentioned in the description.", + 1 + ], + [ + "The child is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The child is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is not mentioned in the description.", + 1 + ], + [ + "The woman is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The woman is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a scale or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a scale or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The weighing platform or the scale is not mentioned.\nB. The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.\nC. The shape of the weighing platform is mentioned in the description and is rectangular.\nD. The shape of the weighing platform is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The weighing platform or the scale is not mentioned.\nB. The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.\nC. The color of the weighing platform is mentioned in the description and is blue.\nD. The color of the weighing platform is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keypad or the scale is not mentioned.\nB. The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.\nC. The shape of the keypad is mentioned in the description and is rectangular.\nD. The shape of the keypad is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The display screen or the scale is not mentioned.\nB. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.\nC. The color of the display screen is mentioned in the description and is black.\nD. The color of the display screen is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the scale is not mentioned.\nB. The color of the body is not mentioned, but the body of the scale is mentioned.\nC. The color of the body is mentioned in the description and is white.\nD. The color of the body is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The power cord of the scale is not mentioned in the description.\nB. The scale is not mentioned in the description.\nC. The power cord of the scale is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The usb cable of the scale is not mentioned in the description.\nB. The scale is not mentioned in the description.\nC. The usb cable of the scale is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycle is not mentioned in the description.\nB. The bicycle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The child is not mentioned in the description.\nB. The child is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The woman is not mentioned in the description.\nB. The woman is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the weighing platform is mentioned in the description and is rectangular.", + "C. The color of the weighing platform is mentioned in the description and is blue.", + "A. The keypad or the scale is not mentioned.", + "B. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + "C. The color of the body is mentioned in the description and is white.", + "A. The power cord of the scale is not mentioned in the description.", + "A. The usb cable of the scale is not mentioned in the description.", + "A. The bicycle is not mentioned in the description.", + "A. The child is not mentioned in the description.", + "A. The woman is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "14832137": { + "pred": "A cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.", + -1 + ], + [ + "The shape of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The color of the barrel/bucket is mentioned in the description and is purple.", + 1 + ], + [ + "The color of the barrel/bucket is mentioned in the description but is not purple.", + -1 + ], + [ + "The color of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the barrel/bucket is mentioned in the description but is not purple.", + "pred_index": 2, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The bottom of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The bottom of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The bottom of the barrel/bucket is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heart is mentioned in the description.", + -1 + ], + [ + "The heart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The heart is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The lid of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The lid of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the barrel/bucket is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cake is mentioned in the description.", + -1 + ], + [ + "The cake is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cake is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a barrel/bucket or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a barrel/bucket or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned.\nB. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.\nC. The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.\nD. The shape of the barrel/bucket is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the barrel/bucket is not mentioned.\nB. The shape of the handle is mentioned in the description and is curved.\nC. The shape of the handle is mentioned in the description but is not curved.\nD. The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned.\nB. The color of the barrel/bucket is mentioned in the description and is purple.\nC. The color of the barrel/bucket is mentioned in the description but is not purple.\nD. The color of the barrel/bucket is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned in the description.\nB. The bottom of the barrel/bucket is mentioned in the description.\nC. The bottom of the barrel/bucket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The heart is mentioned in the description.\nB. The heart is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned in the description.\nB. The lid of the barrel/bucket is mentioned in the description.\nC. The lid of the barrel/bucket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cake is mentioned in the description.\nB. The cake is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + "D. The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + "C. The color of the barrel/bucket is mentioned in the description but is not purple.", + "C. The bottom of the barrel/bucket is not mentioned in the description.", + "B. The heart is not mentioned in the description.", + "C. The lid of the barrel/bucket is not mentioned in the description.", + "B. The cake is not mentioned in the description.", + "B. The plate is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "15050320": { + "pred": "A dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The material of the wine glass is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the wine glass is mentioned in the description but is not glass.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the wine glass is not mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The color of the wine glass is mentioned in the description and is transparent.", + 1 + ], + [ + "The color of the wine glass is mentioned in the description but is not transparent.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the wine glass is mentioned in the description but is not transparent.", + "pred_index": 3, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is not mentioned, but the base of the wine glass is mentioned.", + 0.5 + ], + [ + "The base or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the base is mentioned in the description and is round or flat.", + 1 + ], + [ + "The shape of the base is mentioned in the description but is not round or flat.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The base or the wine glass is not mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + 0.5 + ], + [ + "The stem or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the stem is mentioned in the description and is slender.", + 1 + ], + [ + "The shape of the stem is mentioned in the description but is not slender.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The bowl of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The bowl of the wine glass is mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is not mentioned in the description.", + 1 + ], + [ + "The plate is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plate is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rim of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The rim of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The rim of the wine glass is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The napkin is not mentioned in the description.", + 1 + ], + [ + "The napkin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The napkin is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are wine glass or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are wine glass or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the wine glass is not mentioned.\nB. The wine glass are not mentioned.\nC. The material of the wine glass is mentioned in the description and is glass.\nD. The material of the wine glass is mentioned in the description but is not glass.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wine glass is not mentioned.\nB. The wine glass are not mentioned.\nC. The color of the wine glass is mentioned in the description and is transparent.\nD. The color of the wine glass is mentioned in the description but is not transparent.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the base is not mentioned, but the base of the wine glass is mentioned.\nB. The base or the wine glass is not mentioned.\nC. The shape of the base is mentioned in the description and is round or flat.\nD. The shape of the base is mentioned in the description but is not round or flat.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.\nB. The stem or the wine glass is not mentioned.\nC. The shape of the stem is mentioned in the description and is slender.\nD. The shape of the stem is mentioned in the description but is not slender.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl of the wine glass is not mentioned in the description.\nB. The bowl of the wine glass is mentioned in the description.\nC. The wine glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is not mentioned in the description.\nB. The plate is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rim of the wine glass is not mentioned in the description.\nB. The rim of the wine glass is mentioned in the description.\nC. The wine glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned in the description.\nB. The fork is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The napkin is not mentioned in the description.\nB. The napkin is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The material of the wine glass is not mentioned.", + "D. The color of the wine glass is mentioned in the description but is not transparent.", + "B. The base or the wine glass is not mentioned.", + "A. The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + "B. The bowl of the wine glass is mentioned in the description.", + "A. The plate is not mentioned in the description.", + "A. The rim of the wine glass is not mentioned in the description.", + "A. The fork is not mentioned in the description.", + "A. The napkin is not mentioned in the description." + ], + "score": 0.2777777777777778, + "score_pos": -0.125, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "16957916": { + "pred": "A piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is mentioned in the description and is white/green.", + 1 + ], + [ + "The color of the lettuce is mentioned in the description but is not white/green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the lettuce is mentioned in the description and is white/green.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is curved or irregular.", + 1 + ], + [ + "The shape of the lettuce is mentioned in the description but is not curved or irregular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the lettuce is mentioned in the description and is curved or irregular.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The edge or the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.", + 1 + ], + [ + "The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tartar sauce is mentioned in the description.", + -1 + ], + [ + "The tartar sauce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tartar sauce is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fried fish are mentioned in the description.", + -1 + ], + [ + "The fried fish are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fried fish are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The french fries are mentioned in the description.", + -1 + ], + [ + "The french fries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The french fries are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is mentioned in the description.", + -1 + ], + [ + "The utensil is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensil is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a lettuce or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a lettuce or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lettuce is not mentioned.\nB. The color of the lettuce is not mentioned.\nC. The color of the lettuce is mentioned in the description and is white/green.\nD. The color of the lettuce is mentioned in the description but is not white/green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lettuce is not mentioned.\nB. The shape of the lettuce is not mentioned.\nC. The shape of the lettuce is mentioned in the description and is curved or irregular.\nD. The shape of the lettuce is mentioned in the description but is not curved or irregular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The edge or the lettuce is not mentioned.\nB. The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.\nC. The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.\nD. The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tartar sauce is mentioned in the description.\nB. The tartar sauce is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fried fish are mentioned in the description.\nB. The fried fish are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The french fries are mentioned in the description.\nB. The french fries are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensil is mentioned in the description.\nB. The utensil is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the lettuce is mentioned in the description and is white/green.", + "C. The shape of the lettuce is mentioned in the description and is curved or irregular.", + "D. The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + "B. The tartar sauce is not mentioned in the description.", + "B. The fried fish are not mentioned in the description.", + "B. The plate is not mentioned in the description.", + "B. The french fries are not mentioned in the description.", + "B. The utensil is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "17385866": { + "pred": "A scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + 0.5 + ], + [ + "The scoop or the ice cream is not mentioned.", + 0 + ], + [ + "The shape of the scoop is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the scoop is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The material of the cone is mentioned in the description but is not waffle.", + -1 + ], + [ + "The material of the cone is mentioned in the description and is waffle.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The color of the cone is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the cone is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The cone or the ice cream is not mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The cherry of the ice cream is mentioned in the description.", + -1 + ], + [ + "The cherry of the ice cream is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cherry of the ice cream is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is mentioned in the description.", + -1 + ], + [ + "The woman is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The woman is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The sprinkles of the ice cream are mentioned in the description.", + -1 + ], + [ + "The sprinkles of the ice cream are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The sprinkles of the ice cream are not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The man is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Turkish flag is mentioned in the description.", + -1 + ], + [ + "The Turkish flag is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Turkish flag is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an ice cream or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an ice cream or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.\nB. The scoop or the ice cream is not mentioned.\nC. The shape of the scoop is mentioned in the description but is not round.\nD. The shape of the scoop is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cone is not mentioned, but the cone of the ice cream is mentioned.\nB. The cone or the ice cream is not mentioned.\nC. The material of the cone is mentioned in the description but is not waffle.\nD. The material of the cone is mentioned in the description and is waffle.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cone is not mentioned, but the cone of the ice cream is mentioned.\nB. The cone or the ice cream is not mentioned.\nC. The color of the cone is mentioned in the description but is not yellow.\nD. The color of the cone is mentioned in the description and is yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ice cream is not mentioned in the description.\nB. The cherry of the ice cream is mentioned in the description.\nC. The cherry of the ice cream is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The woman is mentioned in the description.\nB. The woman is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ice cream is not mentioned in the description.\nB. The sprinkles of the ice cream are mentioned in the description.\nC. The sprinkles of the ice cream are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is mentioned in the description.\nB. The man is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Turkish flag is mentioned in the description.\nB. The Turkish flag is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + "A. The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + "B. The cone or the ice cream is not mentioned.", + "C. The cherry of the ice cream is not mentioned in the description.", + "B. The woman is not mentioned in the description.", + "C. The sprinkles of the ice cream are not mentioned in the description.", + "B. The man is not mentioned in the description.", + "B. The Turkish flag is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "17404769": { + "pred": "The car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gas cap is mentioned in the description but is not square.", + -1 + ], + [ + "The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.", + 0.5 + ], + [ + "The gas cap or the suv is not mentioned.", + 0 + ], + [ + "The shape of the gas cap is mentioned in the description and is square.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the gas cap is mentioned in the description and is square.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + 0.5 + ], + [ + "The taillight or the suv is not mentioned.", + 0 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the suv is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the suv is not mentioned.", + 0 + ], + [ + "The suv is not mentioned.", + 0 + ], + [ + "The color of the suv is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the suv is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front bumper of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The front bumper of the suv is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front wheel of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front wheel of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The front wheel of the suv is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grille of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The grille of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The grille of the suv is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The building is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tree is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a suv or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a suv or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the gas cap is mentioned in the description but is not square.\nB. The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.\nC. The gas cap or the suv is not mentioned.\nD. The shape of the gas cap is mentioned in the description and is square.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the taillight is mentioned in the description but is not plastic.\nB. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.\nC. The taillight or the suv is not mentioned.\nD. The material of the taillight is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the suv is mentioned in the description but is not white.\nB. The color of the suv is not mentioned.\nC. The suv is not mentioned.\nD. The color of the suv is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front bumper of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The front bumper of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front wheel of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The front wheel of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grille of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The grille of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is not mentioned in the description.\nB. The building is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is not mentioned in the description.\nB. The tree is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the gas cap is mentioned in the description and is square.", + "B. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + "D. The color of the suv is mentioned in the description and is white.", + "A. The front bumper of the suv is not mentioned in the description.", + "A. The front wheel of the suv is not mentioned in the description.", + "A. The grille of the suv is not mentioned in the description.", + "A. The building is not mentioned in the description.", + "A. The tree is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "18217373": { + "pred": "The spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the frame is mentioned in the description but is not black or metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the frame is mentioned in the description but is not black or metallic.", + "pred_index": 3, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the frame is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the frame is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the frame is not mentioned, but the frame of the glasses is mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lens or the glasses are not mentioned.", + 0 + ], + [ + "The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + 0.5 + ], + [ + "The color of the lens is mentioned in the description and is clear.", + 1 + ], + [ + "The color of the lens is mentioned in the description but is not clear.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple arm or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the temple arm is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the temple arm is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The temple arm or the glasses is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fruit is mentioned in the description.", + -1 + ], + [ + "The fruit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fruit is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The women are mentioned in the description.", + -1 + ], + [ + "The women are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The women are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vegetables are mentioned in the description.", + -1 + ], + [ + "The vegetables are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The vegetables are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The produce is mentioned in the description.", + -1 + ], + [ + "The produce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The produce is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is mentioned in the description.", + -1 + ], + [ + "The food is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The food is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are glasses or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are glasses or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the glasses is not mentioned.\nB. The color of the frame is not mentioned, but the frame of the glasses is mentioned.\nC. The color of the frame is mentioned in the description and is black or metallic.\nD. The color of the frame is mentioned in the description but is not black or metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the glasses is not mentioned.\nB. The shape of the frame is not mentioned, but the frame of the glasses is mentioned.\nC. The shape of the frame is mentioned in the description and is square or rounded.\nD. The shape of the frame is mentioned in the description but is not square or rounded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lens or the glasses are not mentioned.\nB. The color of the lens is not mentioned, but the lens of the glasses are mentioned.\nC. The color of the lens is mentioned in the description and is clear.\nD. The color of the lens is mentioned in the description but is not clear.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The temple arm or the glasses is not mentioned.\nB. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.\nC. The shape of the temple arm is mentioned in the description and is curved.\nD. The shape of the temple arm is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fruit is mentioned in the description.\nB. The fruit is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The women are mentioned in the description.\nB. The women are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The vegetables are mentioned in the description.\nB. The vegetables are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The produce is mentioned in the description.\nB. The produce is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is mentioned in the description.\nB. The food is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the frame is mentioned in the description but is not black or metallic.", + "B. The shape of the frame is not mentioned, but the frame of the glasses is mentioned.", + "B. The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + "A. The temple arm or the glasses is not mentioned.", + "B. The fruit is not mentioned in the description.", + "B. The women are not mentioned in the description.", + "B. The vegetables are not mentioned in the description.", + "B. The produce is not mentioned in the description.", + "B. The food is not mentioned in the description." + ], + "score": 0.5555555555555556, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "19455186": { + "pred": "A blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cart/trolley is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cart/trolley is not mentioned.", + 0 + ], + [ + "The color of the cart/trolley is mentioned in the description but is not blue.", + -1 + ], + [ + "The cart/trolley is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the cart/trolley is mentioned in the description and is blue.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bars is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The shape of the bars is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The bars or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the wheels is mentioned in the description and is small.", + 1 + ], + [ + "The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The size of the wheels is mentioned in the description but is not small.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wheels is mentioned in the description and is black.", + 1 + ], + [ + "The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The color of the wheels is mentioned in the description but is not black.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the wheels is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The lid of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the cart/trolley is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The basket of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The basket of the cart/trolley is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shelves are mentioned in the description.", + -1 + ], + [ + "The shelves are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The shelves are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drawers of the cart/trolley are mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The drawers of the cart/trolley are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drawers of the cart/trolley are not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights are mentioned in the description.", + -1 + ], + [ + "The lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The lights are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a cart/trolley or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a cart/trolley or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cart/trolley is mentioned in the description and is blue.\nB. The color of the cart/trolley is not mentioned.\nC. The color of the cart/trolley is mentioned in the description but is not blue.\nD. The cart/trolley is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bars is mentioned in the description and is rectangular.\nB. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.\nC. The shape of the bars is mentioned in the description but is not rectangular.\nD. The bars or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the wheels is mentioned in the description and is small.\nB. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.\nC. The size of the wheels is mentioned in the description but is not small.\nD. The wheels or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wheels is mentioned in the description and is black.\nB. The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.\nC. The color of the wheels is mentioned in the description but is not black.\nD. The wheels or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the cart/trolley is mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The lid of the cart/trolley is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket of the cart/trolley is mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The basket of the cart/trolley is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shelves are mentioned in the description.\nB. The shelves are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drawers of the cart/trolley are mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The drawers of the cart/trolley are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lights are mentioned in the description.\nB. The lights are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the cart/trolley is mentioned in the description and is blue.", + "B. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + "B. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + "A. The color of the wheels is mentioned in the description and is black.", + "C. The lid of the cart/trolley is not mentioned in the description.", + "C. The basket of the cart/trolley is not mentioned in the description.", + "B. The shelves are not mentioned in the description.", + "C. The drawers of the cart/trolley are not mentioned in the description.", + "B. The lights are not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "19610023": { + "pred": "A bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the slippers is mentioned in the description but is not green.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The color of the slippers is mentioned in the description and is green.", + 1 + ], + [ + "The color of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the slippers is mentioned in the description and is green.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the toe box is mentioned in the description but is not round.", + -1 + ], + [ + "The toe box or the slippers is not mentioned.", + 0 + ], + [ + "The shape of the toe box is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the toe box is mentioned in the description and is round.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the slippers is mentioned in the description but is not frog-shaped.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The shape of the slippers is mentioned in the description and is frog-shaped.", + 1 + ], + [ + "The shape of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the slippers is mentioned in the description and is frog-shaped.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The lining of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lining of the slippers is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shirt is not mentioned in the description.", + 1 + ], + [ + "The shirt is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The shirt is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buckle of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The buckle of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The slippers are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is not mentioned in the description.", + 1 + ], + [ + "The wall is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The wall is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are slippers or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are slippers or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the slippers is mentioned in the description but is not green.\nB. The slippers are not mentioned.\nC. The color of the slippers is mentioned in the description and is green.\nD. The color of the slippers is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the toe box is mentioned in the description but is not round.\nB. The toe box or the slippers is not mentioned.\nC. The shape of the toe box is mentioned in the description and is round.\nD. The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the slippers is mentioned in the description but is not frog-shaped.\nB. The slippers are not mentioned.\nC. The shape of the slippers is mentioned in the description and is frog-shaped.\nD. The shape of the slippers is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lining of the slippers is not mentioned in the description.\nB. The slippers are not mentioned in the description.\nC. The lining of the slippers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shirt is not mentioned in the description.\nB. The shirt is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The buckle of the slippers is not mentioned in the description.\nB. The slippers are not mentioned in the description.\nC. The buckle of the slippers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wall is not mentioned in the description.\nB. The wall is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the slippers is mentioned in the description and is green.", + "C. The shape of the toe box is mentioned in the description and is round.", + "C. The shape of the slippers is mentioned in the description and is frog-shaped.", + "A. The plant is not mentioned in the description.", + "A. The lining of the slippers is not mentioned in the description.", + "A. The shirt is not mentioned in the description.", + "B. The slippers are not mentioned in the description.", + "A. The wall is not mentioned in the description." + ], + "score": 0.875, + "score_pos": 1.0, + "score_neg": 0.8, + "neg_valid_num": 5, + "recognition_result": true + }, + "19610025": { + "pred": "A white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the ear is mentioned in the description and is large.", + 1 + ], + [ + "The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The size of the ear is mentioned in the description but is not large.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the ear is mentioned in the description and is large.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the ear is mentioned in the description and is pink.", + 1 + ], + [ + "The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the ear is mentioned in the description but is not pink.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the ear is mentioned in the description but is not pink.", + "pred_index": 3, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eye is mentioned in the description and is black.", + 1 + ], + [ + "The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + 0.5 + ], + [ + "The eye or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the eye is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the shirt is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The shirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the shirt is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the shirt is mentioned in the description and is yellow.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skirt is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The skirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the skirt is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the skirt is mentioned in the description and is blue.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The whisker of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The whisker of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The whisker of the rabbit is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The teeth of the rabbit are not mentioned in the description.", + 1 + ], + [ + "The teeth of the rabbit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The teeth of the rabbit are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The tail of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The tail of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the rabbit is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The poster is not mentioned in the description.", + 1 + ], + [ + "The poster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The poster is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a rabbit or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a rabbit or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the ear is mentioned in the description and is large.\nB. The size of the ear is not mentioned, but the ear of the rabbit is mentioned.\nC. The ear or the rabbit is not mentioned.\nD. The size of the ear is mentioned in the description but is not large.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the ear is mentioned in the description and is pink.\nB. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.\nC. The ear or the rabbit is not mentioned.\nD. The color of the ear is mentioned in the description but is not pink.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eye is mentioned in the description and is black.\nB. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.\nC. The eye or the rabbit is not mentioned.\nD. The color of the eye is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the shirt is mentioned in the description and is yellow.\nB. The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.\nC. The shirt or the rabbit is not mentioned.\nD. The color of the shirt is mentioned in the description but is not yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skirt is mentioned in the description and is blue.\nB. The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.\nC. The skirt or the rabbit is not mentioned.\nD. The color of the skirt is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The whisker of the rabbit is not mentioned in the description.\nC. The whisker of the rabbit is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The teeth of the rabbit are not mentioned in the description.\nC. The teeth of the rabbit are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The tail of the rabbit is not mentioned in the description.\nC. The tail of the rabbit is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The poster is not mentioned in the description.\nB. The poster is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The size of the ear is mentioned in the description and is large.", + "D. The color of the ear is mentioned in the description but is not pink.", + "B. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + "A. The color of the shirt is mentioned in the description and is yellow.", + "A. The color of the skirt is mentioned in the description and is blue.", + "B. The whisker of the rabbit is not mentioned in the description.", + "B. The teeth of the rabbit are not mentioned in the description.", + "A. The plant is not mentioned in the description.", + "B. The tail of the rabbit is not mentioned in the description.", + "A. The poster is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "20568676": { + "pred": "A stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the rim is mentioned in the description and is round.", + 1 + ], + [ + "The rim or the pot is not mentioned.", + 0 + ], + [ + "The shape of the rim is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the rim is not mentioned, but the rim of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the rim is mentioned in the description and is round.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the content is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The content or the pot is not mentioned.", + 0 + ], + [ + "The color of the content is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the content is not mentioned, but the content of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the content is not mentioned, but the content of the pot is mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the pot is mentioned in the description and is metal.", + 1 + ], + [ + "The pot is not mentioned.", + 0 + ], + [ + "The material of the pot is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pot is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the pot is mentioned in the description and is metal.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spout of the pot is mentioned in the description.", + -1 + ], + [ + "The spout of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The spout of the pot is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bowl is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottom of the pot is mentioned in the description.", + -1 + ], + [ + "The bottom of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The bottom of the pot is mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the pot is mentioned in the description.", + -1 + ], + [ + "The lid of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The lid of the pot is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stove is mentioned in the description.", + -1 + ], + [ + "The stove is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The stove is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a pot or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a pot or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the rim is mentioned in the description and is round.\nB. The rim or the pot is not mentioned.\nC. The shape of the rim is mentioned in the description but is not round.\nD. The shape of the rim is not mentioned, but the rim of the pot is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the content is mentioned in the description and is yellow or brown.\nB. The content or the pot is not mentioned.\nC. The color of the content is mentioned in the description but is not yellow or brown.\nD. The color of the content is not mentioned, but the content of the pot is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the pot is mentioned in the description and is metal.\nB. The pot is not mentioned.\nC. The material of the pot is mentioned in the description but is not metal.\nD. The material of the pot is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spout of the pot is mentioned in the description.\nB. The spout of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is mentioned in the description.\nB. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottom of the pot is mentioned in the description.\nB. The bottom of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the pot is mentioned in the description.\nB. The lid of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stove is mentioned in the description.\nB. The stove is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the rim is mentioned in the description and is round.", + "D. The color of the content is not mentioned, but the content of the pot is mentioned.", + "A. The material of the pot is mentioned in the description and is metal.", + "B. The spout of the pot is not mentioned in the description.", + "B. The bowl is not mentioned in the description.", + "A. The bottom of the pot is mentioned in the description.", + "B. The lid of the pot is not mentioned in the description.", + "B. The stove is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.8333333333333334, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "21107974": { + "pred": "A wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the head is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The head or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not round or cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the head is mentioned in the description and is round or cylindrical.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The handle or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description and is brown.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the gavel/mallet is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description and is wood.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the gavel/mallet is mentioned in the description and is wood.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone is mentioned in the description.", + -1 + ], + [ + "The microphone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The microphone is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The man is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The podium is mentioned in the description.", + -1 + ], + [ + "The podium is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The podium is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The gavel/mallet is not mentioned in the description.", + 0 + ], + [ + "The neck of the gavel/mallet is mentioned in the description.", + -1 + ], + [ + "The neck of the gavel/mallet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The neck of the gavel/mallet is mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is mentioned in the description.", + -1 + ], + [ + "The sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sign is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a gavel/mallet or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a gavel/mallet or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.\nB. The shape of the head is mentioned in the description and is round or cylindrical.\nC. The head or the gavel/mallet is not mentioned.\nD. The shape of the head is mentioned in the description but is not round or cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.\nB. The shape of the handle is mentioned in the description and is cylindrical.\nC. The handle or the gavel/mallet is not mentioned.\nD. The shape of the handle is mentioned in the description but is not cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the gavel/mallet is not mentioned.\nB. The color of the gavel/mallet is mentioned in the description and is brown.\nC. The gavel/mallet is not mentioned.\nD. The color of the gavel/mallet is mentioned in the description but is not brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the gavel/mallet is not mentioned.\nB. The material of the gavel/mallet is mentioned in the description and is wood.\nC. The gavel/mallet is not mentioned.\nD. The material of the gavel/mallet is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microphone is mentioned in the description.\nB. The microphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is mentioned in the description.\nB. The man is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The podium is mentioned in the description.\nB. The podium is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The gavel/mallet is not mentioned in the description.\nB. The neck of the gavel/mallet is mentioned in the description.\nC. The neck of the gavel/mallet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign is mentioned in the description.\nB. The sign is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the head is mentioned in the description and is round or cylindrical.", + "A. The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.", + "A. The color of the gavel/mallet is not mentioned.", + "B. The material of the gavel/mallet is mentioned in the description and is wood.", + "B. The microphone is not mentioned in the description.", + "B. The man is not mentioned in the description.", + "B. The podium is not mentioned in the description.", + "B. The neck of the gavel/mallet is mentioned in the description.", + "B. The sign is not mentioned in the description." + ], + "score": 0.6111111111111112, + "score_pos": 0.625, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "22064315": { + "pred": "The visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the horn is mentioned in the description and is long, curved, or pointed.", + 1 + ], + [ + "The shape of the horn is mentioned in the description but is not long, curved, or pointed.", + -1 + ], + [ + "The shape of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the horn is mentioned in the description and is long, curved, or pointed.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the horn is mentioned in the description and is grey, black, or dark.", + 1 + ], + [ + "The color of the horn is mentioned in the description but is not grey, black, or dark.", + -1 + ], + [ + "The color of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the horn is mentioned in the description and is grey, black, or dark.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the antelope is mentioned in the description.", + -1 + ], + [ + "The tail of the antelope is not mentioned in the description.", + 1 + ], + [ + "The antelope is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the antelope is not mentioned in the description.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deer are mentioned in the description.", + -1 + ], + [ + "The deer are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The deer are not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The car is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tree is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The grass are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an antelope or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an antelope or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the horn is mentioned in the description and is long, curved, or pointed.\nB. The shape of the horn is mentioned in the description but is not long, curved, or pointed.\nC. The shape of the horn is not mentioned, but the horn of the antelope is mentioned.\nD. The horn or the antelope is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the horn is mentioned in the description and is grey, black, or dark.\nB. The color of the horn is mentioned in the description but is not grey, black, or dark.\nC. The color of the horn is not mentioned, but the horn of the antelope is mentioned.\nD. The horn or the antelope is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the antelope is mentioned in the description.\nB. The tail of the antelope is not mentioned in the description.\nC. The antelope is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deer are mentioned in the description.\nB. The deer are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The car is mentioned in the description.\nB. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is mentioned in the description.\nB. The tree is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are mentioned in the description.\nB. The grass are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the horn is mentioned in the description and is long, curved, or pointed.", + "A. The color of the horn is mentioned in the description and is grey, black, or dark.", + "B. The tail of the antelope is not mentioned in the description.", + "B. The deer are not mentioned in the description.", + "B. The car is not mentioned in the description.", + "B. The tree is not mentioned in the description.", + "B. The grass are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "22107522": { + "pred": "A black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the bow tie is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the bow tie is mentioned in the description and is smooth.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The texture of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the bow tie is mentioned in the description and is smooth.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bow tie is mentioned in the description but is not butterfly-shaped.", + -1 + ], + [ + "The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The shape of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bow tie is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the bow tie is mentioned in the description and is black.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The color of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the bow tie is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bow tie is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the bow tie is mentioned in the description and is fabric.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The material of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the bow tie is mentioned in the description and is fabric.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bow tie is not mentioned in the description.", + 0 + ], + [ + "The neck band of the bow tie is mentioned in the description.", + -1 + ], + [ + "The neck band of the bow tie is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The neck band of the bow tie is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trumpet is mentioned in the description.", + -1 + ], + [ + "The trumpet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trumpet is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The suit is mentioned in the description.", + -1 + ], + [ + "The suit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The suit is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The person is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glasses are mentioned in the description.", + -1 + ], + [ + "The glasses are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The glasses are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bow tie or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bow tie or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the bow tie is mentioned in the description but is not smooth.\nB. The texture of the bow tie is mentioned in the description and is smooth.\nC. The bow tie is not mentioned.\nD. The texture of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bow tie is mentioned in the description but is not butterfly-shaped.\nB. The shape of the bow tie is mentioned in the description and is butterfly-shaped.\nC. The bow tie is not mentioned.\nD. The shape of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the bow tie is mentioned in the description but is not black.\nB. The color of the bow tie is mentioned in the description and is black.\nC. The bow tie is not mentioned.\nD. The color of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the bow tie is mentioned in the description but is not fabric.\nB. The material of the bow tie is mentioned in the description and is fabric.\nC. The bow tie is not mentioned.\nD. The material of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bow tie is not mentioned in the description.\nB. The neck band of the bow tie is mentioned in the description.\nC. The neck band of the bow tie is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trumpet is mentioned in the description.\nB. The trumpet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The suit is mentioned in the description.\nB. The suit is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is mentioned in the description.\nB. The person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glasses are mentioned in the description.\nB. The glasses are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the bow tie is mentioned in the description and is smooth.", + "B. The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + "B. The color of the bow tie is mentioned in the description and is black.", + "B. The material of the bow tie is mentioned in the description and is fabric.", + "C. The neck band of the bow tie is not mentioned in the description.", + "B. The trumpet is not mentioned in the description.", + "B. The suit is not mentioned in the description.", + "B. The person is not mentioned in the description.", + "B. The glasses are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "24017816": { + "pred": "The car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the windshield is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The windshield or the car is not mentioned.", + 0 + ], + [ + "The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + 0.5 + ], + [ + "The shape of the windshield is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the car is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The car is not mentioned.", + 0 + ], + [ + "The color of the car is not mentioned.", + 0 + ], + [ + "The color of the car is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the car is mentioned in the description and is dark or black.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the window is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The window or the car is not mentioned.", + 0 + ], + [ + "The color of the window is not mentioned, but the window of the car is mentioned.", + 0.5 + ], + [ + "The color of the window is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the window is mentioned in the description and is dark or black.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the side mirror is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The side mirror or the car is not mentioned.", + 0 + ], + [ + "The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + 0.5 + ], + [ + "The color of the side mirror is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + "pred_index": 2, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fender of the car is not mentioned in the description.", + 1 + ], + [ + "The fender of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The fender of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taillight of the car is not mentioned in the description.", + 1 + ], + [ + "The taillight of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taillight of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire of the car is not mentioned in the description.", + 1 + ], + [ + "The tire of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tire of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The exhaust pipe of the car is not mentioned in the description.", + 1 + ], + [ + "The exhaust pipe of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The exhaust pipe of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hood of the car is not mentioned in the description.", + 1 + ], + [ + "The hood of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The hood of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a car or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a car or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the windshield is mentioned in the description but is not rectangular.\nB. The windshield or the car is not mentioned.\nC. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.\nD. The shape of the windshield is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the car is mentioned in the description but is not dark or black.\nB. The car is not mentioned.\nC. The color of the car is not mentioned.\nD. The color of the car is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the window is mentioned in the description but is not dark or black.\nB. The window or the car is not mentioned.\nC. The color of the window is not mentioned, but the window of the car is mentioned.\nD. The color of the window is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the side mirror is mentioned in the description but is not dark or black.\nB. The side mirror or the car is not mentioned.\nC. The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.\nD. The color of the side mirror is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fender of the car is not mentioned in the description.\nB. The fender of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taillight of the car is not mentioned in the description.\nB. The taillight of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tire of the car is not mentioned in the description.\nB. The tire of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The exhaust pipe of the car is not mentioned in the description.\nB. The exhaust pipe of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hood of the car is not mentioned in the description.\nB. The hood of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + "D. The color of the car is mentioned in the description and is dark or black.", + "D. The color of the window is mentioned in the description and is dark or black.", + "C. The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + "A. The fender of the car is not mentioned in the description.", + "A. The taillight of the car is not mentioned in the description.", + "A. The tire of the car is not mentioned in the description.", + "A. The exhaust pipe of the car is not mentioned in the description.", + "A. The hood of the car is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "24498027": { + "pred": "A tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the pole is mentioned in the description but is not black or dark.", + -1 + ], + [ + "The color of the pole is not mentioned, but the pole of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the pole is mentioned in the description and is black or dark.", + 1 + ], + [ + "The pole or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the pole is mentioned in the description and is black or dark.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the lamp is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the lamp is mentioned in the description and is white.", + 1 + ], + [ + "The lamp or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the lamp is mentioned in the description but is not white.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The direction of the bars is mentioned in the description but is not horizontal.", + -1 + ], + [ + "The direction of the bars is not mentioned, but the bars of the street lights are mentioned.", + 0.5 + ], + [ + "The direction of the bars is mentioned in the description and is horizontal.", + 1 + ], + [ + "The bars or the street lights are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The direction of the bars is mentioned in the description and is horizontal.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The cable of the street lights is mentioned in the description.", + -1 + ], + [ + "The cable of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cable of the street lights is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The wire of the street lights is mentioned in the description.", + -1 + ], + [ + "The wire of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The wire of the street lights is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bus is mentioned in the description.", + -1 + ], + [ + "The bus is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bus is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bike is mentioned in the description.", + -1 + ], + [ + "The bike is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bike is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ambulance is mentioned in the description.", + -1 + ], + [ + "The ambulance is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ambulance is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are street lights or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are street lights or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the pole is mentioned in the description but is not black or dark.\nB. The color of the pole is not mentioned, but the pole of the street lights is mentioned.\nC. The color of the pole is mentioned in the description and is black or dark.\nD. The pole or the street lights is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the lamp is mentioned in the description but is not white.\nB. The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.\nC. The color of the lamp is mentioned in the description and is white.\nD. The lamp or the street lights is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The direction of the bars is mentioned in the description but is not horizontal.\nB. The direction of the bars is not mentioned, but the bars of the street lights are mentioned.\nC. The direction of the bars is mentioned in the description and is horizontal.\nD. The bars or the street lights are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The street lights are not mentioned in the description.\nB. The cable of the street lights is mentioned in the description.\nC. The cable of the street lights is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The street lights are not mentioned in the description.\nB. The wire of the street lights is mentioned in the description.\nC. The wire of the street lights is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bus is mentioned in the description.\nB. The bus is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bike is mentioned in the description.\nB. The bike is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ambulance is mentioned in the description.\nB. The ambulance is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the pole is mentioned in the description and is black or dark.", + "A. The color of the lamp is mentioned in the description but is not white.", + "C. The direction of the bars is mentioned in the description and is horizontal.", + "C. The cable of the street lights is not mentioned in the description.", + "C. The wire of the street lights is not mentioned in the description.", + "B. The bus is not mentioned in the description.", + "B. The bike is not mentioned in the description.", + "B. The ambulance is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "24581953": { + "pred": "A large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the dog is mentioned in the description but is not white or gray.", + -1 + ], + [ + "The color of the dog is mentioned in the description and is white or gray.", + 1 + ], + [ + "The color of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the dog is mentioned in the description and is white or gray.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The posture of the dog is mentioned in the description but is not lying down.", + -1 + ], + [ + "The posture of the dog is mentioned in the description and is lying down.", + 1 + ], + [ + "The posture of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The posture of the dog is mentioned in the description and is lying down.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tail is mentioned in the description but is not large or long.", + -1 + ], + [ + "The size of the tail is mentioned in the description and is large or long.", + 1 + ], + [ + "The size of the tail is not mentioned, but the tail of the dog is mentioned.", + 0.5 + ], + [ + "The tail or the dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the tail is mentioned in the description and is large or long.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the dog is mentioned in the description but is not large.", + -1 + ], + [ + "The size of the dog is mentioned in the description and is large.", + 1 + ], + [ + "The size of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the dog is mentioned in the description and is large.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the coat is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the coat is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the coat is not mentioned, but the coat of the dog is mentioned.", + 0.5 + ], + [ + "The coat or the dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the coat is mentioned in the description and is smooth.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple is not mentioned in the description.", + 1 + ], + [ + "The temple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The temple is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the dog is not mentioned in the description.", + 1 + ], + [ + "The mouth of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The mouth of the dog is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eye of the dog is not mentioned in the description.", + 1 + ], + [ + "The eye of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The eye of the dog is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the dog is not mentioned in the description.", + 1 + ], + [ + "The nose of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The nose of the dog is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bird is not mentioned in the description.", + 1 + ], + [ + "The bird is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bird is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a dog or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a dog or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the dog is mentioned in the description but is not white or gray.\nB. The color of the dog is mentioned in the description and is white or gray.\nC. The color of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The posture of the dog is mentioned in the description but is not lying down.\nB. The posture of the dog is mentioned in the description and is lying down.\nC. The posture of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the tail is mentioned in the description but is not large or long.\nB. The size of the tail is mentioned in the description and is large or long.\nC. The size of the tail is not mentioned, but the tail of the dog is mentioned.\nD. The tail or the dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the dog is mentioned in the description but is not large.\nB. The size of the dog is mentioned in the description and is large.\nC. The size of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the coat is mentioned in the description but is not smooth.\nB. The texture of the coat is mentioned in the description and is smooth.\nC. The texture of the coat is not mentioned, but the coat of the dog is mentioned.\nD. The coat or the dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The temple is not mentioned in the description.\nB. The temple is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouth of the dog is not mentioned in the description.\nB. The mouth of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eye of the dog is not mentioned in the description.\nB. The eye of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nose of the dog is not mentioned in the description.\nB. The nose of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bird is not mentioned in the description.\nB. The bird is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the dog is mentioned in the description and is white or gray.", + "B. The posture of the dog is mentioned in the description and is lying down.", + "B. The size of the tail is mentioned in the description and is large or long.", + "B. The size of the dog is mentioned in the description and is large.", + "B. The texture of the coat is mentioned in the description and is smooth.", + "A. The temple is not mentioned in the description.", + "A. The mouth of the dog is not mentioned in the description.", + "A. The eye of the dog is not mentioned in the description.", + "A. The nose of the dog is not mentioned in the description.", + "A. The bird is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "24786060": { + "pred": "A light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the towel is mentioned in the description but is not gray or white.", + -1 + ], + [ + "The color of the towel is not mentioned.", + 0 + ], + [ + "The color of the towel is mentioned in the description and is gray or white.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the towel is mentioned in the description and is gray or white.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the towel is mentioned in the description but is not plush.", + -1 + ], + [ + "The texture of the towel is not mentioned.", + 0 + ], + [ + "The texture of the towel is mentioned in the description and is plush.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the towel is mentioned in the description and is plush.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the towel is mentioned in the description but is not irregular.", + -1 + ], + [ + "The shape of the towel is not mentioned.", + 0 + ], + [ + "The shape of the towel is mentioned in the description and is irregular.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the towel is mentioned in the description but is not irregular.", + "pred_index": 0, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The loop of the towel is mentioned in the description.", + -1 + ], + [ + "The loop of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The loop of the towel is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vanity is mentioned in the description.", + -1 + ], + [ + "The vanity is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The vanity is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The design of the towel is mentioned in the description.", + -1 + ], + [ + "The design of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The design of the towel is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is mentioned in the description.", + -1 + ], + [ + "The toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toilet is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a towel or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a towel or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the towel is mentioned in the description but is not gray or white.\nB. The color of the towel is not mentioned.\nC. The color of the towel is mentioned in the description and is gray or white.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the towel is mentioned in the description but is not plush.\nB. The texture of the towel is not mentioned.\nC. The texture of the towel is mentioned in the description and is plush.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the towel is mentioned in the description but is not irregular.\nB. The shape of the towel is not mentioned.\nC. The shape of the towel is mentioned in the description and is irregular.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The loop of the towel is mentioned in the description.\nB. The loop of the towel is not mentioned in the description.\nC. The towel is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The vanity is mentioned in the description.\nB. The vanity is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The design of the towel is mentioned in the description.\nB. The design of the towel is not mentioned in the description.\nC. The towel is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is mentioned in the description.\nB. The toilet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the towel is mentioned in the description and is gray or white.", + "C. The texture of the towel is mentioned in the description and is plush.", + "A. The shape of the towel is mentioned in the description but is not irregular.", + "B. The loop of the towel is not mentioned in the description.", + "B. The vanity is not mentioned in the description.", + "B. The sink is not mentioned in the description.", + "B. The design of the towel is not mentioned in the description.", + "B. The toilet is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25054869": { + "pred": "A beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The material of the lid is mentioned in the description but is not ceramic.", + -1 + ], + [ + "The material of the lid is mentioned in the description and is ceramic.", + 1 + ], + [ + "The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The lid or the toilet is not mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The texture of the lid is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the lid is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The lid or the toilet is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tank or the toilet is not mentioned.", + 0 + ], + [ + "The shape of the tank is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the tank is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the tank is not mentioned, but the tank of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The tank or the toilet is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned.", + 0 + ], + [ + "The color of the toilet is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the toilet is mentioned in the description and is white.", + 1 + ], + [ + "The color of the toilet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the toilet is mentioned in the description but is not white.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush lever of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush lever of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flush lever of the toilet is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush button of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush button of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flush button of the toilet is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet bowl of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet bowl of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toilet bowl of the toilet is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothbrush is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet seat of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet seat of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toilet seat of the toilet is not mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a toilet or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a toilet or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid or the toilet is not mentioned.\nB. The material of the lid is mentioned in the description but is not ceramic.\nC. The material of the lid is mentioned in the description and is ceramic.\nD. The material of the lid is not mentioned, but the lid of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid or the toilet is not mentioned.\nB. The texture of the lid is mentioned in the description but is not smooth.\nC. The texture of the lid is mentioned in the description and is smooth.\nD. The texture of the lid is not mentioned, but the lid of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tank or the toilet is not mentioned.\nB. The shape of the tank is mentioned in the description but is not rectangular.\nC. The shape of the tank is mentioned in the description and is rectangular.\nD. The shape of the tank is not mentioned, but the tank of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned.\nB. The color of the toilet is mentioned in the description but is not white.\nC. The color of the toilet is mentioned in the description and is white.\nD. The color of the toilet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The flush lever of the toilet is mentioned in the description.\nC. The flush lever of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The flush button of the toilet is mentioned in the description.\nC. The flush button of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The toilet bowl of the toilet is mentioned in the description.\nC. The toilet bowl of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothbrush is mentioned in the description.\nB. The toothbrush is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The toilet seat of the toilet is mentioned in the description.\nC. The toilet seat of the toilet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The lid or the toilet is not mentioned.", + "A. The lid or the toilet is not mentioned.", + "A. The tank or the toilet is not mentioned.", + "B. The color of the toilet is mentioned in the description but is not white.", + "C. The flush lever of the toilet is not mentioned in the description.", + "C. The flush button of the toilet is not mentioned in the description.", + "C. The toilet bowl of the toilet is not mentioned in the description.", + "B. The toothbrush is not mentioned in the description.", + "C. The toilet seat of the toilet is not mentioned in the description." + ], + "score": 0.4444444444444444, + "score_pos": -0.25, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25273553": { + "pred": "A black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the legs is mentioned in the description but is not slender.", + -1 + ], + [ + "The shape of the legs is mentioned in the description and is slender.", + 1 + ], + [ + "The legs or the tripod are not mentioned.", + 0 + ], + [ + "The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tripod is mentioned in the description but is not plastic or metal.", + -1 + ], + [ + "The material of the tripod is mentioned in the description and is plastic or metal.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The material of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the tripod is not mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tripod is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tripod is mentioned in the description and is black.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The color of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tripod is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The leg locks of the tripod are not mentioned in the description.", + 1 + ], + [ + "The leg locks of the tripod are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The leg locks of the tripod are not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The quick release plate of the tripod is not mentioned in the description.", + 1 + ], + [ + "The quick release plate of the tripod is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The quick release plate of the tripod is mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The hot air balloon is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The hot air balloon is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tree is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is not mentioned in the description.", + 1 + ], + [ + "The sky is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sky is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tripod or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tripod or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the legs is mentioned in the description but is not slender.\nB. The shape of the legs is mentioned in the description and is slender.\nC. The legs or the tripod are not mentioned.\nD. The shape of the legs is not mentioned, but the legs of the tripod are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tripod is mentioned in the description but is not plastic or metal.\nB. The material of the tripod is mentioned in the description and is plastic or metal.\nC. The tripod is not mentioned.\nD. The material of the tripod is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tripod is mentioned in the description but is not black.\nB. The color of the tripod is mentioned in the description and is black.\nC. The tripod is not mentioned.\nD. The color of the tripod is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tripod is not mentioned in the description.\nB. The leg locks of the tripod are not mentioned in the description.\nC. The leg locks of the tripod are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tripod is not mentioned in the description.\nB. The quick release plate of the tripod is not mentioned in the description.\nC. The quick release plate of the tripod is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hot air balloon is not mentioned in the description.\nB. The hot air balloon is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is not mentioned in the description.\nB. The tree is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is not mentioned in the description.\nB. The sky is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + "D. The material of the tripod is not mentioned.", + "B. The color of the tripod is mentioned in the description and is black.", + "B. The leg locks of the tripod are not mentioned in the description.", + "C. The quick release plate of the tripod is mentioned in the description.", + "A. The hot air balloon is not mentioned in the description.", + "A. The tree is not mentioned in the description.", + "A. The sky is not mentioned in the description." + ], + "score": 0.5625, + "score_pos": 0.5, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "25419495": { + "pred": "The tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gripping ends/claws is mentioned in the description and is black.", + 1 + ], + [ + "The color of the gripping ends/claws is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the gripping ends/claws is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gripping ends/claws is mentioned in the description and is scalloped.", + 1 + ], + [ + "The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + -1 + ], + [ + "The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + "pred_index": 1, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle/arm is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the handle/arm is mentioned in the description but is not black or metallic.", + -1 + ], + [ + "The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the handle/arm is mentioned in the description but is not black or metallic.", + "pred_index": 1, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle/arm is mentioned in the description and is metal or rubber.", + 1 + ], + [ + "The material of the handle/arm is mentioned in the description but is not metal or rubber.", + -1 + ], + [ + "The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the handle/arm is mentioned in the description but is not metal or rubber.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is mentioned in the description.", + -1 + ], + [ + "The spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The spoon is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spring of the tong is mentioned in the description.", + -1 + ], + [ + "The tong is not mentioned in the description.", + 0 + ], + [ + "The spring of the tong is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The spring of the tong is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is mentioned in the description.", + -1 + ], + [ + "The cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cup is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fork is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tong or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tong or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the gripping ends/claws is mentioned in the description and is black.\nB. The color of the gripping ends/claws is mentioned in the description but is not black.\nC. The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.\nD. The gripping ends/claws or the tong are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the gripping ends/claws is mentioned in the description and is scalloped.\nB. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.\nC. The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.\nD. The gripping ends/claws or the tong are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle/arm is mentioned in the description and is black or metallic.\nB. The color of the handle/arm is mentioned in the description but is not black or metallic.\nC. The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.\nD. The handle/arm or the tong is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handle/arm is mentioned in the description and is metal or rubber.\nB. The material of the handle/arm is mentioned in the description but is not metal or rubber.\nC. The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.\nD. The handle/arm or the tong is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is mentioned in the description.\nB. The spoon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spring of the tong is mentioned in the description.\nB. The tong is not mentioned in the description.\nC. The spring of the tong is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is mentioned in the description.\nB. The cup is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is mentioned in the description.\nB. The fork is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the gripping ends/claws is mentioned in the description and is black.", + "B. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + "B. The color of the handle/arm is mentioned in the description but is not black or metallic.", + "B. The material of the handle/arm is mentioned in the description but is not metal or rubber.", + "B. The spoon is not mentioned in the description.", + "C. The spring of the tong is not mentioned in the description.", + "B. The plate is not mentioned in the description.", + "B. The cup is not mentioned in the description.", + "B. The fork is not mentioned in the description." + ], + "score": 0.3333333333333333, + "score_pos": -0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25419516": { + "pred": "The toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stuffed toy is mentioned in the description but is not plush.", + -1 + ], + [ + "The material of the stuffed toy is not mentioned.", + 0 + ], + [ + "The material of the stuffed toy is mentioned in the description and is plush.", + 1 + ], + [ + "The stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the stuffed toy is mentioned in the description and is plush.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is black.", + 1 + ], + [ + "The eyes or the stuffed toy are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the nose is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the nose is mentioned in the description and is blue.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The nose or the stuffed toy is not mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is blue.", + 1 + ], + [ + "The body or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is blue.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the nose is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The shape of the nose is mentioned in the description and is round.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The nose or the stuffed toy is not mentioned.", + "pred_index": 3, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the stuffed toy is not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The tail of the stuffed toy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tail of the stuffed toy is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the stuffed toy are not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The feet of the stuffed toy are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The feet of the stuffed toy are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The door is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stuffed toy or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stuffed toy or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stuffed toy is mentioned in the description but is not plush.\nB. The material of the stuffed toy is not mentioned.\nC. The material of the stuffed toy is mentioned in the description and is plush.\nD. The stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eyes is mentioned in the description but is not black.\nB. The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.\nC. The color of the eyes is mentioned in the description and is black.\nD. The eyes or the stuffed toy are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the nose is mentioned in the description but is not blue.\nB. The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.\nC. The color of the nose is mentioned in the description and is blue.\nD. The nose or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not blue.\nB. The color of the body is not mentioned, but the body of the stuffed toy is mentioned.\nC. The color of the body is mentioned in the description and is blue.\nD. The body or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the nose is mentioned in the description but is not round.\nB. The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.\nC. The shape of the nose is mentioned in the description and is round.\nD. The nose or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the stuffed toy is not mentioned in the description.\nB. The stuffed toy is not mentioned in the description.\nC. The tail of the stuffed toy is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feet of the stuffed toy are not mentioned in the description.\nB. The stuffed toy is not mentioned in the description.\nC. The feet of the stuffed toy are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is not mentioned in the description.\nB. The door is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned in the description.\nB. The fork is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the stuffed toy is mentioned in the description and is plush.", + "B. The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + "D. The nose or the stuffed toy is not mentioned.", + "C. The color of the body is mentioned in the description and is blue.", + "D. The nose or the stuffed toy is not mentioned.", + "A. The tail of the stuffed toy is not mentioned in the description.", + "A. The feet of the stuffed toy are not mentioned in the description.", + "A. The door is not mentioned in the description.", + "A. The chair is not mentioned in the description.", + "A. The fork is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25579493": { + "pred": "A square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the bowl is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the bowl is mentioned in the description and is square or rounded.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the bowl is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the bowl is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the bowl is mentioned in the description but is not white or beige.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the bowl is mentioned in the description but is not white or beige.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The content or the bowl is not mentioned.", + 0 + ], + [ + "The color of the content is not mentioned, but the content of the bowl is mentioned.", + 0.5 + ], + [ + "The color of the content is mentioned in the description and is red, white, or yellow.", + 1 + ], + [ + "The color of the content is mentioned in the description but is not red, white, or yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the content is mentioned in the description and is red, white, or yellow.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the bowl is not mentioned in the description.", + 1 + ], + [ + "The base of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The base of the bowl is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the bowl is not mentioned in the description.", + 1 + ], + [ + "The lid of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lid of the bowl is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The knife is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner is not mentioned in the description.", + 1 + ], + [ + "The burner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The burner is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bowl or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bowl or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The shape of the bowl is not mentioned.\nC. The shape of the bowl is mentioned in the description and is square or rounded.\nD. The shape of the bowl is mentioned in the description but is not square or rounded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The texture of the bowl is not mentioned.\nC. The texture of the bowl is mentioned in the description and is smooth.\nD. The texture of the bowl is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The color of the bowl is not mentioned.\nC. The color of the bowl is mentioned in the description and is white or beige.\nD. The color of the bowl is mentioned in the description but is not white or beige.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The content or the bowl is not mentioned.\nB. The color of the content is not mentioned, but the content of the bowl is mentioned.\nC. The color of the content is mentioned in the description and is red, white, or yellow.\nD. The color of the content is mentioned in the description but is not red, white, or yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the bowl is not mentioned in the description.\nB. The base of the bowl is mentioned in the description.\nC. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the bowl is not mentioned in the description.\nB. The lid of the bowl is mentioned in the description.\nC. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knife is not mentioned in the description.\nB. The knife is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet is not mentioned in the description.\nB. The cabinet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burner is not mentioned in the description.\nB. The burner is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the bowl is mentioned in the description and is square or rounded.", + "B. The texture of the bowl is not mentioned.", + "D. The color of the bowl is mentioned in the description but is not white or beige.", + "C. The color of the content is mentioned in the description and is red, white, or yellow.", + "A. The base of the bowl is not mentioned in the description.", + "A. The lid of the bowl is not mentioned in the description.", + "A. The knife is not mentioned in the description.", + "A. The cabinet is not mentioned in the description.", + "A. The burner is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.25, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "297718": { + "pred": "A piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the rice is mentioned in the description and is outer layer.", + 1 + ], + [ + "The position of the rice is mentioned in the description but is not outer layer.", + -1 + ], + [ + "The position of the rice is not mentioned, but the rice of the sushi is mentioned.", + 0.5 + ], + [ + "The rice or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The position of the rice is mentioned in the description and is outer layer.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the wrap is mentioned in the description and is seaweed sheet.", + 1 + ], + [ + "The type of the wrap is mentioned in the description but is not seaweed sheet.", + -1 + ], + [ + "The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.", + 0.5 + ], + [ + "The wrap or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the wrap is mentioned in the description and is seaweed sheet.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.", + 1 + ], + [ + "The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.", + -1 + ], + [ + "The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + 0.5 + ], + [ + "The sesame seeds or the sushi are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.", + "pred_index": 1, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the filling is mentioned in the description and is fish or crab meat.", + 1 + ], + [ + "The type of the filling is mentioned in the description but is not fish or crab meat.", + -1 + ], + [ + "The type of the filling is not mentioned, but the filling of the sushi is mentioned.", + 0.5 + ], + [ + "The filling or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the filling is mentioned in the description but is not fish or crab meat.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the topping is mentioned in the description and is sesame seeds.", + 1 + ], + [ + "The type of the topping is mentioned in the description but is not sesame seeds.", + -1 + ], + [ + "The type of the topping is not mentioned, but the topping of the sushi is mentioned.", + 0.5 + ], + [ + "The topping or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the topping is mentioned in the description but is not sesame seeds.", + "pred_index": 1, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wasabi of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The wasabi of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wasabi of the sushi is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soy sauce of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The soy sauce of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The soy sauce of the sushi is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The avocado of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The avocado of the sushi is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickled ginger of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The pickled ginger of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pickled ginger of the sushi is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple chunks are not mentioned in the description.", + 1 + ], + [ + "The pineapple chunks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pineapple chunks are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a sushi or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a sushi or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the rice is mentioned in the description and is outer layer.\nB. The position of the rice is mentioned in the description but is not outer layer.\nC. The position of the rice is not mentioned, but the rice of the sushi is mentioned.\nD. The rice or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the wrap is mentioned in the description and is seaweed sheet.\nB. The type of the wrap is mentioned in the description but is not seaweed sheet.\nC. The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.\nD. The wrap or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.\nB. The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.\nC. The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.\nD. The sesame seeds or the sushi are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the filling is mentioned in the description and is fish or crab meat.\nB. The type of the filling is mentioned in the description but is not fish or crab meat.\nC. The type of the filling is not mentioned, but the filling of the sushi is mentioned.\nD. The filling or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the topping is mentioned in the description and is sesame seeds.\nB. The type of the topping is mentioned in the description but is not sesame seeds.\nC. The type of the topping is not mentioned, but the topping of the sushi is mentioned.\nD. The topping or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wasabi of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The wasabi of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The soy sauce of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The soy sauce of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The avocado of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pickled ginger of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The pickled ginger of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pineapple chunks are not mentioned in the description.\nB. The pineapple chunks are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The position of the rice is mentioned in the description and is outer layer.", + "A. The type of the wrap is mentioned in the description and is seaweed sheet.", + "B. The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.", + "B. The type of the filling is mentioned in the description but is not fish or crab meat.", + "B. The type of the topping is mentioned in the description but is not sesame seeds.", + "A. The wasabi of the sushi is not mentioned in the description.", + "A. The soy sauce of the sushi is not mentioned in the description.", + "A. The avocado of the sushi is not mentioned in the description.", + "A. The pickled ginger of the sushi is not mentioned in the description.", + "A. The pineapple chunks are not mentioned in the description." + ], + "score": 0.4, + "score_pos": -0.2, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "361105": { + "pred": "A small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The herb is not mentioned.", + 0 + ], + [ + "The color of the herb is mentioned in the description and is green.", + 1 + ], + [ + "The color of the herb is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the herb is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the herb is mentioned in the description and is green.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stems or the herb are not mentioned.", + 0 + ], + [ + "The shape of the stems is mentioned in the description and is thin or slender.", + 1 + ], + [ + "The shape of the stems is mentioned in the description but is not thin or slender.", + -1 + ], + [ + "The shape of the stems is not mentioned, but the stems of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the stems is mentioned in the description and is thin or slender.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaves or the herb are not mentioned.", + 0 + ], + [ + "The dark spots of the leaves is mentioned in the description and is visible.", + 1 + ], + [ + "The dark spots of the leaves is mentioned in the description but is not visible.", + -1 + ], + [ + "The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The dark spots of the leaves is mentioned in the description and is visible.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flowers of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The flowers of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The flowers of the herb are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The seeds of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The seeds of the herb are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The roots of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The roots of the herb are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cream sauce is not mentioned in the description.", + 1 + ], + [ + "The cream sauce is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cream sauce is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scallops are not mentioned in the description.", + 1 + ], + [ + "The scallops are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The scallops are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a herb or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a herb or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The herb is not mentioned.\nB. The color of the herb is mentioned in the description and is green.\nC. The color of the herb is mentioned in the description but is not green.\nD. The color of the herb is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stems or the herb are not mentioned.\nB. The shape of the stems is mentioned in the description and is thin or slender.\nC. The shape of the stems is mentioned in the description but is not thin or slender.\nD. The shape of the stems is not mentioned, but the stems of the herb are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The leaves or the herb are not mentioned.\nB. The dark spots of the leaves is mentioned in the description and is visible.\nC. The dark spots of the leaves is mentioned in the description but is not visible.\nD. The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flowers of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The flowers of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seeds of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The seeds of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The roots of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The roots of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cream sauce is not mentioned in the description.\nB. The cream sauce is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The scallops are not mentioned in the description.\nB. The scallops are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the herb is mentioned in the description and is green.", + "B. The shape of the stems is mentioned in the description and is thin or slender.", + "B. The dark spots of the leaves is mentioned in the description and is visible.", + "A. The flowers of the herb are not mentioned in the description.", + "A. The seeds of the herb are not mentioned in the description.", + "A. The roots of the herb are not mentioned in the description.", + "A. The cream sauce is not mentioned in the description.", + "A. The scallops are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "1196168": { + "pred": "A rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grille is mentioned in the description and is smooth with ridges.", + 1 + ], + [ + "The texture of the grille is mentioned in the description but is not smooth with ridges.", + -1 + ], + [ + "The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + 0.5 + ], + [ + "The grille or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the grille is mentioned in the description but is not smooth with ridges.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the air conditioner is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the air conditioner is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the air conditioner is mentioned in the description and is rectangular.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the air conditioner is mentioned in the description and is white.", + 1 + ], + [ + "The color of the air conditioner is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the air conditioner is mentioned in the description and is white.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the vent is mentioned in the description and is circular.", + 1 + ], + [ + "The shape of the vent is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.", + 0.5 + ], + [ + "The vent or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the vent is mentioned in the description and is circular.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fan is mentioned in the description and is black, grey, silver, or dark.", + 1 + ], + [ + "The color of the fan is mentioned in the description but is not black, grey, silver, or dark.", + -1 + ], + [ + "The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + 0.5 + ], + [ + "The fan or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the fan is mentioned in the description but is not black, grey, silver, or dark.", + "pred_index": 1, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The remote control of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The remote control of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The remote control of the air conditioner is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Hotel Shilaza sign is not mentioned in the description.", + 1 + ], + [ + "The Hotel Shilaza sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The Hotel Shilaza sign is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The display of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The display of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The display of the air conditioner is mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The UCO Bank branch is not mentioned in the description.", + 1 + ], + [ + "The UCO Bank branch is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The UCO Bank branch is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycles are not mentioned in the description.", + 1 + ], + [ + "The motorcycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The motorcycles are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an air conditioner or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an air conditioner or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the grille is mentioned in the description and is smooth with ridges.\nB. The texture of the grille is mentioned in the description but is not smooth with ridges.\nC. The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.\nD. The grille or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the air conditioner is mentioned in the description and is rectangular.\nB. The shape of the air conditioner is mentioned in the description but is not rectangular.\nC. The shape of the air conditioner is not mentioned.\nD. The air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the air conditioner is mentioned in the description and is white.\nB. The color of the air conditioner is mentioned in the description but is not white.\nC. The color of the air conditioner is not mentioned.\nD. The air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the vent is mentioned in the description and is circular.\nB. The shape of the vent is mentioned in the description but is not circular.\nC. The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.\nD. The vent or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the fan is mentioned in the description and is black, grey, silver, or dark.\nB. The color of the fan is mentioned in the description but is not black, grey, silver, or dark.\nC. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.\nD. The fan or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The air conditioner is not mentioned in the description.\nB. The remote control of the air conditioner is not mentioned in the description.\nC. The remote control of the air conditioner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Hotel Shilaza sign is not mentioned in the description.\nB. The Hotel Shilaza sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The air conditioner is not mentioned in the description.\nB. The display of the air conditioner is not mentioned in the description.\nC. The display of the air conditioner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The UCO Bank branch is not mentioned in the description.\nB. The UCO Bank branch is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motorcycles are not mentioned in the description.\nB. The motorcycles are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the grille is mentioned in the description but is not smooth with ridges.", + "A. The shape of the air conditioner is mentioned in the description and is rectangular.", + "A. The color of the air conditioner is mentioned in the description and is white.", + "A. The shape of the vent is mentioned in the description and is circular.", + "B. The color of the fan is mentioned in the description but is not black, grey, silver, or dark.", + "B. The remote control of the air conditioner is not mentioned in the description.", + "A. The Hotel Shilaza sign is not mentioned in the description.", + "C. The display of the air conditioner is mentioned in the description.", + "A. The UCO Bank branch is not mentioned in the description.", + "A. The motorcycles are not mentioned in the description." + ], + "score": 0.4, + "score_pos": 0.2, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "1894089": { + "pred": "A metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the screwdriver is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The color of the screwdriver is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The color of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the screwdriver is not mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screwdriver is mentioned in the description but is not metal.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The material of the screwdriver is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the screwdriver is mentioned in the description and is metal.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grip is mentioned in the description but is not rough or textured.", + -1 + ], + [ + "The grip or the screwdriver is not mentioned.", + 0 + ], + [ + "The texture of the grip is mentioned in the description and is rough or textured.", + 1 + ], + [ + "The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The CD is mentioned in the description.", + -1 + ], + [ + "The CD is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The CD is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire is mentioned in the description.", + -1 + ], + [ + "The wire is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wire is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interchangeable bits of the screwdriver are mentioned in the description.", + -1 + ], + [ + "The interchangeable bits of the screwdriver are not mentioned in the description.", + 1 + ], + [ + "The screwdriver is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The interchangeable bits of the screwdriver are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire cutters are mentioned in the description.", + -1 + ], + [ + "The wire cutters are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wire cutters are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flashlight is mentioned in the description.", + -1 + ], + [ + "The flashlight is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The flashlight is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a screwdriver or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a screwdriver or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the screwdriver is mentioned in the description but is not silver or metallic.\nB. The screwdriver is not mentioned.\nC. The color of the screwdriver is mentioned in the description and is silver or metallic.\nD. The color of the screwdriver is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the screwdriver is mentioned in the description but is not metal.\nB. The screwdriver is not mentioned.\nC. The material of the screwdriver is mentioned in the description and is metal.\nD. The material of the screwdriver is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the grip is mentioned in the description but is not rough or textured.\nB. The grip or the screwdriver is not mentioned.\nC. The texture of the grip is mentioned in the description and is rough or textured.\nD. The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The CD is mentioned in the description.\nB. The CD is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wire is mentioned in the description.\nB. The wire is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The interchangeable bits of the screwdriver are mentioned in the description.\nB. The interchangeable bits of the screwdriver are not mentioned in the description.\nC. The screwdriver is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wire cutters are mentioned in the description.\nB. The wire cutters are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flashlight is mentioned in the description.\nB. The flashlight is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the screwdriver is not mentioned.", + "C. The material of the screwdriver is mentioned in the description and is metal.", + "D. The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.", + "B. The CD is not mentioned in the description.", + "B. The wire is not mentioned in the description.", + "B. The interchangeable bits of the screwdriver are not mentioned in the description.", + "B. The wire cutters are not mentioned in the description.", + "B. The flashlight is not mentioned in the description." + ], + "score": 0.8125, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "2391780": { + "pred": "A bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the wings is mentioned in the description but is not spread or extended.", + -1 + ], + [ + "The position of the wings is not mentioned, but the wings of the wild bird are mentioned.", + 0.5 + ], + [ + "The position of the wings is mentioned in the description and is spread or extended.", + 1 + ], + [ + "The wings or the wild bird are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The position of the wings is not mentioned, but the wings of the wild bird are mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the head is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the head is not mentioned, but the head of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description and is white.", + 1 + ], + [ + "The head or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the head is mentioned in the description but is not white.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white, brown or gray.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white, brown or gray.", + 1 + ], + [ + "The body or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is white, brown or gray.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the beak is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description and is dark or black.", + 1 + ], + [ + "The beak or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the beak is mentioned in the description but is not dark or black.", + "pred_index": 0, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are mentioned in the description.", + -1 + ], + [ + "The boats are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The boats are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stone walls are mentioned in the description.", + -1 + ], + [ + "The stone walls are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The stone walls are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are mentioned in the description.", + -1 + ], + [ + "The chimneys are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chimneys are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The water is mentioned in the description.", + -1 + ], + [ + "The water is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The water is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a wild bird or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a wild bird or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the wings is mentioned in the description but is not spread or extended.\nB. The position of the wings is not mentioned, but the wings of the wild bird are mentioned.\nC. The position of the wings is mentioned in the description and is spread or extended.\nD. The wings or the wild bird are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the head is mentioned in the description but is not white.\nB. The color of the head is not mentioned, but the head of the wild bird is mentioned.\nC. The color of the head is mentioned in the description and is white.\nD. The head or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not white, brown or gray.\nB. The color of the body is not mentioned, but the body of the wild bird is mentioned.\nC. The color of the body is mentioned in the description and is white, brown or gray.\nD. The body or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the beak is mentioned in the description but is not dark or black.\nB. The color of the beak is not mentioned, but the beak of the wild bird is mentioned.\nC. The color of the beak is mentioned in the description and is dark or black.\nD. The beak or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are mentioned in the description.\nB. The windows are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boats are mentioned in the description.\nB. The boats are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stone walls are mentioned in the description.\nB. The stone walls are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chimneys are mentioned in the description.\nB. The chimneys are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The water is mentioned in the description.\nB. The water is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The position of the wings is not mentioned, but the wings of the wild bird are mentioned.", + "A. The color of the head is mentioned in the description but is not white.", + "C. The color of the body is mentioned in the description and is white, brown or gray.", + "A. The color of the beak is mentioned in the description but is not dark or black.", + "B. The windows are not mentioned in the description.", + "B. The boats are not mentioned in the description.", + "B. The stone walls are not mentioned in the description.", + "B. The chimneys are not mentioned in the description.", + "B. The water is not mentioned in the description." + ], + "score": 0.5, + "score_pos": -0.125, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4502267": { + "pred": "A green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is elongated.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not elongated.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the green bean is mentioned in the description and is elongated.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is mentioned in the description and is green.", + 1 + ], + [ + "The color of the green bean is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the green bean is mentioned in the description and is green.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is arc or curved.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not arc or curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the green bean is mentioned in the description and is arc or curved.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the green bean is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the green bean is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is not mentioned in the description.", + 1 + ], + [ + "The apple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The apple is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The strings of the green bean are not mentioned in the description.", + 1 + ], + [ + "The strings of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The strings of the green bean are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The seeds of the green bean are not mentioned in the description.", + 1 + ], + [ + "The seeds of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The seeds of the green bean are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 1 + ], + [ + "The pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pear is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The peach is not mentioned in the description.", + 1 + ], + [ + "The peach is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The peach is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a green bean or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a green bean or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The shape of the green bean is not mentioned.\nC. The shape of the green bean is mentioned in the description and is elongated.\nD. The shape of the green bean is mentioned in the description but is not elongated.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The color of the green bean is not mentioned.\nC. The color of the green bean is mentioned in the description and is green.\nD. The color of the green bean is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The shape of the green bean is not mentioned.\nC. The shape of the green bean is mentioned in the description and is arc or curved.\nD. The shape of the green bean is mentioned in the description but is not arc or curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The texture of the green bean is not mentioned.\nC. The texture of the green bean is mentioned in the description and is smooth.\nD. The texture of the green bean is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The apple is not mentioned in the description.\nB. The apple is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned in the description.\nB. The strings of the green bean are not mentioned in the description.\nC. The strings of the green bean are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned in the description.\nB. The seeds of the green bean are not mentioned in the description.\nC. The seeds of the green bean are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The pear is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The peach is not mentioned in the description.\nB. The peach is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the green bean is mentioned in the description and is elongated.", + "C. The color of the green bean is mentioned in the description and is green.", + "C. The shape of the green bean is mentioned in the description and is arc or curved.", + "C. The texture of the green bean is mentioned in the description and is smooth.", + "A. The apple is not mentioned in the description.", + "B. The strings of the green bean are not mentioned in the description.", + "B. The seeds of the green bean are not mentioned in the description.", + "A. The pear is not mentioned in the description.", + "A. The peach is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4604873": { + "pred": "A large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The size of the jib is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the jib is mentioned in the description and is long.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the jib is not mentioned, but the jib of the crane is mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tower is not mentioned, but the tower of the crane is mentioned.", + 0.5 + ], + [ + "The tower or the crane is not mentioned.", + 0 + ], + [ + "The size of the tower is mentioned in the description but is not tall.", + -1 + ], + [ + "The size of the tower is mentioned in the description and is tall.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the tower is not mentioned, but the tower of the crane is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + 0.5 + ], + [ + "The hook or the crane is not mentioned.", + 0 + ], + [ + "The visibility of the hook is mentioned in the description but is not visible.", + -1 + ], + [ + "The visibility of the hook is mentioned in the description and is visible.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crane is not mentioned.", + 0 + ], + [ + "The crane is not mentioned.", + 0 + ], + [ + "The material of the crane is mentioned in the description but is not metal or steel.", + -1 + ], + [ + "The material of the crane is mentioned in the description and is metal or steel.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the crane is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The shape of the jib is mentioned in the description but is not horizontal beam.", + -1 + ], + [ + "The shape of the jib is mentioned in the description and is horizontal beam.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The telescoping sections of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The telescoping sections of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The telescoping sections of the crane are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tracks of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The tracks of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tracks of the crane are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The wheels of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wheels of the crane are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The silhouettes of structures are not mentioned in the description.", + 1 + ], + [ + "The silhouettes of structures are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The silhouettes of structures are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clouds are not mentioned in the description.", + 1 + ], + [ + "The clouds are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The clouds are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a crane or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a crane or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the jib is not mentioned, but the jib of the crane is mentioned.\nB. The jib or the crane is not mentioned.\nC. The size of the jib is mentioned in the description but is not long.\nD. The size of the jib is mentioned in the description and is long.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the tower is not mentioned, but the tower of the crane is mentioned.\nB. The tower or the crane is not mentioned.\nC. The size of the tower is mentioned in the description but is not tall.\nD. The size of the tower is mentioned in the description and is tall.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.\nB. The hook or the crane is not mentioned.\nC. The visibility of the hook is mentioned in the description but is not visible.\nD. The visibility of the hook is mentioned in the description and is visible.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the crane is not mentioned.\nB. The crane is not mentioned.\nC. The material of the crane is mentioned in the description but is not metal or steel.\nD. The material of the crane is mentioned in the description and is metal or steel.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the jib is not mentioned, but the jib of the crane is mentioned.\nB. The jib or the crane is not mentioned.\nC. The shape of the jib is mentioned in the description but is not horizontal beam.\nD. The shape of the jib is mentioned in the description and is horizontal beam.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The telescoping sections of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The telescoping sections of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tracks of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The tracks of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The wheels of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The silhouettes of structures are not mentioned in the description.\nB. The silhouettes of structures are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clouds are not mentioned in the description.\nB. The clouds are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The size of the jib is not mentioned, but the jib of the crane is mentioned.", + "A. The size of the tower is not mentioned, but the tower of the crane is mentioned.", + "A. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + "A. The material of the crane is not mentioned.", + "A. The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + "A. The telescoping sections of the crane are not mentioned in the description.", + "A. The tracks of the crane are not mentioned in the description.", + "A. The wheels of the crane are not mentioned in the description.", + "A. The silhouettes of structures are not mentioned in the description.", + "A. The clouds are not mentioned in the description." + ], + "score": 0.7, + "score_pos": 0.4, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4916799": { + "pred": "A spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the globe is mentioned in the description and is silver and blue.", + 1 + ], + [ + "The globe is not mentioned.", + 0 + ], + [ + "The color of the globe is not mentioned.", + 0 + ], + [ + "The color of the globe is mentioned in the description but is not silver and blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the globe is mentioned in the description and is silver and blue.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sphere is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The sphere or the globe is not mentioned.", + 0 + ], + [ + "The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + 0.5 + ], + [ + "The material of the sphere is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The sphere or the globe is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is mentioned in the description and is circular.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The shape of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The shape of the base is mentioned in the description but is not circular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the base is not mentioned, but the base of the globe is mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the base is mentioned in the description and is gray.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The color of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The color of the base is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the base is mentioned in the description but is not gray.", + "pred_index": 3, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is mentioned in the description.", + -1 + ], + [ + "The sidewalk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sidewalk is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trees are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The person is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bushes are mentioned in the description.", + -1 + ], + [ + "The bushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bushes are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a globe or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a globe or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the globe is mentioned in the description and is silver and blue.\nB. The globe is not mentioned.\nC. The color of the globe is not mentioned.\nD. The color of the globe is mentioned in the description but is not silver and blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the sphere is mentioned in the description and is metal and plastic.\nB. The sphere or the globe is not mentioned.\nC. The material of the sphere is not mentioned, but the sphere of the globe is mentioned.\nD. The material of the sphere is mentioned in the description but is not metal and plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the base is mentioned in the description and is circular.\nB. The base or the globe is not mentioned.\nC. The shape of the base is not mentioned, but the base of the globe is mentioned.\nD. The shape of the base is mentioned in the description but is not circular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the base is mentioned in the description and is gray.\nB. The base or the globe is not mentioned.\nC. The color of the base is not mentioned, but the base of the globe is mentioned.\nD. The color of the base is mentioned in the description but is not gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sidewalk is mentioned in the description.\nB. The sidewalk is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are mentioned in the description.\nB. The trees are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is mentioned in the description.\nB. The person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bushes are mentioned in the description.\nB. The bushes are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the globe is mentioned in the description and is silver and blue.", + "B. The sphere or the globe is not mentioned.", + "C. The shape of the base is not mentioned, but the base of the globe is mentioned.", + "D. The color of the base is mentioned in the description but is not gray.", + "B. The sidewalk is not mentioned in the description.", + "B. The trees are not mentioned in the description.", + "B. The sky is not mentioned in the description.", + "B. The person is not mentioned in the description.", + "B. The bushes are not mentioned in the description." + ], + "score": 0.6111111111111112, + "score_pos": 0.125, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "5718415": { + "pred": "The tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The material of the canopy is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the canopy is mentioned in the description and is fabric.", + 1 + ], + [ + "The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the canopy is mentioned in the description and is fabric.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole or the tent is not mentioned.", + 0 + ], + [ + "The material of the pole is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pole is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the pole is not mentioned, but the pole of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the pole is mentioned in the description and is metal.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The color of the canopy is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the canopy is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the canopy is mentioned in the description and is yellow.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The windows of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The windows of the tent are not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buildings are mentioned in the description.", + -1 + ], + [ + "The buildings are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The buildings are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walls of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The walls of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The walls of the tent are not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The door of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The door of the tent is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The floor of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The floor of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The floor of the tent is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tent or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tent or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The canopy or the tent is not mentioned.\nB. The material of the canopy is mentioned in the description but is not fabric.\nC. The material of the canopy is mentioned in the description and is fabric.\nD. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pole or the tent is not mentioned.\nB. The material of the pole is mentioned in the description but is not metal.\nC. The material of the pole is mentioned in the description and is metal.\nD. The material of the pole is not mentioned, but the pole of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The canopy or the tent is not mentioned.\nB. The color of the canopy is mentioned in the description but is not yellow.\nC. The color of the canopy is mentioned in the description and is yellow.\nD. The color of the canopy is not mentioned, but the canopy of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows of the tent are mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The windows of the tent are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The buildings are mentioned in the description.\nB. The buildings are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The walls of the tent are mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The walls of the tent are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door of the tent is mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The door of the tent is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The floor of the tent is mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The floor of the tent is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the canopy is mentioned in the description and is fabric.", + "C. The material of the pole is mentioned in the description and is metal.", + "C. The color of the canopy is mentioned in the description and is yellow.", + "C. The windows of the tent are not mentioned in the description.", + "B. The buildings are not mentioned in the description.", + "C. The walls of the tent are not mentioned in the description.", + "C. The door of the tent is not mentioned in the description.", + "C. The floor of the tent is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "6012878": { + "pred": "A square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the symbol is mentioned in the description but is not red or orange.", + -1 + ], + [ + "The color of the symbol is mentioned in the description and is red or orange.", + 1 + ], + [ + "The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the symbol is mentioned in the description and is red or orange.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The shape of the symbol is mentioned in the description but is not hand outline.", + -1 + ], + [ + "The shape of the symbol is mentioned in the description and is hand outline.", + 1 + ], + [ + "The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the symbol is mentioned in the description and is hand outline.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The texture of the background is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the background is mentioned in the description and is matte.", + 1 + ], + [ + "The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the background is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the background is mentioned in the description and is gray or black.", + 1 + ], + [ + "The color of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the background is mentioned in the description and is gray or black.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The reflective surface or the traffic light is not mentioned.", + 0 + ], + [ + "The material of the reflective surface is mentioned in the description but is not glass or plastic.", + -1 + ], + [ + "The material of the reflective surface is mentioned in the description and is glass or plastic.", + 1 + ], + [ + "The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The reflective surface or the traffic light is not mentioned.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walking person symbol of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The walking person symbol of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The walking person symbol of the traffic light is mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The pole of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pole of the traffic light is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycles are not mentioned in the description.", + 1 + ], + [ + "The bicycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bicycles are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is not mentioned in the description.", + 1 + ], + [ + "The sidewalk is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sidewalk is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green light of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The green light of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The green light of the traffic light is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a traffic light or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a traffic light or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The symbol or the traffic light is not mentioned.\nB. The color of the symbol is mentioned in the description but is not red or orange.\nC. The color of the symbol is mentioned in the description and is red or orange.\nD. The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The symbol or the traffic light is not mentioned.\nB. The shape of the symbol is mentioned in the description but is not hand outline.\nC. The shape of the symbol is mentioned in the description and is hand outline.\nD. The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background or the traffic light is not mentioned.\nB. The texture of the background is mentioned in the description but is not matte.\nC. The texture of the background is mentioned in the description and is matte.\nD. The texture of the background is not mentioned, but the background of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background or the traffic light is not mentioned.\nB. The color of the background is mentioned in the description but is not gray or black.\nC. The color of the background is mentioned in the description and is gray or black.\nD. The color of the background is not mentioned, but the background of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The reflective surface or the traffic light is not mentioned.\nB. The material of the reflective surface is mentioned in the description but is not glass or plastic.\nC. The material of the reflective surface is mentioned in the description and is glass or plastic.\nD. The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The walking person symbol of the traffic light is not mentioned in the description.\nB. The walking person symbol of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pole of the traffic light is not mentioned in the description.\nB. The pole of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycles are not mentioned in the description.\nB. The bicycles are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sidewalk is not mentioned in the description.\nB. The sidewalk is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green light of the traffic light is not mentioned in the description.\nB. The green light of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the symbol is mentioned in the description and is red or orange.", + "C. The shape of the symbol is mentioned in the description and is hand outline.", + "D. The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + "C. The color of the background is mentioned in the description and is gray or black.", + "A. The reflective surface or the traffic light is not mentioned.", + "B. The walking person symbol of the traffic light is mentioned in the description.", + "A. The pole of the traffic light is not mentioned in the description.", + "A. The bicycles are not mentioned in the description.", + "A. The sidewalk is not mentioned in the description.", + "A. The green light of the traffic light is not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.7, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "6820595": { + "pred": "A cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the ear is mentioned in the description but is not triangular.", + -1 + ], + [ + "The ear or the cat is not mentioned.", + 0 + ], + [ + "The shape of the ear is mentioned in the description and is triangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The texture of the fur is mentioned in the description but is not fluffy.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The texture of the fur is mentioned in the description and is fluffy.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The color of the fur is mentioned in the description but is not black and white.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The color of the fur is mentioned in the description and is black and white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the fur is mentioned in the description and is black and white.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the back is mentioned in the description but is not arched.", + -1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ], + [ + "The shape of the back is mentioned in the description and is arched.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the back is not mentioned, but the back of the cat is mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.", + 0.5 + ], + [ + "The color of the underbelly is mentioned in the description but is not white.", + -1 + ], + [ + "The underbelly or the cat is not mentioned.", + 0 + ], + [ + "The color of the underbelly is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the underbelly is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is mentioned in the description.", + -1 + ], + [ + "The door is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The door is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom cabinet is mentioned in the description.", + -1 + ], + [ + "The bathroom cabinet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathroom cabinet is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom sink is mentioned in the description.", + -1 + ], + [ + "The bathroom sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathroom sink is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hairbrush is mentioned in the description.", + -1 + ], + [ + "The hairbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The hairbrush is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a cat or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a cat or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the ear is not mentioned, but the ear of the cat is mentioned.\nB. The shape of the ear is mentioned in the description but is not triangular.\nC. The ear or the cat is not mentioned.\nD. The shape of the ear is mentioned in the description and is triangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the fur is not mentioned, but the fur of the cat is mentioned.\nB. The texture of the fur is mentioned in the description but is not fluffy.\nC. The fur or the cat is not mentioned.\nD. The texture of the fur is mentioned in the description and is fluffy.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the fur is not mentioned, but the fur of the cat is mentioned.\nB. The color of the fur is mentioned in the description but is not black and white.\nC. The fur or the cat is not mentioned.\nD. The color of the fur is mentioned in the description and is black and white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the back is not mentioned, but the back of the cat is mentioned.\nB. The shape of the back is mentioned in the description but is not arched.\nC. The back or the cat is not mentioned.\nD. The shape of the back is mentioned in the description and is arched.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.\nB. The color of the underbelly is mentioned in the description but is not white.\nC. The underbelly or the cat is not mentioned.\nD. The color of the underbelly is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is mentioned in the description.\nB. The door is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathroom cabinet is mentioned in the description.\nB. The bathroom cabinet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathroom sink is mentioned in the description.\nB. The bathroom sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hairbrush is mentioned in the description.\nB. The hairbrush is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + "A. The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + "D. The color of the fur is mentioned in the description and is black and white.", + "A. The shape of the back is not mentioned, but the back of the cat is mentioned.", + "D. The color of the underbelly is mentioned in the description and is white.", + "B. The door is not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The bathroom cabinet is not mentioned in the description.", + "B. The bathroom sink is not mentioned in the description.", + "B. The hairbrush is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8556674": { + "pred": "A round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description and is orange.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the orange/tangerine is mentioned in the description and is orange.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the surface is mentioned in the description but is not bright orange.", + -1 + ], + [ + "The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The color of the surface is mentioned in the description and is bright orange.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the surface is mentioned in the description but is not bright orange.", + "pred_index": 1, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the orange/tangerine is mentioned in the description and is smooth.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the orange/tangerine is mentioned in the description and is round.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not glossy.", + -1 + ], + [ + "The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The texture of the surface is mentioned in the description and is glossy.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the surface is mentioned in the description and is glossy.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The stem of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The stem of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The stem of the orange/tangerine is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The leaves of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The leaves of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The leaves of the orange/tangerine are not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The segments of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The segments of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The segments of the orange/tangerine are not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceiling lights are mentioned in the description.", + -1 + ], + [ + "The ceiling lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ceiling lights are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The flesh of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The flesh of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flesh of the orange/tangerine is not mentioned in the description.", + "pred_index": 2, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an orange/tangerine or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an orange/tangerine or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The color of the orange/tangerine is mentioned in the description but is not orange.\nC. The color of the orange/tangerine is not mentioned.\nD. The color of the orange/tangerine is mentioned in the description and is orange.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The surface or the orange/tangerine is not mentioned.\nB. The color of the surface is mentioned in the description but is not bright orange.\nC. The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.\nD. The color of the surface is mentioned in the description and is bright orange.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The texture of the orange/tangerine is mentioned in the description but is not smooth.\nC. The texture of the orange/tangerine is not mentioned.\nD. The texture of the orange/tangerine is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The shape of the orange/tangerine is mentioned in the description but is not round.\nC. The shape of the orange/tangerine is not mentioned.\nD. The shape of the orange/tangerine is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The surface or the orange/tangerine is not mentioned.\nB. The texture of the surface is mentioned in the description but is not glossy.\nC. The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.\nD. The texture of the surface is mentioned in the description and is glossy.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The stem of the orange/tangerine is mentioned in the description.\nC. The stem of the orange/tangerine is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The leaves of the orange/tangerine are mentioned in the description.\nC. The leaves of the orange/tangerine are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The segments of the orange/tangerine are mentioned in the description.\nC. The segments of the orange/tangerine are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceiling lights are mentioned in the description.\nB. The ceiling lights are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The flesh of the orange/tangerine is mentioned in the description.\nC. The flesh of the orange/tangerine is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the orange/tangerine is mentioned in the description and is orange.", + "B. The color of the surface is mentioned in the description but is not bright orange.", + "D. The texture of the orange/tangerine is mentioned in the description and is smooth.", + "D. The shape of the orange/tangerine is mentioned in the description and is round.", + "D. The texture of the surface is mentioned in the description and is glossy.", + "C. The stem of the orange/tangerine is not mentioned in the description.", + "C. The leaves of the orange/tangerine are not mentioned in the description.", + "C. The segments of the orange/tangerine are not mentioned in the description.", + "B. The ceiling lights are not mentioned in the description.", + "C. The flesh of the orange/tangerine is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8906172": { + "pred": "A black, in-ear headphone with a sleek, curved design.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the earphone is mentioned in the description and is dark or black.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description but is not curved.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description and is curved.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the earphone is mentioned in the description and is curved.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control buttons of the earphone are mentioned in the description.", + -1 + ], + [ + "The control buttons of the earphone are not mentioned in the description.", + 1 + ], + [ + "The earphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The control buttons of the earphone are not mentioned in the description.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cell phone is mentioned in the description.", + -1 + ], + [ + "The cell phone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cell phone is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The faucet is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an earphone or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an earphone or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the earphone is not mentioned.\nB. The color of the earphone is mentioned in the description but is not dark or black.\nC. The earphone is not mentioned.\nD. The color of the earphone is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the earphone is not mentioned.\nB. The shape of the earphone is mentioned in the description but is not curved.\nC. The earphone is not mentioned.\nD. The shape of the earphone is mentioned in the description and is curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The control buttons of the earphone are mentioned in the description.\nB. The control buttons of the earphone are not mentioned in the description.\nC. The earphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cell phone is mentioned in the description.\nB. The cell phone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is mentioned in the description.\nB. The faucet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the earphone is mentioned in the description and is dark or black.", + "D. The shape of the earphone is mentioned in the description and is curved.", + "B. The control buttons of the earphone are not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The cell phone is not mentioned in the description.", + "B. The faucet is not mentioned in the description.", + "B. The sink is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "10666665": { + "pred": "A round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the clock is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the clock is mentioned in the description and is circular.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock face or the clock is not mentioned.", + 0 + ], + [ + "The color of the clock face is not mentioned, but the clock face of the clock is mentioned.", + 0.5 + ], + [ + "The color of the clock face is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clock face is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the clock face is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hour hand or the clock is not mentioned.", + 0 + ], + [ + "The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.", + 0.5 + ], + [ + "The color of the hour hand is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the hour hand is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the hour hand is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The numbers or the clock are not mentioned.", + 0 + ], + [ + "The color of the numbers is not mentioned, but the numbers of the clock are mentioned.", + 0.5 + ], + [ + "The color of the numbers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the numbers is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the numbers is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the clock is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the clock is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the frame is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the frame is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo blind is not mentioned in the description.", + 1 + ], + [ + "The bamboo blind is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo blind is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The microwave is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The digital display of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The digital display of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The digital display of the clock is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pendulum of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The pendulum of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pendulum of the clock is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The faucet is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a clock or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a clock or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock is not mentioned.\nB. The shape of the clock is not mentioned.\nC. The shape of the clock is mentioned in the description but is not circular.\nD. The shape of the clock is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock face or the clock is not mentioned.\nB. The color of the clock face is not mentioned, but the clock face of the clock is mentioned.\nC. The color of the clock face is mentioned in the description but is not white.\nD. The color of the clock face is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hour hand or the clock is not mentioned.\nB. The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.\nC. The color of the hour hand is mentioned in the description but is not black.\nD. The color of the hour hand is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The numbers or the clock are not mentioned.\nB. The color of the numbers is not mentioned, but the numbers of the clock are mentioned.\nC. The color of the numbers is mentioned in the description but is not black.\nD. The color of the numbers is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the clock is not mentioned.\nB. The color of the frame is not mentioned, but the frame of the clock is mentioned.\nC. The color of the frame is mentioned in the description but is not black.\nD. The color of the frame is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo blind is not mentioned in the description.\nB. The bamboo blind is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microwave is not mentioned in the description.\nB. The microwave is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The digital display of the clock is not mentioned in the description.\nB. The clock is not mentioned in the description.\nC. The digital display of the clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pendulum of the clock is not mentioned in the description.\nB. The clock is not mentioned in the description.\nC. The pendulum of the clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is not mentioned in the description.\nB. The faucet is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the clock is mentioned in the description and is circular.", + "D. The color of the clock face is mentioned in the description and is white.", + "D. The color of the hour hand is mentioned in the description and is black.", + "D. The color of the numbers is mentioned in the description and is black.", + "D. The color of the frame is mentioned in the description and is black.", + "A. The bamboo blind is not mentioned in the description.", + "A. The microwave is not mentioned in the description.", + "A. The digital display of the clock is not mentioned in the description.", + "A. The pendulum of the clock is not mentioned in the description.", + "A. The faucet is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "10811497": { + "pred": "A dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the key is not mentioned.", + 0 + ], + [ + "The color of the key is mentioned in the description and is dark green, gray, or black.", + 1 + ], + [ + "The color of the key is mentioned in the description but is not dark green, gray, or black.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the key is mentioned in the description and is dark green, gray, or black.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the key is not mentioned.", + 0 + ], + [ + "The material of the key is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the key is mentioned in the description but is not plastic.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the key is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the key is not mentioned.", + 0 + ], + [ + "The texture of the key is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the key is mentioned in the description but is not smooth.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the key is mentioned in the description and is smooth.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the key is not mentioned.", + 0 + ], + [ + "The shape of the key is mentioned in the description and is rounded, circular, or oval.", + 1 + ], + [ + "The shape of the key is mentioned in the description but is not rounded, circular, or oval.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the key is mentioned in the description and is rounded, circular, or oval.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key bow of the key is not mentioned in the description.", + 1 + ], + [ + "The key bow of the key is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The key bow of the key is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key teeth of the key are not mentioned in the description.", + 1 + ], + [ + "The key teeth of the key are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The key teeth of the key are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky note is not mentioned in the description.", + 1 + ], + [ + "The sticky note is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky note is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The smartphone is not mentioned in the description.", + 1 + ], + [ + "The smartphone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The smartphone is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The black fabric item is not mentioned in the description.", + 1 + ], + [ + "The black fabric item is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The black fabric item is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a key or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a key or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the key is not mentioned.\nB. The color of the key is mentioned in the description and is dark green, gray, or black.\nC. The color of the key is mentioned in the description but is not dark green, gray, or black.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the key is not mentioned.\nB. The material of the key is mentioned in the description and is plastic.\nC. The material of the key is mentioned in the description but is not plastic.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the key is not mentioned.\nB. The texture of the key is mentioned in the description and is smooth.\nC. The texture of the key is mentioned in the description but is not smooth.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the key is not mentioned.\nB. The shape of the key is mentioned in the description and is rounded, circular, or oval.\nC. The shape of the key is mentioned in the description but is not rounded, circular, or oval.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The key is not mentioned in the description.\nB. The key bow of the key is not mentioned in the description.\nC. The key bow of the key is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The key is not mentioned in the description.\nB. The key teeth of the key are not mentioned in the description.\nC. The key teeth of the key are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky note is not mentioned in the description.\nB. The sticky note is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The smartphone is not mentioned in the description.\nB. The smartphone is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The black fabric item is not mentioned in the description.\nB. The black fabric item is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the key is mentioned in the description and is dark green, gray, or black.", + "A. The material of the key is not mentioned.", + "B. The texture of the key is mentioned in the description and is smooth.", + "B. The shape of the key is mentioned in the description and is rounded, circular, or oval.", + "B. The key bow of the key is not mentioned in the description.", + "B. The key teeth of the key are not mentioned in the description.", + "A. The sticky note is not mentioned in the description.", + "A. The smartphone is not mentioned in the description.", + "A. The black fabric item is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "11021562": { + "pred": "The microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + 0.5 + ], + [ + "The texture of the casing is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the casing is mentioned in the description and is smooth.", + 1 + ], + [ + "The casing or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The orientation of the handle is mentioned in the description but is not vertical.", + -1 + ], + [ + "The orientation of the handle is mentioned in the description and is vertical.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The orientation of the handle is mentioned in the description and is vertical.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the microwave is not mentioned.", + 0 + ], + [ + "The color of the microwave is mentioned in the description but is not white, beige, or yellow.", + -1 + ], + [ + "The color of the microwave is mentioned in the description and is white, beige, or yellow.", + 1 + ], + [ + "The microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the microwave is mentioned in the description and is white, beige, or yellow.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the vent is not mentioned, but the vent of the microwave is mentioned.", + 0.5 + ], + [ + "The position of the vent is mentioned in the description but is not top.", + -1 + ], + [ + "The position of the vent is mentioned in the description and is top.", + 1 + ], + [ + "The vent or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The position of the vent is mentioned in the description and is top.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fire extinguisher is mentioned in the description.", + -1 + ], + [ + "The fire extinguisher is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fire extinguisher is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The turntable of the microwave is mentioned in the description.", + -1 + ], + [ + "The turntable of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The turntable of the microwave is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interior light of the microwave is mentioned in the description.", + -1 + ], + [ + "The interior light of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The interior light of the microwave is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rug is mentioned in the description.", + -1 + ], + [ + "The rug is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The rug is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a microwave or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a microwave or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the microwave is mentioned.\nB. The shape of the handle is mentioned in the description but is not curved.\nC. The shape of the handle is mentioned in the description and is curved.\nD. The handle or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.\nB. The texture of the casing is mentioned in the description but is not smooth.\nC. The texture of the casing is mentioned in the description and is smooth.\nD. The casing or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.\nB. The orientation of the handle is mentioned in the description but is not vertical.\nC. The orientation of the handle is mentioned in the description and is vertical.\nD. The handle or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the microwave is not mentioned.\nB. The color of the microwave is mentioned in the description but is not white, beige, or yellow.\nC. The color of the microwave is mentioned in the description and is white, beige, or yellow.\nD. The microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the vent is not mentioned, but the vent of the microwave is mentioned.\nB. The position of the vent is mentioned in the description but is not top.\nC. The position of the vent is mentioned in the description and is top.\nD. The vent or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fire extinguisher is mentioned in the description.\nB. The fire extinguisher is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The turntable of the microwave is mentioned in the description.\nB. The turntable of the microwave is not mentioned in the description.\nC. The microwave is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The interior light of the microwave is mentioned in the description.\nB. The interior light of the microwave is not mentioned in the description.\nC. The microwave is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are mentioned in the description.\nB. The windows are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rug is mentioned in the description.\nB. The rug is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + "A. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + "C. The orientation of the handle is mentioned in the description and is vertical.", + "C. The color of the microwave is mentioned in the description and is white, beige, or yellow.", + "C. The position of the vent is mentioned in the description and is top.", + "B. The fire extinguisher is not mentioned in the description.", + "B. The turntable of the microwave is not mentioned in the description.", + "B. The interior light of the microwave is not mentioned in the description.", + "B. The windows are not mentioned in the description.", + "B. The rug is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "11021563": { + "pred": "A white stove with four black burners, featuring a control panel with knobs on the back.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control panel or the stove is not mentioned.", + 0 + ], + [ + "The location of the control panel is mentioned in the description but is not back.", + -1 + ], + [ + "The location of the control panel is mentioned in the description and is back.", + 1 + ], + [ + "The location of the control panel is not mentioned, but the control panel of the stove is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The location of the control panel is mentioned in the description and is back.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The color of the burners is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the burners is mentioned in the description and is black.", + 1 + ], + [ + "The color of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the burners is mentioned in the description and is black.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The number of the burners is mentioned in the description but is not 4.", + -1 + ], + [ + "The number of the burners is mentioned in the description and is 4.", + 1 + ], + [ + "The number of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The number of the burners is mentioned in the description and is 4.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The shape of the burners is mentioned in the description but is not coiled.", + -1 + ], + [ + "The shape of the burners is mentioned in the description and is coiled.", + 1 + ], + [ + "The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + "pred_index": 3, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The countertop is mentioned in the description.", + -1 + ], + [ + "The countertop is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The countertop is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The kitchen cabinets are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The broom is mentioned in the description.", + -1 + ], + [ + "The broom is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The broom is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The induction cooktop surface of the stove is mentioned in the description.", + -1 + ], + [ + "The induction cooktop surface of the stove is not mentioned in the description.", + 1 + ], + [ + "The stove is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The induction cooktop surface of the stove is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The small table is mentioned in the description.", + -1 + ], + [ + "The small table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The small table is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stove or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stove or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The control panel or the stove is not mentioned.\nB. The location of the control panel is mentioned in the description but is not back.\nC. The location of the control panel is mentioned in the description and is back.\nD. The location of the control panel is not mentioned, but the control panel of the stove is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The color of the burners is mentioned in the description but is not black.\nC. The color of the burners is mentioned in the description and is black.\nD. The color of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The number of the burners is mentioned in the description but is not 4.\nC. The number of the burners is mentioned in the description and is 4.\nD. The number of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The shape of the burners is mentioned in the description but is not coiled.\nC. The shape of the burners is mentioned in the description and is coiled.\nD. The shape of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The countertop is mentioned in the description.\nB. The countertop is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The kitchen cabinets are mentioned in the description.\nB. The kitchen cabinets are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The broom is mentioned in the description.\nB. The broom is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The induction cooktop surface of the stove is mentioned in the description.\nB. The induction cooktop surface of the stove is not mentioned in the description.\nC. The stove is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The small table is mentioned in the description.\nB. The small table is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The location of the control panel is mentioned in the description and is back.", + "C. The color of the burners is mentioned in the description and is black.", + "C. The number of the burners is mentioned in the description and is 4.", + "D. The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + "B. The countertop is not mentioned in the description.", + "B. The kitchen cabinets are not mentioned in the description.", + "B. The broom is not mentioned in the description.", + "B. The induction cooktop surface of the stove is not mentioned in the description.", + "B. The small table is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "12348078": { + "pred": "A woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The style of the clothes is mentioned in the description and is t-shirt.", + 1 + ], + [ + "The style of the clothes is mentioned in the description but is not t-shirt.", + -1 + ], + [ + "The style of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The style of the clothes is mentioned in the description and is t-shirt.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The color of the clothes is mentioned in the description and is white.", + 1 + ], + [ + "The color of the clothes is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the clothes is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hairstyle of the person is mentioned in the description and is bun.", + 1 + ], + [ + "The hairstyle of the person is mentioned in the description but is not bun.", + -1 + ], + [ + "The hairstyle of the person is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The hairstyle of the person is mentioned in the description and is bun.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hair color of the person is mentioned in the description and is dark or black.", + 1 + ], + [ + "The hair color of the person is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The hair color of the person is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The hair color of the person is mentioned in the description and is dark or black.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pants or the person are not mentioned.", + 0 + ], + [ + "The color of the pants is mentioned in the description and is black.", + 1 + ], + [ + "The color of the pants is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the pants is not mentioned, but the pants of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the pants is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The mouth of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The mouth of the person is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The face of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The face of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The face of the person is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The nose of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The nose of the person is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is mentioned in the description.", + -1 + ], + [ + "The wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wall is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle cart is mentioned in the description.", + -1 + ], + [ + "The bicycle cart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bicycle cart is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a person or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a person or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clothes or the person are not mentioned.\nB. The style of the clothes is mentioned in the description and is t-shirt.\nC. The style of the clothes is mentioned in the description but is not t-shirt.\nD. The style of the clothes is not mentioned, but the clothes of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clothes or the person are not mentioned.\nB. The color of the clothes is mentioned in the description and is white.\nC. The color of the clothes is mentioned in the description but is not white.\nD. The color of the clothes is not mentioned, but the clothes of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned.\nB. The hairstyle of the person is mentioned in the description and is bun.\nC. The hairstyle of the person is mentioned in the description but is not bun.\nD. The hairstyle of the person is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned.\nB. The hair color of the person is mentioned in the description and is dark or black.\nC. The hair color of the person is mentioned in the description but is not dark or black.\nD. The hair color of the person is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pants or the person are not mentioned.\nB. The color of the pants is mentioned in the description and is black.\nC. The color of the pants is mentioned in the description but is not black.\nD. The color of the pants is not mentioned, but the pants of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouth of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The mouth of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The face of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The face of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nose of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The nose of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wall is mentioned in the description.\nB. The wall is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycle cart is mentioned in the description.\nB. The bicycle cart is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The style of the clothes is mentioned in the description and is t-shirt.", + "B. The color of the clothes is mentioned in the description and is white.", + "B. The hairstyle of the person is mentioned in the description and is bun.", + "B. The hair color of the person is mentioned in the description and is dark or black.", + "B. The color of the pants is mentioned in the description and is black.", + "C. The mouth of the person is not mentioned in the description.", + "C. The face of the person is not mentioned in the description.", + "C. The nose of the person is not mentioned in the description.", + "B. The wall is not mentioned in the description.", + "B. The bicycle cart is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "13138178": { + "pred": "The stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stool is mentioned in the description and is blue.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The color of the stool is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the stool is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the stool is mentioned in the description and is blue.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is mentioned in the description and is plastic.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the stool is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the stool is mentioned in the description but is not plastic.", + "pred_index": 2, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the seat is mentioned in the description but is not flat.", + "pred_index": 2, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The armrest of the stool is mentioned in the description.", + -1 + ], + [ + "The armrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The armrest of the stool is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The backrest of the stool is mentioned in the description.", + -1 + ], + [ + "The backrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The backrest of the stool is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Cooking grate is mentioned in the description.", + -1 + ], + [ + "The Cooking grate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Cooking grate is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the stool is mentioned in the description.", + -1 + ], + [ + "The footrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The footrest of the stool is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swivel base of the stool is mentioned in the description.", + -1 + ], + [ + "The swivel base of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The swivel base of the stool is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stool or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stool or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stool is mentioned in the description and is blue.\nB. The stool is not mentioned.\nC. The color of the stool is mentioned in the description but is not blue.\nD. The color of the stool is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stool is mentioned in the description and is plastic.\nB. The stool is not mentioned.\nC. The material of the stool is mentioned in the description but is not plastic.\nD. The material of the stool is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the seat is mentioned in the description and is flat.\nB. The seat or the stool is not mentioned.\nC. The shape of the seat is mentioned in the description but is not flat.\nD. The shape of the seat is not mentioned, but the seat of the stool is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The armrest of the stool is mentioned in the description.\nB. The armrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The backrest of the stool is mentioned in the description.\nB. The backrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Cooking grate is mentioned in the description.\nB. The Cooking grate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The footrest of the stool is mentioned in the description.\nB. The footrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The swivel base of the stool is mentioned in the description.\nB. The swivel base of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the stool is mentioned in the description and is blue.", + "C. The material of the stool is mentioned in the description but is not plastic.", + "C. The shape of the seat is mentioned in the description but is not flat.", + "B. The armrest of the stool is not mentioned in the description.", + "B. The backrest of the stool is not mentioned in the description.", + "B. The Cooking grate is not mentioned in the description.", + "B. The footrest of the stool is not mentioned in the description.", + "B. The swivel base of the stool is not mentioned in the description." + ], + "score": 0.5, + "score_pos": -0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "13187927": { + "pred": "The motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the exterior is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the exterior is mentioned in the description and is white.", + 1 + ], + [ + "The exterior or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the exterior is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the seat is mentioned in the description but is not leather or vinyl.", + -1 + ], + [ + "The material of the seat is mentioned in the description and is leather or vinyl.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the seat is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the taillight is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the taillight is mentioned in the description and is red.", + 1 + ], + [ + "The taillight or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the taillight is mentioned in the description and is red.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ], + [ + "The license plate or the motorcycle is not mentioned.", + 0 + ], + [ + "The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + "pred_index": 3, + "question_index": 4, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycle is not mentioned in the description.", + 0 + ], + [ + "The windshield of the motorcycle is not mentioned in the description.", + 1 + ], + [ + "The windshield of the motorcycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The windshield of the motorcycle is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The balconies are not mentioned in the description.", + 1 + ], + [ + "The balconies are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The balconies are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pepsi advertisements are not mentioned in the description.", + 1 + ], + [ + "The Pepsi advertisements are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The Pepsi advertisements are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The windows are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative metal grill is not mentioned in the description.", + 1 + ], + [ + "The decorative metal grill is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The decorative metal grill is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a motorcycle or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a motorcycle or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the exterior is mentioned in the description but is not white.\nB. The color of the exterior is mentioned in the description and is white.\nC. The exterior or the motorcycle is not mentioned.\nD. The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the seat is mentioned in the description but is not leather or vinyl.\nB. The material of the seat is mentioned in the description and is leather or vinyl.\nC. The seat or the motorcycle is not mentioned.\nD. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the seat is mentioned in the description but is not black.\nB. The color of the seat is mentioned in the description and is black.\nC. The seat or the motorcycle is not mentioned.\nD. The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the taillight is mentioned in the description but is not red.\nB. The color of the taillight is mentioned in the description and is red.\nC. The taillight or the motorcycle is not mentioned.\nD. The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the license plate is mentioned in the description but is not rectangular.\nB. The shape of the license plate is mentioned in the description and is rectangular.\nC. The license plate or the motorcycle is not mentioned.\nD. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motorcycle is not mentioned in the description.\nB. The windshield of the motorcycle is not mentioned in the description.\nC. The windshield of the motorcycle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The balconies are not mentioned in the description.\nB. The balconies are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Pepsi advertisements are not mentioned in the description.\nB. The Pepsi advertisements are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are not mentioned in the description.\nB. The windows are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative metal grill is not mentioned in the description.\nB. The decorative metal grill is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the exterior is mentioned in the description and is white.", + "D. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + "B. The color of the seat is mentioned in the description and is black.", + "B. The color of the taillight is mentioned in the description and is red.", + "D. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + "B. The windshield of the motorcycle is not mentioned in the description.", + "A. The balconies are not mentioned in the description.", + "A. The Pepsi advertisements are not mentioned in the description.", + "A. The windows are not mentioned in the description.", + "A. The decorative metal grill is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "14490578": { + "pred": "The harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the seal is mentioned in the description but is not smooth.", + -1 + ], + [ + "The seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the seal is mentioned in the description and is smooth.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not gray, black, or dark.", + -1 + ], + [ + "The skin or the seal is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the seal is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is gray, black, or dark.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the skin is mentioned in the description and is gray, black, or dark.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the body is mentioned in the description but is not elongated.", + -1 + ], + [ + "The body or the seal is not mentioned.", + 0 + ], + [ + "The shape of the body is not mentioned, but the body of the seal is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is elongated.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the body is mentioned in the description and is elongated.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The teeth of the seal are not mentioned in the description.", + 1 + ], + [ + "The teeth of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The teeth of the seal are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the seal are not mentioned in the description.", + 1 + ], + [ + "The claws of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The claws of the seal are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ears of the seal are not mentioned in the description.", + 1 + ], + [ + "The ears of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ears of the seal are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sand is not mentioned in the description.", + 1 + ], + [ + "The sand is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sand is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rocks are not mentioned in the description.", + 1 + ], + [ + "The rocks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The rocks are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a seal or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a seal or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the seal is mentioned in the description but is not smooth.\nB. The seal is not mentioned.\nC. The texture of the seal is not mentioned.\nD. The texture of the seal is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skin is mentioned in the description but is not gray, black, or dark.\nB. The skin or the seal is not mentioned.\nC. The color of the skin is not mentioned, but the skin of the seal is mentioned.\nD. The color of the skin is mentioned in the description and is gray, black, or dark.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the body is mentioned in the description but is not elongated.\nB. The body or the seal is not mentioned.\nC. The shape of the body is not mentioned, but the body of the seal is mentioned.\nD. The shape of the body is mentioned in the description and is elongated.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The teeth of the seal are not mentioned in the description.\nB. The teeth of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The claws of the seal are not mentioned in the description.\nB. The claws of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ears of the seal are not mentioned in the description.\nB. The ears of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sand is not mentioned in the description.\nB. The sand is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rocks are not mentioned in the description.\nB. The rocks are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the seal is mentioned in the description and is smooth.", + "D. The color of the skin is mentioned in the description and is gray, black, or dark.", + "D. The shape of the body is mentioned in the description and is elongated.", + "A. The teeth of the seal are not mentioned in the description.", + "A. The claws of the seal are not mentioned in the description.", + "A. The ears of the seal are not mentioned in the description.", + "A. The sand is not mentioned in the description.", + "A. The rocks are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "14640483": { + "pred": "A rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.", + 0.5 + ], + [ + "The surface or the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the surface is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the surface is mentioned in the description and is smooth.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.", + -1 + ], + [ + "The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The material of the cutting/chopping board is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the cutting/chopping board is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the cutting/chopping board is mentioned in the description and is wood.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hanging hole of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The hanging hole of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The hanging hole of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The juice groove of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The juice groove of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The juice groove of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The handle of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The whisk is mentioned in the description.", + -1 + ], + [ + "The whisk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The whisk is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensils are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a cutting/chopping board or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a cutting/chopping board or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.\nB. The surface or the cutting/chopping board is not mentioned.\nC. The texture of the surface is mentioned in the description but is not smooth.\nD. The texture of the surface is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.\nD. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The shape of the cutting/chopping board is mentioned in the description but is not rectangular.\nD. The shape of the cutting/chopping board is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The material of the cutting/chopping board is mentioned in the description but is not wood.\nD. The material of the cutting/chopping board is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hanging hole of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The hanging hole of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The juice groove of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The juice groove of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The handle of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The whisk is mentioned in the description.\nB. The whisk is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensils are mentioned in the description.\nB. The utensils are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the surface is mentioned in the description and is smooth.", + "D. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + "D. The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + "D. The material of the cutting/chopping board is mentioned in the description and is wood.", + "C. The hanging hole of the cutting/chopping board is not mentioned in the description.", + "C. The juice groove of the cutting/chopping board is not mentioned in the description.", + "C. The handle of the cutting/chopping board is not mentioned in the description.", + "B. The whisk is not mentioned in the description.", + "B. The utensils are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "16010041": { + "pred": "A pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the chopsticks is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the chopsticks is not mentioned.", + 0 + ], + [ + "The material of the chopsticks is mentioned in the description and is wood.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the chopsticks is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + -1 + ], + [ + "The shape of the chopsticks is not mentioned.", + 0 + ], + [ + "The shape of the chopsticks is mentioned in the description and is long cylindrical.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the chopsticks is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The body or the chopsticks is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the body is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon slices are not mentioned in the description.", + 1 + ], + [ + "The lemon slices are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The lemon slices are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the chopsticks are not mentioned in the description.", + 1 + ], + [ + "The chopsticks are not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the chopsticks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The decorative elements of the chopsticks are mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is not mentioned in the description.", + 1 + ], + [ + "The man is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The man is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meat is not mentioned in the description.", + 1 + ], + [ + "The sliced meat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sliced meat is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green garnish is not mentioned in the description.", + 1 + ], + [ + "The green garnish is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The green garnish is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are chopsticks or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are chopsticks or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the chopsticks is mentioned in the description but is not wood.\nB. The material of the chopsticks is not mentioned.\nC. The material of the chopsticks is mentioned in the description and is wood.\nD. The chopsticks are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the chopsticks is mentioned in the description but is not long cylindrical.\nB. The shape of the chopsticks is not mentioned.\nC. The shape of the chopsticks is mentioned in the description and is long cylindrical.\nD. The chopsticks are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the body is mentioned in the description but is not smooth.\nB. The texture of the body is not mentioned, but the body of the chopsticks is mentioned.\nC. The texture of the body is mentioned in the description and is smooth.\nD. The body or the chopsticks is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lemon slices are not mentioned in the description.\nB. The lemon slices are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the chopsticks are not mentioned in the description.\nB. The chopsticks are not mentioned in the description.\nC. The decorative elements of the chopsticks are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is not mentioned in the description.\nB. The man is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sliced meat is not mentioned in the description.\nB. The sliced meat is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green garnish is not mentioned in the description.\nB. The green garnish is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the chopsticks is not mentioned.", + "A. The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + "C. The texture of the body is mentioned in the description and is smooth.", + "A. The lemon slices are not mentioned in the description.", + "C. The decorative elements of the chopsticks are mentioned in the description.", + "A. The man is not mentioned in the description.", + "A. The sliced meat is not mentioned in the description.", + "A. The green garnish is not mentioned in the description." + ], + "score": 0.375, + "score_pos": 0.0, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "17072759": { + "pred": "A black belt with a smooth texture, featuring a silver rectangular buckle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the belt is not mentioned.", + 0 + ], + [ + "The color of the belt is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the belt is mentioned in the description and is gray or black.", + 1 + ], + [ + "The belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the belt is mentioned in the description and is gray or black.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the strap is not mentioned, but the strap of the belt is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The texture of the strap is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the strap is mentioned in the description and is smooth.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the strap is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The tip of the belt is mentioned in the description.", + -1 + ], + [ + "The tip of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The tip of the belt is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chess board is mentioned in the description.", + -1 + ], + [ + "The chess board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chess board is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blanket is mentioned in the description.", + -1 + ], + [ + "The blanket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The blanket is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The keeper of the belt is mentioned in the description.", + -1 + ], + [ + "The keeper of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The keeper of the belt is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a belt or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a belt or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the belt is not mentioned.\nB. The color of the belt is mentioned in the description but is not gray or black.\nC. The color of the belt is mentioned in the description and is gray or black.\nD. The belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the strap is not mentioned, but the strap of the belt is mentioned.\nB. The material of the strap is mentioned in the description but is not leather.\nC. The material of the strap is mentioned in the description and is leather.\nD. The strap or the belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the strap is not mentioned, but the strap of the belt is mentioned.\nB. The texture of the strap is mentioned in the description but is not smooth.\nC. The texture of the strap is mentioned in the description and is smooth.\nD. The strap or the belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The belt is not mentioned in the description.\nB. The tip of the belt is mentioned in the description.\nC. The tip of the belt is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chess board is mentioned in the description.\nB. The chess board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blanket is mentioned in the description.\nB. The blanket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The belt is not mentioned in the description.\nB. The keeper of the belt is mentioned in the description.\nC. The keeper of the belt is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the belt is mentioned in the description and is gray or black.", + "A. The material of the strap is not mentioned, but the strap of the belt is mentioned.", + "C. The texture of the strap is mentioned in the description and is smooth.", + "C. The tip of the belt is not mentioned in the description.", + "B. The chess board is not mentioned in the description.", + "B. The blanket is not mentioned in the description.", + "B. The sky is not mentioned in the description.", + "C. The keeper of the belt is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "17072764": { + "pred": "A partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the pear is mentioned in the description but is not smooth.", + -1 + ], + [ + "The pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the pear is mentioned in the description and is smooth.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the stem is mentioned in the description but is not short.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The size of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The size of the stem is mentioned in the description and is short.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The size of the stem is not mentioned, but the stem of the pear is mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stem is mentioned in the description but is not brown.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The color of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The color of the stem is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the stem is mentioned in the description and is brown.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The skin or the pear is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the pear is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is yellow or green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the skin is mentioned in the description and is yellow or green.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The core of the pear is not mentioned in the description.", + 1 + ], + [ + "The core of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The core of the pear is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The white top is not mentioned in the description.", + 1 + ], + [ + "The white top is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The white top is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are not mentioned in the description.", + 1 + ], + [ + "The grass are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The grass are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The leaf of the pear is not mentioned in the description.", + 1 + ], + [ + "The leaf of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The leaf of the pear is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a pear or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a pear or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the pear is mentioned in the description but is not smooth.\nB. The pear is not mentioned.\nC. The texture of the pear is not mentioned.\nD. The texture of the pear is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the stem is mentioned in the description but is not short.\nB. The stem or the pear is not mentioned.\nC. The size of the stem is not mentioned, but the stem of the pear is mentioned.\nD. The size of the stem is mentioned in the description and is short.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stem is mentioned in the description but is not brown.\nB. The stem or the pear is not mentioned.\nC. The color of the stem is not mentioned, but the stem of the pear is mentioned.\nD. The color of the stem is mentioned in the description and is brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skin is mentioned in the description but is not yellow or green.\nB. The skin or the pear is not mentioned.\nC. The color of the skin is not mentioned, but the skin of the pear is mentioned.\nD. The color of the skin is mentioned in the description and is yellow or green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The core of the pear is not mentioned in the description.\nC. The core of the pear is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The white top is not mentioned in the description.\nB. The white top is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are not mentioned in the description.\nB. The grass are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The leaf of the pear is not mentioned in the description.\nC. The leaf of the pear is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the pear is mentioned in the description and is smooth.", + "C. The size of the stem is not mentioned, but the stem of the pear is mentioned.", + "D. The color of the stem is mentioned in the description and is brown.", + "D. The color of the skin is mentioned in the description and is yellow or green.", + "A. The cup is not mentioned in the description.", + "B. The core of the pear is not mentioned in the description.", + "A. The white top is not mentioned in the description.", + "A. The grass are not mentioned in the description.", + "B. The leaf of the pear is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "18301585": { + "pred": "The bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the backrest is mentioned in the description and is black.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The color of the backrest is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the backrest is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the bench are not mentioned.", + 0 + ], + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the bench are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the legs is mentioned in the description but is not white.", + "pred_index": 2, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the backrest is mentioned in the description and is slatted.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The shape of the backrest is mentioned in the description but is not slatted.", + -1 + ], + [ + "The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the backrest is mentioned in the description and is slatted.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bollards are mentioned in the description.", + -1 + ], + [ + "The bollards are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bollards are not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the bench is mentioned in the description.", + -1 + ], + [ + "The storage compartment of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The storage compartment of the bench is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the bench is mentioned in the description.", + -1 + ], + [ + "The footrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The footrest of the bench is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headrest of the bench is mentioned in the description.", + -1 + ], + [ + "The headrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The headrest of the bench is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trees are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bench or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bench or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the backrest is mentioned in the description and is black.\nB. The backrest or the bench is not mentioned.\nC. The color of the backrest is mentioned in the description but is not black.\nD. The color of the backrest is not mentioned, but the backrest of the bench is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the legs is mentioned in the description and is white.\nB. The legs or the bench are not mentioned.\nC. The color of the legs is mentioned in the description but is not white.\nD. The color of the legs is not mentioned, but the legs of the bench are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the backrest is mentioned in the description and is slatted.\nB. The backrest or the bench is not mentioned.\nC. The shape of the backrest is mentioned in the description but is not slatted.\nD. The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bollards are mentioned in the description.\nB. The bollards are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The storage compartment of the bench is mentioned in the description.\nB. The storage compartment of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The footrest of the bench is mentioned in the description.\nB. The footrest of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headrest of the bench is mentioned in the description.\nB. The headrest of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are mentioned in the description.\nB. The trees are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the backrest is mentioned in the description and is black.", + "C. The color of the legs is mentioned in the description but is not white.", + "A. The shape of the backrest is mentioned in the description and is slatted.", + "B. The bollards are not mentioned in the description.", + "B. The storage compartment of the bench is not mentioned in the description.", + "B. The footrest of the bench is not mentioned in the description.", + "B. The headrest of the bench is not mentioned in the description.", + "B. The trees are not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "18680641": { + "pred": "A rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the carpet is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the carpet is not mentioned.", + 0 + ], + [ + "The material of the carpet is mentioned in the description and is fabric.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the carpet is mentioned in the description and is fabric.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the edge is mentioned in the description but is not straight.", + -1 + ], + [ + "The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is straight.", + 1 + ], + [ + "The edge or the carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the carpet is mentioned in the description but is not orange or red.", + -1 + ], + [ + "The color of the carpet is not mentioned.", + 0 + ], + [ + "The color of the carpet is mentioned in the description and is orange or red.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the carpet is mentioned in the description and is orange or red.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the carpet is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the carpet is not mentioned.", + 0 + ], + [ + "The shape of the carpet is mentioned in the description and is rectangular.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the carpet is mentioned in the description and is rectangular.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The tassels of the carpet are not mentioned in the description.", + 1 + ], + [ + "The tassels of the carpet are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tassels of the carpet are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drainage pipe is not mentioned in the description.", + 1 + ], + [ + "The drainage pipe is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The drainage pipe is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The pattern of the carpet is not mentioned in the description.", + 1 + ], + [ + "The pattern of the carpet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pattern of the carpet is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shop sign is not mentioned in the description.", + 1 + ], + [ + "The shop sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The shop sign is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The candy display is not mentioned in the description.", + 1 + ], + [ + "The candy display is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The candy display is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a carpet or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a carpet or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the carpet is mentioned in the description but is not fabric.\nB. The material of the carpet is not mentioned.\nC. The material of the carpet is mentioned in the description and is fabric.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the edge is mentioned in the description but is not straight.\nB. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.\nC. The shape of the edge is mentioned in the description and is straight.\nD. The edge or the carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the carpet is mentioned in the description but is not orange or red.\nB. The color of the carpet is not mentioned.\nC. The color of the carpet is mentioned in the description and is orange or red.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the carpet is mentioned in the description but is not rectangular.\nB. The shape of the carpet is not mentioned.\nC. The shape of the carpet is mentioned in the description and is rectangular.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The carpet is not mentioned in the description.\nB. The tassels of the carpet are not mentioned in the description.\nC. The tassels of the carpet are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drainage pipe is not mentioned in the description.\nB. The drainage pipe is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The carpet is not mentioned in the description.\nB. The pattern of the carpet is not mentioned in the description.\nC. The pattern of the carpet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shop sign is not mentioned in the description.\nB. The shop sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The candy display is not mentioned in the description.\nB. The candy display is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the carpet is mentioned in the description and is fabric.", + "B. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + "C. The color of the carpet is mentioned in the description and is orange or red.", + "C. The shape of the carpet is mentioned in the description and is rectangular.", + "B. The tassels of the carpet are not mentioned in the description.", + "A. The drainage pipe is not mentioned in the description.", + "B. The pattern of the carpet is not mentioned in the description.", + "A. The shop sign is not mentioned in the description.", + "A. The candy display is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25273528": { + "pred": "A hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the hot air balloon is mentioned in the description and is multicolored.", + 1 + ], + [ + "The hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the hot air balloon is mentioned in the description but is not multicolored.", + -1 + ], + [ + "The color of the hot air balloon is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the hot air balloon is mentioned in the description and is multicolored.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the envelope is mentioned in the description and is nylon or polyester.", + 1 + ], + [ + "The envelope or the hot air balloon is not mentioned.", + 0 + ], + [ + "The material of the envelope is mentioned in the description but is not nylon or polyester.", + -1 + ], + [ + "The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the basket is mentioned in the description and is bottom.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The position of the basket is mentioned in the description but is not bottom.", + -1 + ], + [ + "The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the basket is mentioned in the description and is small.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The size of the basket is mentioned in the description but is not small.", + -1 + ], + [ + "The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "pred_index": 3, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the basket is mentioned in the description and is dark or black.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "pred_index": 3, + "question_index": 4, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fuel tanks of the hot air balloon are not mentioned in the description.", + 1 + ], + [ + "The fuel tanks of the hot air balloon are mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The fuel tanks of the hot air balloon are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner of the hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The burner of the hot air balloon is mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The burner of the hot air balloon is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ground is not mentioned in the description.", + 1 + ], + [ + "The ground is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ground is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The people are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trees are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a hot air balloon or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a hot air balloon or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the hot air balloon is mentioned in the description and is multicolored.\nB. The hot air balloon is not mentioned.\nC. The color of the hot air balloon is mentioned in the description but is not multicolored.\nD. The color of the hot air balloon is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the envelope is mentioned in the description and is nylon or polyester.\nB. The envelope or the hot air balloon is not mentioned.\nC. The material of the envelope is mentioned in the description but is not nylon or polyester.\nD. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the basket is mentioned in the description and is bottom.\nB. The basket or the hot air balloon is not mentioned.\nC. The position of the basket is mentioned in the description but is not bottom.\nD. The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the basket is mentioned in the description and is small.\nB. The basket or the hot air balloon is not mentioned.\nC. The size of the basket is mentioned in the description but is not small.\nD. The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the basket is mentioned in the description and is dark or black.\nB. The basket or the hot air balloon is not mentioned.\nC. The color of the basket is mentioned in the description but is not dark or black.\nD. The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fuel tanks of the hot air balloon are not mentioned in the description.\nB. The fuel tanks of the hot air balloon are mentioned in the description.\nC. The hot air balloon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burner of the hot air balloon is not mentioned in the description.\nB. The burner of the hot air balloon is mentioned in the description.\nC. The hot air balloon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ground is not mentioned in the description.\nB. The ground is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are not mentioned in the description.\nB. The people are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are not mentioned in the description.\nB. The trees are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the hot air balloon is mentioned in the description and is multicolored.", + "D. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + "D. The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "D. The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "D. The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "A. The fuel tanks of the hot air balloon are not mentioned in the description.", + "A. The burner of the hot air balloon is not mentioned in the description.", + "A. The ground is not mentioned in the description.", + "A. The people are not mentioned in the description.", + "A. The trees are not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25419509": { + "pred": "A metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned.", + 0 + ], + [ + "The color of the fork is not mentioned.", + 0 + ], + [ + "The color of the fork is mentioned in the description and is metallic.", + 1 + ], + [ + "The color of the fork is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the fork is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The material of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The material of the handle is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the handle is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the handle is mentioned in the description and is metal.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The shape of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The texture of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The texture of the handle is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handle is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the handle is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tines or the fork are not mentioned.", + 0 + ], + [ + "The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.", + 0.5 + ], + [ + "The number of parts of the tines is mentioned in the description and is 4.", + 1 + ], + [ + "The number of parts of the tines is mentioned in the description but is not 4.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the tines is mentioned in the description and is 4.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bolster of the fork is not mentioned in the description.", + 1 + ], + [ + "The bolster of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The bolster of the fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plates are not mentioned in the description.", + 1 + ], + [ + "The plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plates are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The end cap of the fork is not mentioned in the description.", + 1 + ], + [ + "The end cap of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The end cap of the fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drinks are not mentioned in the description.", + 1 + ], + [ + "The drinks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The drinks are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ferrule of the fork is not mentioned in the description.", + 1 + ], + [ + "The ferrule of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ferrule of the fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a fork or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a fork or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned.\nB. The color of the fork is not mentioned.\nC. The color of the fork is mentioned in the description and is metallic.\nD. The color of the fork is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The material of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The material of the handle is mentioned in the description and is metal.\nD. The material of the handle is mentioned in the description but is not metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The shape of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The shape of the handle is mentioned in the description and is curved.\nD. The shape of the handle is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The texture of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The texture of the handle is mentioned in the description and is smooth.\nD. The texture of the handle is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tines or the fork are not mentioned.\nB. The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.\nC. The number of parts of the tines is mentioned in the description and is 4.\nD. The number of parts of the tines is mentioned in the description but is not 4.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bolster of the fork is not mentioned in the description.\nB. The bolster of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plates are not mentioned in the description.\nB. The plates are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The end cap of the fork is not mentioned in the description.\nB. The end cap of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drinks are not mentioned in the description.\nB. The drinks are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ferrule of the fork is not mentioned in the description.\nB. The ferrule of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the fork is not mentioned.", + "C. The material of the handle is mentioned in the description and is metal.", + "C. The shape of the handle is mentioned in the description and is curved.", + "C. The texture of the handle is mentioned in the description and is smooth.", + "C. The number of parts of the tines is mentioned in the description and is 4.", + "A. The bolster of the fork is not mentioned in the description.", + "A. The plates are not mentioned in the description.", + "A. The end cap of the fork is not mentioned in the description.", + "A. The drinks are not mentioned in the description.", + "A. The ferrule of the fork is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25612310": { + "pred": "A woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is mentioned in the description and is woven.", + 1 + ], + [ + "The texture of the basket is mentioned in the description but is not woven.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the basket is mentioned in the description and is woven.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The material of the basket is not mentioned.", + 0 + ], + [ + "The material of the basket is mentioned in the description and is wicker.", + 1 + ], + [ + "The material of the basket is mentioned in the description but is not wicker.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the basket is mentioned in the description and is wicker.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The type of the basket is not mentioned.", + 0 + ], + [ + "The type of the basket is mentioned in the description and is interlaced.", + 1 + ], + [ + "The type of the basket is mentioned in the description but is not interlaced.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The type of the basket is mentioned in the description and is interlaced.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The color of the basket is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description and is brown or wooden.", + 1 + ], + [ + "The color of the basket is mentioned in the description but is not brown or wooden.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the basket is mentioned in the description and is brown or wooden.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The base of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The base of the basket is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The handle of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the basket is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lid of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the basket is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the basket are mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the basket are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The decorative elements of the basket are not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lining of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lining of the basket is not mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a basket or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a basket or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The texture of the basket is not mentioned.\nC. The texture of the basket is mentioned in the description and is woven.\nD. The texture of the basket is mentioned in the description but is not woven.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The material of the basket is not mentioned.\nC. The material of the basket is mentioned in the description and is wicker.\nD. The material of the basket is mentioned in the description but is not wicker.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The type of the basket is not mentioned.\nC. The type of the basket is mentioned in the description and is interlaced.\nD. The type of the basket is mentioned in the description but is not interlaced.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The color of the basket is not mentioned.\nC. The color of the basket is mentioned in the description and is brown or wooden.\nD. The color of the basket is mentioned in the description but is not brown or wooden.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The base of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The handle of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The lid of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the basket are mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The decorative elements of the basket are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lining of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The lining of the basket is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The texture of the basket is mentioned in the description and is woven.", + "C. The material of the basket is mentioned in the description and is wicker.", + "C. The type of the basket is mentioned in the description and is interlaced.", + "C. The color of the basket is mentioned in the description and is brown or wooden.", + "C. The base of the basket is not mentioned in the description.", + "C. The handle of the basket is not mentioned in the description.", + "C. The lid of the basket is not mentioned in the description.", + "C. The decorative elements of the basket are not mentioned in the description.", + "C. The lining of the basket is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "17265253": { + "pred": "A black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the spokes is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The material of the spokes is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The spokes or the rickshaw are not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tire is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The shape of the tire is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the tire is mentioned in the description and is circular.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tire is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The material of the tire is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the spokes is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The color of the spokes is mentioned in the description and is silver.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The spokes or the rickshaw are not mentioned.", + "pred_index": 2, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tire is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the tire is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the tire is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The path is not mentioned in the description.", + 1 + ], + [ + "The path is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The path is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw canopy of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The rickshaw canopy of the rickshaw is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The rickshaw canopy of the rickshaw is mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw handlebars of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The rickshaw handlebars of the rickshaw are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The rickshaw handlebars of the rickshaw are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fence is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo trees are not mentioned in the description.", + 1 + ], + [ + "The bamboo trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo trees are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a rickshaw or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a rickshaw or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the spokes is mentioned in the description but is not metal.\nB. The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.\nC. The spokes or the rickshaw are not mentioned.\nD. The material of the spokes is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the tire is mentioned in the description but is not circular.\nB. The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The shape of the tire is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tire is mentioned in the description but is not rubber.\nB. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The material of the tire is mentioned in the description and is rubber.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the spokes is mentioned in the description but is not silver.\nB. The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.\nC. The spokes or the rickshaw are not mentioned.\nD. The color of the spokes is mentioned in the description and is silver.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tire is mentioned in the description but is not black.\nB. The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The color of the tire is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The path is not mentioned in the description.\nB. The path is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rickshaw is not mentioned in the description.\nB. The rickshaw canopy of the rickshaw is not mentioned in the description.\nC. The rickshaw canopy of the rickshaw is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rickshaw is not mentioned in the description.\nB. The rickshaw handlebars of the rickshaw are not mentioned in the description.\nC. The rickshaw handlebars of the rickshaw are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fence is not mentioned in the description.\nB. The fence is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo trees are not mentioned in the description.\nB. The bamboo trees are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The spokes or the rickshaw are not mentioned.", + "D. The shape of the tire is mentioned in the description and is circular.", + "B. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + "C. The spokes or the rickshaw are not mentioned.", + "D. The color of the tire is mentioned in the description and is black.", + "A. The path is not mentioned in the description.", + "C. The rickshaw canopy of the rickshaw is mentioned in the description.", + "B. The rickshaw handlebars of the rickshaw are not mentioned in the description.", + "A. The fence is not mentioned in the description.", + "A. The bamboo trees are not mentioned in the description." + ], + "score": 0.55, + "score_pos": 0.5, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "avg_pos": 0.48891666666666667, + "avg_neg": 0.8699999999999999 +} \ No newline at end of file diff --git a/evaluation/DLC-Bench/model_outputs/gar_1b_eval_gpt.json b/evaluation/DLC-Bench/model_outputs/gar_1b_eval_gpt.json new file mode 100644 index 0000000000000000000000000000000000000000..75abc0c694b2d1a2965c44429f51fb9e5c2454b1 --- /dev/null +++ b/evaluation/DLC-Bench/model_outputs/gar_1b_eval_gpt.json @@ -0,0 +1,25034 @@ +{ + "2391781": { + "pred": "The bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The color of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the wing is mentioned in the description but is not grey or brown.", + -1 + ], + [ + "The color of the wing is mentioned in the description and is grey or brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the wing is mentioned in the description and is grey or brown.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feathers or the wild bird are not mentioned.", + 0 + ], + [ + "The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.", + 0.5 + ], + [ + "The color of the feathers is mentioned in the description but is not white, grey, or brown.", + -1 + ], + [ + "The color of the feathers is mentioned in the description and is white, grey, or brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the feathers is mentioned in the description and is white, grey, or brown.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the wild bird is not mentioned.", + 0 + ], + [ + "The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + 0.5 + ], + [ + "The shape of the tail is mentioned in the description but is not fan-like.", + -1 + ], + [ + "The shape of the tail is mentioned in the description and is fan-like.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The null or the wild bird is not mentioned.", + 0 + ], + [ + "The action of the null is not mentioned, but the null of the wild bird is mentioned.", + 0.5 + ], + [ + "The action of the null is mentioned in the description but is not flying.", + -1 + ], + [ + "The action of the null is mentioned in the description and is flying.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The action of the null is mentioned in the description and is flying.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The position of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The position of the wing is mentioned in the description but is not extended or outstretched.", + -1 + ], + [ + "The position of the wing is mentioned in the description and is extended or outstretched.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The position of the wing is mentioned in the description and is extended or outstretched.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the wild bird are not mentioned in the description.", + 1 + ], + [ + "The claws of the wild bird are mentioned in the description.", + -1 + ], + [ + "The wild bird is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The claws of the wild bird are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are not mentioned in the description.", + 1 + ], + [ + "The boats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The boats are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are not mentioned in the description.", + 1 + ], + [ + "The chimneys are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chimneys are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bridge is not mentioned in the description.", + 1 + ], + [ + "The bridge is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bridge is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The windows are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is wild bird. Based on the image, is it likely that the object in the description is given class: wild bird or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is wild bird. Based on the image, is it likely that the object in the description is given class: wild bird or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wing or the wild bird is not mentioned.\nB. The color of the wing is not mentioned, but the wing of the wild bird is mentioned.\nC. The color of the wing is mentioned in the description but is not grey or brown.\nD. The color of the wing is mentioned in the description and is grey or brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feathers or the wild bird are not mentioned.\nB. The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.\nC. The color of the feathers is mentioned in the description but is not white, grey, or brown.\nD. The color of the feathers is mentioned in the description and is white, grey, or brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the wild bird is not mentioned.\nB. The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.\nC. The shape of the tail is mentioned in the description but is not fan-like.\nD. The shape of the tail is mentioned in the description and is fan-like.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The null or the wild bird is not mentioned.\nB. The action of the null is not mentioned, but the null of the wild bird is mentioned.\nC. The action of the null is mentioned in the description but is not flying.\nD. The action of the null is mentioned in the description and is flying.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wing or the wild bird is not mentioned.\nB. The position of the wing is not mentioned, but the wing of the wild bird is mentioned.\nC. The position of the wing is mentioned in the description but is not extended or outstretched.\nD. The position of the wing is mentioned in the description and is extended or outstretched.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The claws of the wild bird are not mentioned in the description.\nB. The claws of the wild bird are mentioned in the description.\nC. The wild bird is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boats are not mentioned in the description.\nB. The boats are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chimneys are not mentioned in the description.\nB. The chimneys are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bridge is not mentioned in the description.\nB. The bridge is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly dark brown body with lighter brown and white markings on its wings and back. Its wings are outstretched, showing a mix of dark and light feathers. The bird's head is slightly turned, with a visible beak and a hint of white feathers around the neck area.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are not mentioned in the description.\nB. The windows are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the wing is mentioned in the description and is grey or brown.", + "D. The color of the feathers is mentioned in the description and is white, grey, or brown.", + "B. The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + "D. The action of the null is mentioned in the description and is flying.", + "D. The position of the wing is mentioned in the description and is extended or outstretched.", + "A. The claws of the wild bird are not mentioned in the description.", + "A. The boats are not mentioned in the description.", + "A. The chimneys are not mentioned in the description.", + "A. The bridge is not mentioned in the description.", + "A. The windows are not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "2580323": { + "pred": "A rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the frame is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The shape of the frame is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the frame is mentioned in the description and is rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the picture is mentioned in the description and is white.", + 1 + ], + [ + "The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The background color of the picture is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + 1 + ], + [ + "The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The material of the frame is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the frame is mentioned in the description and is wood.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The radio is mentioned in the description.", + -1 + ], + [ + "The radio is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The radio is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The picture/frame is not mentioned in the description.", + 0 + ], + [ + "The glass of the picture/frame are mentioned in the description.", + -1 + ], + [ + "The glass of the picture/frame are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The glass of the picture/frame are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The keyboard is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The light switch is mentioned in the description.", + -1 + ], + [ + "The light switch is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The light switch is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The curtain is mentioned in the description.", + -1 + ], + [ + "The curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The curtain is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is picture/frame. Based on the image, is it likely that the object in the description is given class: picture/frame or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is picture/frame. Based on the image, is it likely that the object in the description is given class: picture/frame or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the frame is mentioned in the description and is rectangular.\nB. The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.\nC. The frame or the picture/frame is not mentioned.\nD. The shape of the frame is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background color of the picture is mentioned in the description and is white.\nB. The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.\nC. The picture or the picture/frame is not mentioned.\nD. The background color of the picture is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.\nB. The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.\nC. The picture or the picture/frame is not mentioned.\nD. The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the frame is mentioned in the description and is wood.\nB. The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.\nC. The frame or the picture/frame is not mentioned.\nD. The material of the frame is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The radio is mentioned in the description.\nB. The radio is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The picture/frame is not mentioned in the description.\nB. The glass of the picture/frame are mentioned in the description.\nC. The glass of the picture/frame are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is mentioned in the description.\nB. The keyboard is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The light switch is mentioned in the description.\nB. The light switch is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden frame encloses a detailed architectural blueprint with various lines, symbols, and text. The frame has a natural wood finish and is mounted on a wall.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The curtain is mentioned in the description.\nB. The curtain is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the frame is mentioned in the description and is rectangular.", + "B. The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + "A. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + "A. The material of the frame is mentioned in the description and is wood.", + "B. The radio is not mentioned in the description.", + "C. The glass of the picture/frame are not mentioned in the description.", + "B. The keyboard is not mentioned in the description.", + "B. The light switch is not mentioned in the description.", + "B. The curtain is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "4782942": { + "pred": "A large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description but is not conical.", + -1 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description and is conical.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the megaphone/speaker is mentioned in the description and is conical.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the megaphone/speaker is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the megaphone/speaker is mentioned in the description and is gray.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front/opening or the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.", + 0.5 + ], + [ + "The shape of the front/opening is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the front/opening is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the front/opening is mentioned in the description and is round.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The siren button of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The siren button of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The siren button of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fence is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The strap of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The strap of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The strap of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The battery compartment of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The battery compartment of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The battery compartment of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The houses are not mentioned in the description.", + 1 + ], + [ + "The houses are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The houses are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is megaphone/speaker. Based on the image, is it likely that the object in the description is given class: megaphone/speaker or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is megaphone/speaker. Based on the image, is it likely that the object in the description is given class: megaphone/speaker or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned.\nB. The shape of the megaphone/speaker is not mentioned.\nC. The shape of the megaphone/speaker is mentioned in the description but is not conical.\nD. The shape of the megaphone/speaker is mentioned in the description and is conical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned.\nB. The color of the megaphone/speaker is not mentioned.\nC. The color of the megaphone/speaker is mentioned in the description but is not gray.\nD. The color of the megaphone/speaker is mentioned in the description and is gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front/opening or the megaphone/speaker is not mentioned.\nB. The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.\nC. The shape of the front/opening is mentioned in the description but is not round.\nD. The shape of the front/opening is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The siren button of the megaphone/speaker is not mentioned in the description.\nC. The siren button of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fence is not mentioned in the description.\nB. The fence is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The strap of the megaphone/speaker is not mentioned in the description.\nC. The strap of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The battery compartment of the megaphone/speaker is not mentioned in the description.\nC. The battery compartment of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, dark-colored, conical-shaped horn with a wide, flared opening and a narrow, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The houses are not mentioned in the description.\nB. The houses are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the megaphone/speaker is mentioned in the description and is conical.", + "D. The color of the megaphone/speaker is mentioned in the description and is gray.", + "D. The shape of the front/opening is mentioned in the description and is round.", + "B. The siren button of the megaphone/speaker is not mentioned in the description.", + "A. The fence is not mentioned in the description.", + "B. The strap of the megaphone/speaker is not mentioned in the description.", + "B. The battery compartment of the megaphone/speaker is not mentioned in the description.", + "A. The houses are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "6037269": { + "pred": "A metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the showerhead is mentioned in the description but is not silver and/or metallic.", + -1 + ], + [ + "The color of the showerhead is not mentioned.", + 0 + ], + [ + "The color of the showerhead is mentioned in the description and is silver and/or metallic.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the showerhead is mentioned in the description and is silver and/or metallic.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the showerhead is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the showerhead is not mentioned.", + 0 + ], + [ + "The texture of the showerhead is mentioned in the description and is smooth.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the showerhead is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the showerhead is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the showerhead is not mentioned.", + 0 + ], + [ + "The shape of the showerhead is mentioned in the description and is circular.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the showerhead is mentioned in the description but is not circular.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the showerhead is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the showerhead is not mentioned.", + 0 + ], + [ + "The material of the showerhead is mentioned in the description and is metal.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the showerhead is mentioned in the description and is metal.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the handle is not mentioned, but the handle of the showerhead is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is white.", + 1 + ], + [ + "The handle or the showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the handle is mentioned in the description and is white.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower hose of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The shower hose of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The shower hose of the showerhead is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bath caddy is mentioned in the description.", + -1 + ], + [ + "The bath caddy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bath caddy is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathtub is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The showerhead filter of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The showerhead filter of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The showerhead filter of the showerhead is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet paper holder is mentioned in the description.", + -1 + ], + [ + "The toilet paper holder is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toilet paper holder is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is showerhead. Based on the image, is it likely that the object in the description is given class: showerhead or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is showerhead. Based on the image, is it likely that the object in the description is given class: showerhead or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the showerhead is mentioned in the description but is not silver and/or metallic.\nB. The color of the showerhead is not mentioned.\nC. The color of the showerhead is mentioned in the description and is silver and/or metallic.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the showerhead is mentioned in the description but is not smooth.\nB. The texture of the showerhead is not mentioned.\nC. The texture of the showerhead is mentioned in the description and is smooth.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the showerhead is mentioned in the description but is not circular.\nB. The shape of the showerhead is not mentioned.\nC. The shape of the showerhead is mentioned in the description and is circular.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the showerhead is mentioned in the description but is not metal.\nB. The material of the showerhead is not mentioned.\nC. The material of the showerhead is mentioned in the description and is metal.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is mentioned in the description but is not white.\nB. The color of the handle is not mentioned, but the handle of the showerhead is mentioned.\nC. The color of the handle is mentioned in the description and is white.\nD. The handle or the showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shower hose of the showerhead is mentioned in the description.\nB. The showerhead is not mentioned in the description.\nC. The shower hose of the showerhead is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bath caddy is mentioned in the description.\nB. The bath caddy is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is mentioned in the description.\nB. The bathtub is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The showerhead filter of the showerhead is mentioned in the description.\nB. The showerhead is not mentioned in the description.\nC. The showerhead filter of the showerhead is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic shower head with a curved, elongated handle. The handle is cylindrical and appears to be made of a light-colored material. The shower head itself is conical with a rounded tip and a slightly wider base, featuring a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet paper holder is mentioned in the description.\nB. The toilet paper holder is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the showerhead is mentioned in the description and is silver and/or metallic.", + "C. The texture of the showerhead is mentioned in the description and is smooth.", + "A. The shape of the showerhead is mentioned in the description but is not circular.", + "C. The material of the showerhead is mentioned in the description and is metal.", + "C. The color of the handle is mentioned in the description and is white.", + "C. The shower hose of the showerhead is not mentioned in the description.", + "B. The bath caddy is not mentioned in the description.", + "B. The bathtub is not mentioned in the description.", + "C. The showerhead filter of the showerhead is not mentioned in the description.", + "B. The toilet paper holder is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "recognition_result": true + }, + "7050495": { + "pred": "A black leather handbag with a smooth texture and a slightly curved bottom edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the handbag is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handbag is not mentioned.", + 0 + ], + [ + "The texture of the handbag is mentioned in the description but is not smooth.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the handbag is mentioned in the description and is smooth.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handbag is mentioned in the description and is black or glossy.", + 1 + ], + [ + "The color of the handbag is not mentioned.", + 0 + ], + [ + "The color of the handbag is mentioned in the description but is not black or glossy.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handbag is mentioned in the description and is black or glossy.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handbag is mentioned in the description and is leather.", + 1 + ], + [ + "The material of the handbag is not mentioned.", + 0 + ], + [ + "The material of the handbag is mentioned in the description but is not leather.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the handbag is mentioned in the description and is leather.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handbag is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the handbag is not mentioned.", + 0 + ], + [ + "The shape of the handbag is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handbag is mentioned in the description but is not rectangular.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seam of the handbag is mentioned in the description and is visible.", + 1 + ], + [ + "The seam of the handbag is not mentioned.", + 0 + ], + [ + "The seam of the handbag is mentioned in the description but is not visible.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The seam of the handbag is not mentioned.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The price tags are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The logo of the handbag is not mentioned in the description.", + 1 + ], + [ + "The logo of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The logo of the handbag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The handle of the handbag is not mentioned in the description.", + 1 + ], + [ + "The handle of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The handle of the handbag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The pocket of the handbag is not mentioned in the description.", + 1 + ], + [ + "The pocket of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pocket of the handbag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The zipper of the handbag is not mentioned in the description.", + 1 + ], + [ + "The zipper of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The zipper of the handbag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is handbag. Based on the image, is it likely that the object in the description is given class: handbag or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is handbag. Based on the image, is it likely that the object in the description is given class: handbag or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the handbag is mentioned in the description and is smooth.\nB. The texture of the handbag is not mentioned.\nC. The texture of the handbag is mentioned in the description but is not smooth.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handbag is mentioned in the description and is black or glossy.\nB. The color of the handbag is not mentioned.\nC. The color of the handbag is mentioned in the description but is not black or glossy.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handbag is mentioned in the description and is leather.\nB. The material of the handbag is not mentioned.\nC. The material of the handbag is mentioned in the description but is not leather.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handbag is mentioned in the description and is rectangular.\nB. The shape of the handbag is not mentioned.\nC. The shape of the handbag is mentioned in the description but is not rectangular.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seam of the handbag is mentioned in the description and is visible.\nB. The seam of the handbag is not mentioned.\nC. The seam of the handbag is mentioned in the description but is not visible.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The price tags are not mentioned in the description.\nB. The price tags are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The logo of the handbag is not mentioned in the description.\nC. The logo of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The handle of the handbag is not mentioned in the description.\nC. The handle of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The pocket of the handbag is not mentioned in the description.\nC. The pocket of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth texture and a slightly curved bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The zipper of the handbag is not mentioned in the description.\nC. The zipper of the handbag is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The texture of the handbag is mentioned in the description and is smooth.", + "A. The color of the handbag is mentioned in the description and is black or glossy.", + "A. The material of the handbag is mentioned in the description and is leather.", + "C. The shape of the handbag is mentioned in the description but is not rectangular.", + "B. The seam of the handbag is not mentioned.", + "A. The price tags are not mentioned in the description.", + "B. The logo of the handbag is not mentioned in the description.", + "B. The handle of the handbag is not mentioned in the description.", + "B. The pocket of the handbag is not mentioned in the description.", + "B. The zipper of the handbag is not mentioned in the description." + ], + "score": 0.7, + "score_pos": 0.4, + "score_neg": 1.0, + "recognition_result": true + }, + "8331699": { + "pred": "The visible part of the printer is black with a smooth, curved surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the computer box is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the computer box is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is mentioned in the description and is black or gray.", + 1 + ], + [ + "The color of the computer box is mentioned in the description but is not black or gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the computer box is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side panel or the computer box is not mentioned.", + 0 + ], + [ + "The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.", + 0.5 + ], + [ + "The color of the side panel is mentioned in the description and is gray.", + 1 + ], + [ + "The color of the side panel is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The side panel or the computer box is not mentioned.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rubber duck is not mentioned in the description.", + 1 + ], + [ + "The rubber duck is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The rubber duck is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power button of the computer box is not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The power button of the computer box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The computer box is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky notes are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The USB ports of the computer box are not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The USB ports of the computer box are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The computer box is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is computer box. Based on the image, is it likely that the object in the description is given class: computer box or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is computer box. Based on the image, is it likely that the object in the description is given class: computer box or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The computer box is not mentioned.\nB. The shape of the computer box is not mentioned.\nC. The shape of the computer box is mentioned in the description and is rectangular.\nD. The shape of the computer box is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The computer box is not mentioned.\nB. The color of the computer box is not mentioned.\nC. The color of the computer box is mentioned in the description and is black or gray.\nD. The color of the computer box is mentioned in the description but is not black or gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The side panel or the computer box is not mentioned.\nB. The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.\nC. The color of the side panel is mentioned in the description and is gray.\nD. The color of the side panel is mentioned in the description but is not gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rubber duck is not mentioned in the description.\nB. The rubber duck is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The power button of the computer box is not mentioned in the description.\nB. The computer box is not mentioned in the description.\nC. The power button of the computer box is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky notes are not mentioned in the description.\nB. The sticky notes are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the printer is black with a smooth, curved surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The USB ports of the computer box are not mentioned in the description.\nB. The computer box is not mentioned in the description.\nC. The USB ports of the computer box are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the computer box is not mentioned.", + "B. The color of the computer box is not mentioned.", + "A. The side panel or the computer box is not mentioned.", + "A. The chair is not mentioned in the description.", + "A. The rubber duck is not mentioned in the description.", + "B. The computer box is not mentioned in the description.", + "A. The sticky notes are not mentioned in the description.", + "B. The computer box is not mentioned in the description." + ], + "score": 0.375, + "score_pos": 0.0, + "score_neg": 0.6, + "recognition_result": true + }, + "8556676": { + "pred": "A deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the apple is mentioned in the description and is small.", + 1 + ], + [ + "The size of the apple is not mentioned.", + 0 + ], + [ + "The size of the apple is mentioned in the description but is not small.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the apple is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the apple is mentioned in the description and is smooth or glossy.", + 1 + ], + [ + "The texture of the apple is not mentioned.", + 0 + ], + [ + "The texture of the apple is mentioned in the description but is not smooth or glossy.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the apple is mentioned in the description and is smooth or glossy.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the apple is mentioned in the description and is red.", + 1 + ], + [ + "The color of the apple is not mentioned.", + 0 + ], + [ + "The color of the apple is mentioned in the description but is not red.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the apple is mentioned in the description and is red.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon is mentioned in the description.", + -1 + ], + [ + "The lemon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The lemon is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange is mentioned in the description.", + -1 + ], + [ + "The orange is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The orange is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple is mentioned in the description.", + -1 + ], + [ + "The pineapple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The pineapple is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the apple are mentioned in the description.", + -1 + ], + [ + "The seeds of the apple are not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The seeds of the apple are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the apple is mentioned in the description.", + -1 + ], + [ + "The stem of the apple is not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The stem of the apple is mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is apple. Based on the image, is it likely that the object in the description is given class: apple or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is apple. Based on the image, is it likely that the object in the description is given class: apple or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the apple is mentioned in the description and is small.\nB. The size of the apple is not mentioned.\nC. The size of the apple is mentioned in the description but is not small.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the apple is mentioned in the description and is smooth or glossy.\nB. The texture of the apple is not mentioned.\nC. The texture of the apple is mentioned in the description but is not smooth or glossy.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the apple is mentioned in the description and is red.\nB. The color of the apple is not mentioned.\nC. The color of the apple is mentioned in the description but is not red.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lemon is mentioned in the description.\nB. The lemon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange is mentioned in the description.\nB. The orange is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pineapple is mentioned in the description.\nB. The pineapple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seeds of the apple are mentioned in the description.\nB. The seeds of the apple are not mentioned in the description.\nC. The apple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a glossy surface reflecting light, showcasing a smooth curvature and a small, visible portion of the stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stem of the apple is mentioned in the description.\nB. The stem of the apple is not mentioned in the description.\nC. The apple is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The size of the apple is not mentioned.", + "A. The texture of the apple is mentioned in the description and is smooth or glossy.", + "A. The color of the apple is mentioned in the description and is red.", + "B. The lemon is not mentioned in the description.", + "B. The orange is not mentioned in the description.", + "B. The pineapple is not mentioned in the description.", + "B. The seeds of the apple are not mentioned in the description.", + "A. The stem of the apple is mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.6666666666666666, + "score_neg": 0.6, + "recognition_result": true + }, + "11012500": { + "pred": "A soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the lettuce is mentioned in the description but is not shredded.", + -1 + ], + [ + "The lettuce or the taco is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is shredded.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the lettuce is mentioned in the description and is shredded.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the tomato is mentioned in the description but is not sliced.", + -1 + ], + [ + "The tomato or the taco is not mentioned.", + 0 + ], + [ + "The shape of the tomato is mentioned in the description and is sliced.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the tomato is mentioned in the description and is sliced.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the arugula is not mentioned, but the arugula of the taco is mentioned.", + 0.5 + ], + [ + "The color of the arugula is mentioned in the description but is not green.", + -1 + ], + [ + "The arugula or the taco is not mentioned.", + 0 + ], + [ + "The color of the arugula is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the arugula is not mentioned, but the arugula of the taco is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.", + 0.5 + ], + [ + "The color of the tortilla is mentioned in the description but is not white.", + -1 + ], + [ + "The tortilla or the taco is not mentioned.", + 0 + ], + [ + "The color of the tortilla is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The whipped cream of the taco is not mentioned in the description.", + 1 + ], + [ + "The whipped cream of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The whipped cream of the taco is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + 1 + ], + [ + "The two glasses of lemonade with lemon slices and straws are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The nuts of the taco are not mentioned in the description.", + 1 + ], + [ + "The nuts of the taco are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The nuts of the taco are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandwich with vegetables are not mentioned in the description.", + 1 + ], + [ + "The sandwich with vegetables are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sandwich with vegetables are mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The chocolate of the taco is not mentioned in the description.", + 1 + ], + [ + "The chocolate of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The chocolate of the taco is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is taco. Based on the image, is it likely that the object in the description is given class: taco or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is taco. Based on the image, is it likely that the object in the description is given class: taco or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.\nB. The shape of the lettuce is mentioned in the description but is not shredded.\nC. The lettuce or the taco is not mentioned.\nD. The shape of the lettuce is mentioned in the description and is shredded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.\nB. The shape of the tomato is mentioned in the description but is not sliced.\nC. The tomato or the taco is not mentioned.\nD. The shape of the tomato is mentioned in the description and is sliced.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the arugula is not mentioned, but the arugula of the taco is mentioned.\nB. The color of the arugula is mentioned in the description but is not green.\nC. The arugula or the taco is not mentioned.\nD. The color of the arugula is mentioned in the description and is green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.\nB. The color of the tortilla is mentioned in the description but is not white.\nC. The tortilla or the taco is not mentioned.\nD. The color of the tortilla is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The whipped cream of the taco is not mentioned in the description.\nC. The whipped cream of the taco is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.\nB. The two glasses of lemonade with lemon slices and straws are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The nuts of the taco are not mentioned in the description.\nC. The nuts of the taco are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandwich with vegetables are not mentioned in the description.\nB. The sandwich with vegetables are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA soft, round tortilla filled with fresh arugula, a slice of ripe tomato, shredded lettuce, and a dollop of creamy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The chocolate of the taco is not mentioned in the description.\nC. The chocolate of the taco is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the lettuce is mentioned in the description and is shredded.", + "D. The shape of the tomato is mentioned in the description and is sliced.", + "A. The color of the arugula is not mentioned, but the arugula of the taco is mentioned.", + "A. The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.", + "B. The whipped cream of the taco is not mentioned in the description.", + "A. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + "B. The nuts of the taco are not mentioned in the description.", + "B. The sandwich with vegetables are mentioned in the description.", + "B. The chocolate of the taco is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.75, + "score_neg": 0.6, + "recognition_result": true + }, + "12348080": { + "pred": "A pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handles is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the handles is mentioned in the description and is plastic.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blades is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The color of the blades is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The color of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the blades is mentioned in the description and is silver or metallic.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blades is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blades is mentioned in the description and is metal.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The material of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the blades is mentioned in the description and is metal.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handles is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the handles is mentioned in the description and is red.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The color of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the handles is mentioned in the description and is red.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The adjustment screw of the scissors is not mentioned in the description.", + 1 + ], + [ + "The adjustment screw of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The adjustment screw of the scissors is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blade guard of the scissors is not mentioned in the description.", + 1 + ], + [ + "The blade guard of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The blade guard of the scissors is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tricycle cart is not mentioned in the description.", + 1 + ], + [ + "The tricycle cart is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tricycle cart is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The baskets of fruit are not mentioned in the description.", + 1 + ], + [ + "The baskets of fruit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The baskets of fruit are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scale is not mentioned in the description.", + 1 + ], + [ + "The scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The scale is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is scissors. Based on the image, is it likely that the object in the description is given class: scissors or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is scissors. Based on the image, is it likely that the object in the description is given class: scissors or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handles is mentioned in the description but is not plastic.\nB. The material of the handles is mentioned in the description and is plastic.\nC. The handles or the scissors are not mentioned.\nD. The material of the handles is not mentioned, but the handles of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the blades is mentioned in the description but is not silver or metallic.\nB. The color of the blades is mentioned in the description and is silver or metallic.\nC. The blades or the scissors are not mentioned.\nD. The color of the blades is not mentioned, but the blades of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the blades is mentioned in the description but is not metal.\nB. The material of the blades is mentioned in the description and is metal.\nC. The blades or the scissors are not mentioned.\nD. The material of the blades is not mentioned, but the blades of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handles is mentioned in the description but is not red.\nB. The color of the handles is mentioned in the description and is red.\nC. The handles or the scissors are not mentioned.\nD. The color of the handles is not mentioned, but the handles of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The adjustment screw of the scissors is not mentioned in the description.\nB. The adjustment screw of the scissors is mentioned in the description.\nC. The scissors are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blade guard of the scissors is not mentioned in the description.\nB. The blade guard of the scissors is mentioned in the description.\nC. The scissors are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tricycle cart is not mentioned in the description.\nB. The tricycle cart is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The baskets of fruit are not mentioned in the description.\nB. The baskets of fruit are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red handles, each handle forming a loop with a smooth, rounded edge. The blades are metallic and converge at a central pivot point, with one blade partially visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The scale is not mentioned in the description.\nB. The scale is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + "B. The color of the blades is mentioned in the description and is silver or metallic.", + "B. The material of the blades is mentioned in the description and is metal.", + "B. The color of the handles is mentioned in the description and is red.", + "A. The adjustment screw of the scissors is not mentioned in the description.", + "A. The blade guard of the scissors is not mentioned in the description.", + "A. The tricycle cart is not mentioned in the description.", + "A. The baskets of fruit are not mentioned in the description.", + "A. The scale is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "16951734": { + "pred": "A triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description and is yellow, golden, or brown.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description but is not yellow, golden, or brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description and is smooth.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description and is irregular.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description but is not irregular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprouts of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The sprouts of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The potato is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned in the description.", + 1 + ], + [ + "The bowl is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bowl is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The roots of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The potato is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The liquid is not mentioned in the description.", + 1 + ], + [ + "The liquid is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The liquid is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is potato. Based on the image, is it likely that the object in the description is given class: potato or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is potato. Based on the image, is it likely that the object in the description is given class: potato or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the potato is not mentioned.\nB. The color of the potato is mentioned in the description and is yellow, golden, or brown.\nC. The potato is not mentioned.\nD. The color of the potato is mentioned in the description but is not yellow, golden, or brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the potato is not mentioned.\nB. The texture of the potato is mentioned in the description and is smooth.\nC. The potato is not mentioned.\nD. The texture of the potato is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the potato is not mentioned.\nB. The shape of the potato is mentioned in the description and is irregular.\nC. The potato is not mentioned.\nD. The shape of the potato is mentioned in the description but is not irregular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sprouts of the potato are not mentioned in the description.\nB. The potato is not mentioned in the description.\nC. The sprouts of the potato are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned in the description.\nB. The bowl is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The roots of the potato are not mentioned in the description.\nB. The potato is not mentioned in the description.\nC. The roots of the potato are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA triangular slice of mango with a smooth, light orange flesh and a slightly darker orange edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The liquid is not mentioned in the description.\nB. The liquid is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The potato is not mentioned.", + "C. The potato is not mentioned.", + "C. The potato is not mentioned.", + "A. The cup is not mentioned in the description.", + "B. The potato is not mentioned in the description.", + "A. The bowl is not mentioned in the description.", + "B. The potato is not mentioned in the description.", + "A. The liquid is not mentioned in the description." + ], + "score": 0.375, + "score_pos": 0.0, + "score_neg": 0.6, + "recognition_result": true + }, + "17265254": { + "pred": "A traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The shape of the wheels is mentioned in the description and is circular or spoked.", + 1 + ], + [ + "The shape of the wheels is mentioned in the description but is not circular or spoked.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the wheels is mentioned in the description and is circular or spoked.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + 0.5 + ], + [ + "The seat or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the seat is mentioned in the description but is not black.", + "pred_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The number of parts of the wheels is mentioned in the description and is 2.", + 1 + ], + [ + "The number of parts of the wheels is mentioned in the description but is not 2.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + "pred_index": 0, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The lights of the rickshaw are mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lights of the rickshaw are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The people are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The storage compartment of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The storage compartment of the rickshaw is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo forest is not mentioned in the description.", + 1 + ], + [ + "The bamboo forest is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo forest is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The horn of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The horn of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The horn of the rickshaw is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is rickshaw. Based on the image, is it likely that the object in the description is given class: rickshaw or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is rickshaw. Based on the image, is it likely that the object in the description is given class: rickshaw or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.\nB. The wheels or the rickshaw are not mentioned.\nC. The shape of the wheels is mentioned in the description and is circular or spoked.\nD. The shape of the wheels is mentioned in the description but is not circular or spoked.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.\nB. The seat or the rickshaw is not mentioned.\nC. The color of the seat is mentioned in the description and is black.\nD. The color of the seat is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.\nB. The wheels or the rickshaw are not mentioned.\nC. The number of parts of the wheels is mentioned in the description and is 2.\nD. The number of parts of the wheels is mentioned in the description but is not 2.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lights of the rickshaw are not mentioned in the description.\nB. The lights of the rickshaw are mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are not mentioned in the description.\nB. The people are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The storage compartment of the rickshaw is not mentioned in the description.\nB. The storage compartment of the rickshaw is mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo forest is not mentioned in the description.\nB. The bamboo forest is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat cushion. It features a single large spoked wheel on each side, connected by a horizontal axle. The rickshaw has a curved handlebar at the front, and a footrest is visible beneath the seat. The wheels are equipped with black tires and silver rims.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The horn of the rickshaw is not mentioned in the description.\nB. The horn of the rickshaw is mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the wheels is mentioned in the description and is circular or spoked.", + "D. The color of the seat is mentioned in the description but is not black.", + "A. The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + "A. The lights of the rickshaw are not mentioned in the description.", + "A. The people are not mentioned in the description.", + "A. The storage compartment of the rickshaw is not mentioned in the description.", + "A. The bamboo forest is not mentioned in the description.", + "A. The horn of the rickshaw is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "recognition_result": true + }, + "18845103": { + "pred": "A metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is metallic.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The material of the bowl is mentioned in the description and is metal.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The material of the bowl is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the bowl is mentioned in the description and is metal.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the bowl is mentioned in the description and is metallic.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the bowl is mentioned in the description and is round or oval.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description but is not round or oval.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bowl is mentioned in the description and is round or oval.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The engraved handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The engraved handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The engraved handle of the spoon is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The kitchen cabinets are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The twisted handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The twisted handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The twisted handle of the spoon is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is spoon. Based on the image, is it likely that the object in the description is given class: spoon or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is spoon. Based on the image, is it likely that the object in the description is given class: spoon or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is not mentioned, but the handle of the spoon is mentioned.\nB. The color of the handle is mentioned in the description and is metallic.\nC. The handle or the spoon is not mentioned.\nD. The color of the handle is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The material of the bowl is mentioned in the description and is metal.\nC. The bowl or the spoon is not mentioned.\nD. The material of the bowl is mentioned in the description but is not metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The color of the bowl is mentioned in the description and is metallic.\nC. The bowl or the spoon is not mentioned.\nD. The color of the bowl is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The shape of the bowl is mentioned in the description and is round or oval.\nC. The bowl or the spoon is not mentioned.\nD. The shape of the bowl is mentioned in the description but is not round or oval.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the spoon is mentioned.\nB. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.\nC. The handle or the spoon is not mentioned.\nD. The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is not mentioned in the description.\nB. The engraved handle of the spoon is mentioned in the description.\nC. The engraved handle of the spoon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is mentioned in the description.\nB. The cutting board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The kitchen cabinets are mentioned in the description.\nB. The kitchen cabinets are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved, elongated handle and a shallow, oval-shaped bowl. The handle has a smooth, polished finish, and the bowl is also metallic with a reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is not mentioned in the description.\nB. The twisted handle of the spoon is mentioned in the description.\nC. The twisted handle of the spoon is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + "B. The material of the bowl is mentioned in the description and is metal.", + "A. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "B. The shape of the bowl is mentioned in the description and is round or oval.", + "B. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + "C. The engraved handle of the spoon is not mentioned in the description.", + "B. The cutting board is not mentioned in the description.", + "B. The kitchen cabinets are not mentioned in the description.", + "B. The sink is not mentioned in the description.", + "C. The twisted handle of the spoon is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "20993402": { + "pred": "A roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the tape is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the tape is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the tape is mentioned in the description and is smooth.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tape roll is mentioned in the description but is not beige, white, or transparent.", + -1 + ], + [ + "The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + 1 + ], + [ + "The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the tape is mentioned in the description but is not adhesive tape.", + -1 + ], + [ + "The type of the tape is mentioned in the description and is adhesive tape.", + 1 + ], + [ + "The type of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the tape is mentioned in the description and is adhesive tape.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the core is mentioned in the description but is not cardboard.", + -1 + ], + [ + "The material of the core is mentioned in the description and is cardboard.", + 1 + ], + [ + "The material of the core is not mentioned, but the core of the tape is mentioned.", + 0.5 + ], + [ + "The core or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the core is not mentioned, but the core of the tape is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tape roll is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the tape roll is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The dispenser of the tape is not mentioned in the description.", + 1 + ], + [ + "The dispenser of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The dispenser of the tape is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trees are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The window is not mentioned in the description.", + 1 + ], + [ + "The window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The window is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stack of plates are not mentioned in the description.", + 1 + ], + [ + "The stack of plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The stack of plates are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The cutting edge of the tape is not mentioned in the description.", + 1 + ], + [ + "The cutting edge of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The cutting edge of the tape is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tape. Based on the image, is it likely that the object in the description is given class: tape or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tape. Based on the image, is it likely that the object in the description is given class: tape or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the tape is mentioned in the description but is not smooth.\nB. The texture of the tape is mentioned in the description and is smooth.\nC. The texture of the tape is not mentioned.\nD. The tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tape roll is mentioned in the description but is not beige, white, or transparent.\nB. The color of the tape roll is mentioned in the description and is beige, white, or transparent.\nC. The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.\nD. The tape roll or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the tape is mentioned in the description but is not adhesive tape.\nB. The type of the tape is mentioned in the description and is adhesive tape.\nC. The type of the tape is not mentioned.\nD. The tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the core is mentioned in the description but is not cardboard.\nB. The material of the core is mentioned in the description and is cardboard.\nC. The material of the core is not mentioned, but the core of the tape is mentioned.\nD. The core or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tape roll is mentioned in the description but is not plastic.\nB. The material of the tape roll is mentioned in the description and is plastic.\nC. The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.\nD. The tape roll or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The dispenser of the tape is not mentioned in the description.\nC. The dispenser of the tape is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are not mentioned in the description.\nB. The trees are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The window is not mentioned in the description.\nB. The window is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stack of plates are not mentioned in the description.\nB. The stack of plates are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of white adhesive tape with a smooth, glossy surface and a slightly reflective sheen. The tape is wound tightly around a cylindrical core, with the outer edge appearing clean and unblemished.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The cutting edge of the tape is not mentioned in the description.\nC. The cutting edge of the tape is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the tape is mentioned in the description and is smooth.", + "B. The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + "B. The type of the tape is mentioned in the description and is adhesive tape.", + "C. The material of the core is not mentioned, but the core of the tape is mentioned.", + "C. The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + "B. The dispenser of the tape is not mentioned in the description.", + "A. The trees are not mentioned in the description.", + "A. The window is not mentioned in the description.", + "A. The stack of plates are not mentioned in the description.", + "B. The cutting edge of the tape is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "21529954": { + "pred": "A cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the can/container is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the cap/lid is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the cap/lid is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The color of the cap/lid is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the cap/lid is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the cap/lid is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the body is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the body is not mentioned, but the body of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is cylindrical.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the body is mentioned in the description and is cylindrical.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the can/container is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description but is not green, white, yellow.", + -1 + ], + [ + "The color of the label is not mentioned, but the label of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description and is green, white, yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the label is mentioned in the description and is green, white, yellow.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The raspberries are mentioned in the description.", + -1 + ], + [ + "The raspberries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The raspberries are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red bell peppers are mentioned in the description.", + -1 + ], + [ + "The red bell peppers are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The red bell peppers are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ginger is mentioned in the description.", + -1 + ], + [ + "The ginger is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ginger is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sweet potato is mentioned in the description.", + -1 + ], + [ + "The sweet potato is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sweet potato is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the can/container is mentioned in the description.", + -1 + ], + [ + "The base of the can/container is not mentioned in the description.", + 1 + ], + [ + "The can/container is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The base of the can/container is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is can/container. Based on the image, is it likely that the object in the description is given class: can/container or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is can/container. Based on the image, is it likely that the object in the description is given class: can/container or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The can/container is not mentioned.\nB. The material of the can/container is mentioned in the description but is not plastic.\nC. The material of the can/container is not mentioned.\nD. The material of the can/container is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/lid or the can/container is not mentioned.\nB. The shape of the cap/lid is mentioned in the description but is not circular.\nC. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.\nD. The shape of the cap/lid is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/lid or the can/container is not mentioned.\nB. The color of the cap/lid is mentioned in the description but is not white.\nC. The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.\nD. The color of the cap/lid is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the can/container is not mentioned.\nB. The shape of the body is mentioned in the description but is not cylindrical.\nC. The shape of the body is not mentioned, but the body of the can/container is mentioned.\nD. The shape of the body is mentioned in the description and is cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label or the can/container is not mentioned.\nB. The color of the label is mentioned in the description but is not green, white, yellow.\nC. The color of the label is not mentioned, but the label of the can/container is mentioned.\nD. The color of the label is mentioned in the description and is green, white, yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The raspberries are mentioned in the description.\nB. The raspberries are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The red bell peppers are mentioned in the description.\nB. The red bell peppers are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ginger is mentioned in the description.\nB. The ginger is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sweet potato is mentioned in the description.\nB. The sweet potato is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a predominantly green label featuring the word \"Pepsi\" in white, bold letters. The top of the can is orange with a white cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the can/container is mentioned in the description.\nB. The base of the can/container is not mentioned in the description.\nC. The can/container is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the can/container is not mentioned.", + "C. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + "D. The color of the cap/lid is mentioned in the description and is white.", + "D. The shape of the body is mentioned in the description and is cylindrical.", + "D. The color of the label is mentioned in the description and is green, white, yellow.", + "B. The raspberries are not mentioned in the description.", + "B. The red bell peppers are not mentioned in the description.", + "B. The ginger is not mentioned in the description.", + "B. The sweet potato is not mentioned in the description.", + "B. The base of the can/container is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "recognition_result": true + }, + "22879790": { + "pred": "A single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the garlic is mentioned in the description and is white.", + 1 + ], + [ + "The color of the garlic is not mentioned.", + 0 + ], + [ + "The garlic is not mentioned.", + 0 + ], + [ + "The color of the garlic is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The garlic is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the skin is mentioned in the description and is papery.", + 1 + ], + [ + "The texture of the skin is not mentioned, but the skin of the garlic is mentioned.", + 0.5 + ], + [ + "The skin or the garlic is not mentioned.", + 0 + ], + [ + "The texture of the skin is mentioned in the description but is not papery.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the skin is mentioned in the description and is papery.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the root is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the root is not mentioned, but the root of the garlic is mentioned.", + 0.5 + ], + [ + "The root or the garlic is not mentioned.", + 0 + ], + [ + "The color of the root is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the root is mentioned in the description and is brown.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaf of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The leaf of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The garlic is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + 1 + ], + [ + "The ceramic rooster and hen salt and pepper shakers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The stem of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The garlic is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic hen salt shaker is not mentioned in the description.", + 1 + ], + [ + "The ceramic hen salt shaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ceramic hen salt shaker is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green glass rooster is not mentioned in the description.", + 1 + ], + [ + "The green glass rooster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The green glass rooster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is garlic. Based on the image, is it likely that the object in the description is given class: garlic or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is garlic. Based on the image, is it likely that the object in the description is given class: garlic or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the garlic is mentioned in the description and is white.\nB. The color of the garlic is not mentioned.\nC. The garlic is not mentioned.\nD. The color of the garlic is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the skin is mentioned in the description and is papery.\nB. The texture of the skin is not mentioned, but the skin of the garlic is mentioned.\nC. The skin or the garlic is not mentioned.\nD. The texture of the skin is mentioned in the description but is not papery.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the root is mentioned in the description and is brown.\nB. The color of the root is not mentioned, but the root of the garlic is mentioned.\nC. The root or the garlic is not mentioned.\nD. The color of the root is mentioned in the description but is not brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The leaf of the garlic is not mentioned in the description.\nB. The garlic is not mentioned in the description.\nC. The leaf of the garlic is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.\nB. The ceramic rooster and hen salt and pepper shakers are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stem of the garlic is not mentioned in the description.\nB. The garlic is not mentioned in the description.\nC. The stem of the garlic is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceramic hen salt shaker is not mentioned in the description.\nB. The ceramic hen salt shaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, partially peeled white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a visible root end that is dry and brownish. The layers are tightly packed, and the outer skin is mostly intact, showing a few small, white root remnants.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green glass rooster is not mentioned in the description.\nB. The green glass rooster is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The garlic is not mentioned.", + "A. The texture of the skin is mentioned in the description and is papery.", + "A. The color of the root is mentioned in the description and is brown.", + "B. The garlic is not mentioned in the description.", + "A. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + "B. The garlic is not mentioned in the description.", + "A. The ceramic hen salt shaker is not mentioned in the description.", + "A. The green glass rooster is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.6666666666666666, + "score_neg": 0.6, + "recognition_result": true + }, + "24010373": { + "pred": "The guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The neck or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the neck is mentioned in the description and is straight or slightly curved.", + 1 + ], + [ + "The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the neck is mentioned in the description but is not straight or slightly curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the guitar is not mentioned.", + 0 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is not mentioned, but the body of the guitar is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the body is not mentioned, but the body of the guitar is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strings or the guitar are not mentioned.", + 0 + ], + [ + "The number of parts of the strings is mentioned in the description and is 6.", + 1 + ], + [ + "The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + 0.5 + ], + [ + "The number of parts of the strings is mentioned in the description but is not 6.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sound hole or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the sound hole is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the sound hole is mentioned in the description but is not round.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the sound hole is mentioned in the description and is round.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description and is black.", + 1 + ], + [ + "The color of the guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the guitar is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The violin is not mentioned in the description.", + 1 + ], + [ + "The violin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The violin is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned in the description.", + 0 + ], + [ + "The pickguard of the guitar is not mentioned in the description.", + 1 + ], + [ + "The pickguard of the guitar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The pickguard of the guitar is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is not mentioned in the description.", + 1 + ], + [ + "The sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sign is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The music stands are not mentioned in the description.", + 1 + ], + [ + "The music stands are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The music stands are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The books are not mentioned in the description.", + 1 + ], + [ + "The books are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The books are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is guitar. Based on the image, is it likely that the object in the description is given class: guitar or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is guitar. Based on the image, is it likely that the object in the description is given class: guitar or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The neck or the guitar is not mentioned.\nB. The shape of the neck is mentioned in the description and is straight or slightly curved.\nC. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.\nD. The shape of the neck is mentioned in the description but is not straight or slightly curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the guitar is not mentioned.\nB. The texture of the body is mentioned in the description and is smooth.\nC. The texture of the body is not mentioned, but the body of the guitar is mentioned.\nD. The texture of the body is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strings or the guitar are not mentioned.\nB. The number of parts of the strings is mentioned in the description and is 6.\nC. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.\nD. The number of parts of the strings is mentioned in the description but is not 6.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sound hole or the guitar is not mentioned.\nB. The shape of the sound hole is mentioned in the description and is round.\nC. The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.\nD. The shape of the sound hole is mentioned in the description but is not round.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The guitar is not mentioned.\nB. The color of the guitar is mentioned in the description and is black.\nC. The color of the guitar is not mentioned.\nD. The color of the guitar is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The violin is not mentioned in the description.\nB. The violin is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The guitar is not mentioned in the description.\nB. The pickguard of the guitar is not mentioned in the description.\nC. The pickguard of the guitar is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign is not mentioned in the description.\nB. The sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The music stands are not mentioned in the description.\nB. The music stands are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy finish with a cutaway body design. It features a white pickguard and a circular soundhole with a simple rosette pattern. The fretboard is dark with white dot inlays, and the headstock is equipped with tuning pegs.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The books are not mentioned in the description.\nB. The books are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + "C. The texture of the body is not mentioned, but the body of the guitar is mentioned.", + "C. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + "B. The shape of the sound hole is mentioned in the description and is round.", + "B. The color of the guitar is mentioned in the description and is black.", + "A. The violin is not mentioned in the description.", + "C. The pickguard of the guitar is mentioned in the description.", + "A. The sign is not mentioned in the description.", + "A. The music stands are not mentioned in the description.", + "A. The books are not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.7, + "score_neg": 0.6, + "recognition_result": true + }, + "24694197": { + "pred": "A ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The skin or the avocado is not mentioned.", + 0 + ], + [ + "The color of the skin is mentioned in the description but is not dark green.", + -1 + ], + [ + "The color of the skin is mentioned in the description and is dark green.", + 1 + ], + [ + "The color of the skin is not mentioned, but the skin of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the skin is mentioned in the description and is dark green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The texture of the flesh is mentioned in the description but is not creamy.", + -1 + ], + [ + "The texture of the flesh is mentioned in the description and is creamy.", + 1 + ], + [ + "The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The flesh or the avocado is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pit or the avocado is not mentioned.", + 0 + ], + [ + "The shape of the pit is mentioned in the description but is not oval.", + -1 + ], + [ + "The shape of the pit is mentioned in the description and is oval.", + 1 + ], + [ + "The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the pit is mentioned in the description and is oval.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The color of the flesh is mentioned in the description but is not light green.", + -1 + ], + [ + "The color of the flesh is mentioned in the description and is light green.", + 1 + ], + [ + "The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The flesh or the avocado is not mentioned.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The leaves of the avocado are mentioned in the description.", + -1 + ], + [ + "The leaves of the avocado are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The leaves of the avocado are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utility pole is mentioned in the description.", + -1 + ], + [ + "The utility pole is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utility pole is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The stem of the avocado is mentioned in the description.", + -1 + ], + [ + "The stem of the avocado is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The stem of the avocado is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is mentioned in the description.", + -1 + ], + [ + "The building is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The building is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are mentioned in the description.", + -1 + ], + [ + "The people are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The people are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is avocado. Based on the image, is it likely that the object in the description is given class: avocado or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is avocado. Based on the image, is it likely that the object in the description is given class: avocado or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The skin or the avocado is not mentioned.\nB. The color of the skin is mentioned in the description but is not dark green.\nC. The color of the skin is mentioned in the description and is dark green.\nD. The color of the skin is not mentioned, but the skin of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flesh or the avocado is not mentioned.\nB. The texture of the flesh is mentioned in the description but is not creamy.\nC. The texture of the flesh is mentioned in the description and is creamy.\nD. The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pit or the avocado is not mentioned.\nB. The shape of the pit is mentioned in the description but is not oval.\nC. The shape of the pit is mentioned in the description and is oval.\nD. The shape of the pit is not mentioned, but the pit of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flesh or the avocado is not mentioned.\nB. The color of the flesh is mentioned in the description but is not light green.\nC. The color of the flesh is mentioned in the description and is light green.\nD. The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado is not mentioned in the description.\nB. The leaves of the avocado are mentioned in the description.\nC. The leaves of the avocado are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utility pole is mentioned in the description.\nB. The utility pole is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado is not mentioned in the description.\nB. The stem of the avocado is mentioned in the description.\nC. The stem of the avocado is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is mentioned in the description.\nB. The building is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green to almost black skin and a large, round, red to yellow-green pit nestled in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are mentioned in the description.\nB. The people are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the skin is mentioned in the description and is dark green.", + "A. The flesh or the avocado is not mentioned.", + "C. The shape of the pit is mentioned in the description and is oval.", + "A. The flesh or the avocado is not mentioned.", + "C. The leaves of the avocado are not mentioned in the description.", + "B. The utility pole is not mentioned in the description.", + "C. The stem of the avocado is not mentioned in the description.", + "B. The building is not mentioned in the description.", + "B. The people are not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "recognition_result": true + }, + "279135": { + "pred": "The ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The color of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The color of the binding is mentioned in the description and is black or orange.", + 1 + ], + [ + "The color of the binding is mentioned in the description but is not black or orange.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the binding is mentioned in the description and is black or orange.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The color of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The color of the deck is mentioned in the description and is black, white, or orange.", + 1 + ], + [ + "The color of the deck is mentioned in the description but is not black, white, or orange.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the deck is mentioned in the description and is black, white, or orange.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The material of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The material of the binding is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The material of the binding is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the binding is mentioned in the description and is metal and plastic.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The shape of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The shape of the deck is mentioned in the description and is slightly curved.", + 1 + ], + [ + "The shape of the deck is mentioned in the description but is not slightly curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the deck is not mentioned, but the deck of the ski is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the ski is not mentioned.", + 0 + ], + [ + "The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.", + 0.5 + ], + [ + "The texture/pattern of the tail is mentioned in the description and is geometric shapes.", + 1 + ], + [ + "The texture/pattern of the tail is mentioned in the description but is not geometric shapes.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The tail or the ski is not mentioned.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the ski are not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The wheels of the ski are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wheels of the ski are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wooden post is not mentioned in the description.", + 1 + ], + [ + "The wooden post is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The wooden post is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass window is not mentioned in the description.", + 1 + ], + [ + "The glass window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The glass window is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski base of the ski is not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The ski base of the ski is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ski base of the ski is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski poles are not mentioned in the description.", + 1 + ], + [ + "The ski poles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ski poles are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is ski. Based on the image, is it likely that the object in the description is given class: ski or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is ski. Based on the image, is it likely that the object in the description is given class: ski or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The binding or the ski is not mentioned.\nB. The color of the binding is not mentioned, but the binding of the ski is mentioned.\nC. The color of the binding is mentioned in the description and is black or orange.\nD. The color of the binding is mentioned in the description but is not black or orange.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deck or the ski is not mentioned.\nB. The color of the deck is not mentioned, but the deck of the ski is mentioned.\nC. The color of the deck is mentioned in the description and is black, white, or orange.\nD. The color of the deck is mentioned in the description but is not black, white, or orange.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The binding or the ski is not mentioned.\nB. The material of the binding is not mentioned, but the binding of the ski is mentioned.\nC. The material of the binding is mentioned in the description and is metal and plastic.\nD. The material of the binding is mentioned in the description but is not metal and plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deck or the ski is not mentioned.\nB. The shape of the deck is not mentioned, but the deck of the ski is mentioned.\nC. The shape of the deck is mentioned in the description and is slightly curved.\nD. The shape of the deck is mentioned in the description but is not slightly curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the ski is not mentioned.\nB. The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.\nC. The texture/pattern of the tail is mentioned in the description and is geometric shapes.\nD. The texture/pattern of the tail is mentioned in the description but is not geometric shapes.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the ski are not mentioned in the description.\nB. The ski is not mentioned in the description.\nC. The wheels of the ski are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wooden post is not mentioned in the description.\nB. The wooden post is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glass window is not mentioned in the description.\nB. The glass window is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ski base of the ski is not mentioned in the description.\nB. The ski is not mentioned in the description.\nC. The ski base of the ski is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black surface with intricate orange and white geometric patterns along its length. It is equipped with a black binding system, including a silver and black mechanism, and an orange adjustment lever. The tip of the ski has a similar pattern to the rest of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ski poles are not mentioned in the description.\nB. The ski poles are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the binding is mentioned in the description and is black or orange.", + "C. The color of the deck is mentioned in the description and is black, white, or orange.", + "C. The material of the binding is mentioned in the description and is metal and plastic.", + "B. The shape of the deck is not mentioned, but the deck of the ski is mentioned.", + "A. The tail or the ski is not mentioned.", + "A. The wheels of the ski are not mentioned in the description.", + "A. The wooden post is not mentioned in the description.", + "A. The glass window is not mentioned in the description.", + "A. The ski base of the ski is not mentioned in the description.", + "A. The ski poles are not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "recognition_result": true + }, + "622329": { + "pred": "A rectangular, flat, beige eraser with rounded corners and a slightly textured surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The color of the eraser is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the eraser is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eraser is mentioned in the description but is not brown.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The material of the eraser is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the eraser is mentioned in the description and is rubber.", + 1 + ], + [ + "The material of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the eraser is not mentioned.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The corner or the eraser is not mentioned.", + 0 + ], + [ + "The shape of the corner is mentioned in the description but is not rounded.", + -1 + ], + [ + "The shape of the corner is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the corner is not mentioned, but the corner of the eraser is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the corner is mentioned in the description and is rounded.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The desk organizer is not mentioned in the description.", + 1 + ], + [ + "The desk organizer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The desk organizer is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper sleeve of the eraser is not mentioned in the description.", + 1 + ], + [ + "The eraser is not mentioned in the description.", + 0 + ], + [ + "The paper sleeve of the eraser is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The paper sleeve of the eraser is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The phone is not mentioned in the description.", + 1 + ], + [ + "The phone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The phone is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky notes are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 1 + ], + [ + "The tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tape is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is eraser. Based on the image, is it likely that the object in the description is given class: eraser or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is eraser. Based on the image, is it likely that the object in the description is given class: eraser or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eraser is not mentioned.\nB. The color of the eraser is mentioned in the description but is not brown.\nC. The color of the eraser is mentioned in the description and is brown.\nD. The color of the eraser is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eraser is not mentioned.\nB. The material of the eraser is mentioned in the description but is not rubber.\nC. The material of the eraser is mentioned in the description and is rubber.\nD. The material of the eraser is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The corner or the eraser is not mentioned.\nB. The shape of the corner is mentioned in the description but is not rounded.\nC. The shape of the corner is mentioned in the description and is rounded.\nD. The shape of the corner is not mentioned, but the corner of the eraser is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The desk organizer is not mentioned in the description.\nB. The desk organizer is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paper sleeve of the eraser is not mentioned in the description.\nB. The eraser is not mentioned in the description.\nC. The paper sleeve of the eraser is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The phone is not mentioned in the description.\nB. The phone is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky notes are not mentioned in the description.\nB. The sticky notes are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige eraser with rounded corners and a slightly textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The tape is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the eraser is mentioned in the description but is not brown.", + "D. The material of the eraser is not mentioned.", + "C. The shape of the corner is mentioned in the description and is rounded.", + "A. The desk organizer is not mentioned in the description.", + "A. The paper sleeve of the eraser is not mentioned in the description.", + "A. The phone is not mentioned in the description.", + "A. The sticky notes are not mentioned in the description.", + "A. The tape is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.0, + "score_neg": 1.0, + "recognition_result": true + }, + "622332": { + "pred": "A black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base plate is mentioned in the description and is metallic.", + 1 + ], + [ + "The base plate or the stapler is not mentioned.", + 0 + ], + [ + "The material of the base plate is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stapler is mentioned in the description and is rectangular.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The shape of the stapler is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the stapler is mentioned in the description and is rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo is mentioned in the description and is white.", + 1 + ], + [ + "The logo or the stapler is not mentioned.", + 0 + ], + [ + "The color of the logo is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo is not mentioned, but the logo of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the logo is not mentioned, but the logo of the stapler is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the top cover is mentioned in the description and is black.", + 1 + ], + [ + "The top cover or the stapler is not mentioned.", + 0 + ], + [ + "The color of the top cover is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the top cover is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stapler is mentioned in the description and is black.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The color of the stapler is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the stapler is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is mentioned in the description.", + -1 + ], + [ + "The tape is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tape is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple remover of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple remover of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The staple remover of the stapler is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper clips are mentioned in the description.", + -1 + ], + [ + "The paper clips are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The paper clips are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paintbrushes are mentioned in the description.", + -1 + ], + [ + "The paintbrushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The paintbrushes are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple storage compartment of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple storage compartment of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The staple storage compartment of the stapler is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stapler. Based on the image, is it likely that the object in the description is given class: stapler or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stapler. Based on the image, is it likely that the object in the description is given class: stapler or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the base plate is mentioned in the description and is metallic.\nB. The base plate or the stapler is not mentioned.\nC. The material of the base plate is mentioned in the description but is not metallic.\nD. The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the stapler is mentioned in the description and is rectangular.\nB. The stapler is not mentioned.\nC. The shape of the stapler is mentioned in the description but is not rectangular.\nD. The shape of the stapler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the logo is mentioned in the description and is white.\nB. The logo or the stapler is not mentioned.\nC. The color of the logo is mentioned in the description but is not white.\nD. The color of the logo is not mentioned, but the logo of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the top cover is mentioned in the description and is black.\nB. The top cover or the stapler is not mentioned.\nC. The color of the top cover is mentioned in the description but is not black.\nD. The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stapler is mentioned in the description and is black.\nB. The stapler is not mentioned.\nC. The color of the stapler is mentioned in the description but is not black.\nD. The color of the stapler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is mentioned in the description.\nB. The tape is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The staple remover of the stapler is mentioned in the description.\nB. The stapler is not mentioned in the description.\nC. The staple remover of the stapler is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paper clips are mentioned in the description.\nB. The paper clips are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paintbrushes are mentioned in the description.\nB. The paintbrushes are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rectangular stapler with a glossy finish, featuring a silver brand logo on the top right corner. The stapler has a visible metal stapling mechanism on the right side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The staple storage compartment of the stapler is mentioned in the description.\nB. The stapler is not mentioned in the description.\nC. The staple storage compartment of the stapler is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + "A. The shape of the stapler is mentioned in the description and is rectangular.", + "D. The color of the logo is not mentioned, but the logo of the stapler is mentioned.", + "A. The color of the top cover is mentioned in the description and is black.", + "A. The color of the stapler is mentioned in the description and is black.", + "B. The tape is not mentioned in the description.", + "C. The staple remover of the stapler is not mentioned in the description.", + "B. The paper clips are not mentioned in the description.", + "B. The paintbrushes are not mentioned in the description.", + "C. The staple storage compartment of the stapler is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "1075308": { + "pred": "A vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + 1 + ], + [ + "The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the frame is mentioned in the description but is not plastic.", + -1 + ], + [ + "The frame or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the frame is mentioned in the description and is plastic.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the monitor/tv is mentioned in the description and is black.", + 1 + ], + [ + "The color of the monitor/tv is mentioned in the description but is not black.", + -1 + ], + [ + "The monitor/tv is not mentioned.", + 0 + ], + [ + "The color of the monitor/tv is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the monitor/tv is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screen is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the screen is mentioned in the description but is not glass.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chairs are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The remote control of the monitor/tv is mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The remote control of the monitor/tv is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The remote control of the monitor/tv is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ports of the monitor/tv are mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The ports of the monitor/tv are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The ports of the monitor/tv are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass are mentioned in the description.", + -1 + ], + [ + "The glass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The glass are mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The range hood is mentioned in the description.", + -1 + ], + [ + "The range hood is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The range hood is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is monitor/tv. Based on the image, is it likely that the object in the description is given class: monitor/tv or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is monitor/tv. Based on the image, is it likely that the object in the description is given class: monitor/tv or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.\nB. The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.\nC. The screen or the monitor/tv is not mentioned.\nD. The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the frame is mentioned in the description and is plastic.\nB. The material of the frame is mentioned in the description but is not plastic.\nC. The frame or the monitor/tv is not mentioned.\nD. The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the monitor/tv is mentioned in the description and is black.\nB. The color of the monitor/tv is mentioned in the description but is not black.\nC. The monitor/tv is not mentioned.\nD. The color of the monitor/tv is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the screen is mentioned in the description and is glass.\nB. The material of the screen is mentioned in the description but is not glass.\nC. The screen or the monitor/tv is not mentioned.\nD. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chairs are mentioned in the description.\nB. The chairs are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The remote control of the monitor/tv is mentioned in the description.\nB. The monitor/tv is not mentioned in the description.\nC. The remote control of the monitor/tv is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ports of the monitor/tv are mentioned in the description.\nB. The monitor/tv is not mentioned in the description.\nC. The ports of the monitor/tv are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glass are mentioned in the description.\nB. The glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black plastic casing. The front features a large, square screen with a slightly curved surface. The top of the television has a series of buttons and dials, and there is a small, rectangular display area above the screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The range hood is mentioned in the description.\nB. The range hood is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + "A. The material of the frame is mentioned in the description and is plastic.", + "A. The color of the monitor/tv is mentioned in the description and is black.", + "D. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + "B. The chairs are not mentioned in the description.", + "C. The remote control of the monitor/tv is not mentioned in the description.", + "C. The ports of the monitor/tv are not mentioned in the description.", + "A. The glass are mentioned in the description.", + "B. The range hood is not mentioned in the description." + ], + "score": 0.7222222222222222, + "score_pos": 0.875, + "score_neg": 0.6, + "recognition_result": true + }, + "1770866": { + "pred": "A white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the text is mentioned in the description but is not handwritten.", + -1 + ], + [ + "The type of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The type of the text is mentioned in the description and is handwritten.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The type of the text is mentioned in the description and is handwritten.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + 0.5 + ], + [ + "The board or the sign/banner is not mentioned.", + 0 + ], + [ + "The shape of the board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the sign/banner is mentioned in the description but is not white.", + -1 + ], + [ + "The background color of the sign/banner is not mentioned.", + 0 + ], + [ + "The sign/banner is not mentioned.", + 0 + ], + [ + "The background color of the sign/banner is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The background color of the sign/banner is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the text is mentioned in the description but is not black or blue and red.", + -1 + ], + [ + "The color of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is black or blue and red.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the text is mentioned in the description and is black or blue and red.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The salami is not mentioned in the description.", + 1 + ], + [ + "The salami is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The salami is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meats are not mentioned in the description.", + 1 + ], + [ + "The sliced meats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sliced meats are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The duster of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The duster of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The duster of the sign/banner is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The marker of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The marker of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The marker of the sign/banner is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The price tags are mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is sign/banner. Based on the image, is it likely that the object in the description is given class: sign/banner or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is sign/banner. Based on the image, is it likely that the object in the description is given class: sign/banner or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the text is mentioned in the description but is not handwritten.\nB. The type of the text is not mentioned, but the text of the sign/banner is mentioned.\nC. The text or the sign/banner is not mentioned.\nD. The type of the text is mentioned in the description and is handwritten.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the board is mentioned in the description but is not rectangular.\nB. The shape of the board is not mentioned, but the board of the sign/banner is mentioned.\nC. The board or the sign/banner is not mentioned.\nD. The shape of the board is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background color of the sign/banner is mentioned in the description but is not white.\nB. The background color of the sign/banner is not mentioned.\nC. The sign/banner is not mentioned.\nD. The background color of the sign/banner is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the text is mentioned in the description but is not black or blue and red.\nB. The color of the text is not mentioned, but the text of the sign/banner is mentioned.\nC. The text or the sign/banner is not mentioned.\nD. The color of the text is mentioned in the description and is black or blue and red.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The salami is not mentioned in the description.\nB. The salami is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sliced meats are not mentioned in the description.\nB. The sliced meats are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign/banner is not mentioned in the description.\nB. The duster of the sign/banner is not mentioned in the description.\nC. The duster of the sign/banner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign/banner is not mentioned in the description.\nB. The marker of the sign/banner is not mentioned in the description.\nC. The marker of the sign/banner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white price tag with handwritten text in blue and red marker. The text reads \"Libra\" in blue at the top, followed by \"Lb\" in blue, \"per\" in blue, \"lb\" in blue, and \"950\" in red.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The price tags are not mentioned in the description.\nB. The price tags are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The type of the text is mentioned in the description and is handwritten.", + "B. The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + "D. The background color of the sign/banner is mentioned in the description and is white.", + "D. The color of the text is mentioned in the description and is black or blue and red.", + "A. The salami is not mentioned in the description.", + "A. The sliced meats are not mentioned in the description.", + "B. The duster of the sign/banner is not mentioned in the description.", + "C. The marker of the sign/banner is mentioned in the description.", + "B. The price tags are mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.875, + "score_neg": 0.2, + "recognition_result": true + }, + "2391761": { + "pred": "The canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hull or the boat is not mentioned.", + 0 + ], + [ + "The color of the hull is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the hull is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the hull is not mentioned, but the hull of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the hull is mentioned in the description and is brown.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cover or the boat is not mentioned.", + 0 + ], + [ + "The color of the cover is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cover is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the cover is not mentioned, but the cover of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cover is mentioned in the description and is blue.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The life preserver/life buoy or the boat is not mentioned.", + 0 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description and is red or white.", + 1 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description but is not red or white.", + -1 + ], + [ + "The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motor or the boat is not mentioned.", + 0 + ], + [ + "The color of the motor is mentioned in the description and is black.", + 1 + ], + [ + "The color of the motor is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the motor is not mentioned, but the motor of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the motor is not mentioned, but the motor of the boat is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rudder of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The rudder of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The rudder of the boat is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sail of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The sail of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The sail of the boat is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabin of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The cabin of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cabin of the boat is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The anchor of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The anchor of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The anchor of the boat is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ducks are mentioned in the description.", + -1 + ], + [ + "The ducks are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ducks are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is boat. Based on the image, is it likely that the object in the description is given class: boat or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is boat. Based on the image, is it likely that the object in the description is given class: boat or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hull or the boat is not mentioned.\nB. The color of the hull is mentioned in the description and is brown.\nC. The color of the hull is mentioned in the description but is not brown.\nD. The color of the hull is not mentioned, but the hull of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cover or the boat is not mentioned.\nB. The color of the cover is mentioned in the description and is blue.\nC. The color of the cover is mentioned in the description but is not blue.\nD. The color of the cover is not mentioned, but the cover of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The life preserver/life buoy or the boat is not mentioned.\nB. The color of the life preserver/life buoy is mentioned in the description and is red or white.\nC. The color of the life preserver/life buoy is mentioned in the description but is not red or white.\nD. The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motor or the boat is not mentioned.\nB. The color of the motor is mentioned in the description and is black.\nC. The color of the motor is mentioned in the description but is not black.\nD. The color of the motor is not mentioned, but the motor of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rudder of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The rudder of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sail of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The sail of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabin of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The cabin of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The anchor of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The anchor of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe has a wooden hull with horizontal planks and a blue tarpaulin cover draped over it. The tarpaulin is secured with ropes and has some white markings on it. The canoe also features a small outboard motor mounted on the stern.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ducks are mentioned in the description.\nB. The ducks are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the hull is mentioned in the description and is brown.", + "B. The color of the cover is mentioned in the description and is blue.", + "D. The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.", + "D. The color of the motor is not mentioned, but the motor of the boat is mentioned.", + "C. The rudder of the boat is not mentioned in the description.", + "C. The sail of the boat is not mentioned in the description.", + "C. The cabin of the boat is not mentioned in the description.", + "C. The anchor of the boat is not mentioned in the description.", + "B. The ducks are not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "2580318": { + "pred": "The mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the mouse is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the mouse is mentioned in the description and is gray.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the mouse is mentioned in the description and is matte.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the mouse is mentioned in the description but is not matte.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is mentioned in the description but is not ergonomic.", + -1 + ], + [ + "The shape of the mouse is mentioned in the description and is ergonomic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the mouse is mentioned in the description but is not ergonomic.", + "pred_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Paper is mentioned in the description.", + -1 + ], + [ + "The Paper is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Paper is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The cable of the mouse is mentioned in the description.", + -1 + ], + [ + "The cable of the mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cable of the mouse is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Framed diagram is mentioned in the description.", + -1 + ], + [ + "The Framed diagram is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Framed diagram is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pen is mentioned in the description.", + -1 + ], + [ + "The Pen is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Pen is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The side buttons of the mouse are mentioned in the description.", + -1 + ], + [ + "The side buttons of the mouse are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The side buttons of the mouse are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is mouse. Based on the image, is it likely that the object in the description is given class: mouse or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is mouse. Based on the image, is it likely that the object in the description is given class: mouse or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The color of the mouse is not mentioned.\nC. The color of the mouse is mentioned in the description but is not gray.\nD. The color of the mouse is mentioned in the description and is gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The texture of the mouse is not mentioned.\nC. The texture of the mouse is mentioned in the description but is not matte.\nD. The texture of the mouse is mentioned in the description and is matte.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The shape of the mouse is not mentioned.\nC. The shape of the mouse is mentioned in the description but is not ergonomic.\nD. The shape of the mouse is mentioned in the description and is ergonomic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Paper is mentioned in the description.\nB. The Paper is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned in the description.\nB. The cable of the mouse is mentioned in the description.\nC. The cable of the mouse is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Framed diagram is mentioned in the description.\nB. The Framed diagram is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Pen is mentioned in the description.\nB. The Pen is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a sleek, metallic silver body with a smooth, reflective surface. The visible part of the mouse is triangular in shape, with a slightly curved edge and a subtle gradient of light reflecting off its surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned in the description.\nB. The side buttons of the mouse are mentioned in the description.\nC. The side buttons of the mouse are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the mouse is mentioned in the description and is gray.", + "C. The texture of the mouse is mentioned in the description but is not matte.", + "C. The shape of the mouse is mentioned in the description but is not ergonomic.", + "B. The Paper is not mentioned in the description.", + "C. The cable of the mouse is not mentioned in the description.", + "B. The Framed diagram is not mentioned in the description.", + "B. The Pen is not mentioned in the description.", + "C. The side buttons of the mouse are not mentioned in the description." + ], + "score": 0.5, + "score_pos": -0.3333333333333333, + "score_neg": 1.0, + "recognition_result": true + }, + "2588513": { + "pred": "A rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wood block is not mentioned.", + 0 + ], + [ + "The color of the wood block is mentioned in the description but is not wooden or brown.", + -1 + ], + [ + "The color of the wood block is mentioned in the description and is wooden or brown.", + 1 + ], + [ + "The wood block is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the wood block is mentioned in the description and is wooden or brown.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the top is not mentioned, but the top of the wood block is mentioned.", + 0.5 + ], + [ + "The texture of the top is mentioned in the description but is not grain.", + -1 + ], + [ + "The texture of the top is mentioned in the description and is grain.", + 1 + ], + [ + "The top or the wood block is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the top is mentioned in the description and is grain.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The metal clip of the wood block is not mentioned in the description.", + 1 + ], + [ + "The metal clip of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The metal clip of the wood block is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The machine with a motor are not mentioned in the description.", + 1 + ], + [ + "The machine with a motor are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The machine with a motor are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickup is not mentioned in the description.", + 1 + ], + [ + "The pickup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pickup is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The plastic handle of the wood block is not mentioned in the description.", + 1 + ], + [ + "The plastic handle of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The plastic handle of the wood block is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The copper wire spool is not mentioned in the description.", + 1 + ], + [ + "The copper wire spool is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The copper wire spool is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is wood block. Based on the image, is it likely that the object in the description is given class: wood block or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is wood block. Based on the image, is it likely that the object in the description is given class: wood block or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wood block is not mentioned.\nB. The color of the wood block is mentioned in the description but is not wooden or brown.\nC. The color of the wood block is mentioned in the description and is wooden or brown.\nD. The wood block is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the top is not mentioned, but the top of the wood block is mentioned.\nB. The texture of the top is mentioned in the description but is not grain.\nC. The texture of the top is mentioned in the description and is grain.\nD. The top or the wood block is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wood block is not mentioned in the description.\nB. The metal clip of the wood block is not mentioned in the description.\nC. The metal clip of the wood block is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The machine with a motor are not mentioned in the description.\nB. The machine with a motor are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pickup is not mentioned in the description.\nB. The pickup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wood block is not mentioned in the description.\nB. The plastic handle of the wood block is not mentioned in the description.\nC. The plastic handle of the wood block is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige top surface and a black bottom surface. The top surface has a smooth texture with visible wood grain patterns, while the bottom surface is solid black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The copper wire spool is not mentioned in the description.\nB. The copper wire spool is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the wood block is mentioned in the description and is wooden or brown.", + "C. The texture of the top is mentioned in the description and is grain.", + "B. The metal clip of the wood block is not mentioned in the description.", + "A. The machine with a motor are not mentioned in the description.", + "A. The pickup is not mentioned in the description.", + "B. The plastic handle of the wood block is not mentioned in the description.", + "A. The copper wire spool is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "3993075": { + "pred": "A cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cap is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the cap is mentioned in the description and is orange.", + 1 + ], + [ + "The cap or the marker is not mentioned.", + 0 + ], + [ + "The color of the cap is not mentioned, but the cap of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cap is mentioned in the description and is orange.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the marker is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the marker is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The marker is not mentioned.", + 0 + ], + [ + "The shape of the marker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the marker is mentioned in the description and is cylindrical.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The material of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the body is not mentioned, but the body of the marker is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The children are mentioned in the description.", + -1 + ], + [ + "The children are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The children are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is mentioned in the description.", + -1 + ], + [ + "The table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The table is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clip of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The clip of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The clip of the marker is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The label of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The label of the marker is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chairs are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is marker. Based on the image, is it likely that the object in the description is given class: marker or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is marker. Based on the image, is it likely that the object in the description is given class: marker or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cap is mentioned in the description but is not orange.\nB. The color of the cap is mentioned in the description and is orange.\nC. The cap or the marker is not mentioned.\nD. The color of the cap is not mentioned, but the cap of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not white.\nB. The color of the body is mentioned in the description and is white.\nC. The body or the marker is not mentioned.\nD. The color of the body is not mentioned, but the body of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the marker is mentioned in the description but is not cylindrical.\nB. The shape of the marker is mentioned in the description and is cylindrical.\nC. The marker is not mentioned.\nD. The shape of the marker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the body is mentioned in the description but is not plastic.\nB. The material of the body is mentioned in the description and is plastic.\nC. The body or the marker is not mentioned.\nD. The material of the body is not mentioned, but the body of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The children are mentioned in the description.\nB. The children are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table is mentioned in the description.\nB. The table is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clip of the marker is mentioned in the description.\nB. The marker is not mentioned in the description.\nC. The clip of the marker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label of the marker is mentioned in the description.\nB. The marker is not mentioned in the description.\nC. The label of the marker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical marker with a white body featuring a colorful design, including a blue and green pattern near the middle and a red cap.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chairs are mentioned in the description.\nB. The chairs are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the cap is mentioned in the description and is orange.", + "B. The color of the body is mentioned in the description and is white.", + "B. The shape of the marker is mentioned in the description and is cylindrical.", + "D. The material of the body is not mentioned, but the body of the marker is mentioned.", + "B. The children are not mentioned in the description.", + "B. The table is not mentioned in the description.", + "C. The clip of the marker is not mentioned in the description.", + "C. The label of the marker is not mentioned in the description.", + "B. The chairs are not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "4027486": { + "pred": "The bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.", + 0.5 + ], + [ + "The license plate or the truck is not mentioned.", + 0 + ], + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the license plate is mentioned in the description and is rectangular.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the front is not mentioned, but the front of the truck is mentioned.", + 0.5 + ], + [ + "The front or the truck is not mentioned.", + 0 + ], + [ + "The color of the front is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the front is mentioned in the description and is blue.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The front or the truck is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the grille is not mentioned, but the grille of the truck is mentioned.", + 0.5 + ], + [ + "The grille or the truck is not mentioned.", + 0 + ], + [ + "The color of the grille is mentioned in the description but is not black, green, or blue.", + -1 + ], + [ + "The color of the grille is mentioned in the description and is black, green, or blue.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The grille or the truck is not mentioned.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The door handle of the truck is mentioned in the description.", + -1 + ], + [ + "The door handle of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The door handle of the truck is not mentioned in the description.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The rear part of the truck is mentioned in the description.", + -1 + ], + [ + "The rear part of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The rear part of the truck is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The exhaust pipe of the truck is mentioned in the description.", + -1 + ], + [ + "The exhaust pipe of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The exhaust pipe of the truck is not mentioned in the description.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The tail light of the truck is mentioned in the description.", + -1 + ], + [ + "The tail light of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The tail light of the truck is not mentioned in the description.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The cargo area of the truck is mentioned in the description.", + -1 + ], + [ + "The cargo area of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cargo area of the truck is not mentioned in the description.", + "pred_index": 2, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is truck. Based on the image, is it likely that the object in the description is given class: truck or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is truck. Based on the image, is it likely that the object in the description is given class: truck or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.\nB. The license plate or the truck is not mentioned.\nC. The shape of the license plate is mentioned in the description but is not rectangular.\nD. The shape of the license plate is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the front is not mentioned, but the front of the truck is mentioned.\nB. The front or the truck is not mentioned.\nC. The color of the front is mentioned in the description but is not blue.\nD. The color of the front is mentioned in the description and is blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the grille is not mentioned, but the grille of the truck is mentioned.\nB. The grille or the truck is not mentioned.\nC. The color of the grille is mentioned in the description but is not black, green, or blue.\nD. The color of the grille is mentioned in the description and is black, green, or blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The door handle of the truck is mentioned in the description.\nC. The door handle of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The rear part of the truck is mentioned in the description.\nC. The rear part of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The exhaust pipe of the truck is mentioned in the description.\nC. The exhaust pipe of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The tail light of the truck is mentioned in the description.\nC. The tail light of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section on the right side. It has a black horizontal stripe running along the middle, with a green stripe above it. The rear of the bus features a white license plate with black text. There is a small, white, triangular logo with a black design on the blue section near the rear.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The cargo area of the truck is mentioned in the description.\nC. The cargo area of the truck is not mentioned in the description.\n" + ], + "response": [ + "B. No", + "D. The shape of the license plate is mentioned in the description and is rectangular.", + "B. The front or the truck is not mentioned.", + "B. The grille or the truck is not mentioned.", + "C. The door handle of the truck is not mentioned in the description.", + "B. The rear part of the truck is mentioned in the description.", + "C. The exhaust pipe of the truck is not mentioned in the description.", + "C. The tail light of the truck is not mentioned in the description.", + "C. The cargo area of the truck is not mentioned in the description." + ], + "score": -0.125, + "score_pos": 0.0, + "score_neg": -0.2, + "recognition_result": false + }, + "4243725": { + "pred": "The soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the soap is mentioned in the description and is bar soap.", + 1 + ], + [ + "The type of the soap is mentioned in the description but is not bar soap.", + -1 + ], + [ + "The type of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the soap is mentioned in the description and is bar soap.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the soap is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The color of the soap is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the soap is mentioned in the description but is not yellow or brown.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the soap are not mentioned in the description.", + 1 + ], + [ + "The decorative elements of the soap are mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The decorative elements of the soap are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is not mentioned in the description.", + 1 + ], + [ + "The cutting board is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cutting board is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dishwasher is not mentioned in the description.", + 1 + ], + [ + "The dishwasher is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The dishwasher is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative peppers are not mentioned in the description.", + 1 + ], + [ + "The decorative peppers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The decorative peppers are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The logo of the soap is not mentioned in the description.", + 1 + ], + [ + "The logo of the soap is mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The logo of the soap is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is soap. Based on the image, is it likely that the object in the description is given class: soap or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is soap. Based on the image, is it likely that the object in the description is given class: soap or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the soap is mentioned in the description and is bar soap.\nB. The type of the soap is mentioned in the description but is not bar soap.\nC. The type of the soap is not mentioned.\nD. The soap is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the soap is mentioned in the description and is yellow or brown.\nB. The color of the soap is mentioned in the description but is not yellow or brown.\nC. The color of the soap is not mentioned.\nD. The soap is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the soap are not mentioned in the description.\nB. The decorative elements of the soap are mentioned in the description.\nC. The soap is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is not mentioned in the description.\nB. The cutting board is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The dishwasher is not mentioned in the description.\nB. The dishwasher is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative peppers are not mentioned in the description.\nB. The decorative peppers are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe soap is a rectangular, slightly curved bar with a smooth, creamy beige surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The logo of the soap is not mentioned in the description.\nB. The logo of the soap is mentioned in the description.\nC. The soap is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The type of the soap is mentioned in the description and is bar soap.", + "B. The color of the soap is mentioned in the description but is not yellow or brown.", + "A. The decorative elements of the soap are not mentioned in the description.", + "A. The cutting board is not mentioned in the description.", + "A. The dishwasher is not mentioned in the description.", + "A. The decorative peppers are not mentioned in the description.", + "A. The logo of the soap is not mentioned in the description." + ], + "score": 0.7142857142857143, + "score_pos": 0.0, + "score_neg": 1.0, + "recognition_result": true + }, + "4781902": { + "pred": "A dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is not mentioned.", + 0 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the stool is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The stool is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the leg is not mentioned, but the leg of the stool is mentioned.", + 0.5 + ], + [ + "The leg or the stool is not mentioned.", + 0 + ], + [ + "The material of the leg is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the leg is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the leg is mentioned in the description and is wood.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The shape of the crossbar is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the crossbar is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The crossbar or the stool is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The material of the crossbar is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the crossbar is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The crossbar or the stool is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The seat or the stool is not mentioned.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building in the background are mentioned in the description.", + -1 + ], + [ + "The building in the background are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The building in the background are not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swing set is mentioned in the description.", + -1 + ], + [ + "The swing set is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The swing set is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The slide is mentioned in the description.", + -1 + ], + [ + "The slide is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The slide is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The climbing wall is mentioned in the description.", + -1 + ], + [ + "The climbing wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The climbing wall is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stool. Based on the image, is it likely that the object in the description is given class: stool or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stool. Based on the image, is it likely that the object in the description is given class: stool or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stool is not mentioned.\nB. The stool is not mentioned.\nC. The material of the stool is mentioned in the description but is not wood.\nD. The material of the stool is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the leg is not mentioned, but the leg of the stool is mentioned.\nB. The leg or the stool is not mentioned.\nC. The material of the leg is mentioned in the description but is not wood.\nD. The material of the leg is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.\nB. The crossbar or the stool is not mentioned.\nC. The shape of the crossbar is mentioned in the description but is not rectangular.\nD. The shape of the crossbar is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.\nB. The crossbar or the stool is not mentioned.\nC. The material of the crossbar is mentioned in the description but is not wood.\nD. The material of the crossbar is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the seat is not mentioned, but the seat of the stool is mentioned.\nB. The seat or the stool is not mentioned.\nC. The shape of the seat is mentioned in the description but is not flat.\nD. The shape of the seat is mentioned in the description and is flat.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building in the background are mentioned in the description.\nB. The building in the background are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The swing set is mentioned in the description.\nB. The swing set is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The slide is mentioned in the description.\nB. The slide is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown, wooden ladder with evenly spaced, flat rungs and side rails that taper slightly towards the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The climbing wall is mentioned in the description.\nB. The climbing wall is not mentioned in the description.\n" + ], + "response": [ + "B. No", + "B. The stool is not mentioned.", + "D. The material of the leg is mentioned in the description and is wood.", + "B. The crossbar or the stool is not mentioned.", + "B. The crossbar or the stool is not mentioned.", + "B. The seat or the stool is not mentioned.", + "B. The sky is not mentioned in the description.", + "B. The building in the background are not mentioned in the description.", + "B. The swing set is not mentioned in the description.", + "B. The slide is not mentioned in the description.", + "B. The climbing wall is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "recognition_result": false + }, + "4782949": { + "pred": "The drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the drum is mentioned in the description but is not round or circular.", + -1 + ], + [ + "The shape of the drum is mentioned in the description and is round or circular.", + 1 + ], + [ + "The shape of the drum is not mentioned.", + 0 + ], + [ + "The drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the drum is mentioned in the description and is round or circular.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the body is mentioned in the description and is red.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the drum is mentioned.", + 0.5 + ], + [ + "The body or the drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is red.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the rim is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the rim is mentioned in the description and is metallic.", + 1 + ], + [ + "The material of the rim is not mentioned, but the rim of the drum is mentioned.", + 0.5 + ], + [ + "The rim or the drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the rim is not mentioned, but the rim of the drum is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum stand of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum stand of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drum stand of the drum is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Trees are mentioned in the description.", + -1 + ], + [ + "The Trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Trees are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum pedal of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum pedal of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drum pedal of the drum is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Superior Foods sign is mentioned in the description.", + -1 + ], + [ + "The Superior Foods sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Superior Foods sign is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The People are mentioned in the description.", + -1 + ], + [ + "The People are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The People are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is drum. Based on the image, is it likely that the object in the description is given class: drum or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is drum. Based on the image, is it likely that the object in the description is given class: drum or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the drum is mentioned in the description but is not round or circular.\nB. The shape of the drum is mentioned in the description and is round or circular.\nC. The shape of the drum is not mentioned.\nD. The drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not red.\nB. The color of the body is mentioned in the description and is red.\nC. The color of the body is not mentioned, but the body of the drum is mentioned.\nD. The body or the drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the rim is mentioned in the description but is not metallic.\nB. The material of the rim is mentioned in the description and is metallic.\nC. The material of the rim is not mentioned, but the rim of the drum is mentioned.\nD. The rim or the drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drum stand of the drum is mentioned in the description.\nB. The drum is not mentioned in the description.\nC. The drum stand of the drum is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Trees are mentioned in the description.\nB. The Trees are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drum pedal of the drum is mentioned in the description.\nB. The drum is not mentioned in the description.\nC. The drum pedal of the drum is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Superior Foods sign is mentioned in the description.\nB. The Superior Foods sign is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe drum has a circular shape with a red body and a black rim. The drumhead is a light brown color with a blue circular patch in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The People are mentioned in the description.\nB. The People are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the drum is mentioned in the description and is round or circular.", + "B. The color of the body is mentioned in the description and is red.", + "C. The material of the rim is not mentioned, but the rim of the drum is mentioned.", + "C. The drum stand of the drum is not mentioned in the description.", + "B. The Trees are not mentioned in the description.", + "C. The drum pedal of the drum is not mentioned in the description.", + "B. The Superior Foods sign is not mentioned in the description.", + "B. The People are not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "recognition_result": true + }, + "5211280": { + "pred": "A stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the rice cooker is mentioned in the description and is silver and black.", + 1 + ], + [ + "The color of the rice cooker is mentioned in the description but is not silver and black.", + -1 + ], + [ + "The color of the rice cooker is not mentioned.", + 0 + ], + [ + "The rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the rice cooker is mentioned in the description and is silver and black.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + 0.5 + ], + [ + "The handle or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the display is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the display is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + 0.5 + ], + [ + "The display or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description and is silver.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the rice cooker is mentioned.", + 0.5 + ], + [ + "The body or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the body is mentioned in the description and is silver.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the base is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + 0.5 + ], + [ + "The base or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red crates are mentioned in the description.", + -1 + ], + [ + "The red crates are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The red crates are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The generator is mentioned in the description.", + -1 + ], + [ + "The generator is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The generator is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The steam vent of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The steam vent of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The steam vent of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The inner pot of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The inner pot of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The inner pot of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The measuring cup of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The measuring cup of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is rice cooker. Based on the image, is it likely that the object in the description is given class: rice cooker or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is rice cooker. Based on the image, is it likely that the object in the description is given class: rice cooker or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the rice cooker is mentioned in the description and is silver and black.\nB. The color of the rice cooker is mentioned in the description but is not silver and black.\nC. The color of the rice cooker is not mentioned.\nD. The rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is mentioned in the description and is curved.\nB. The shape of the handle is mentioned in the description but is not curved.\nC. The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.\nD. The handle or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the display is mentioned in the description and is rectangular.\nB. The shape of the display is mentioned in the description but is not rectangular.\nC. The shape of the display is not mentioned, but the display of the rice cooker is mentioned.\nD. The display or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description and is silver.\nB. The color of the body is mentioned in the description but is not silver.\nC. The color of the body is not mentioned, but the body of the rice cooker is mentioned.\nD. The body or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the base is mentioned in the description and is plastic.\nB. The material of the base is mentioned in the description but is not plastic.\nC. The material of the base is not mentioned, but the base of the rice cooker is mentioned.\nD. The base or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The red crates are mentioned in the description.\nB. The red crates are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The generator is mentioned in the description.\nB. The generator is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The steam vent of the rice cooker is mentioned in the description.\nB. The steam vent of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The inner pot of the rice cooker is mentioned in the description.\nB. The inner pot of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel rice cooker with a black handle on top. The front features a digital display screen in the center, surrounded by various buttons and controls. The buttons are arranged in a circular pattern around the display, with additional buttons below. The cooker has a sleek, modern design with a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The measuring cup of the rice cooker is mentioned in the description.\nB. The measuring cup of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the rice cooker is mentioned in the description and is silver and black.", + "C. The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + "C. The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + "A. The color of the body is mentioned in the description and is silver.", + "C. The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + "B. The red crates are not mentioned in the description.", + "B. The generator is not mentioned in the description.", + "B. The steam vent of the rice cooker is not mentioned in the description.", + "B. The inner pot of the rice cooker is not mentioned in the description.", + "B. The measuring cup of the rice cooker is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "recognition_result": true + }, + "5718392": { + "pred": "A woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the storage box is mentioned in the description but is not wicker/woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is mentioned in the description and is wicker/woven.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the storage box is mentioned in the description and is wicker/woven.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the storage box is mentioned in the description but is not woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is mentioned in the description and is woven.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the storage box is mentioned in the description and is woven.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the storage box is mentioned in the description but is not brown.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the storage box is mentioned in the description and is brown.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the storage box are mentioned in the description.", + -1 + ], + [ + "The wheels of the storage box are not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The wheels of the storage box are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensils are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the storage box is mentioned in the description.", + -1 + ], + [ + "The handle of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The handle of the storage box is mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the storage box is mentioned in the description.", + -1 + ], + [ + "The lock of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The lock of the storage box is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blue canopy is mentioned in the description.", + -1 + ], + [ + "The blue canopy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The blue canopy is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is storage box. Based on the image, is it likely that the object in the description is given class: storage box or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is storage box. Based on the image, is it likely that the object in the description is given class: storage box or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the storage box is mentioned in the description but is not wicker/woven.\nB. The storage box is not mentioned.\nC. The material of the storage box is not mentioned.\nD. The material of the storage box is mentioned in the description and is wicker/woven.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the storage box is mentioned in the description but is not woven.\nB. The storage box is not mentioned.\nC. The texture of the storage box is not mentioned.\nD. The texture of the storage box is mentioned in the description and is woven.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the storage box is mentioned in the description but is not brown.\nB. The storage box is not mentioned.\nC. The color of the storage box is not mentioned.\nD. The color of the storage box is mentioned in the description and is brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the storage box are mentioned in the description.\nB. The wheels of the storage box are not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensils are mentioned in the description.\nB. The utensils are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the storage box is mentioned in the description.\nB. The handle of the storage box is not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lock of the storage box is mentioned in the description.\nB. The lock of the storage box is not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven basket with a dark brown color and a pattern of interlocking diamond shapes, featuring a sturdy, slightly curved handle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blue canopy is mentioned in the description.\nB. The blue canopy is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the storage box is mentioned in the description and is wicker/woven.", + "D. The texture of the storage box is mentioned in the description and is woven.", + "D. The color of the storage box is mentioned in the description and is brown.", + "B. The wheels of the storage box are not mentioned in the description.", + "B. The utensils are not mentioned in the description.", + "A. The handle of the storage box is mentioned in the description.", + "B. The lock of the storage box is not mentioned in the description.", + "B. The blue canopy is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 1.0, + "score_neg": 0.6, + "recognition_result": true + }, + "6037272": { + "pred": "A green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle or the bottle is not mentioned.", + 0 + ], + [ + "The material of the bottle is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + 0.5 + ], + [ + "The material of the bottle is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is green.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cap is mentioned in the description and is green.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text on the label or the bottle are not mentioned.", + 0 + ], + [ + "The color of the text on the label is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.", + 0.5 + ], + [ + "The color of the text on the label is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The text on the label or the bottle are not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description and is green.", + 1 + ], + [ + "The color of the bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the bottle is mentioned in the description and is green.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/top or the bottle is not mentioned.", + 0 + ], + [ + "The shape of the cap/top is mentioned in the description and is flat or tapered.", + 1 + ], + [ + "The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.", + 0.5 + ], + [ + "The shape of the cap/top is mentioned in the description but is not flat or tapered.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nozzle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The nozzle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The nozzle of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower curtain is mentioned in the description.", + -1 + ], + [ + "The shower curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The shower curtain is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathtub is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pump of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The pump of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The pump of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bottle. Based on the image, is it likely that the object in the description is given class: bottle or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bottle. Based on the image, is it likely that the object in the description is given class: bottle or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle or the bottle is not mentioned.\nB. The material of the bottle is mentioned in the description and is plastic.\nC. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.\nD. The material of the bottle is mentioned in the description but is not plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap or the bottle is not mentioned.\nB. The color of the cap is mentioned in the description and is green.\nC. The color of the cap is not mentioned, but the cap of the bottle is mentioned.\nD. The color of the cap is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text on the label or the bottle are not mentioned.\nB. The color of the text on the label is mentioned in the description and is white.\nC. The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.\nD. The color of the text on the label is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned.\nB. The color of the bottle is mentioned in the description and is green.\nC. The color of the bottle is not mentioned.\nD. The color of the bottle is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/top or the bottle is not mentioned.\nB. The shape of the cap/top is mentioned in the description and is flat or tapered.\nC. The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.\nD. The shape of the cap/top is mentioned in the description but is not flat or tapered.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nozzle of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The nozzle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shower curtain is mentioned in the description.\nB. The shower curtain is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is mentioned in the description.\nB. The bathtub is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The handle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green shampoo bottle with a slightly curved shape, featuring a white label with text and a small orange logo.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pump of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The pump of the bottle is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + "B. The color of the cap is mentioned in the description and is green.", + "A. The text on the label or the bottle are not mentioned.", + "B. The color of the bottle is mentioned in the description and is green.", + "C. The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.", + "C. The nozzle of the bottle is not mentioned in the description.", + "B. The shower curtain is not mentioned in the description.", + "B. The bathtub is not mentioned in the description.", + "C. The handle of the bottle is not mentioned in the description.", + "C. The pump of the bottle is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "recognition_result": true + }, + "6820594": { + "pred": "A medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ears is mentioned in the description but is not triangular or pointed.", + -1 + ], + [ + "The shape of the ears is not mentioned, but the ears of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the ears is mentioned in the description and is triangular or pointed.", + 1 + ], + [ + "The ears or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the ears is mentioned in the description and is triangular or pointed.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is yellow or green.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the eyes is mentioned in the description and is yellow or green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the cat are mentioned.", + 0.5 + ], + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the legs is not mentioned, but the legs of the cat are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the eyes is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the eyes is mentioned in the description and is round.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the back is mentioned in the description but is not brown or black.", + -1 + ], + [ + "The color of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The color of the back is mentioned in the description and is brown or black.", + 1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the back is mentioned in the description and is brown or black.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothpaste is mentioned in the description.", + -1 + ], + [ + "The toothpaste is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothpaste is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothbrush is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the cat is mentioned in the description.", + -1 + ], + [ + "The tail of the cat is not mentioned in the description.", + 1 + ], + [ + "The cat is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the cat is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The faucet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is cat. Based on the image, is it likely that the object in the description is given class: cat or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is cat. Based on the image, is it likely that the object in the description is given class: cat or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the ears is mentioned in the description but is not triangular or pointed.\nB. The shape of the ears is not mentioned, but the ears of the cat are mentioned.\nC. The shape of the ears is mentioned in the description and is triangular or pointed.\nD. The ears or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eyes is mentioned in the description but is not yellow or green.\nB. The color of the eyes is not mentioned, but the eyes of the cat are mentioned.\nC. The color of the eyes is mentioned in the description and is yellow or green.\nD. The eyes or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the legs is mentioned in the description but is not white.\nB. The color of the legs is not mentioned, but the legs of the cat are mentioned.\nC. The color of the legs is mentioned in the description and is white.\nD. The legs or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the eyes is mentioned in the description but is not round.\nB. The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.\nC. The shape of the eyes is mentioned in the description and is round.\nD. The eyes or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the back is mentioned in the description but is not brown or black.\nB. The color of the back is not mentioned, but the back of the cat is mentioned.\nC. The color of the back is mentioned in the description and is brown or black.\nD. The back or the cat is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothpaste is mentioned in the description.\nB. The toothpaste is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothbrush is mentioned in the description.\nB. The toothbrush is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the cat is mentioned in the description.\nB. The tail of the cat is not mentioned in the description.\nC. The cat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and chest, featuring a mix of brown and black tabby markings on its back and sides. The cat has large, expressive green eyes and a pink nose. Its ears are pointed and have a light brown color with darker tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is mentioned in the description.\nB. The faucet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the ears is mentioned in the description and is triangular or pointed.", + "C. The color of the eyes is mentioned in the description and is yellow or green.", + "B. The color of the legs is not mentioned, but the legs of the cat are mentioned.", + "B. The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + "C. The color of the back is mentioned in the description and is brown or black.", + "B. The toothpaste is not mentioned in the description.", + "B. The toothbrush is not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The tail of the cat is not mentioned in the description.", + "B. The faucet is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "5718424": { + "pred": "A black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + 0.5 + ], + [ + "The color of the laces is mentioned in the description but is not white or green.", + -1 + ], + [ + "The color of the laces is mentioned in the description and is white or green.", + 1 + ], + [ + "The laces or the sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sneakers is not mentioned.", + 0 + ], + [ + "The color of the sneakers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the sneakers is mentioned in the description and is black.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the sneakers is mentioned in the description and is black.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sneakers is not mentioned.", + 0 + ], + [ + "The material of the sneakers is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sneakers is mentioned in the description and is rubber.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the sneakers is not mentioned.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is not mentioned in the description.", + 1 + ], + [ + "The table is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The table is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The base of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The base of the sneakers is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heel of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The heel of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The heel of the sneakers is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The person is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The food is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is sneakers. Based on the image, is it likely that the object in the description is given class: sneakers or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is sneakers. Based on the image, is it likely that the object in the description is given class: sneakers or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.\nB. The color of the laces is mentioned in the description but is not white or green.\nC. The color of the laces is mentioned in the description and is white or green.\nD. The laces or the sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sneakers is not mentioned.\nB. The color of the sneakers is mentioned in the description but is not black.\nC. The color of the sneakers is mentioned in the description and is black.\nD. The sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the sneakers is not mentioned.\nB. The material of the sneakers is mentioned in the description but is not rubber.\nC. The material of the sneakers is mentioned in the description and is rubber.\nD. The sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table is not mentioned in the description.\nB. The table is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the sneakers is not mentioned in the description.\nB. The sneakers are not mentioned in the description.\nC. The base of the sneakers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The heel of the sneakers is not mentioned in the description.\nB. The sneakers are not mentioned in the description.\nC. The heel of the sneakers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned in the description.\nB. The person is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black athletic shoe with a textured surface, featuring a prominent yellow swoosh logo on the side. The shoe has a low-top design with a padded collar and a lace-up closure. The sole is thick and rugged, designed for traction.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is not mentioned in the description.\nB. The food is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + "C. The color of the sneakers is mentioned in the description and is black.", + "A. The material of the sneakers is not mentioned.", + "A. The table is not mentioned in the description.", + "C. The base of the sneakers is mentioned in the description.", + "A. The heel of the sneakers is not mentioned in the description.", + "A. The person is not mentioned in the description.", + "A. The food is not mentioned in the description." + ], + "score": 0.5625, + "score_pos": 0.5, + "score_neg": 0.6, + "recognition_result": true + }, + "6055310": { + "pred": "A golden ruler with a series of evenly spaced, small, rectangular notches along its length.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the blade is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the blade is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blade is mentioned in the description and is yellow or golden.", + 1 + ], + [ + "The color of the blade is mentioned in the description but is not yellow or golden.", + -1 + ], + [ + "The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the blade is mentioned in the description and is yellow or golden.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the blade is mentioned in the description and is long.", + 1 + ], + [ + "The size of the blade is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blade is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the blade is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The case of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The case of the tape measure/ruler is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is not mentioned in the description.", + 1 + ], + [ + "The bathtub is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bathtub is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The lock of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lock of the tape measure/ruler is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The door is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The box is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tape measure/ruler. Based on the image, is it likely that the object in the description is given class: tape measure/ruler or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tape measure/ruler. Based on the image, is it likely that the object in the description is given class: tape measure/ruler or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the blade is mentioned in the description and is rectangular.\nB. The shape of the blade is mentioned in the description but is not rectangular.\nC. The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the blade is mentioned in the description and is yellow or golden.\nB. The color of the blade is mentioned in the description but is not yellow or golden.\nC. The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the blade is mentioned in the description and is long.\nB. The size of the blade is mentioned in the description but is not long.\nC. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the blade is mentioned in the description and is metal.\nB. The material of the blade is mentioned in the description but is not metal.\nC. The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case of the tape measure/ruler is not mentioned in the description.\nB. The tape measure/ruler is not mentioned in the description.\nC. The case of the tape measure/ruler is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is not mentioned in the description.\nB. The bathtub is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lock of the tape measure/ruler is not mentioned in the description.\nB. The tape measure/ruler is not mentioned in the description.\nC. The lock of the tape measure/ruler is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is not mentioned in the description.\nB. The door is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA golden ruler with a series of evenly spaced, small, rectangular notches along its length.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The box is not mentioned in the description.\nB. The box is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "A. The color of the blade is mentioned in the description and is yellow or golden.", + "C. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "C. The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "A. The case of the tape measure/ruler is not mentioned in the description.", + "A. The bathtub is not mentioned in the description.", + "A. The lock of the tape measure/ruler is not mentioned in the description.", + "A. The door is not mentioned in the description.", + "A. The box is not mentioned in the description." + ], + "score": 0.8333333333333334, + "score_pos": 0.625, + "score_neg": 1.0, + "recognition_result": true + }, + "8201777": { + "pred": "A black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the van is mentioned in the description but is not black.", + -1 + ], + [ + "The van is not mentioned.", + 0 + ], + [ + "The color of the van is not mentioned.", + 0 + ], + [ + "The color of the van is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the van is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sticker is mentioned in the description but is not white.", + -1 + ], + [ + "The sticker or the van is not mentioned.", + 0 + ], + [ + "The color of the sticker is not mentioned, but the sticker of the van is mentioned.", + 0.5 + ], + [ + "The color of the sticker is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the sticker is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The taillight or the van is not mentioned.", + 0 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + 0.5 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the license plate is mentioned in the description but is not yellow.", + -1 + ], + [ + "The license plate or the van is not mentioned.", + 0 + ], + [ + "The color of the license plate is not mentioned, but the license plate of the van is mentioned.", + 0.5 + ], + [ + "The color of the license plate is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the license plate is mentioned in the description and is yellow.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the sign is mentioned in the description but is not taxi.", + -1 + ], + [ + "The sign or the van is not mentioned.", + 0 + ], + [ + "The text of the sign is not mentioned, but the sign of the van is mentioned.", + 0.5 + ], + [ + "The text of the sign is mentioned in the description and is taxi.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The text of the sign is mentioned in the description and is taxi.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The grill of the van is not mentioned in the description.", + 1 + ], + [ + "The grill of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The grill of the van is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The building is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the van is not mentioned in the description.", + 1 + ], + [ + "The front bumper of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The front bumper of the van is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The antenna of the van is not mentioned in the description.", + 1 + ], + [ + "The antenna of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The antenna of the van is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The person is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is van. Based on the image, is it likely that the object in the description is given class: van or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is van. Based on the image, is it likely that the object in the description is given class: van or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the van is mentioned in the description but is not black.\nB. The van is not mentioned.\nC. The color of the van is not mentioned.\nD. The color of the van is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sticker is mentioned in the description but is not white.\nB. The sticker or the van is not mentioned.\nC. The color of the sticker is not mentioned, but the sticker of the van is mentioned.\nD. The color of the sticker is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the taillight is mentioned in the description but is not plastic.\nB. The taillight or the van is not mentioned.\nC. The material of the taillight is not mentioned, but the taillight of the van is mentioned.\nD. The material of the taillight is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the license plate is mentioned in the description but is not yellow.\nB. The license plate or the van is not mentioned.\nC. The color of the license plate is not mentioned, but the license plate of the van is mentioned.\nD. The color of the license plate is mentioned in the description and is yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text of the sign is mentioned in the description but is not taxi.\nB. The sign or the van is not mentioned.\nC. The text of the sign is not mentioned, but the sign of the van is mentioned.\nD. The text of the sign is mentioned in the description and is taxi.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The grill of the van is not mentioned in the description.\nC. The grill of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is not mentioned in the description.\nB. The building is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The front bumper of the van is not mentioned in the description.\nC. The front bumper of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The antenna of the van is not mentioned in the description.\nC. The antenna of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in large yellow letters. The van has a yellow license plate with black text and a small white sticker on the lower left side of the rear bumper. The rear lights are vertically aligned on both sides of the van.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned in the description.\nB. The person is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the van is mentioned in the description and is black.", + "D. The color of the sticker is mentioned in the description and is white.", + "C. The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + "D. The color of the license plate is mentioned in the description and is yellow.", + "D. The text of the sign is mentioned in the description and is taxi.", + "B. The grill of the van is not mentioned in the description.", + "A. The building is not mentioned in the description.", + "B. The front bumper of the van is not mentioned in the description.", + "B. The antenna of the van is not mentioned in the description.", + "A. The person is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "8331685": { + "pred": "The earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is mentioned in the description and is black.", + 1 + ], + [ + "The color of the headphone is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the headphone is mentioned in the description but is not black.", + "pred_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ear cup or the headphone is not mentioned.", + 0 + ], + [ + "The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.", + 0.5 + ], + [ + "The type of the ear cup is mentioned in the description and is cushioned.", + 1 + ], + [ + "The type of the ear cup is mentioned in the description but is not cushioned.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The type of the ear cup is mentioned in the description and is cushioned.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headband or the headphone is not mentioned.", + 0 + ], + [ + "The shape of the headband is not mentioned, but the headband of the headphone is mentioned.", + 0.5 + ], + [ + "The shape of the headband is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the headband is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the headband is mentioned in the description and is curved.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone of the headphone is not mentioned in the description.", + 1 + ], + [ + "The microphone of the headphone is mentioned in the description.", + -1 + ], + [ + "The headphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The microphone of the headphone is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is not mentioned in the description.", + 1 + ], + [ + "The keyboard is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The keyboard is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned in the description.", + 1 + ], + [ + "The clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The clock is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 1 + ], + [ + "The bottle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bottle is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is headphone. Based on the image, is it likely that the object in the description is given class: headphone or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is headphone. Based on the image, is it likely that the object in the description is given class: headphone or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headphone is not mentioned.\nB. The color of the headphone is not mentioned.\nC. The color of the headphone is mentioned in the description and is black.\nD. The color of the headphone is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ear cup or the headphone is not mentioned.\nB. The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.\nC. The type of the ear cup is mentioned in the description and is cushioned.\nD. The type of the ear cup is mentioned in the description but is not cushioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headband or the headphone is not mentioned.\nB. The shape of the headband is not mentioned, but the headband of the headphone is mentioned.\nC. The shape of the headband is mentioned in the description and is curved.\nD. The shape of the headband is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microphone of the headphone is not mentioned in the description.\nB. The microphone of the headphone is mentioned in the description.\nC. The headphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is not mentioned in the description.\nB. The keyboard is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock is not mentioned in the description.\nB. The clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe earphone features a sleek, curved design with a dark gray color. The earpiece is circular and appears to be cushioned for comfort. The headband is also dark gray and has a smooth, slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The bottle is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the headphone is mentioned in the description but is not black.", + "C. The type of the ear cup is mentioned in the description and is cushioned.", + "C. The shape of the headband is mentioned in the description and is curved.", + "A. The microphone of the headphone is not mentioned in the description.", + "A. The keyboard is not mentioned in the description.", + "A. The plant is not mentioned in the description.", + "A. The clock is not mentioned in the description.", + "A. The bottle is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "recognition_result": true + }, + "8331718": { + "pred": "A black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the notebook is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the notebook is mentioned in the description and is black.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The color of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the notebook is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the notebook is mentioned in the description but is not spiral-bound.", + -1 + ], + [ + "The type of the notebook is mentioned in the description and is spiral-bound.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The type of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the notebook is mentioned in the description and is spiral-bound.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the cover is mentioned in the description but is not YAHOO.", + -1 + ], + [ + "The text of the cover is mentioned in the description and is YAHOO.", + 1 + ], + [ + "The cover or the notebook is not mentioned.", + 0 + ], + [ + "The text of the cover is not mentioned, but the cover of the notebook is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The text of the cover is mentioned in the description but is not YAHOO.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo/text on the cover is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo/text on the cover is mentioned in the description and is white.", + 1 + ], + [ + "The logo/text on the cover or the notebook are not mentioned.", + 0 + ], + [ + "The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the logo/text on the cover is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is mentioned in the description.", + -1 + ], + [ + "The chair is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chair is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bottle is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bookmark of the notebook is mentioned in the description.", + -1 + ], + [ + "The notebook is not mentioned in the description.", + 0 + ], + [ + "The bookmark of the notebook is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The bookmark of the notebook is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is mentioned in the description.", + -1 + ], + [ + "The mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mouse is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The keyboard is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is notebook. Based on the image, is it likely that the object in the description is given class: notebook or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is notebook. Based on the image, is it likely that the object in the description is given class: notebook or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the notebook is mentioned in the description but is not black.\nB. The color of the notebook is mentioned in the description and is black.\nC. The notebook is not mentioned.\nD. The color of the notebook is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the notebook is mentioned in the description but is not spiral-bound.\nB. The type of the notebook is mentioned in the description and is spiral-bound.\nC. The notebook is not mentioned.\nD. The type of the notebook is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text of the cover is mentioned in the description but is not YAHOO.\nB. The text of the cover is mentioned in the description and is YAHOO.\nC. The cover or the notebook is not mentioned.\nD. The text of the cover is not mentioned, but the cover of the notebook is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the logo/text on the cover is mentioned in the description but is not white.\nB. The color of the logo/text on the cover is mentioned in the description and is white.\nC. The logo/text on the cover or the notebook are not mentioned.\nD. The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is mentioned in the description.\nB. The chair is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bookmark of the notebook is mentioned in the description.\nB. The notebook is not mentioned in the description.\nC. The bookmark of the notebook is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is mentioned in the description.\nB. The mouse is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover and the word \"Xtreme\" written in white on the cover.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is mentioned in the description.\nB. The keyboard is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the notebook is mentioned in the description and is black.", + "B. The type of the notebook is mentioned in the description and is spiral-bound.", + "A. The text of the cover is mentioned in the description but is not YAHOO.", + "B. The color of the logo/text on the cover is mentioned in the description and is white.", + "B. The chair is not mentioned in the description.", + "B. The bottle is not mentioned in the description.", + "C. The bookmark of the notebook is not mentioned in the description.", + "B. The mouse is not mentioned in the description.", + "B. The keyboard is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "recognition_result": true + }, + "8557176": { + "pred": "The watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The color of the case is mentioned in the description but is not golden.", + -1 + ], + [ + "The color of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The color of the case is mentioned in the description and is golden.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the case is mentioned in the description and is golden.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The material of the case is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The material of the case is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the case is mentioned in the description and is metal.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The color of the strap is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The color of the strap is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the strap is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the strap is mentioned in the description and is leather.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The food is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The oven is not mentioned in the description.", + 1 + ], + [ + "The oven is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The oven is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The crab cracker is not mentioned in the description.", + 1 + ], + [ + "The crab cracker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The crab cracker is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The jar is not mentioned in the description.", + 1 + ], + [ + "The jar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The jar is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is watch. Based on the image, is it likely that the object in the description is given class: watch or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is watch. Based on the image, is it likely that the object in the description is given class: watch or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case or the watch is not mentioned.\nB. The color of the case is mentioned in the description but is not golden.\nC. The color of the case is not mentioned, but the case of the watch is mentioned.\nD. The color of the case is mentioned in the description and is golden.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case or the watch is not mentioned.\nB. The material of the case is mentioned in the description but is not metal.\nC. The material of the case is not mentioned, but the case of the watch is mentioned.\nD. The material of the case is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the watch is not mentioned.\nB. The color of the strap is mentioned in the description but is not black.\nC. The color of the strap is not mentioned, but the strap of the watch is mentioned.\nD. The color of the strap is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the watch is not mentioned.\nB. The material of the strap is mentioned in the description but is not leather.\nC. The material of the strap is not mentioned, but the strap of the watch is mentioned.\nD. The material of the strap is mentioned in the description and is leather.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is not mentioned in the description.\nB. The food is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The oven is not mentioned in the description.\nB. The oven is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The crab cracker is not mentioned in the description.\nB. The crab cracker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold-toned case with a black dial. It has a black leather strap with white stitching and a small metallic buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The jar is not mentioned in the description.\nB. The jar is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the case is mentioned in the description and is golden.", + "D. The material of the case is mentioned in the description and is metal.", + "D. The color of the strap is mentioned in the description and is black.", + "D. The material of the strap is mentioned in the description and is leather.", + "A. The food is not mentioned in the description.", + "A. The cup is not mentioned in the description.", + "A. The oven is not mentioned in the description.", + "A. The crab cracker is not mentioned in the description.", + "A. The jar is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "8557195": { + "pred": "The microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the body is not mentioned, but the body of the toaster is mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the toaster is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the toaster is mentioned in the description but is not white or beige.", + -1 + ], + [ + "The color of the toaster is not mentioned.", + 0 + ], + [ + "The toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the toaster is mentioned in the description and is white or beige.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the body is mentioned in the description and is smooth.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lever of the toaster is not mentioned in the description.", + 1 + ], + [ + "The lever of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lever of the toaster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The knife is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cord of the toaster is not mentioned in the description.", + 1 + ], + [ + "The cord of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toaster is not mentioned in the description.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knob of the toaster is not mentioned in the description.", + 1 + ], + [ + "The knob of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The knob of the toaster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is not mentioned in the description.", + 1 + ], + [ + "The utensil is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensil is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is toaster. Based on the image, is it likely that the object in the description is given class: toaster or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is toaster. Based on the image, is it likely that the object in the description is given class: toaster or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the body is mentioned in the description and is plastic.\nB. The material of the body is mentioned in the description but is not plastic.\nC. The material of the body is not mentioned, but the body of the toaster is mentioned.\nD. The body or the toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the toaster is mentioned in the description and is white or beige.\nB. The color of the toaster is mentioned in the description but is not white or beige.\nC. The color of the toaster is not mentioned.\nD. The toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the body is mentioned in the description and is smooth.\nB. The texture of the body is mentioned in the description but is not smooth.\nC. The texture of the body is not mentioned, but the body of the toaster is mentioned.\nD. The body or the toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lever of the toaster is not mentioned in the description.\nB. The lever of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knife is not mentioned in the description.\nB. The knife is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cord of the toaster is not mentioned in the description.\nB. The cord of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knob of the toaster is not mentioned in the description.\nB. The knob of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a smooth, curved, off-white exterior with a slightly reflective surface. The visible part of the microwave includes a rounded edge and a small, dark-colored component at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensil is not mentioned in the description.\nB. The utensil is mentioned in the description.\n" + ], + "response": [ + "B. No", + "C. The material of the body is not mentioned, but the body of the toaster is mentioned.", + "A. The color of the toaster is mentioned in the description and is white or beige.", + "A. The texture of the body is mentioned in the description and is smooth.", + "A. The lever of the toaster is not mentioned in the description.", + "A. The knife is not mentioned in the description.", + "C. The toaster is not mentioned in the description.", + "A. The knob of the toaster is not mentioned in the description.", + "B. The utensil is mentioned in the description." + ], + "score": -0.125, + "score_pos": 0.0, + "score_neg": -0.2, + "recognition_result": false + }, + "9766617": { + "pred": "The goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The head or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the head is mentioned in the description and is black.", + 1 + ], + [ + "The color of the head is not mentioned, but the head of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the head is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The beak or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the beak is mentioned in the description and is black.", + 1 + ], + [ + "The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The beak or the duck/goose is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the body is mentioned in the description and is grey or brown.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description but is not grey or brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is grey or brown.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the tail is mentioned in the description and is black and white.", + 1 + ], + [ + "The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the tail is mentioned in the description but is not black and white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The tail or the duck/goose is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wings or the duck/goose are not mentioned.", + 0 + ], + [ + "The shape of the wings is mentioned in the description and is folded.", + 1 + ], + [ + "The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + 0.5 + ], + [ + "The shape of the wings is mentioned in the description but is not folded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the duck/goose are mentioned in the description.", + -1 + ], + [ + "The feet of the duck/goose are not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The feet of the duck/goose are mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mud of the duck/goose is mentioned in the description.", + -1 + ], + [ + "The mud of the duck/goose is not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The mud of the duck/goose is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The grass are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pigeon is mentioned in the description.", + -1 + ], + [ + "The pigeon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The pigeon is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tree is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is duck/goose. Based on the image, is it likely that the object in the description is given class: duck/goose or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is duck/goose. Based on the image, is it likely that the object in the description is given class: duck/goose or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The head or the duck/goose is not mentioned.\nB. The color of the head is mentioned in the description and is black.\nC. The color of the head is not mentioned, but the head of the duck/goose is mentioned.\nD. The color of the head is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The beak or the duck/goose is not mentioned.\nB. The color of the beak is mentioned in the description and is black.\nC. The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.\nD. The color of the beak is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the duck/goose is not mentioned.\nB. The color of the body is mentioned in the description and is grey or brown.\nC. The color of the body is not mentioned, but the body of the duck/goose is mentioned.\nD. The color of the body is mentioned in the description but is not grey or brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the duck/goose is not mentioned.\nB. The color of the tail is mentioned in the description and is black and white.\nC. The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.\nD. The color of the tail is mentioned in the description but is not black and white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wings or the duck/goose are not mentioned.\nB. The shape of the wings is mentioned in the description and is folded.\nC. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.\nD. The shape of the wings is mentioned in the description but is not folded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feet of the duck/goose are mentioned in the description.\nB. The feet of the duck/goose are not mentioned in the description.\nC. The duck/goose is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mud of the duck/goose is mentioned in the description.\nB. The mud of the duck/goose is not mentioned in the description.\nC. The duck/goose is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are mentioned in the description.\nB. The grass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pigeon is mentioned in the description.\nB. The pigeon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The underbelly is white, and the legs and feet are greenish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is mentioned in the description.\nB. The tree is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the head is mentioned in the description and is black.", + "A. The beak or the duck/goose is not mentioned.", + "B. The color of the body is mentioned in the description and is grey or brown.", + "A. The tail or the duck/goose is not mentioned.", + "C. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + "A. The feet of the duck/goose are mentioned in the description.", + "B. The mud of the duck/goose is not mentioned in the description.", + "B. The grass are not mentioned in the description.", + "B. The pigeon is not mentioned in the description.", + "B. The tree is not mentioned in the description." + ], + "score": 0.55, + "score_pos": 0.5, + "score_neg": 0.6, + "recognition_result": true + }, + "11021544": { + "pred": "The faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the faucet is mentioned in the description but is not metallic.", + -1 + ], + [ + "The texture of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The texture of the faucet is mentioned in the description and is metallic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the faucet is mentioned in the description and is metallic.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the spout is mentioned in the description but is not curved or arc.", + -1 + ], + [ + "The shape of the spout is not mentioned, but the spout of the faucet is mentioned.", + 0.5 + ], + [ + "The spout or the faucet is not mentioned.", + 0 + ], + [ + "The shape of the spout is mentioned in the description and is curved or arc.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the spout is mentioned in the description and is curved or arc.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the faucet is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The material of the faucet is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the faucet is mentioned in the description and is metal.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soap dispenser of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The soap dispenser of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The soap dispenser of the faucet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side spray of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The side spray of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The side spray of the faucet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprayer of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The sprayer of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The sprayer of the faucet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet door is not mentioned in the description.", + 1 + ], + [ + "The cabinet door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet door is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is faucet. Based on the image, is it likely that the object in the description is given class: faucet or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is faucet. Based on the image, is it likely that the object in the description is given class: faucet or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the faucet is mentioned in the description but is not metallic.\nB. The texture of the faucet is not mentioned.\nC. The faucet is not mentioned.\nD. The texture of the faucet is mentioned in the description and is metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the spout is mentioned in the description but is not curved or arc.\nB. The shape of the spout is not mentioned, but the spout of the faucet is mentioned.\nC. The spout or the faucet is not mentioned.\nD. The shape of the spout is mentioned in the description and is curved or arc.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the faucet is mentioned in the description but is not metal.\nB. The material of the faucet is not mentioned.\nC. The faucet is not mentioned.\nD. The material of the faucet is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The soap dispenser of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The soap dispenser of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The side spray of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The side spray of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sprayer of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The sprayer of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe faucet features a sleek, curved design with a polished chrome finish. It has a single lever handle on the right side for controlling water flow and temperature. The spout is slightly arched, extending outward with a smooth, flowing curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet door is not mentioned in the description.\nB. The cabinet door is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the faucet is mentioned in the description and is metallic.", + "D. The shape of the spout is mentioned in the description and is curved or arc.", + "D. The material of the faucet is mentioned in the description and is metal.", + "A. The soap dispenser of the faucet is not mentioned in the description.", + "A. The side spray of the faucet is not mentioned in the description.", + "A. The chair is not mentioned in the description.", + "A. The sprayer of the faucet is not mentioned in the description.", + "A. The cabinet door is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "11775390": { + "pred": "A green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the sandals is not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the sandals is mentioned in the description and is green.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description but is not perforated.", + -1 + ], + [ + "The style of the sandals is not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description and is perforated.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The style of the sandals is mentioned in the description and is perforated.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sandals is not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the sandals is mentioned in the description and is rubber.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the sandals is not mentioned.", + 0 + ], + [ + "The number of parts of the strap is mentioned in the description but is not one.", + -1 + ], + [ + "The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + 0.5 + ], + [ + "The number of parts of the strap is mentioned in the description and is one.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trailer is not mentioned in the description.", + 1 + ], + [ + "The trailer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trailer is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire is not mentioned in the description.", + 1 + ], + [ + "The tire is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tire is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pumpkins are not mentioned in the description.", + 1 + ], + [ + "The pumpkins are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pumpkins are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The box is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boy is not mentioned in the description.", + 1 + ], + [ + "The boy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The boy is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is sandals. Based on the image, is it likely that the object in the description is given class: sandals or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is sandals. Based on the image, is it likely that the object in the description is given class: sandals or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The color of the sandals is mentioned in the description but is not green.\nC. The color of the sandals is not mentioned.\nD. The color of the sandals is mentioned in the description and is green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The style of the sandals is mentioned in the description but is not perforated.\nC. The style of the sandals is not mentioned.\nD. The style of the sandals is mentioned in the description and is perforated.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The material of the sandals is mentioned in the description but is not rubber.\nC. The material of the sandals is not mentioned.\nD. The material of the sandals is mentioned in the description and is rubber.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the sandals is not mentioned.\nB. The number of parts of the strap is mentioned in the description but is not one.\nC. The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.\nD. The number of parts of the strap is mentioned in the description and is one.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trailer is not mentioned in the description.\nB. The trailer is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tire is not mentioned in the description.\nB. The tire is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pumpkins are not mentioned in the description.\nB. The pumpkins are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The box is not mentioned in the description.\nB. The box is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a thick, textured sole and multiple circular holes on the side. The shoe features a black and white design on the side, with a prominent black section and white accents. The upper part of the shoe has a smooth, rounded shape with a slight curve at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boy is not mentioned in the description.\nB. The boy is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the sandals is mentioned in the description and is green.", + "D. The style of the sandals is mentioned in the description and is perforated.", + "D. The material of the sandals is mentioned in the description and is rubber.", + "C. The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + "A. The trailer is not mentioned in the description.", + "A. The tire is not mentioned in the description.", + "A. The pumpkins are not mentioned in the description.", + "A. The box is not mentioned in the description.", + "A. The boy is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "11950619": { + "pred": "The racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description and is white, beige, or wooden color.", + 1 + ], + [ + "The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not white, beige, or wooden color.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is mentioned in the description and is white, beige, or wooden color.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the handle is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the handle is mentioned in the description and is wood.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cover is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The cover or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the cover is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The head or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dumbbell is not mentioned in the description.", + 1 + ], + [ + "The dumbbell is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The dumbbell is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The edge tape of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The edge tape of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The edge tape of the table tennis paddle is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The logo of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The logo of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The logo of the table tennis paddle is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mat is not mentioned in the description.", + 1 + ], + [ + "The mat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The mat is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is table tennis paddle. Based on the image, is it likely that the object in the description is given class: table tennis paddle or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is table tennis paddle. Based on the image, is it likely that the object in the description is given class: table tennis paddle or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is mentioned in the description and is white, beige, or wooden color.\nB. The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.\nC. The handle or the table tennis paddle is not mentioned.\nD. The color of the handle is mentioned in the description but is not white, beige, or wooden color.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handle is mentioned in the description and is wood.\nB. The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.\nC. The handle or the table tennis paddle is not mentioned.\nD. The material of the handle is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cover is mentioned in the description and is plastic.\nB. The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.\nC. The cover or the table tennis paddle is not mentioned.\nD. The material of the cover is mentioned in the description but is not plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the head is mentioned in the description and is rounded.\nB. The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.\nC. The head or the table tennis paddle is not mentioned.\nD. The shape of the head is mentioned in the description but is not rounded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The dumbbell is not mentioned in the description.\nB. The dumbbell is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table tennis paddle is not mentioned in the description.\nB. The edge tape of the table tennis paddle is not mentioned in the description.\nC. The edge tape of the table tennis paddle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table tennis paddle is not mentioned in the description.\nB. The logo of the table tennis paddle is not mentioned in the description.\nC. The logo of the table tennis paddle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mat is not mentioned in the description.\nB. The mat is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe racket has a light-colored wooden handle with a smooth finish. The head of the racket is covered with a transparent protective guard, revealing a blue and white string bed. The guard has a rectangular shape with rounded edges and is secured to the head of the racket.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet is not mentioned in the description.\nB. The cabinet is mentioned in the description.\n" + ], + "response": [ + "B. No", + "A. The color of the handle is mentioned in the description and is white, beige, or wooden color.", + "A. The material of the handle is mentioned in the description and is wood.", + "B. The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.", + "B. The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.", + "A. The dumbbell is not mentioned in the description.", + "B. The edge tape of the table tennis paddle is not mentioned in the description.", + "B. The logo of the table tennis paddle is not mentioned in the description.", + "A. The mat is not mentioned in the description.", + "A. The cabinet is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "recognition_result": false + }, + "12178946": { + "pred": "A cylindrical bottle with a yellow cap and a blue label featuring white text.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The shape of the bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bottle is mentioned in the description and is cylindrical.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cap is mentioned in the description and is yellow.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the bottle is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the label is not mentioned, but the label of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the label is mentioned in the description and is blue.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text or the bottle is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text is not mentioned, but the text of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the text is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the text is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is mentioned in the description.", + -1 + ], + [ + "The apple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The apple is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The trigger of the bottle is mentioned in the description.", + -1 + ], + [ + "The trigger of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The trigger of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup is mentioned in the description.", + -1 + ], + [ + "The measuring cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The measuring cup is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bottle. Based on the image, is it likely that the object in the description is given class: bottle or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bottle. Based on the image, is it likely that the object in the description is given class: bottle or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned.\nB. The shape of the bottle is mentioned in the description and is cylindrical.\nC. The shape of the bottle is not mentioned.\nD. The shape of the bottle is mentioned in the description but is not cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap or the bottle is not mentioned.\nB. The color of the cap is mentioned in the description and is yellow.\nC. The color of the cap is not mentioned, but the cap of the bottle is mentioned.\nD. The color of the cap is mentioned in the description but is not yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label or the bottle is not mentioned.\nB. The color of the label is mentioned in the description and is blue.\nC. The color of the label is not mentioned, but the label of the bottle is mentioned.\nD. The color of the label is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text or the bottle is not mentioned.\nB. The color of the text is mentioned in the description and is white.\nC. The color of the text is not mentioned, but the text of the bottle is mentioned.\nD. The color of the text is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The apple is mentioned in the description.\nB. The apple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The handle of the bottle is mentioned in the description.\nC. The handle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The trigger of the bottle is mentioned in the description.\nC. The trigger of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The measuring cup is mentioned in the description.\nB. The measuring cup is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical bottle with a yellow cap and a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is mentioned in the description.\nB. The cutting board is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the bottle is mentioned in the description and is cylindrical.", + "B. The color of the cap is mentioned in the description and is yellow.", + "B. The color of the label is mentioned in the description and is blue.", + "B. The color of the text is mentioned in the description and is white.", + "B. The apple is not mentioned in the description.", + "C. The handle of the bottle is not mentioned in the description.", + "C. The trigger of the bottle is not mentioned in the description.", + "B. The measuring cup is not mentioned in the description.", + "B. The cutting board is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "12348079": { + "pred": "A digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the weighing platform is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the weighing platform is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the weighing platform is mentioned in the description and is rectangular.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The color of the weighing platform is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the weighing platform is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the weighing platform is mentioned in the description and is blue.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keypad or the scale is not mentioned.", + 0 + ], + [ + "The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the keypad is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the keypad is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The display screen or the scale is not mentioned.", + 0 + ], + [ + "The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + 0.5 + ], + [ + "The color of the display screen is mentioned in the description and is black.", + 1 + ], + [ + "The color of the display screen is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the scale is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the scale is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is white.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power cord of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The power cord of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The power cord of the scale is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The usb cable of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The usb cable of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The usb cable of the scale is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle is not mentioned in the description.", + 1 + ], + [ + "The bicycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bicycle is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The child is not mentioned in the description.", + 1 + ], + [ + "The child is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The child is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is not mentioned in the description.", + 1 + ], + [ + "The woman is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The woman is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is scale. Based on the image, is it likely that the object in the description is given class: scale or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is scale. Based on the image, is it likely that the object in the description is given class: scale or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The weighing platform or the scale is not mentioned.\nB. The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.\nC. The shape of the weighing platform is mentioned in the description and is rectangular.\nD. The shape of the weighing platform is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The weighing platform or the scale is not mentioned.\nB. The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.\nC. The color of the weighing platform is mentioned in the description and is blue.\nD. The color of the weighing platform is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keypad or the scale is not mentioned.\nB. The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.\nC. The shape of the keypad is mentioned in the description and is rectangular.\nD. The shape of the keypad is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The display screen or the scale is not mentioned.\nB. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.\nC. The color of the display screen is mentioned in the description and is black.\nD. The color of the display screen is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the scale is not mentioned.\nB. The color of the body is not mentioned, but the body of the scale is mentioned.\nC. The color of the body is mentioned in the description and is white.\nD. The color of the body is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The power cord of the scale is not mentioned in the description.\nB. The scale is not mentioned in the description.\nC. The power cord of the scale is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The usb cable of the scale is not mentioned in the description.\nB. The scale is not mentioned in the description.\nC. The usb cable of the scale is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycle is not mentioned in the description.\nB. The bicycle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The child is not mentioned in the description.\nB. The child is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA digital weighing scale with a rectangular, blue weighing platform on top. The scale has a white base with a control panel on the left side, featuring several buttons and a display screen. The right side of the scale has a series of blue and black buttons.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The woman is not mentioned in the description.\nB. The woman is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the weighing platform is mentioned in the description and is rectangular.", + "C. The color of the weighing platform is mentioned in the description and is blue.", + "B. The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + "B. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + "C. The color of the body is mentioned in the description and is white.", + "A. The power cord of the scale is not mentioned in the description.", + "A. The usb cable of the scale is not mentioned in the description.", + "A. The bicycle is not mentioned in the description.", + "A. The child is not mentioned in the description.", + "A. The woman is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "14832137": { + "pred": "A cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.", + -1 + ], + [ + "The shape of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The color of the barrel/bucket is mentioned in the description and is purple.", + 1 + ], + [ + "The color of the barrel/bucket is mentioned in the description but is not purple.", + -1 + ], + [ + "The color of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the barrel/bucket is mentioned in the description but is not purple.", + "pred_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The bottom of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The bottom of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The bottom of the barrel/bucket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heart is mentioned in the description.", + -1 + ], + [ + "The heart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The heart is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The lid of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The lid of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the barrel/bucket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cake is mentioned in the description.", + -1 + ], + [ + "The cake is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cake is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is barrel/bucket. Based on the image, is it likely that the object in the description is given class: barrel/bucket or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is barrel/bucket. Based on the image, is it likely that the object in the description is given class: barrel/bucket or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned.\nB. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.\nC. The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.\nD. The shape of the barrel/bucket is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the barrel/bucket is not mentioned.\nB. The shape of the handle is mentioned in the description and is curved.\nC. The shape of the handle is mentioned in the description but is not curved.\nD. The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned.\nB. The color of the barrel/bucket is mentioned in the description and is purple.\nC. The color of the barrel/bucket is mentioned in the description but is not purple.\nD. The color of the barrel/bucket is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned in the description.\nB. The bottom of the barrel/bucket is mentioned in the description.\nC. The bottom of the barrel/bucket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The heart is mentioned in the description.\nB. The heart is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned in the description.\nB. The lid of the barrel/bucket is mentioned in the description.\nC. The lid of the barrel/bucket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cake is mentioned in the description.\nB. The cake is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, dark blue plastic bucket with a smooth surface and a slightly flared rim. The bucket has a handle attached to the top edge, which is also dark blue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + "D. The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + "C. The color of the barrel/bucket is mentioned in the description but is not purple.", + "C. The bottom of the barrel/bucket is not mentioned in the description.", + "B. The heart is not mentioned in the description.", + "C. The lid of the barrel/bucket is not mentioned in the description.", + "B. The cake is not mentioned in the description.", + "B. The plate is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "recognition_result": true + }, + "15050320": { + "pred": "A dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The material of the wine glass is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the wine glass is mentioned in the description but is not glass.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the wine glass is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The color of the wine glass is mentioned in the description and is transparent.", + 1 + ], + [ + "The color of the wine glass is mentioned in the description but is not transparent.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the wine glass is mentioned in the description and is transparent.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is not mentioned, but the base of the wine glass is mentioned.", + 0.5 + ], + [ + "The base or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the base is mentioned in the description and is round or flat.", + 1 + ], + [ + "The shape of the base is mentioned in the description but is not round or flat.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The base or the wine glass is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + 0.5 + ], + [ + "The stem or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the stem is mentioned in the description and is slender.", + 1 + ], + [ + "The shape of the stem is mentioned in the description but is not slender.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The bowl of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The bowl of the wine glass is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is not mentioned in the description.", + 1 + ], + [ + "The plate is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plate is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rim of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The rim of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The rim of the wine glass is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The napkin is not mentioned in the description.", + 1 + ], + [ + "The napkin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The napkin is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is wine glass. Based on the image, is it likely that the object in the description is given class: wine glass or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is wine glass. Based on the image, is it likely that the object in the description is given class: wine glass or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the wine glass is not mentioned.\nB. The wine glass are not mentioned.\nC. The material of the wine glass is mentioned in the description and is glass.\nD. The material of the wine glass is mentioned in the description but is not glass.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wine glass is not mentioned.\nB. The wine glass are not mentioned.\nC. The color of the wine glass is mentioned in the description and is transparent.\nD. The color of the wine glass is mentioned in the description but is not transparent.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the base is not mentioned, but the base of the wine glass is mentioned.\nB. The base or the wine glass is not mentioned.\nC. The shape of the base is mentioned in the description and is round or flat.\nD. The shape of the base is mentioned in the description but is not round or flat.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.\nB. The stem or the wine glass is not mentioned.\nC. The shape of the stem is mentioned in the description and is slender.\nD. The shape of the stem is mentioned in the description but is not slender.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl of the wine glass is not mentioned in the description.\nB. The bowl of the wine glass is mentioned in the description.\nC. The wine glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is not mentioned in the description.\nB. The plate is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rim of the wine glass is not mentioned in the description.\nB. The rim of the wine glass is mentioned in the description.\nC. The wine glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned in the description.\nB. The fork is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, shallow bowl and a short stem. The glass has a smooth, reflective surface with a few light reflections visible on the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The napkin is not mentioned in the description.\nB. The napkin is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The material of the wine glass is not mentioned.", + "C. The color of the wine glass is mentioned in the description and is transparent.", + "B. The base or the wine glass is not mentioned.", + "A. The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + "B. The bowl of the wine glass is mentioned in the description.", + "A. The plate is not mentioned in the description.", + "A. The rim of the wine glass is not mentioned in the description.", + "A. The fork is not mentioned in the description.", + "A. The napkin is not mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.375, + "score_neg": 0.6, + "recognition_result": true + }, + "16957916": { + "pred": "A piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is mentioned in the description and is white/green.", + 1 + ], + [ + "The color of the lettuce is mentioned in the description but is not white/green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the lettuce is mentioned in the description and is white/green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is curved or irregular.", + 1 + ], + [ + "The shape of the lettuce is mentioned in the description but is not curved or irregular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the lettuce is mentioned in the description and is curved or irregular.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The edge or the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.", + 1 + ], + [ + "The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + "pred_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tartar sauce is mentioned in the description.", + -1 + ], + [ + "The tartar sauce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tartar sauce is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fried fish are mentioned in the description.", + -1 + ], + [ + "The fried fish are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fried fish are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The french fries are mentioned in the description.", + -1 + ], + [ + "The french fries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The french fries are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is mentioned in the description.", + -1 + ], + [ + "The utensil is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensil is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is lettuce. Based on the image, is it likely that the object in the description is given class: lettuce or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is lettuce. Based on the image, is it likely that the object in the description is given class: lettuce or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lettuce is not mentioned.\nB. The color of the lettuce is not mentioned.\nC. The color of the lettuce is mentioned in the description and is white/green.\nD. The color of the lettuce is mentioned in the description but is not white/green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lettuce is not mentioned.\nB. The shape of the lettuce is not mentioned.\nC. The shape of the lettuce is mentioned in the description and is curved or irregular.\nD. The shape of the lettuce is mentioned in the description but is not curved or irregular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The edge or the lettuce is not mentioned.\nB. The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.\nC. The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.\nD. The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tartar sauce is mentioned in the description.\nB. The tartar sauce is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fried fish are mentioned in the description.\nB. The fried fish are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The french fries are mentioned in the description.\nB. The french fries are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of green lettuce with a slightly curled edge and a mix of light and dark green hues, featuring a few small brown spots and a hint of red at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensil is mentioned in the description.\nB. The utensil is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the lettuce is mentioned in the description and is white/green.", + "C. The shape of the lettuce is mentioned in the description and is curved or irregular.", + "D. The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + "B. The tartar sauce is not mentioned in the description.", + "B. The fried fish are not mentioned in the description.", + "B. The plate is not mentioned in the description.", + "B. The french fries are not mentioned in the description.", + "B. The utensil is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "recognition_result": true + }, + "17385866": { + "pred": "A scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + 0.5 + ], + [ + "The scoop or the ice cream is not mentioned.", + 0 + ], + [ + "The shape of the scoop is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the scoop is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The material of the cone is mentioned in the description but is not waffle.", + -1 + ], + [ + "The material of the cone is mentioned in the description and is waffle.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The color of the cone is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the cone is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the cone is not mentioned, but the cone of the ice cream is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The cherry of the ice cream is mentioned in the description.", + -1 + ], + [ + "The cherry of the ice cream is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cherry of the ice cream is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is mentioned in the description.", + -1 + ], + [ + "The woman is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The woman is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The sprinkles of the ice cream are mentioned in the description.", + -1 + ], + [ + "The sprinkles of the ice cream are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The sprinkles of the ice cream are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The man is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Turkish flag is mentioned in the description.", + -1 + ], + [ + "The Turkish flag is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Turkish flag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is ice cream. Based on the image, is it likely that the object in the description is given class: ice cream or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is ice cream. Based on the image, is it likely that the object in the description is given class: ice cream or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.\nB. The scoop or the ice cream is not mentioned.\nC. The shape of the scoop is mentioned in the description but is not round.\nD. The shape of the scoop is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cone is not mentioned, but the cone of the ice cream is mentioned.\nB. The cone or the ice cream is not mentioned.\nC. The material of the cone is mentioned in the description but is not waffle.\nD. The material of the cone is mentioned in the description and is waffle.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cone is not mentioned, but the cone of the ice cream is mentioned.\nB. The cone or the ice cream is not mentioned.\nC. The color of the cone is mentioned in the description but is not yellow.\nD. The color of the cone is mentioned in the description and is yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ice cream is not mentioned in the description.\nB. The cherry of the ice cream is mentioned in the description.\nC. The cherry of the ice cream is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The woman is mentioned in the description.\nB. The woman is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ice cream is not mentioned in the description.\nB. The sprinkles of the ice cream are mentioned in the description.\nC. The sprinkles of the ice cream are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is mentioned in the description.\nB. The man is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream with a swirl of red and yellow fruit toppings, possibly strawberry and lemon, on a light green and yellow marbled base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Turkish flag is mentioned in the description.\nB. The Turkish flag is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + "A. The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + "A. The color of the cone is not mentioned, but the cone of the ice cream is mentioned.", + "C. The cherry of the ice cream is not mentioned in the description.", + "B. The woman is not mentioned in the description.", + "C. The sprinkles of the ice cream are not mentioned in the description.", + "B. The man is not mentioned in the description.", + "B. The Turkish flag is not mentioned in the description." + ], + "score": 0.8125, + "score_pos": 0.5, + "score_neg": 1.0, + "recognition_result": true + }, + "17404769": { + "pred": "The car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gas cap is mentioned in the description but is not square.", + -1 + ], + [ + "The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.", + 0.5 + ], + [ + "The gas cap or the suv is not mentioned.", + 0 + ], + [ + "The shape of the gas cap is mentioned in the description and is square.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the gas cap is mentioned in the description and is square.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + 0.5 + ], + [ + "The taillight or the suv is not mentioned.", + 0 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the suv is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the suv is not mentioned.", + 0 + ], + [ + "The suv is not mentioned.", + 0 + ], + [ + "The color of the suv is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the suv is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front bumper of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The front bumper of the suv is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front wheel of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front wheel of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The front wheel of the suv is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grille of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The grille of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The grille of the suv is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The building is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tree is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is suv. Based on the image, is it likely that the object in the description is given class: suv or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is suv. Based on the image, is it likely that the object in the description is given class: suv or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the gas cap is mentioned in the description but is not square.\nB. The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.\nC. The gas cap or the suv is not mentioned.\nD. The shape of the gas cap is mentioned in the description and is square.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the taillight is mentioned in the description but is not plastic.\nB. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.\nC. The taillight or the suv is not mentioned.\nD. The material of the taillight is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the suv is mentioned in the description but is not white.\nB. The color of the suv is not mentioned.\nC. The suv is not mentioned.\nD. The color of the suv is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front bumper of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The front bumper of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front wheel of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The front wheel of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grille of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The grille of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is not mentioned in the description.\nB. The building is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white SUV with a rear hatchback design. It features a rear window with a slight tint and a small, square fuel cap on the right side of the rear door. The taillights are vertically aligned and wrap around the side of the vehicle. The rear bumper is slightly curved, and the car has a visible rear wheel with a five-spoke alloy rim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is not mentioned in the description.\nB. The tree is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the gas cap is mentioned in the description and is square.", + "B. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + "D. The color of the suv is mentioned in the description and is white.", + "A. The front bumper of the suv is not mentioned in the description.", + "A. The front wheel of the suv is not mentioned in the description.", + "A. The grille of the suv is not mentioned in the description.", + "A. The building is not mentioned in the description.", + "A. The tree is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "recognition_result": true + }, + "18217373": { + "pred": "The spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the frame is mentioned in the description but is not black or metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the frame is mentioned in the description and is black or metallic.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the frame is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the frame is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the frame is mentioned in the description and is square or rounded.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lens or the glasses are not mentioned.", + 0 + ], + [ + "The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + 0.5 + ], + [ + "The color of the lens is mentioned in the description and is clear.", + 1 + ], + [ + "The color of the lens is mentioned in the description but is not clear.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple arm or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the temple arm is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the temple arm is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fruit is mentioned in the description.", + -1 + ], + [ + "The fruit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fruit is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The women are mentioned in the description.", + -1 + ], + [ + "The women are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The women are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vegetables are mentioned in the description.", + -1 + ], + [ + "The vegetables are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The vegetables are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The produce is mentioned in the description.", + -1 + ], + [ + "The produce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The produce is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is mentioned in the description.", + -1 + ], + [ + "The food is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The food is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is glasses. Based on the image, is it likely that the object in the description is given class: glasses or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is glasses. Based on the image, is it likely that the object in the description is given class: glasses or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the glasses is not mentioned.\nB. The color of the frame is not mentioned, but the frame of the glasses is mentioned.\nC. The color of the frame is mentioned in the description and is black or metallic.\nD. The color of the frame is mentioned in the description but is not black or metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the glasses is not mentioned.\nB. The shape of the frame is not mentioned, but the frame of the glasses is mentioned.\nC. The shape of the frame is mentioned in the description and is square or rounded.\nD. The shape of the frame is mentioned in the description but is not square or rounded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lens or the glasses are not mentioned.\nB. The color of the lens is not mentioned, but the lens of the glasses are mentioned.\nC. The color of the lens is mentioned in the description and is clear.\nD. The color of the lens is mentioned in the description but is not clear.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The temple arm or the glasses is not mentioned.\nB. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.\nC. The shape of the temple arm is mentioned in the description and is curved.\nD. The shape of the temple arm is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fruit is mentioned in the description.\nB. The fruit is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The women are mentioned in the description.\nB. The women are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The vegetables are mentioned in the description.\nB. The vegetables are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The produce is mentioned in the description.\nB. The produce is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a thin, dark brown frame with a slightly curved bridge. The lenses are rectangular with rounded edges, and the frame has a subtle metallic sheen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is mentioned in the description.\nB. The food is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the frame is mentioned in the description and is black or metallic.", + "C. The shape of the frame is mentioned in the description and is square or rounded.", + "B. The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + "B. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + "B. The fruit is not mentioned in the description.", + "B. The women are not mentioned in the description.", + "B. The vegetables are not mentioned in the description.", + "B. The produce is not mentioned in the description.", + "B. The food is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "19455186": { + "pred": "A blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cart/trolley is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cart/trolley is not mentioned.", + 0 + ], + [ + "The color of the cart/trolley is mentioned in the description but is not blue.", + -1 + ], + [ + "The cart/trolley is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the cart/trolley is mentioned in the description and is blue.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bars is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The shape of the bars is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The bars or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the wheels is mentioned in the description and is small.", + 1 + ], + [ + "The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The size of the wheels is mentioned in the description but is not small.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wheels is mentioned in the description and is black.", + 1 + ], + [ + "The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The color of the wheels is mentioned in the description but is not black.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the wheels is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The lid of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the cart/trolley is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The basket of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The basket of the cart/trolley is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shelves are mentioned in the description.", + -1 + ], + [ + "The shelves are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The shelves are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drawers of the cart/trolley are mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The drawers of the cart/trolley are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drawers of the cart/trolley are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights are mentioned in the description.", + -1 + ], + [ + "The lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The lights are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is cart/trolley. Based on the image, is it likely that the object in the description is given class: cart/trolley or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is cart/trolley. Based on the image, is it likely that the object in the description is given class: cart/trolley or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cart/trolley is mentioned in the description and is blue.\nB. The color of the cart/trolley is not mentioned.\nC. The color of the cart/trolley is mentioned in the description but is not blue.\nD. The cart/trolley is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bars is mentioned in the description and is rectangular.\nB. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.\nC. The shape of the bars is mentioned in the description but is not rectangular.\nD. The bars or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the wheels is mentioned in the description and is small.\nB. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.\nC. The size of the wheels is mentioned in the description but is not small.\nD. The wheels or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wheels is mentioned in the description and is black.\nB. The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.\nC. The color of the wheels is mentioned in the description but is not black.\nD. The wheels or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the cart/trolley is mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The lid of the cart/trolley is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket of the cart/trolley is mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The basket of the cart/trolley is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shelves are mentioned in the description.\nB. The shelves are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drawers of the cart/trolley are mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The drawers of the cart/trolley are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal handcart with two horizontal bars and two vertical supports. The cart has two black wheels at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lights are mentioned in the description.\nB. The lights are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the cart/trolley is mentioned in the description and is blue.", + "B. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + "B. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + "A. The color of the wheels is mentioned in the description and is black.", + "C. The lid of the cart/trolley is not mentioned in the description.", + "C. The basket of the cart/trolley is not mentioned in the description.", + "B. The shelves are not mentioned in the description.", + "C. The drawers of the cart/trolley are not mentioned in the description.", + "B. The lights are not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "19610023": { + "pred": "A bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the slippers is mentioned in the description but is not green.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The color of the slippers is mentioned in the description and is green.", + 1 + ], + [ + "The color of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the slippers is mentioned in the description and is green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the toe box is mentioned in the description but is not round.", + -1 + ], + [ + "The toe box or the slippers is not mentioned.", + 0 + ], + [ + "The shape of the toe box is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the toe box is mentioned in the description and is round.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the slippers is mentioned in the description but is not frog-shaped.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The shape of the slippers is mentioned in the description and is frog-shaped.", + 1 + ], + [ + "The shape of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the slippers is mentioned in the description and is frog-shaped.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The lining of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lining of the slippers is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shirt is not mentioned in the description.", + 1 + ], + [ + "The shirt is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The shirt is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buckle of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The buckle of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The buckle of the slippers is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is not mentioned in the description.", + 1 + ], + [ + "The wall is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The wall is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is slippers. Based on the image, is it likely that the object in the description is given class: slippers or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is slippers. Based on the image, is it likely that the object in the description is given class: slippers or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the slippers is mentioned in the description but is not green.\nB. The slippers are not mentioned.\nC. The color of the slippers is mentioned in the description and is green.\nD. The color of the slippers is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the toe box is mentioned in the description but is not round.\nB. The toe box or the slippers is not mentioned.\nC. The shape of the toe box is mentioned in the description and is round.\nD. The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the slippers is mentioned in the description but is not frog-shaped.\nB. The slippers are not mentioned.\nC. The shape of the slippers is mentioned in the description and is frog-shaped.\nD. The shape of the slippers is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lining of the slippers is not mentioned in the description.\nB. The slippers are not mentioned in the description.\nC. The lining of the slippers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shirt is not mentioned in the description.\nB. The shirt is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The buckle of the slippers is not mentioned in the description.\nB. The slippers are not mentioned in the description.\nC. The buckle of the slippers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green croc-style shoe with a thick, textured sole and a wide, open toe design. The shoe features a smooth, rounded toe and a slightly raised heel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wall is not mentioned in the description.\nB. The wall is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the slippers is mentioned in the description and is green.", + "C. The shape of the toe box is mentioned in the description and is round.", + "C. The shape of the slippers is mentioned in the description and is frog-shaped.", + "A. The plant is not mentioned in the description.", + "A. The lining of the slippers is not mentioned in the description.", + "A. The shirt is not mentioned in the description.", + "A. The buckle of the slippers is not mentioned in the description.", + "A. The wall is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "19610025": { + "pred": "A white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the ear is mentioned in the description and is large.", + 1 + ], + [ + "The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The size of the ear is mentioned in the description but is not large.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the ear is mentioned in the description and is large.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the ear is mentioned in the description and is pink.", + 1 + ], + [ + "The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the ear is mentioned in the description but is not pink.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eye is mentioned in the description and is black.", + 1 + ], + [ + "The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + 0.5 + ], + [ + "The eye or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the eye is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the shirt is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The shirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the shirt is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the shirt is mentioned in the description and is yellow.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skirt is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The skirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the skirt is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The whisker of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The whisker of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The whisker of the rabbit is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The teeth of the rabbit are not mentioned in the description.", + 1 + ], + [ + "The teeth of the rabbit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The teeth of the rabbit are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The tail of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The tail of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the rabbit is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The poster is not mentioned in the description.", + 1 + ], + [ + "The poster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The poster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is rabbit. Based on the image, is it likely that the object in the description is given class: rabbit or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is rabbit. Based on the image, is it likely that the object in the description is given class: rabbit or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the ear is mentioned in the description and is large.\nB. The size of the ear is not mentioned, but the ear of the rabbit is mentioned.\nC. The ear or the rabbit is not mentioned.\nD. The size of the ear is mentioned in the description but is not large.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the ear is mentioned in the description and is pink.\nB. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.\nC. The ear or the rabbit is not mentioned.\nD. The color of the ear is mentioned in the description but is not pink.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eye is mentioned in the description and is black.\nB. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.\nC. The eye or the rabbit is not mentioned.\nD. The color of the eye is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the shirt is mentioned in the description and is yellow.\nB. The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.\nC. The shirt or the rabbit is not mentioned.\nD. The color of the shirt is mentioned in the description but is not yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skirt is mentioned in the description and is blue.\nB. The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.\nC. The skirt or the rabbit is not mentioned.\nD. The color of the skirt is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The whisker of the rabbit is not mentioned in the description.\nC. The whisker of the rabbit is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The teeth of the rabbit are not mentioned in the description.\nC. The teeth of the rabbit are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The tail of the rabbit is not mentioned in the description.\nC. The tail of the rabbit is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with large, upright ears and a red backpack. It is wearing a yellow shirt and blue pants. The rabbit has a playful expression with its mouth open and eyes wide.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The poster is not mentioned in the description.\nB. The poster is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The size of the ear is mentioned in the description and is large.", + "B. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "B. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + "A. The color of the shirt is mentioned in the description and is yellow.", + "B. The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + "B. The whisker of the rabbit is not mentioned in the description.", + "B. The teeth of the rabbit are not mentioned in the description.", + "A. The plant is not mentioned in the description.", + "B. The tail of the rabbit is not mentioned in the description.", + "A. The poster is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "recognition_result": true + }, + "20568676": { + "pred": "A stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the rim is mentioned in the description and is round.", + 1 + ], + [ + "The rim or the pot is not mentioned.", + 0 + ], + [ + "The shape of the rim is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the rim is not mentioned, but the rim of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the rim is mentioned in the description and is round.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the content is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The content or the pot is not mentioned.", + 0 + ], + [ + "The color of the content is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the content is not mentioned, but the content of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the content is not mentioned, but the content of the pot is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the pot is mentioned in the description and is metal.", + 1 + ], + [ + "The pot is not mentioned.", + 0 + ], + [ + "The material of the pot is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pot is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the pot is mentioned in the description and is metal.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spout of the pot is mentioned in the description.", + -1 + ], + [ + "The spout of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The spout of the pot is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bowl is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottom of the pot is mentioned in the description.", + -1 + ], + [ + "The bottom of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The bottom of the pot is mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the pot is mentioned in the description.", + -1 + ], + [ + "The lid of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The lid of the pot is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stove is mentioned in the description.", + -1 + ], + [ + "The stove is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The stove is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is pot. Based on the image, is it likely that the object in the description is given class: pot or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is pot. Based on the image, is it likely that the object in the description is given class: pot or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the rim is mentioned in the description and is round.\nB. The rim or the pot is not mentioned.\nC. The shape of the rim is mentioned in the description but is not round.\nD. The shape of the rim is not mentioned, but the rim of the pot is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the content is mentioned in the description and is yellow or brown.\nB. The content or the pot is not mentioned.\nC. The color of the content is mentioned in the description but is not yellow or brown.\nD. The color of the content is not mentioned, but the content of the pot is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the pot is mentioned in the description and is metal.\nB. The pot is not mentioned.\nC. The material of the pot is mentioned in the description but is not metal.\nD. The material of the pot is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spout of the pot is mentioned in the description.\nB. The spout of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is mentioned in the description.\nB. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottom of the pot is mentioned in the description.\nB. The bottom of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the pot is mentioned in the description.\nB. The lid of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel cooking pot with a rounded bottom and a rolled edge, featuring two riveted handles on opposite sides.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stove is mentioned in the description.\nB. The stove is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the rim is mentioned in the description and is round.", + "D. The color of the content is not mentioned, but the content of the pot is mentioned.", + "A. The material of the pot is mentioned in the description and is metal.", + "B. The spout of the pot is not mentioned in the description.", + "B. The bowl is not mentioned in the description.", + "A. The bottom of the pot is mentioned in the description.", + "B. The lid of the pot is not mentioned in the description.", + "B. The stove is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.8333333333333334, + "score_neg": 0.6, + "recognition_result": true + }, + "21107974": { + "pred": "A wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the head is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The head or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not round or cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the head is mentioned in the description and is round or cylindrical.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The handle or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the handle is mentioned in the description and is cylindrical.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description and is brown.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the gavel/mallet is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description and is wood.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the gavel/mallet is mentioned in the description and is wood.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone is mentioned in the description.", + -1 + ], + [ + "The microphone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The microphone is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The man is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The podium is mentioned in the description.", + -1 + ], + [ + "The podium is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The podium is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The gavel/mallet is not mentioned in the description.", + 0 + ], + [ + "The neck of the gavel/mallet is mentioned in the description.", + -1 + ], + [ + "The neck of the gavel/mallet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The neck of the gavel/mallet is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is mentioned in the description.", + -1 + ], + [ + "The sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sign is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is gavel/mallet. Based on the image, is it likely that the object in the description is given class: gavel/mallet or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is gavel/mallet. Based on the image, is it likely that the object in the description is given class: gavel/mallet or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.\nB. The shape of the head is mentioned in the description and is round or cylindrical.\nC. The head or the gavel/mallet is not mentioned.\nD. The shape of the head is mentioned in the description but is not round or cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.\nB. The shape of the handle is mentioned in the description and is cylindrical.\nC. The handle or the gavel/mallet is not mentioned.\nD. The shape of the handle is mentioned in the description but is not cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the gavel/mallet is not mentioned.\nB. The color of the gavel/mallet is mentioned in the description and is brown.\nC. The gavel/mallet is not mentioned.\nD. The color of the gavel/mallet is mentioned in the description but is not brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the gavel/mallet is not mentioned.\nB. The material of the gavel/mallet is mentioned in the description and is wood.\nC. The gavel/mallet is not mentioned.\nD. The material of the gavel/mallet is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microphone is mentioned in the description.\nB. The microphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is mentioned in the description.\nB. The man is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The podium is mentioned in the description.\nB. The podium is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The gavel/mallet is not mentioned in the description.\nB. The neck of the gavel/mallet is mentioned in the description.\nC. The neck of the gavel/mallet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head featuring three evenly spaced, horizontal grooves. The handle is smooth and tapers slightly towards the end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign is mentioned in the description.\nB. The sign is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the head is mentioned in the description and is round or cylindrical.", + "B. The shape of the handle is mentioned in the description and is cylindrical.", + "A. The color of the gavel/mallet is not mentioned.", + "B. The material of the gavel/mallet is mentioned in the description and is wood.", + "B. The microphone is not mentioned in the description.", + "B. The man is not mentioned in the description.", + "B. The podium is not mentioned in the description.", + "B. The neck of the gavel/mallet is mentioned in the description.", + "B. The sign is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.75, + "score_neg": 0.6, + "recognition_result": true + }, + "22064315": { + "pred": "The visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the horn is mentioned in the description and is long, curved, or pointed.", + 1 + ], + [ + "The shape of the horn is mentioned in the description but is not long, curved, or pointed.", + -1 + ], + [ + "The shape of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the horn is mentioned in the description and is long, curved, or pointed.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the horn is mentioned in the description and is grey, black, or dark.", + 1 + ], + [ + "The color of the horn is mentioned in the description but is not grey, black, or dark.", + -1 + ], + [ + "The color of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the horn is mentioned in the description and is grey, black, or dark.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the antelope is mentioned in the description.", + -1 + ], + [ + "The tail of the antelope is not mentioned in the description.", + 1 + ], + [ + "The antelope is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the antelope is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deer are mentioned in the description.", + -1 + ], + [ + "The deer are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The deer are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The car is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tree is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The grass are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is antelope. Based on the image, is it likely that the object in the description is given class: antelope or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is antelope. Based on the image, is it likely that the object in the description is given class: antelope or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the horn is mentioned in the description and is long, curved, or pointed.\nB. The shape of the horn is mentioned in the description but is not long, curved, or pointed.\nC. The shape of the horn is not mentioned, but the horn of the antelope is mentioned.\nD. The horn or the antelope is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the horn is mentioned in the description and is grey, black, or dark.\nB. The color of the horn is mentioned in the description but is not grey, black, or dark.\nC. The color of the horn is not mentioned, but the horn of the antelope is mentioned.\nD. The horn or the antelope is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the antelope is mentioned in the description.\nB. The tail of the antelope is not mentioned in the description.\nC. The antelope is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deer are mentioned in the description.\nB. The deer are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The car is mentioned in the description.\nB. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is mentioned in the description.\nB. The tree is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the antelope shows two long, curved horns with a dark, almost black coloration, tapering to a point. The horns are covered in a pattern of ridges and grooves, giving them a textured appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are mentioned in the description.\nB. The grass are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the horn is mentioned in the description and is long, curved, or pointed.", + "A. The color of the horn is mentioned in the description and is grey, black, or dark.", + "B. The tail of the antelope is not mentioned in the description.", + "B. The deer are not mentioned in the description.", + "B. The car is not mentioned in the description.", + "B. The tree is not mentioned in the description.", + "B. The grass are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "22107522": { + "pred": "A black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the bow tie is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the bow tie is mentioned in the description and is smooth.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The texture of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the bow tie is mentioned in the description and is smooth.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bow tie is mentioned in the description but is not butterfly-shaped.", + -1 + ], + [ + "The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The shape of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bow tie is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the bow tie is mentioned in the description and is black.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The color of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the bow tie is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bow tie is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the bow tie is mentioned in the description and is fabric.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The material of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the bow tie is mentioned in the description and is fabric.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bow tie is not mentioned in the description.", + 0 + ], + [ + "The neck band of the bow tie is mentioned in the description.", + -1 + ], + [ + "The neck band of the bow tie is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The neck band of the bow tie is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trumpet is mentioned in the description.", + -1 + ], + [ + "The trumpet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trumpet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The suit is mentioned in the description.", + -1 + ], + [ + "The suit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The suit is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The person is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glasses are mentioned in the description.", + -1 + ], + [ + "The glasses are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The glasses are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bow tie. Based on the image, is it likely that the object in the description is given class: bow tie or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bow tie. Based on the image, is it likely that the object in the description is given class: bow tie or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the bow tie is mentioned in the description but is not smooth.\nB. The texture of the bow tie is mentioned in the description and is smooth.\nC. The bow tie is not mentioned.\nD. The texture of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bow tie is mentioned in the description but is not butterfly-shaped.\nB. The shape of the bow tie is mentioned in the description and is butterfly-shaped.\nC. The bow tie is not mentioned.\nD. The shape of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the bow tie is mentioned in the description but is not black.\nB. The color of the bow tie is mentioned in the description and is black.\nC. The bow tie is not mentioned.\nD. The color of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the bow tie is mentioned in the description but is not fabric.\nB. The material of the bow tie is mentioned in the description and is fabric.\nC. The bow tie is not mentioned.\nD. The material of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bow tie is not mentioned in the description.\nB. The neck band of the bow tie is mentioned in the description.\nC. The neck band of the bow tie is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trumpet is mentioned in the description.\nB. The trumpet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The suit is mentioned in the description.\nB. The suit is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is mentioned in the description.\nB. The person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips. The bow tie has a symmetrical design with a central knot and two loops that are slightly curved outward.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glasses are mentioned in the description.\nB. The glasses are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the bow tie is mentioned in the description and is smooth.", + "B. The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + "B. The color of the bow tie is mentioned in the description and is black.", + "B. The material of the bow tie is mentioned in the description and is fabric.", + "C. The neck band of the bow tie is not mentioned in the description.", + "B. The trumpet is not mentioned in the description.", + "B. The suit is not mentioned in the description.", + "B. The person is not mentioned in the description.", + "B. The glasses are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "24017816": { + "pred": "The car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the windshield is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The windshield or the car is not mentioned.", + 0 + ], + [ + "The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + 0.5 + ], + [ + "The shape of the windshield is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the car is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The car is not mentioned.", + 0 + ], + [ + "The color of the car is not mentioned.", + 0 + ], + [ + "The color of the car is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the car is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the window is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The window or the car is not mentioned.", + 0 + ], + [ + "The color of the window is not mentioned, but the window of the car is mentioned.", + 0.5 + ], + [ + "The color of the window is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the window is mentioned in the description and is dark or black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the side mirror is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The side mirror or the car is not mentioned.", + 0 + ], + [ + "The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + 0.5 + ], + [ + "The color of the side mirror is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fender of the car is not mentioned in the description.", + 1 + ], + [ + "The fender of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The fender of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taillight of the car is not mentioned in the description.", + 1 + ], + [ + "The taillight of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taillight of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire of the car is not mentioned in the description.", + 1 + ], + [ + "The tire of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tire of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The exhaust pipe of the car is not mentioned in the description.", + 1 + ], + [ + "The exhaust pipe of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The exhaust pipe of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hood of the car is not mentioned in the description.", + 1 + ], + [ + "The hood of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The hood of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is car. Based on the image, is it likely that the object in the description is given class: car or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is car. Based on the image, is it likely that the object in the description is given class: car or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the windshield is mentioned in the description but is not rectangular.\nB. The windshield or the car is not mentioned.\nC. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.\nD. The shape of the windshield is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the car is mentioned in the description but is not dark or black.\nB. The car is not mentioned.\nC. The color of the car is not mentioned.\nD. The color of the car is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the window is mentioned in the description but is not dark or black.\nB. The window or the car is not mentioned.\nC. The color of the window is not mentioned, but the window of the car is mentioned.\nD. The color of the window is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the side mirror is mentioned in the description but is not dark or black.\nB. The side mirror or the car is not mentioned.\nC. The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.\nD. The color of the side mirror is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fender of the car is not mentioned in the description.\nB. The fender of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taillight of the car is not mentioned in the description.\nB. The taillight of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tire of the car is not mentioned in the description.\nB. The tire of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The exhaust pipe of the car is not mentioned in the description.\nB. The exhaust pipe of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car features a dark-tinted side window with a black frame, and a portion of the front windshield is visible, also with a black frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hood of the car is not mentioned in the description.\nB. The hood of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + "C. The color of the car is not mentioned.", + "D. The color of the window is mentioned in the description and is dark or black.", + "C. The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + "A. The fender of the car is not mentioned in the description.", + "A. The taillight of the car is not mentioned in the description.", + "A. The tire of the car is not mentioned in the description.", + "A. The exhaust pipe of the car is not mentioned in the description.", + "A. The hood of the car is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "recognition_result": true + }, + "24498027": { + "pred": "A tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the pole is mentioned in the description but is not black or dark.", + -1 + ], + [ + "The color of the pole is not mentioned, but the pole of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the pole is mentioned in the description and is black or dark.", + 1 + ], + [ + "The pole or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the pole is mentioned in the description and is black or dark.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the lamp is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the lamp is mentioned in the description and is white.", + 1 + ], + [ + "The lamp or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The direction of the bars is mentioned in the description but is not horizontal.", + -1 + ], + [ + "The direction of the bars is not mentioned, but the bars of the street lights are mentioned.", + 0.5 + ], + [ + "The direction of the bars is mentioned in the description and is horizontal.", + 1 + ], + [ + "The bars or the street lights are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The direction of the bars is mentioned in the description and is horizontal.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The cable of the street lights is mentioned in the description.", + -1 + ], + [ + "The cable of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cable of the street lights is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The wire of the street lights is mentioned in the description.", + -1 + ], + [ + "The wire of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The wire of the street lights is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bus is mentioned in the description.", + -1 + ], + [ + "The bus is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bus is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bike is mentioned in the description.", + -1 + ], + [ + "The bike is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bike is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ambulance is mentioned in the description.", + -1 + ], + [ + "The ambulance is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ambulance is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is street lights. Based on the image, is it likely that the object in the description is given class: street lights or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is street lights. Based on the image, is it likely that the object in the description is given class: street lights or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the pole is mentioned in the description but is not black or dark.\nB. The color of the pole is not mentioned, but the pole of the street lights is mentioned.\nC. The color of the pole is mentioned in the description and is black or dark.\nD. The pole or the street lights is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the lamp is mentioned in the description but is not white.\nB. The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.\nC. The color of the lamp is mentioned in the description and is white.\nD. The lamp or the street lights is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The direction of the bars is mentioned in the description but is not horizontal.\nB. The direction of the bars is not mentioned, but the bars of the street lights are mentioned.\nC. The direction of the bars is mentioned in the description and is horizontal.\nD. The bars or the street lights are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The street lights are not mentioned in the description.\nB. The cable of the street lights is mentioned in the description.\nC. The cable of the street lights is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The street lights are not mentioned in the description.\nB. The wire of the street lights is mentioned in the description.\nC. The wire of the street lights is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bus is mentioned in the description.\nB. The bus is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bike is mentioned in the description.\nB. The bike is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a horizontal arm extending from the middle, supporting a lantern-style light fixture with a glass enclosure and a metal frame.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ambulance is mentioned in the description.\nB. The ambulance is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the pole is mentioned in the description and is black or dark.", + "B. The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.", + "C. The direction of the bars is mentioned in the description and is horizontal.", + "C. The cable of the street lights is not mentioned in the description.", + "C. The wire of the street lights is not mentioned in the description.", + "B. The bus is not mentioned in the description.", + "B. The bike is not mentioned in the description.", + "B. The ambulance is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "recognition_result": true + }, + "24581953": { + "pred": "A large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the dog is mentioned in the description but is not white or gray.", + -1 + ], + [ + "The color of the dog is mentioned in the description and is white or gray.", + 1 + ], + [ + "The color of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the dog is mentioned in the description and is white or gray.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The posture of the dog is mentioned in the description but is not lying down.", + -1 + ], + [ + "The posture of the dog is mentioned in the description and is lying down.", + 1 + ], + [ + "The posture of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The posture of the dog is mentioned in the description and is lying down.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tail is mentioned in the description but is not large or long.", + -1 + ], + [ + "The size of the tail is mentioned in the description and is large or long.", + 1 + ], + [ + "The size of the tail is not mentioned, but the tail of the dog is mentioned.", + 0.5 + ], + [ + "The tail or the dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the tail is mentioned in the description and is large or long.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the dog is mentioned in the description but is not large.", + -1 + ], + [ + "The size of the dog is mentioned in the description and is large.", + 1 + ], + [ + "The size of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the dog is mentioned in the description and is large.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the coat is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the coat is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the coat is not mentioned, but the coat of the dog is mentioned.", + 0.5 + ], + [ + "The coat or the dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the coat is mentioned in the description and is smooth.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple is not mentioned in the description.", + 1 + ], + [ + "The temple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The temple is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the dog is not mentioned in the description.", + 1 + ], + [ + "The mouth of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The mouth of the dog is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eye of the dog is not mentioned in the description.", + 1 + ], + [ + "The eye of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The eye of the dog is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the dog is not mentioned in the description.", + 1 + ], + [ + "The nose of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The nose of the dog is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bird is not mentioned in the description.", + 1 + ], + [ + "The bird is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bird is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is dog. Based on the image, is it likely that the object in the description is given class: dog or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is dog. Based on the image, is it likely that the object in the description is given class: dog or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the dog is mentioned in the description but is not white or gray.\nB. The color of the dog is mentioned in the description and is white or gray.\nC. The color of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The posture of the dog is mentioned in the description but is not lying down.\nB. The posture of the dog is mentioned in the description and is lying down.\nC. The posture of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the tail is mentioned in the description but is not large or long.\nB. The size of the tail is mentioned in the description and is large or long.\nC. The size of the tail is not mentioned, but the tail of the dog is mentioned.\nD. The tail or the dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the dog is mentioned in the description but is not large.\nB. The size of the dog is mentioned in the description and is large.\nC. The size of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the coat is mentioned in the description but is not smooth.\nB. The texture of the coat is mentioned in the description and is smooth.\nC. The texture of the coat is not mentioned, but the coat of the dog is mentioned.\nD. The coat or the dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The temple is not mentioned in the description.\nB. The temple is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouth of the dog is not mentioned in the description.\nB. The mouth of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eye of the dog is not mentioned in the description.\nB. The eye of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nose of the dog is not mentioned in the description.\nB. The nose of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light gray dog with a short, smooth coat is lying down with its body stretched out. The dog has a long, slender tail that extends straight out behind it. Its legs are extended, with the front legs slightly bent and the hind legs stretched out. The dog's head is resting on the ground, and its ears are relaxed and folded back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bird is not mentioned in the description.\nB. The bird is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the dog is mentioned in the description and is white or gray.", + "B. The posture of the dog is mentioned in the description and is lying down.", + "B. The size of the tail is mentioned in the description and is large or long.", + "B. The size of the dog is mentioned in the description and is large.", + "B. The texture of the coat is mentioned in the description and is smooth.", + "A. The temple is not mentioned in the description.", + "A. The mouth of the dog is not mentioned in the description.", + "A. The eye of the dog is not mentioned in the description.", + "A. The nose of the dog is not mentioned in the description.", + "A. The bird is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "24786060": { + "pred": "A light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the towel is mentioned in the description but is not gray or white.", + -1 + ], + [ + "The color of the towel is not mentioned.", + 0 + ], + [ + "The color of the towel is mentioned in the description and is gray or white.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the towel is mentioned in the description and is gray or white.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the towel is mentioned in the description but is not plush.", + -1 + ], + [ + "The texture of the towel is not mentioned.", + 0 + ], + [ + "The texture of the towel is mentioned in the description and is plush.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the towel is mentioned in the description and is plush.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the towel is mentioned in the description but is not irregular.", + -1 + ], + [ + "The shape of the towel is not mentioned.", + 0 + ], + [ + "The shape of the towel is mentioned in the description and is irregular.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the towel is mentioned in the description but is not irregular.", + "pred_index": 0, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The loop of the towel is mentioned in the description.", + -1 + ], + [ + "The loop of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The loop of the towel is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vanity is mentioned in the description.", + -1 + ], + [ + "The vanity is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The vanity is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The design of the towel is mentioned in the description.", + -1 + ], + [ + "The design of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The design of the towel is mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is mentioned in the description.", + -1 + ], + [ + "The toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toilet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is towel. Based on the image, is it likely that the object in the description is given class: towel or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is towel. Based on the image, is it likely that the object in the description is given class: towel or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the towel is mentioned in the description but is not gray or white.\nB. The color of the towel is not mentioned.\nC. The color of the towel is mentioned in the description and is gray or white.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the towel is mentioned in the description but is not plush.\nB. The texture of the towel is not mentioned.\nC. The texture of the towel is mentioned in the description and is plush.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the towel is mentioned in the description but is not irregular.\nB. The shape of the towel is not mentioned.\nC. The shape of the towel is mentioned in the description and is irregular.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The loop of the towel is mentioned in the description.\nB. The loop of the towel is not mentioned in the description.\nC. The towel is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The vanity is mentioned in the description.\nB. The vanity is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The design of the towel is mentioned in the description.\nB. The design of the towel is not mentioned in the description.\nC. The towel is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, plush texture, featuring a slightly wrinkled appearance. The towel has a rectangular shape with a visible fold running vertically down the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is mentioned in the description.\nB. The toilet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the towel is mentioned in the description and is gray or white.", + "C. The texture of the towel is mentioned in the description and is plush.", + "A. The shape of the towel is mentioned in the description but is not irregular.", + "B. The loop of the towel is not mentioned in the description.", + "B. The vanity is not mentioned in the description.", + "B. The sink is not mentioned in the description.", + "A. The design of the towel is mentioned in the description.", + "B. The toilet is not mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.3333333333333333, + "score_neg": 0.6, + "recognition_result": true + }, + "25054869": { + "pred": "A beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The material of the lid is mentioned in the description but is not ceramic.", + -1 + ], + [ + "The material of the lid is mentioned in the description and is ceramic.", + 1 + ], + [ + "The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The texture of the lid is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the lid is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the lid is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tank or the toilet is not mentioned.", + 0 + ], + [ + "The shape of the tank is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the tank is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the tank is not mentioned, but the tank of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the tank is mentioned in the description but is not rectangular.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned.", + 0 + ], + [ + "The color of the toilet is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the toilet is mentioned in the description and is white.", + 1 + ], + [ + "The color of the toilet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the toilet is mentioned in the description and is white.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush lever of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush lever of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flush lever of the toilet is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush button of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush button of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flush button of the toilet is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet bowl of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet bowl of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toilet bowl of the toilet is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothbrush is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet seat of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet seat of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toilet seat of the toilet is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is toilet. Based on the image, is it likely that the object in the description is given class: toilet or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is toilet. Based on the image, is it likely that the object in the description is given class: toilet or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid or the toilet is not mentioned.\nB. The material of the lid is mentioned in the description but is not ceramic.\nC. The material of the lid is mentioned in the description and is ceramic.\nD. The material of the lid is not mentioned, but the lid of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid or the toilet is not mentioned.\nB. The texture of the lid is mentioned in the description but is not smooth.\nC. The texture of the lid is mentioned in the description and is smooth.\nD. The texture of the lid is not mentioned, but the lid of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tank or the toilet is not mentioned.\nB. The shape of the tank is mentioned in the description but is not rectangular.\nC. The shape of the tank is mentioned in the description and is rectangular.\nD. The shape of the tank is not mentioned, but the tank of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned.\nB. The color of the toilet is mentioned in the description but is not white.\nC. The color of the toilet is mentioned in the description and is white.\nD. The color of the toilet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The flush lever of the toilet is mentioned in the description.\nC. The flush lever of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The flush button of the toilet is mentioned in the description.\nC. The flush button of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The toilet bowl of the toilet is mentioned in the description.\nC. The toilet bowl of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothbrush is mentioned in the description.\nB. The toothbrush is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige toilet cistern with a smooth, curved top surface and a slightly protruding front edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The toilet seat of the toilet is mentioned in the description.\nC. The toilet seat of the toilet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + "C. The texture of the lid is mentioned in the description and is smooth.", + "B. The shape of the tank is mentioned in the description but is not rectangular.", + "C. The color of the toilet is mentioned in the description and is white.", + "C. The flush lever of the toilet is not mentioned in the description.", + "C. The flush button of the toilet is not mentioned in the description.", + "C. The toilet bowl of the toilet is not mentioned in the description.", + "B. The toothbrush is not mentioned in the description.", + "C. The toilet seat of the toilet is not mentioned in the description." + ], + "score": 0.7222222222222222, + "score_pos": 0.375, + "score_neg": 1.0, + "recognition_result": true + }, + "25273553": { + "pred": "A black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the legs is mentioned in the description but is not slender.", + -1 + ], + [ + "The shape of the legs is mentioned in the description and is slender.", + 1 + ], + [ + "The legs or the tripod are not mentioned.", + 0 + ], + [ + "The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the legs is mentioned in the description and is slender.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tripod is mentioned in the description but is not plastic or metal.", + -1 + ], + [ + "The material of the tripod is mentioned in the description and is plastic or metal.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The material of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the tripod is not mentioned.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tripod is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tripod is mentioned in the description and is black.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The color of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tripod is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The leg locks of the tripod are not mentioned in the description.", + 1 + ], + [ + "The leg locks of the tripod are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The leg locks of the tripod are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The quick release plate of the tripod is not mentioned in the description.", + 1 + ], + [ + "The quick release plate of the tripod is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The quick release plate of the tripod is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The hot air balloon is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The hot air balloon is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tree is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is not mentioned in the description.", + 1 + ], + [ + "The sky is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sky is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tripod. Based on the image, is it likely that the object in the description is given class: tripod or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tripod. Based on the image, is it likely that the object in the description is given class: tripod or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the legs is mentioned in the description but is not slender.\nB. The shape of the legs is mentioned in the description and is slender.\nC. The legs or the tripod are not mentioned.\nD. The shape of the legs is not mentioned, but the legs of the tripod are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tripod is mentioned in the description but is not plastic or metal.\nB. The material of the tripod is mentioned in the description and is plastic or metal.\nC. The tripod is not mentioned.\nD. The material of the tripod is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tripod is mentioned in the description but is not black.\nB. The color of the tripod is mentioned in the description and is black.\nC. The tripod is not mentioned.\nD. The color of the tripod is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tripod is not mentioned in the description.\nB. The leg locks of the tripod are not mentioned in the description.\nC. The leg locks of the tripod are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tripod is not mentioned in the description.\nB. The quick release plate of the tripod is not mentioned in the description.\nC. The quick release plate of the tripod is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hot air balloon is not mentioned in the description.\nB. The hot air balloon is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is not mentioned in the description.\nB. The tree is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with three legs, each leg featuring a rubber foot for stability. The legs are connected at the top by a central column, which supports a mounting platform with a quick-release plate. The tripod has a telescopic head with a pan handle for adjusting the angle of the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is not mentioned in the description.\nB. The sky is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the legs is mentioned in the description and is slender.", + "D. The material of the tripod is not mentioned.", + "B. The color of the tripod is mentioned in the description and is black.", + "B. The leg locks of the tripod are not mentioned in the description.", + "C. The quick release plate of the tripod is mentioned in the description.", + "A. The hot air balloon is not mentioned in the description.", + "A. The tree is not mentioned in the description.", + "A. The sky is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.6666666666666666, + "score_neg": 0.6, + "recognition_result": true + }, + "25419495": { + "pred": "The tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gripping ends/claws is mentioned in the description and is black.", + 1 + ], + [ + "The color of the gripping ends/claws is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gripping ends/claws is mentioned in the description and is scalloped.", + 1 + ], + [ + "The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + -1 + ], + [ + "The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle/arm is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the handle/arm is mentioned in the description but is not black or metallic.", + -1 + ], + [ + "The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the handle/arm is mentioned in the description but is not black or metallic.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle/arm is mentioned in the description and is metal or rubber.", + 1 + ], + [ + "The material of the handle/arm is mentioned in the description but is not metal or rubber.", + -1 + ], + [ + "The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is mentioned in the description.", + -1 + ], + [ + "The spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The spoon is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spring of the tong is mentioned in the description.", + -1 + ], + [ + "The tong is not mentioned in the description.", + 0 + ], + [ + "The spring of the tong is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The spring of the tong is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is mentioned in the description.", + -1 + ], + [ + "The cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cup is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fork is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tong. Based on the image, is it likely that the object in the description is given class: tong or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tong. Based on the image, is it likely that the object in the description is given class: tong or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the gripping ends/claws is mentioned in the description and is black.\nB. The color of the gripping ends/claws is mentioned in the description but is not black.\nC. The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.\nD. The gripping ends/claws or the tong are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the gripping ends/claws is mentioned in the description and is scalloped.\nB. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.\nC. The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.\nD. The gripping ends/claws or the tong are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle/arm is mentioned in the description and is black or metallic.\nB. The color of the handle/arm is mentioned in the description but is not black or metallic.\nC. The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.\nD. The handle/arm or the tong is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handle/arm is mentioned in the description and is metal or rubber.\nB. The material of the handle/arm is mentioned in the description but is not metal or rubber.\nC. The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.\nD. The handle/arm or the tong is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is mentioned in the description.\nB. The spoon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spring of the tong is mentioned in the description.\nB. The tong is not mentioned in the description.\nC. The spring of the tong is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is mentioned in the description.\nB. The cup is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a dark green handle with a black grip at the end. The metal arms are slightly curved and have a dark, matte finish. The tips of the arms are pointed and designed for precise gripping.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is mentioned in the description.\nB. The fork is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + "B. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + "B. The color of the handle/arm is mentioned in the description but is not black or metallic.", + "C. The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + "B. The spoon is not mentioned in the description.", + "C. The spring of the tong is not mentioned in the description.", + "B. The plate is not mentioned in the description.", + "B. The cup is not mentioned in the description.", + "B. The fork is not mentioned in the description." + ], + "score": 0.4444444444444444, + "score_pos": -0.25, + "score_neg": 1.0, + "recognition_result": true + }, + "25419516": { + "pred": "The toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stuffed toy is mentioned in the description but is not plush.", + -1 + ], + [ + "The material of the stuffed toy is not mentioned.", + 0 + ], + [ + "The material of the stuffed toy is mentioned in the description and is plush.", + 1 + ], + [ + "The stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the stuffed toy is mentioned in the description and is plush.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is black.", + 1 + ], + [ + "The eyes or the stuffed toy are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the nose is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the nose is mentioned in the description and is blue.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is blue.", + 1 + ], + [ + "The body or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is blue.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the nose is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The shape of the nose is mentioned in the description and is round.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the stuffed toy is not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The tail of the stuffed toy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tail of the stuffed toy is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the stuffed toy are not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The feet of the stuffed toy are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The feet of the stuffed toy are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The door is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stuffed toy. Based on the image, is it likely that the object in the description is given class: stuffed toy or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stuffed toy. Based on the image, is it likely that the object in the description is given class: stuffed toy or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stuffed toy is mentioned in the description but is not plush.\nB. The material of the stuffed toy is not mentioned.\nC. The material of the stuffed toy is mentioned in the description and is plush.\nD. The stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eyes is mentioned in the description but is not black.\nB. The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.\nC. The color of the eyes is mentioned in the description and is black.\nD. The eyes or the stuffed toy are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the nose is mentioned in the description but is not blue.\nB. The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.\nC. The color of the nose is mentioned in the description and is blue.\nD. The nose or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not blue.\nB. The color of the body is not mentioned, but the body of the stuffed toy is mentioned.\nC. The color of the body is mentioned in the description and is blue.\nD. The body or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the nose is mentioned in the description but is not round.\nB. The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.\nC. The shape of the nose is mentioned in the description and is round.\nD. The nose or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the stuffed toy is not mentioned in the description.\nB. The stuffed toy is not mentioned in the description.\nC. The tail of the stuffed toy is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feet of the stuffed toy are not mentioned in the description.\nB. The stuffed toy is not mentioned in the description.\nC. The feet of the stuffed toy are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is not mentioned in the description.\nB. The door is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toy is a plush, blue creature with large, expressive eyes and prominent, pointed ears. It has a small tuft of hair on top of its head and a light blue underbelly.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned in the description.\nB. The fork is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the stuffed toy is mentioned in the description and is plush.", + "B. The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + "B. The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + "C. The color of the body is mentioned in the description and is blue.", + "B. The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + "A. The tail of the stuffed toy is not mentioned in the description.", + "A. The feet of the stuffed toy are not mentioned in the description.", + "A. The door is not mentioned in the description.", + "A. The chair is not mentioned in the description.", + "A. The fork is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "recognition_result": true + }, + "25579493": { + "pred": "A square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the bowl is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the bowl is mentioned in the description and is square or rounded.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the bowl is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the bowl is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the bowl is mentioned in the description but is not white or beige.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the bowl is mentioned in the description but is not white or beige.", + "pred_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The content or the bowl is not mentioned.", + 0 + ], + [ + "The color of the content is not mentioned, but the content of the bowl is mentioned.", + 0.5 + ], + [ + "The color of the content is mentioned in the description and is red, white, or yellow.", + 1 + ], + [ + "The color of the content is mentioned in the description but is not red, white, or yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the content is mentioned in the description and is red, white, or yellow.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the bowl is not mentioned in the description.", + 1 + ], + [ + "The base of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The base of the bowl is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the bowl is not mentioned in the description.", + 1 + ], + [ + "The lid of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lid of the bowl is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The knife is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner is not mentioned in the description.", + 1 + ], + [ + "The burner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The burner is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bowl. Based on the image, is it likely that the object in the description is given class: bowl or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bowl. Based on the image, is it likely that the object in the description is given class: bowl or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The shape of the bowl is not mentioned.\nC. The shape of the bowl is mentioned in the description and is square or rounded.\nD. The shape of the bowl is mentioned in the description but is not square or rounded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The texture of the bowl is not mentioned.\nC. The texture of the bowl is mentioned in the description and is smooth.\nD. The texture of the bowl is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The color of the bowl is not mentioned.\nC. The color of the bowl is mentioned in the description and is white or beige.\nD. The color of the bowl is mentioned in the description but is not white or beige.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The content or the bowl is not mentioned.\nB. The color of the content is not mentioned, but the content of the bowl is mentioned.\nC. The color of the content is mentioned in the description and is red, white, or yellow.\nD. The color of the content is mentioned in the description but is not red, white, or yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the bowl is not mentioned in the description.\nB. The base of the bowl is mentioned in the description.\nC. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the bowl is not mentioned in the description.\nB. The lid of the bowl is mentioned in the description.\nC. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knife is not mentioned in the description.\nB. The knife is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet is not mentioned in the description.\nB. The cabinet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square-shaped mixing bowl with rounded edges, featuring a light blue exterior and a cream-colored interior. The bowl contains a mixture of white and yellow ingredients, with a small piece of red garnish on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burner is not mentioned in the description.\nB. The burner is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the bowl is mentioned in the description and is square or rounded.", + "B. The texture of the bowl is not mentioned.", + "D. The color of the bowl is mentioned in the description but is not white or beige.", + "C. The color of the content is mentioned in the description and is red, white, or yellow.", + "A. The base of the bowl is not mentioned in the description.", + "A. The lid of the bowl is not mentioned in the description.", + "A. The knife is not mentioned in the description.", + "A. The cabinet is not mentioned in the description.", + "A. The burner is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.25, + "score_neg": 1.0, + "recognition_result": true + }, + "297718": { + "pred": "A piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the rice is mentioned in the description and is outer layer.", + 1 + ], + [ + "The position of the rice is mentioned in the description but is not outer layer.", + -1 + ], + [ + "The position of the rice is not mentioned, but the rice of the sushi is mentioned.", + 0.5 + ], + [ + "The rice or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The position of the rice is mentioned in the description and is outer layer.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the wrap is mentioned in the description and is seaweed sheet.", + 1 + ], + [ + "The type of the wrap is mentioned in the description but is not seaweed sheet.", + -1 + ], + [ + "The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.", + 0.5 + ], + [ + "The wrap or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the wrap is mentioned in the description and is seaweed sheet.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.", + 1 + ], + [ + "The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.", + -1 + ], + [ + "The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + 0.5 + ], + [ + "The sesame seeds or the sushi are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the filling is mentioned in the description and is fish or crab meat.", + 1 + ], + [ + "The type of the filling is mentioned in the description but is not fish or crab meat.", + -1 + ], + [ + "The type of the filling is not mentioned, but the filling of the sushi is mentioned.", + 0.5 + ], + [ + "The filling or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The type of the filling is not mentioned, but the filling of the sushi is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the topping is mentioned in the description and is sesame seeds.", + 1 + ], + [ + "The type of the topping is mentioned in the description but is not sesame seeds.", + -1 + ], + [ + "The type of the topping is not mentioned, but the topping of the sushi is mentioned.", + 0.5 + ], + [ + "The topping or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the topping is mentioned in the description and is sesame seeds.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wasabi of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The wasabi of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wasabi of the sushi is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soy sauce of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The soy sauce of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The soy sauce of the sushi is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The avocado of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The avocado of the sushi is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickled ginger of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The pickled ginger of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pickled ginger of the sushi is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple chunks are not mentioned in the description.", + 1 + ], + [ + "The pineapple chunks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pineapple chunks are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is sushi. Based on the image, is it likely that the object in the description is given class: sushi or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is sushi. Based on the image, is it likely that the object in the description is given class: sushi or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the rice is mentioned in the description and is outer layer.\nB. The position of the rice is mentioned in the description but is not outer layer.\nC. The position of the rice is not mentioned, but the rice of the sushi is mentioned.\nD. The rice or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the wrap is mentioned in the description and is seaweed sheet.\nB. The type of the wrap is mentioned in the description but is not seaweed sheet.\nC. The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.\nD. The wrap or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.\nB. The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.\nC. The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.\nD. The sesame seeds or the sushi are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the filling is mentioned in the description and is fish or crab meat.\nB. The type of the filling is mentioned in the description but is not fish or crab meat.\nC. The type of the filling is not mentioned, but the filling of the sushi is mentioned.\nD. The filling or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the topping is mentioned in the description and is sesame seeds.\nB. The type of the topping is mentioned in the description but is not sesame seeds.\nC. The type of the topping is not mentioned, but the topping of the sushi is mentioned.\nD. The topping or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wasabi of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The wasabi of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The soy sauce of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The soy sauce of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The avocado of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pickled ginger of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The pickled ginger of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice, topped with a layer of black seaweed, and filled with a mixture of pink and white fish roe. The top is garnished with a sprinkle of sesame seeds.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pineapple chunks are not mentioned in the description.\nB. The pineapple chunks are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The position of the rice is mentioned in the description and is outer layer.", + "A. The type of the wrap is mentioned in the description and is seaweed sheet.", + "C. The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + "C. The type of the filling is not mentioned, but the filling of the sushi is mentioned.", + "A. The type of the topping is mentioned in the description and is sesame seeds.", + "A. The wasabi of the sushi is not mentioned in the description.", + "A. The soy sauce of the sushi is not mentioned in the description.", + "A. The avocado of the sushi is not mentioned in the description.", + "A. The pickled ginger of the sushi is not mentioned in the description.", + "A. The pineapple chunks are not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "361105": { + "pred": "A small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The herb is not mentioned.", + 0 + ], + [ + "The color of the herb is mentioned in the description and is green.", + 1 + ], + [ + "The color of the herb is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the herb is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the herb is mentioned in the description and is green.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stems or the herb are not mentioned.", + 0 + ], + [ + "The shape of the stems is mentioned in the description and is thin or slender.", + 1 + ], + [ + "The shape of the stems is mentioned in the description but is not thin or slender.", + -1 + ], + [ + "The shape of the stems is not mentioned, but the stems of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the stems is mentioned in the description and is thin or slender.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaves or the herb are not mentioned.", + 0 + ], + [ + "The dark spots of the leaves is mentioned in the description and is visible.", + 1 + ], + [ + "The dark spots of the leaves is mentioned in the description but is not visible.", + -1 + ], + [ + "The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The dark spots of the leaves is mentioned in the description but is not visible.", + "pred_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flowers of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The flowers of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The flowers of the herb are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The seeds of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The seeds of the herb are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The roots of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The roots of the herb are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cream sauce is not mentioned in the description.", + 1 + ], + [ + "The cream sauce is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cream sauce is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scallops are not mentioned in the description.", + 1 + ], + [ + "The scallops are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The scallops are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is herb. Based on the image, is it likely that the object in the description is given class: herb or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is herb. Based on the image, is it likely that the object in the description is given class: herb or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The herb is not mentioned.\nB. The color of the herb is mentioned in the description and is green.\nC. The color of the herb is mentioned in the description but is not green.\nD. The color of the herb is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stems or the herb are not mentioned.\nB. The shape of the stems is mentioned in the description and is thin or slender.\nC. The shape of the stems is mentioned in the description but is not thin or slender.\nD. The shape of the stems is not mentioned, but the stems of the herb are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The leaves or the herb are not mentioned.\nB. The dark spots of the leaves is mentioned in the description and is visible.\nC. The dark spots of the leaves is mentioned in the description but is not visible.\nD. The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flowers of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The flowers of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seeds of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The seeds of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The roots of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The roots of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cream sauce is not mentioned in the description.\nB. The cream sauce is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a slender, slightly curved stem. The leaves are elongated with pointed tips and a glossy surface, showing a few small brown spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The scallops are not mentioned in the description.\nB. The scallops are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the herb is mentioned in the description and is green.", + "B. The shape of the stems is mentioned in the description and is thin or slender.", + "C. The dark spots of the leaves is mentioned in the description but is not visible.", + "A. The flowers of the herb are not mentioned in the description.", + "A. The seeds of the herb are not mentioned in the description.", + "A. The roots of the herb are not mentioned in the description.", + "A. The cream sauce is not mentioned in the description.", + "A. The scallops are not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "recognition_result": true + }, + "1196168": { + "pred": "A rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grille is mentioned in the description and is smooth with ridges.", + 1 + ], + [ + "The texture of the grille is mentioned in the description but is not smooth with ridges.", + -1 + ], + [ + "The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + 0.5 + ], + [ + "The grille or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the air conditioner is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the air conditioner is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the air conditioner is mentioned in the description and is rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the air conditioner is mentioned in the description and is white.", + 1 + ], + [ + "The color of the air conditioner is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the air conditioner is mentioned in the description and is white.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the vent is mentioned in the description and is circular.", + 1 + ], + [ + "The shape of the vent is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.", + 0.5 + ], + [ + "The vent or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the vent is mentioned in the description and is circular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fan is mentioned in the description and is black, grey, silver, or dark.", + 1 + ], + [ + "The color of the fan is mentioned in the description but is not black, grey, silver, or dark.", + -1 + ], + [ + "The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + 0.5 + ], + [ + "The fan or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The remote control of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The remote control of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The remote control of the air conditioner is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Hotel Shilaza sign is not mentioned in the description.", + 1 + ], + [ + "The Hotel Shilaza sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The Hotel Shilaza sign is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The display of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The display of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The display of the air conditioner is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The UCO Bank branch is not mentioned in the description.", + 1 + ], + [ + "The UCO Bank branch is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The UCO Bank branch is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycles are not mentioned in the description.", + 1 + ], + [ + "The motorcycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The motorcycles are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is air conditioner. Based on the image, is it likely that the object in the description is given class: air conditioner or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is air conditioner. Based on the image, is it likely that the object in the description is given class: air conditioner or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the grille is mentioned in the description and is smooth with ridges.\nB. The texture of the grille is mentioned in the description but is not smooth with ridges.\nC. The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.\nD. The grille or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the air conditioner is mentioned in the description and is rectangular.\nB. The shape of the air conditioner is mentioned in the description but is not rectangular.\nC. The shape of the air conditioner is not mentioned.\nD. The air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the air conditioner is mentioned in the description and is white.\nB. The color of the air conditioner is mentioned in the description but is not white.\nC. The color of the air conditioner is not mentioned.\nD. The air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the vent is mentioned in the description and is circular.\nB. The shape of the vent is mentioned in the description but is not circular.\nC. The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.\nD. The vent or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the fan is mentioned in the description and is black, grey, silver, or dark.\nB. The color of the fan is mentioned in the description but is not black, grey, silver, or dark.\nC. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.\nD. The fan or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The air conditioner is not mentioned in the description.\nB. The remote control of the air conditioner is not mentioned in the description.\nC. The remote control of the air conditioner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Hotel Shilaza sign is not mentioned in the description.\nB. The Hotel Shilaza sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The air conditioner is not mentioned in the description.\nB. The display of the air conditioner is not mentioned in the description.\nC. The display of the air conditioner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The UCO Bank branch is not mentioned in the description.\nB. The UCO Bank branch is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, white air conditioner unit with a large circular fan grille on the left side. The grille has a grid pattern with multiple blades visible. To the right of the grille, there is a small rectangular panel with a circular emblem and some text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motorcycles are not mentioned in the description.\nB. The motorcycles are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + "A. The shape of the air conditioner is mentioned in the description and is rectangular.", + "A. The color of the air conditioner is mentioned in the description and is white.", + "A. The shape of the vent is mentioned in the description and is circular.", + "C. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + "B. The remote control of the air conditioner is not mentioned in the description.", + "A. The Hotel Shilaza sign is not mentioned in the description.", + "B. The display of the air conditioner is not mentioned in the description.", + "A. The UCO Bank branch is not mentioned in the description.", + "A. The motorcycles are not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "1894089": { + "pred": "A metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the screwdriver is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The color of the screwdriver is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The color of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the screwdriver is mentioned in the description and is silver or metallic.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screwdriver is mentioned in the description but is not metal.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The material of the screwdriver is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the screwdriver is mentioned in the description and is metal.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grip is mentioned in the description but is not rough or textured.", + -1 + ], + [ + "The grip or the screwdriver is not mentioned.", + 0 + ], + [ + "The texture of the grip is mentioned in the description and is rough or textured.", + 1 + ], + [ + "The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The grip or the screwdriver is not mentioned.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The CD is mentioned in the description.", + -1 + ], + [ + "The CD is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The CD is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire is mentioned in the description.", + -1 + ], + [ + "The wire is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wire is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interchangeable bits of the screwdriver are mentioned in the description.", + -1 + ], + [ + "The interchangeable bits of the screwdriver are not mentioned in the description.", + 1 + ], + [ + "The screwdriver is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The interchangeable bits of the screwdriver are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire cutters are mentioned in the description.", + -1 + ], + [ + "The wire cutters are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wire cutters are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flashlight is mentioned in the description.", + -1 + ], + [ + "The flashlight is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The flashlight is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is screwdriver. Based on the image, is it likely that the object in the description is given class: screwdriver or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is screwdriver. Based on the image, is it likely that the object in the description is given class: screwdriver or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the screwdriver is mentioned in the description but is not silver or metallic.\nB. The screwdriver is not mentioned.\nC. The color of the screwdriver is mentioned in the description and is silver or metallic.\nD. The color of the screwdriver is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the screwdriver is mentioned in the description but is not metal.\nB. The screwdriver is not mentioned.\nC. The material of the screwdriver is mentioned in the description and is metal.\nD. The material of the screwdriver is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the grip is mentioned in the description but is not rough or textured.\nB. The grip or the screwdriver is not mentioned.\nC. The texture of the grip is mentioned in the description and is rough or textured.\nD. The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The CD is mentioned in the description.\nB. The CD is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wire is mentioned in the description.\nB. The wire is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The interchangeable bits of the screwdriver are mentioned in the description.\nB. The interchangeable bits of the screwdriver are not mentioned in the description.\nC. The screwdriver is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wire cutters are mentioned in the description.\nB. The wire cutters are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a long, slender shaft and a flat, rectangular head. The shaft is smooth and tapers slightly towards the head, which is flat and has a small, circular indentation near the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flashlight is mentioned in the description.\nB. The flashlight is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the screwdriver is mentioned in the description and is silver or metallic.", + "C. The material of the screwdriver is mentioned in the description and is metal.", + "B. The grip or the screwdriver is not mentioned.", + "B. The CD is not mentioned in the description.", + "B. The wire is not mentioned in the description.", + "B. The interchangeable bits of the screwdriver are not mentioned in the description.", + "B. The wire cutters are not mentioned in the description.", + "B. The flashlight is not mentioned in the description." + ], + "score": 0.875, + "score_pos": 0.6666666666666666, + "score_neg": 1.0, + "recognition_result": true + }, + "2391780": { + "pred": "A bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the wings is mentioned in the description but is not spread or extended.", + -1 + ], + [ + "The position of the wings is not mentioned, but the wings of the wild bird are mentioned.", + 0.5 + ], + [ + "The position of the wings is mentioned in the description and is spread or extended.", + 1 + ], + [ + "The wings or the wild bird are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The position of the wings is mentioned in the description and is spread or extended.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the head is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the head is not mentioned, but the head of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description and is white.", + 1 + ], + [ + "The head or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the head is not mentioned, but the head of the wild bird is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white, brown or gray.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white, brown or gray.", + 1 + ], + [ + "The body or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is white, brown or gray.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the beak is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description and is dark or black.", + 1 + ], + [ + "The beak or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are mentioned in the description.", + -1 + ], + [ + "The boats are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The boats are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stone walls are mentioned in the description.", + -1 + ], + [ + "The stone walls are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The stone walls are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are mentioned in the description.", + -1 + ], + [ + "The chimneys are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chimneys are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The water is mentioned in the description.", + -1 + ], + [ + "The water is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The water is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is wild bird. Based on the image, is it likely that the object in the description is given class: wild bird or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is wild bird. Based on the image, is it likely that the object in the description is given class: wild bird or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the wings is mentioned in the description but is not spread or extended.\nB. The position of the wings is not mentioned, but the wings of the wild bird are mentioned.\nC. The position of the wings is mentioned in the description and is spread or extended.\nD. The wings or the wild bird are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the head is mentioned in the description but is not white.\nB. The color of the head is not mentioned, but the head of the wild bird is mentioned.\nC. The color of the head is mentioned in the description and is white.\nD. The head or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not white, brown or gray.\nB. The color of the body is not mentioned, but the body of the wild bird is mentioned.\nC. The color of the body is mentioned in the description and is white, brown or gray.\nD. The body or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the beak is mentioned in the description but is not dark or black.\nB. The color of the beak is not mentioned, but the beak of the wild bird is mentioned.\nC. The color of the beak is mentioned in the description and is dark or black.\nD. The beak or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are mentioned in the description.\nB. The windows are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boats are mentioned in the description.\nB. The boats are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stone walls are mentioned in the description.\nB. The stone walls are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chimneys are mentioned in the description.\nB. The chimneys are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bird with a long, slender neck and a pointed beak. Its plumage is predominantly brown with lighter, almost white, streaks on the wings and back. The bird's legs are thin and dark, and it has a small, rounded tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The water is mentioned in the description.\nB. The water is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The position of the wings is mentioned in the description and is spread or extended.", + "B. The color of the head is not mentioned, but the head of the wild bird is mentioned.", + "C. The color of the body is mentioned in the description and is white, brown or gray.", + "B. The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + "B. The windows are not mentioned in the description.", + "B. The boats are not mentioned in the description.", + "B. The stone walls are not mentioned in the description.", + "B. The chimneys are not mentioned in the description.", + "B. The water is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "4502267": { + "pred": "A green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is elongated.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not elongated.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the green bean is mentioned in the description and is elongated.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is mentioned in the description and is green.", + 1 + ], + [ + "The color of the green bean is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the green bean is mentioned in the description and is green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is arc or curved.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not arc or curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the green bean is mentioned in the description and is arc or curved.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the green bean is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the green bean is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is not mentioned in the description.", + 1 + ], + [ + "The apple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The apple is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The strings of the green bean are not mentioned in the description.", + 1 + ], + [ + "The strings of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The strings of the green bean are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The seeds of the green bean are not mentioned in the description.", + 1 + ], + [ + "The seeds of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The seeds of the green bean are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 1 + ], + [ + "The pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pear is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The peach is not mentioned in the description.", + 1 + ], + [ + "The peach is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The peach is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is green bean. Based on the image, is it likely that the object in the description is given class: green bean or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is green bean. Based on the image, is it likely that the object in the description is given class: green bean or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The shape of the green bean is not mentioned.\nC. The shape of the green bean is mentioned in the description and is elongated.\nD. The shape of the green bean is mentioned in the description but is not elongated.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The color of the green bean is not mentioned.\nC. The color of the green bean is mentioned in the description and is green.\nD. The color of the green bean is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The shape of the green bean is not mentioned.\nC. The shape of the green bean is mentioned in the description and is arc or curved.\nD. The shape of the green bean is mentioned in the description but is not arc or curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The texture of the green bean is not mentioned.\nC. The texture of the green bean is mentioned in the description and is smooth.\nD. The texture of the green bean is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The apple is not mentioned in the description.\nB. The apple is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned in the description.\nB. The strings of the green bean are not mentioned in the description.\nC. The strings of the green bean are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned in the description.\nB. The seeds of the green bean are not mentioned in the description.\nC. The seeds of the green bean are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The pear is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, tapering to a point at one end and having a broader, rounded base at the other. The bean has a consistent green color with subtle variations in shading.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The peach is not mentioned in the description.\nB. The peach is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the green bean is mentioned in the description and is elongated.", + "C. The color of the green bean is mentioned in the description and is green.", + "C. The shape of the green bean is mentioned in the description and is arc or curved.", + "C. The texture of the green bean is mentioned in the description and is smooth.", + "A. The apple is not mentioned in the description.", + "B. The strings of the green bean are not mentioned in the description.", + "B. The seeds of the green bean are not mentioned in the description.", + "A. The pear is not mentioned in the description.", + "A. The peach is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "4604873": { + "pred": "A large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The size of the jib is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the jib is mentioned in the description and is long.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the jib is not mentioned, but the jib of the crane is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tower is not mentioned, but the tower of the crane is mentioned.", + 0.5 + ], + [ + "The tower or the crane is not mentioned.", + 0 + ], + [ + "The size of the tower is mentioned in the description but is not tall.", + -1 + ], + [ + "The size of the tower is mentioned in the description and is tall.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the tower is not mentioned, but the tower of the crane is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + 0.5 + ], + [ + "The hook or the crane is not mentioned.", + 0 + ], + [ + "The visibility of the hook is mentioned in the description but is not visible.", + -1 + ], + [ + "The visibility of the hook is mentioned in the description and is visible.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crane is not mentioned.", + 0 + ], + [ + "The crane is not mentioned.", + 0 + ], + [ + "The material of the crane is mentioned in the description but is not metal or steel.", + -1 + ], + [ + "The material of the crane is mentioned in the description and is metal or steel.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the crane is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The shape of the jib is mentioned in the description but is not horizontal beam.", + -1 + ], + [ + "The shape of the jib is mentioned in the description and is horizontal beam.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the jib is mentioned in the description and is horizontal beam.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The telescoping sections of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The telescoping sections of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The telescoping sections of the crane are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tracks of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The tracks of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tracks of the crane are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The wheels of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wheels of the crane are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The silhouettes of structures are not mentioned in the description.", + 1 + ], + [ + "The silhouettes of structures are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The silhouettes of structures are mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clouds are not mentioned in the description.", + 1 + ], + [ + "The clouds are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The clouds are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is crane. Based on the image, is it likely that the object in the description is given class: crane or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is crane. Based on the image, is it likely that the object in the description is given class: crane or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the jib is not mentioned, but the jib of the crane is mentioned.\nB. The jib or the crane is not mentioned.\nC. The size of the jib is mentioned in the description but is not long.\nD. The size of the jib is mentioned in the description and is long.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the tower is not mentioned, but the tower of the crane is mentioned.\nB. The tower or the crane is not mentioned.\nC. The size of the tower is mentioned in the description but is not tall.\nD. The size of the tower is mentioned in the description and is tall.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.\nB. The hook or the crane is not mentioned.\nC. The visibility of the hook is mentioned in the description but is not visible.\nD. The visibility of the hook is mentioned in the description and is visible.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the crane is not mentioned.\nB. The crane is not mentioned.\nC. The material of the crane is mentioned in the description but is not metal or steel.\nD. The material of the crane is mentioned in the description and is metal or steel.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the jib is not mentioned, but the jib of the crane is mentioned.\nB. The jib or the crane is not mentioned.\nC. The shape of the jib is mentioned in the description but is not horizontal beam.\nD. The shape of the jib is mentioned in the description and is horizontal beam.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The telescoping sections of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The telescoping sections of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tracks of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The tracks of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The wheels of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The silhouettes of structures are not mentioned in the description.\nB. The silhouettes of structures are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, industrial crane with a lattice structure, featuring a long, horizontal boom extending from a vertical mast. The boom is supported by a series of diagonal cross-bracing and has a hook at the end. The mast is equipped with various mechanical components and a counterweight at the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clouds are not mentioned in the description.\nB. The clouds are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The size of the jib is not mentioned, but the jib of the crane is mentioned.", + "A. The size of the tower is not mentioned, but the tower of the crane is mentioned.", + "A. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + "A. The material of the crane is not mentioned.", + "D. The shape of the jib is mentioned in the description and is horizontal beam.", + "A. The telescoping sections of the crane are not mentioned in the description.", + "A. The tracks of the crane are not mentioned in the description.", + "A. The wheels of the crane are not mentioned in the description.", + "B. The silhouettes of structures are mentioned in the description.", + "A. The clouds are not mentioned in the description." + ], + "score": 0.55, + "score_pos": 0.5, + "score_neg": 0.6, + "recognition_result": true + }, + "4916799": { + "pred": "A spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the globe is mentioned in the description and is silver and blue.", + 1 + ], + [ + "The globe is not mentioned.", + 0 + ], + [ + "The color of the globe is not mentioned.", + 0 + ], + [ + "The color of the globe is mentioned in the description but is not silver and blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the globe is mentioned in the description and is silver and blue.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sphere is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The sphere or the globe is not mentioned.", + 0 + ], + [ + "The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + 0.5 + ], + [ + "The material of the sphere is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is mentioned in the description and is circular.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The shape of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The shape of the base is mentioned in the description but is not circular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the base is mentioned in the description and is circular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the base is mentioned in the description and is gray.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The color of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The color of the base is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the base is mentioned in the description but is not gray.", + "pred_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is mentioned in the description.", + -1 + ], + [ + "The sidewalk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sidewalk is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trees are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The person is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bushes are mentioned in the description.", + -1 + ], + [ + "The bushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bushes are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is globe. Based on the image, is it likely that the object in the description is given class: globe or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is globe. Based on the image, is it likely that the object in the description is given class: globe or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the globe is mentioned in the description and is silver and blue.\nB. The globe is not mentioned.\nC. The color of the globe is not mentioned.\nD. The color of the globe is mentioned in the description but is not silver and blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the sphere is mentioned in the description and is metal and plastic.\nB. The sphere or the globe is not mentioned.\nC. The material of the sphere is not mentioned, but the sphere of the globe is mentioned.\nD. The material of the sphere is mentioned in the description but is not metal and plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the base is mentioned in the description and is circular.\nB. The base or the globe is not mentioned.\nC. The shape of the base is not mentioned, but the base of the globe is mentioned.\nD. The shape of the base is mentioned in the description but is not circular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the base is mentioned in the description and is gray.\nB. The base or the globe is not mentioned.\nC. The color of the base is not mentioned, but the base of the globe is mentioned.\nD. The color of the base is mentioned in the description but is not gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sidewalk is mentioned in the description.\nB. The sidewalk is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are mentioned in the description.\nB. The trees are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is mentioned in the description.\nB. The person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture with a textured surface composed of small, raised, silver-colored elements. The sphere is adorned with blue, three-dimensional letters spelling \"Reve\" and is mounted on a black, cylindrical base. A green band encircles the sphere, and there are colorful, abstract shapes and patterns on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bushes are mentioned in the description.\nB. The bushes are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the globe is mentioned in the description and is silver and blue.", + "C. The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + "A. The shape of the base is mentioned in the description and is circular.", + "D. The color of the base is mentioned in the description but is not gray.", + "B. The sidewalk is not mentioned in the description.", + "B. The trees are not mentioned in the description.", + "B. The sky is not mentioned in the description.", + "B. The person is not mentioned in the description.", + "B. The bushes are not mentioned in the description." + ], + "score": 0.7222222222222222, + "score_pos": 0.375, + "score_neg": 1.0, + "recognition_result": true + }, + "5718415": { + "pred": "The tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The material of the canopy is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the canopy is mentioned in the description and is fabric.", + 1 + ], + [ + "The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole or the tent is not mentioned.", + 0 + ], + [ + "The material of the pole is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pole is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the pole is not mentioned, but the pole of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the pole is mentioned in the description and is metal.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The color of the canopy is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the canopy is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the canopy is mentioned in the description and is yellow.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The windows of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The windows of the tent are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buildings are mentioned in the description.", + -1 + ], + [ + "The buildings are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The buildings are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walls of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The walls of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The walls of the tent are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The door of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The door of the tent is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The floor of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The floor of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The floor of the tent is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tent. Based on the image, is it likely that the object in the description is given class: tent or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tent. Based on the image, is it likely that the object in the description is given class: tent or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The canopy or the tent is not mentioned.\nB. The material of the canopy is mentioned in the description but is not fabric.\nC. The material of the canopy is mentioned in the description and is fabric.\nD. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pole or the tent is not mentioned.\nB. The material of the pole is mentioned in the description but is not metal.\nC. The material of the pole is mentioned in the description and is metal.\nD. The material of the pole is not mentioned, but the pole of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The canopy or the tent is not mentioned.\nB. The color of the canopy is mentioned in the description but is not yellow.\nC. The color of the canopy is mentioned in the description and is yellow.\nD. The color of the canopy is not mentioned, but the canopy of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows of the tent are mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The windows of the tent are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The buildings are mentioned in the description.\nB. The buildings are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The walls of the tent are mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The walls of the tent are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door of the tent is mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The door of the tent is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a dark brown edge. The visible part of the tent includes a metal pole with a rusted section near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The floor of the tent is mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The floor of the tent is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + "C. The material of the pole is mentioned in the description and is metal.", + "C. The color of the canopy is mentioned in the description and is yellow.", + "C. The windows of the tent are not mentioned in the description.", + "B. The buildings are not mentioned in the description.", + "C. The walls of the tent are not mentioned in the description.", + "C. The door of the tent is not mentioned in the description.", + "C. The floor of the tent is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "recognition_result": true + }, + "6012878": { + "pred": "A square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the symbol is mentioned in the description but is not red or orange.", + -1 + ], + [ + "The color of the symbol is mentioned in the description and is red or orange.", + 1 + ], + [ + "The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the symbol is mentioned in the description and is red or orange.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The shape of the symbol is mentioned in the description but is not hand outline.", + -1 + ], + [ + "The shape of the symbol is mentioned in the description and is hand outline.", + 1 + ], + [ + "The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the symbol is mentioned in the description and is hand outline.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The texture of the background is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the background is mentioned in the description and is matte.", + 1 + ], + [ + "The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the background is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the background is mentioned in the description and is gray or black.", + 1 + ], + [ + "The color of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the background is mentioned in the description and is gray or black.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The reflective surface or the traffic light is not mentioned.", + 0 + ], + [ + "The material of the reflective surface is mentioned in the description but is not glass or plastic.", + -1 + ], + [ + "The material of the reflective surface is mentioned in the description and is glass or plastic.", + 1 + ], + [ + "The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walking person symbol of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The walking person symbol of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The walking person symbol of the traffic light is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The pole of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pole of the traffic light is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycles are not mentioned in the description.", + 1 + ], + [ + "The bicycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bicycles are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is not mentioned in the description.", + 1 + ], + [ + "The sidewalk is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sidewalk is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green light of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The green light of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The green light of the traffic light is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is traffic light. Based on the image, is it likely that the object in the description is given class: traffic light or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is traffic light. Based on the image, is it likely that the object in the description is given class: traffic light or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The symbol or the traffic light is not mentioned.\nB. The color of the symbol is mentioned in the description but is not red or orange.\nC. The color of the symbol is mentioned in the description and is red or orange.\nD. The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The symbol or the traffic light is not mentioned.\nB. The shape of the symbol is mentioned in the description but is not hand outline.\nC. The shape of the symbol is mentioned in the description and is hand outline.\nD. The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background or the traffic light is not mentioned.\nB. The texture of the background is mentioned in the description but is not matte.\nC. The texture of the background is mentioned in the description and is matte.\nD. The texture of the background is not mentioned, but the background of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background or the traffic light is not mentioned.\nB. The color of the background is mentioned in the description but is not gray or black.\nC. The color of the background is mentioned in the description and is gray or black.\nD. The color of the background is not mentioned, but the background of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The reflective surface or the traffic light is not mentioned.\nB. The material of the reflective surface is mentioned in the description but is not glass or plastic.\nC. The material of the reflective surface is mentioned in the description and is glass or plastic.\nD. The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The walking person symbol of the traffic light is not mentioned in the description.\nB. The walking person symbol of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pole of the traffic light is not mentioned in the description.\nB. The pole of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycles are not mentioned in the description.\nB. The bicycles are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sidewalk is not mentioned in the description.\nB. The sidewalk is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square pedestrian traffic light with a black background, featuring a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green light of the traffic light is not mentioned in the description.\nB. The green light of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the symbol is mentioned in the description and is red or orange.", + "C. The shape of the symbol is mentioned in the description and is hand outline.", + "D. The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + "C. The color of the background is mentioned in the description and is gray or black.", + "D. The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + "A. The walking person symbol of the traffic light is not mentioned in the description.", + "A. The pole of the traffic light is not mentioned in the description.", + "A. The bicycles are not mentioned in the description.", + "A. The sidewalk is not mentioned in the description.", + "A. The green light of the traffic light is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "6820595": { + "pred": "A cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the ear is mentioned in the description but is not triangular.", + -1 + ], + [ + "The ear or the cat is not mentioned.", + 0 + ], + [ + "The shape of the ear is mentioned in the description and is triangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The texture of the fur is mentioned in the description but is not fluffy.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The texture of the fur is mentioned in the description and is fluffy.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the fur is mentioned in the description but is not fluffy.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The color of the fur is mentioned in the description but is not black and white.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The color of the fur is mentioned in the description and is black and white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the fur is mentioned in the description and is black and white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the back is mentioned in the description but is not arched.", + -1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ], + [ + "The shape of the back is mentioned in the description and is arched.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the back is not mentioned, but the back of the cat is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.", + 0.5 + ], + [ + "The color of the underbelly is mentioned in the description but is not white.", + -1 + ], + [ + "The underbelly or the cat is not mentioned.", + 0 + ], + [ + "The color of the underbelly is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the underbelly is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is mentioned in the description.", + -1 + ], + [ + "The door is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The door is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom cabinet is mentioned in the description.", + -1 + ], + [ + "The bathroom cabinet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathroom cabinet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom sink is mentioned in the description.", + -1 + ], + [ + "The bathroom sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathroom sink is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hairbrush is mentioned in the description.", + -1 + ], + [ + "The hairbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The hairbrush is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is cat. Based on the image, is it likely that the object in the description is given class: cat or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is cat. Based on the image, is it likely that the object in the description is given class: cat or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the ear is not mentioned, but the ear of the cat is mentioned.\nB. The shape of the ear is mentioned in the description but is not triangular.\nC. The ear or the cat is not mentioned.\nD. The shape of the ear is mentioned in the description and is triangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the fur is not mentioned, but the fur of the cat is mentioned.\nB. The texture of the fur is mentioned in the description but is not fluffy.\nC. The fur or the cat is not mentioned.\nD. The texture of the fur is mentioned in the description and is fluffy.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the fur is not mentioned, but the fur of the cat is mentioned.\nB. The color of the fur is mentioned in the description but is not black and white.\nC. The fur or the cat is not mentioned.\nD. The color of the fur is mentioned in the description and is black and white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the back is not mentioned, but the back of the cat is mentioned.\nB. The shape of the back is mentioned in the description but is not arched.\nC. The back or the cat is not mentioned.\nD. The shape of the back is mentioned in the description and is arched.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.\nB. The color of the underbelly is mentioned in the description but is not white.\nC. The underbelly or the cat is not mentioned.\nD. The color of the underbelly is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is mentioned in the description.\nB. The door is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathroom cabinet is mentioned in the description.\nB. The bathroom cabinet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathroom sink is mentioned in the description.\nB. The bathroom sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face and ears, featuring a mix of black and brown fur on its back and tail. The cat's body is predominantly white with black patches, and it has a short, sleek coat.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hairbrush is mentioned in the description.\nB. The hairbrush is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + "B. The texture of the fur is mentioned in the description but is not fluffy.", + "D. The color of the fur is mentioned in the description and is black and white.", + "A. The shape of the back is not mentioned, but the back of the cat is mentioned.", + "D. The color of the underbelly is mentioned in the description and is white.", + "B. The door is not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The bathroom cabinet is not mentioned in the description.", + "B. The bathroom sink is not mentioned in the description.", + "B. The hairbrush is not mentioned in the description." + ], + "score": 0.7, + "score_pos": 0.4, + "score_neg": 1.0, + "recognition_result": true + }, + "8556674": { + "pred": "A round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description and is orange.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the orange/tangerine is mentioned in the description and is orange.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the surface is mentioned in the description but is not bright orange.", + -1 + ], + [ + "The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The color of the surface is mentioned in the description and is bright orange.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the surface is mentioned in the description and is bright orange.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the orange/tangerine is mentioned in the description and is smooth.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the orange/tangerine is mentioned in the description and is round.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not glossy.", + -1 + ], + [ + "The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The texture of the surface is mentioned in the description and is glossy.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the surface is mentioned in the description and is glossy.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The stem of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The stem of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The stem of the orange/tangerine is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The leaves of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The leaves of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The leaves of the orange/tangerine are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The segments of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The segments of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The segments of the orange/tangerine are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceiling lights are mentioned in the description.", + -1 + ], + [ + "The ceiling lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ceiling lights are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The flesh of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The flesh of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flesh of the orange/tangerine is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is orange/tangerine. Based on the image, is it likely that the object in the description is given class: orange/tangerine or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is orange/tangerine. Based on the image, is it likely that the object in the description is given class: orange/tangerine or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The color of the orange/tangerine is mentioned in the description but is not orange.\nC. The color of the orange/tangerine is not mentioned.\nD. The color of the orange/tangerine is mentioned in the description and is orange.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The surface or the orange/tangerine is not mentioned.\nB. The color of the surface is mentioned in the description but is not bright orange.\nC. The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.\nD. The color of the surface is mentioned in the description and is bright orange.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The texture of the orange/tangerine is mentioned in the description but is not smooth.\nC. The texture of the orange/tangerine is not mentioned.\nD. The texture of the orange/tangerine is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The shape of the orange/tangerine is mentioned in the description but is not round.\nC. The shape of the orange/tangerine is not mentioned.\nD. The shape of the orange/tangerine is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The surface or the orange/tangerine is not mentioned.\nB. The texture of the surface is mentioned in the description but is not glossy.\nC. The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.\nD. The texture of the surface is mentioned in the description and is glossy.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The stem of the orange/tangerine is mentioned in the description.\nC. The stem of the orange/tangerine is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The leaves of the orange/tangerine are mentioned in the description.\nC. The leaves of the orange/tangerine are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The segments of the orange/tangerine are mentioned in the description.\nC. The segments of the orange/tangerine are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceiling lights are mentioned in the description.\nB. The ceiling lights are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round, orange fruit with a smooth, glossy surface. The fruit has a gradient of colors, transitioning from a deep orange at the bottom to a lighter, almost yellowish hue at the top. There is a small, white, irregularly shaped patch near the top center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The flesh of the orange/tangerine is mentioned in the description.\nC. The flesh of the orange/tangerine is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the orange/tangerine is mentioned in the description and is orange.", + "D. The color of the surface is mentioned in the description and is bright orange.", + "D. The texture of the orange/tangerine is mentioned in the description and is smooth.", + "D. The shape of the orange/tangerine is mentioned in the description and is round.", + "D. The texture of the surface is mentioned in the description and is glossy.", + "C. The stem of the orange/tangerine is not mentioned in the description.", + "C. The leaves of the orange/tangerine are not mentioned in the description.", + "C. The segments of the orange/tangerine are not mentioned in the description.", + "B. The ceiling lights are not mentioned in the description.", + "C. The flesh of the orange/tangerine is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "8906172": { + "pred": "A black, in-ear headphone with a sleek, curved design.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the earphone is mentioned in the description and is dark or black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description but is not curved.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description and is curved.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the earphone is mentioned in the description and is curved.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control buttons of the earphone are mentioned in the description.", + -1 + ], + [ + "The control buttons of the earphone are not mentioned in the description.", + 1 + ], + [ + "The earphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The control buttons of the earphone are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cell phone is mentioned in the description.", + -1 + ], + [ + "The cell phone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cell phone is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The faucet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is earphone. Based on the image, is it likely that the object in the description is given class: earphone or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is earphone. Based on the image, is it likely that the object in the description is given class: earphone or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the earphone is not mentioned.\nB. The color of the earphone is mentioned in the description but is not dark or black.\nC. The earphone is not mentioned.\nD. The color of the earphone is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the earphone is not mentioned.\nB. The shape of the earphone is mentioned in the description but is not curved.\nC. The earphone is not mentioned.\nD. The shape of the earphone is mentioned in the description and is curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The control buttons of the earphone are mentioned in the description.\nB. The control buttons of the earphone are not mentioned in the description.\nC. The earphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cell phone is mentioned in the description.\nB. The cell phone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is mentioned in the description.\nB. The faucet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, in-ear headphone with a sleek, curved design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the earphone is mentioned in the description and is dark or black.", + "D. The shape of the earphone is mentioned in the description and is curved.", + "B. The control buttons of the earphone are not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The cell phone is not mentioned in the description.", + "B. The faucet is not mentioned in the description.", + "B. The sink is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "10666665": { + "pred": "A round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the clock is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the clock is mentioned in the description and is circular.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock face or the clock is not mentioned.", + 0 + ], + [ + "The color of the clock face is not mentioned, but the clock face of the clock is mentioned.", + 0.5 + ], + [ + "The color of the clock face is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clock face is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the clock face is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hour hand or the clock is not mentioned.", + 0 + ], + [ + "The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.", + 0.5 + ], + [ + "The color of the hour hand is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the hour hand is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the hour hand is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The numbers or the clock are not mentioned.", + 0 + ], + [ + "The color of the numbers is not mentioned, but the numbers of the clock are mentioned.", + 0.5 + ], + [ + "The color of the numbers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the numbers is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the numbers is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the clock is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the clock is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the frame is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the frame is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo blind is not mentioned in the description.", + 1 + ], + [ + "The bamboo blind is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo blind is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The microwave is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The digital display of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The digital display of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The digital display of the clock is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pendulum of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The pendulum of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pendulum of the clock is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The faucet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is clock. Based on the image, is it likely that the object in the description is given class: clock or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is clock. Based on the image, is it likely that the object in the description is given class: clock or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock is not mentioned.\nB. The shape of the clock is not mentioned.\nC. The shape of the clock is mentioned in the description but is not circular.\nD. The shape of the clock is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock face or the clock is not mentioned.\nB. The color of the clock face is not mentioned, but the clock face of the clock is mentioned.\nC. The color of the clock face is mentioned in the description but is not white.\nD. The color of the clock face is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hour hand or the clock is not mentioned.\nB. The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.\nC. The color of the hour hand is mentioned in the description but is not black.\nD. The color of the hour hand is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The numbers or the clock are not mentioned.\nB. The color of the numbers is not mentioned, but the numbers of the clock are mentioned.\nC. The color of the numbers is mentioned in the description but is not black.\nD. The color of the numbers is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the clock is not mentioned.\nB. The color of the frame is not mentioned, but the frame of the clock is mentioned.\nC. The color of the frame is mentioned in the description but is not black.\nD. The color of the frame is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo blind is not mentioned in the description.\nB. The bamboo blind is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microwave is not mentioned in the description.\nB. The microwave is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The digital display of the clock is not mentioned in the description.\nB. The clock is not mentioned in the description.\nC. The digital display of the clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pendulum of the clock is not mentioned in the description.\nB. The clock is not mentioned in the description.\nC. The pendulum of the clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals for each hour, with the numbers 1 through 12 clearly visible. The hour and minute hands are black and pointed, while the second hand is thin and black. The clock has a simple, minimalist design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is not mentioned in the description.\nB. The faucet is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the clock is mentioned in the description and is circular.", + "D. The color of the clock face is mentioned in the description and is white.", + "D. The color of the hour hand is mentioned in the description and is black.", + "D. The color of the numbers is mentioned in the description and is black.", + "D. The color of the frame is mentioned in the description and is black.", + "A. The bamboo blind is not mentioned in the description.", + "A. The microwave is not mentioned in the description.", + "A. The digital display of the clock is not mentioned in the description.", + "A. The pendulum of the clock is not mentioned in the description.", + "A. The faucet is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "10811497": { + "pred": "A dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the key is not mentioned.", + 0 + ], + [ + "The color of the key is mentioned in the description and is dark green, gray, or black.", + 1 + ], + [ + "The color of the key is mentioned in the description but is not dark green, gray, or black.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the key is mentioned in the description and is dark green, gray, or black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the key is not mentioned.", + 0 + ], + [ + "The material of the key is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the key is mentioned in the description but is not plastic.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the key is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the key is not mentioned.", + 0 + ], + [ + "The texture of the key is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the key is mentioned in the description but is not smooth.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the key is mentioned in the description and is smooth.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the key is not mentioned.", + 0 + ], + [ + "The shape of the key is mentioned in the description and is rounded, circular, or oval.", + 1 + ], + [ + "The shape of the key is mentioned in the description but is not rounded, circular, or oval.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the key is mentioned in the description and is rounded, circular, or oval.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key bow of the key is not mentioned in the description.", + 1 + ], + [ + "The key bow of the key is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The key bow of the key is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key teeth of the key are not mentioned in the description.", + 1 + ], + [ + "The key teeth of the key are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The key teeth of the key are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky note is not mentioned in the description.", + 1 + ], + [ + "The sticky note is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky note is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The smartphone is not mentioned in the description.", + 1 + ], + [ + "The smartphone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The smartphone is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The black fabric item is not mentioned in the description.", + 1 + ], + [ + "The black fabric item is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The black fabric item is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is key. Based on the image, is it likely that the object in the description is given class: key or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is key. Based on the image, is it likely that the object in the description is given class: key or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the key is not mentioned.\nB. The color of the key is mentioned in the description and is dark green, gray, or black.\nC. The color of the key is mentioned in the description but is not dark green, gray, or black.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the key is not mentioned.\nB. The material of the key is mentioned in the description and is plastic.\nC. The material of the key is mentioned in the description but is not plastic.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the key is not mentioned.\nB. The texture of the key is mentioned in the description and is smooth.\nC. The texture of the key is mentioned in the description but is not smooth.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the key is not mentioned.\nB. The shape of the key is mentioned in the description and is rounded, circular, or oval.\nC. The shape of the key is mentioned in the description but is not rounded, circular, or oval.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The key is not mentioned in the description.\nB. The key bow of the key is not mentioned in the description.\nC. The key bow of the key is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The key is not mentioned in the description.\nB. The key teeth of the key are not mentioned in the description.\nC. The key teeth of the key are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky note is not mentioned in the description.\nB. The sticky note is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The smartphone is not mentioned in the description.\nB. The smartphone is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark green, oval-shaped key with a smooth surface and a small, circular indentation near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The black fabric item is not mentioned in the description.\nB. The black fabric item is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the key is mentioned in the description and is dark green, gray, or black.", + "A. The material of the key is not mentioned.", + "B. The texture of the key is mentioned in the description and is smooth.", + "B. The shape of the key is mentioned in the description and is rounded, circular, or oval.", + "C. The key bow of the key is mentioned in the description.", + "B. The key teeth of the key are not mentioned in the description.", + "A. The sticky note is not mentioned in the description.", + "A. The smartphone is not mentioned in the description.", + "A. The black fabric item is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.75, + "score_neg": 0.6, + "recognition_result": true + }, + "11021562": { + "pred": "The microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + 0.5 + ], + [ + "The texture of the casing is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the casing is mentioned in the description and is smooth.", + 1 + ], + [ + "The casing or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The orientation of the handle is mentioned in the description but is not vertical.", + -1 + ], + [ + "The orientation of the handle is mentioned in the description and is vertical.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The orientation of the handle is mentioned in the description and is vertical.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the microwave is not mentioned.", + 0 + ], + [ + "The color of the microwave is mentioned in the description but is not white, beige, or yellow.", + -1 + ], + [ + "The color of the microwave is mentioned in the description and is white, beige, or yellow.", + 1 + ], + [ + "The microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the microwave is mentioned in the description and is white, beige, or yellow.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the vent is not mentioned, but the vent of the microwave is mentioned.", + 0.5 + ], + [ + "The position of the vent is mentioned in the description but is not top.", + -1 + ], + [ + "The position of the vent is mentioned in the description and is top.", + 1 + ], + [ + "The vent or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The position of the vent is mentioned in the description and is top.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fire extinguisher is mentioned in the description.", + -1 + ], + [ + "The fire extinguisher is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fire extinguisher is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The turntable of the microwave is mentioned in the description.", + -1 + ], + [ + "The turntable of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The turntable of the microwave is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interior light of the microwave is mentioned in the description.", + -1 + ], + [ + "The interior light of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The interior light of the microwave is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rug is mentioned in the description.", + -1 + ], + [ + "The rug is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The rug is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is microwave. Based on the image, is it likely that the object in the description is given class: microwave or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is microwave. Based on the image, is it likely that the object in the description is given class: microwave or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the microwave is mentioned.\nB. The shape of the handle is mentioned in the description but is not curved.\nC. The shape of the handle is mentioned in the description and is curved.\nD. The handle or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.\nB. The texture of the casing is mentioned in the description but is not smooth.\nC. The texture of the casing is mentioned in the description and is smooth.\nD. The casing or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.\nB. The orientation of the handle is mentioned in the description but is not vertical.\nC. The orientation of the handle is mentioned in the description and is vertical.\nD. The handle or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the microwave is not mentioned.\nB. The color of the microwave is mentioned in the description but is not white, beige, or yellow.\nC. The color of the microwave is mentioned in the description and is white, beige, or yellow.\nD. The microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the vent is not mentioned, but the vent of the microwave is mentioned.\nB. The position of the vent is mentioned in the description but is not top.\nC. The position of the vent is mentioned in the description and is top.\nD. The vent or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fire extinguisher is mentioned in the description.\nB. The fire extinguisher is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The turntable of the microwave is mentioned in the description.\nB. The turntable of the microwave is not mentioned in the description.\nC. The microwave is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The interior light of the microwave is mentioned in the description.\nB. The interior light of the microwave is not mentioned in the description.\nC. The microwave is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are mentioned in the description.\nB. The windows are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven features a white exterior with a prominent vertical handle on the left side of the door. The door has a series of horizontal ventilation slits near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rug is mentioned in the description.\nB. The rug is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + "A. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + "C. The orientation of the handle is mentioned in the description and is vertical.", + "C. The color of the microwave is mentioned in the description and is white, beige, or yellow.", + "C. The position of the vent is mentioned in the description and is top.", + "B. The fire extinguisher is not mentioned in the description.", + "B. The turntable of the microwave is not mentioned in the description.", + "B. The interior light of the microwave is not mentioned in the description.", + "B. The windows are not mentioned in the description.", + "B. The rug is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "11021563": { + "pred": "A white stove with four black burners, featuring a control panel with knobs on the back.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control panel or the stove is not mentioned.", + 0 + ], + [ + "The location of the control panel is mentioned in the description but is not back.", + -1 + ], + [ + "The location of the control panel is mentioned in the description and is back.", + 1 + ], + [ + "The location of the control panel is not mentioned, but the control panel of the stove is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The location of the control panel is mentioned in the description and is back.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The color of the burners is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the burners is mentioned in the description and is black.", + 1 + ], + [ + "The color of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the burners is mentioned in the description and is black.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The number of the burners is mentioned in the description but is not 4.", + -1 + ], + [ + "The number of the burners is mentioned in the description and is 4.", + 1 + ], + [ + "The number of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The number of the burners is mentioned in the description and is 4.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The shape of the burners is mentioned in the description but is not coiled.", + -1 + ], + [ + "The shape of the burners is mentioned in the description and is coiled.", + 1 + ], + [ + "The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The countertop is mentioned in the description.", + -1 + ], + [ + "The countertop is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The countertop is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The kitchen cabinets are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The broom is mentioned in the description.", + -1 + ], + [ + "The broom is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The broom is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The induction cooktop surface of the stove is mentioned in the description.", + -1 + ], + [ + "The induction cooktop surface of the stove is not mentioned in the description.", + 1 + ], + [ + "The stove is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The induction cooktop surface of the stove is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The small table is mentioned in the description.", + -1 + ], + [ + "The small table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The small table is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stove. Based on the image, is it likely that the object in the description is given class: stove or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stove. Based on the image, is it likely that the object in the description is given class: stove or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The control panel or the stove is not mentioned.\nB. The location of the control panel is mentioned in the description but is not back.\nC. The location of the control panel is mentioned in the description and is back.\nD. The location of the control panel is not mentioned, but the control panel of the stove is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The color of the burners is mentioned in the description but is not black.\nC. The color of the burners is mentioned in the description and is black.\nD. The color of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The number of the burners is mentioned in the description but is not 4.\nC. The number of the burners is mentioned in the description and is 4.\nD. The number of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The shape of the burners is mentioned in the description but is not coiled.\nC. The shape of the burners is mentioned in the description and is coiled.\nD. The shape of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The countertop is mentioned in the description.\nB. The countertop is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The kitchen cabinets are mentioned in the description.\nB. The kitchen cabinets are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The broom is mentioned in the description.\nB. The broom is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The induction cooktop surface of the stove is mentioned in the description.\nB. The induction cooktop surface of the stove is not mentioned in the description.\nC. The stove is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white stove with four black burners, featuring a control panel with knobs on the back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The small table is mentioned in the description.\nB. The small table is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The location of the control panel is mentioned in the description and is back.", + "C. The color of the burners is mentioned in the description and is black.", + "C. The number of the burners is mentioned in the description and is 4.", + "D. The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + "B. The countertop is not mentioned in the description.", + "B. The kitchen cabinets are not mentioned in the description.", + "B. The broom is not mentioned in the description.", + "B. The induction cooktop surface of the stove is not mentioned in the description.", + "B. The small table is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "12348078": { + "pred": "A woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The style of the clothes is mentioned in the description and is t-shirt.", + 1 + ], + [ + "The style of the clothes is mentioned in the description but is not t-shirt.", + -1 + ], + [ + "The style of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The style of the clothes is mentioned in the description and is t-shirt.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The color of the clothes is mentioned in the description and is white.", + 1 + ], + [ + "The color of the clothes is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the clothes is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hairstyle of the person is mentioned in the description and is bun.", + 1 + ], + [ + "The hairstyle of the person is mentioned in the description but is not bun.", + -1 + ], + [ + "The hairstyle of the person is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The hairstyle of the person is mentioned in the description and is bun.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hair color of the person is mentioned in the description and is dark or black.", + 1 + ], + [ + "The hair color of the person is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The hair color of the person is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The hair color of the person is mentioned in the description and is dark or black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pants or the person are not mentioned.", + 0 + ], + [ + "The color of the pants is mentioned in the description and is black.", + 1 + ], + [ + "The color of the pants is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the pants is not mentioned, but the pants of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the pants is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The mouth of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The mouth of the person is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The face of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The face of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The face of the person is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The nose of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The nose of the person is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is mentioned in the description.", + -1 + ], + [ + "The wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wall is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle cart is mentioned in the description.", + -1 + ], + [ + "The bicycle cart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bicycle cart is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is person. Based on the image, is it likely that the object in the description is given class: person or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is person. Based on the image, is it likely that the object in the description is given class: person or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clothes or the person are not mentioned.\nB. The style of the clothes is mentioned in the description and is t-shirt.\nC. The style of the clothes is mentioned in the description but is not t-shirt.\nD. The style of the clothes is not mentioned, but the clothes of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clothes or the person are not mentioned.\nB. The color of the clothes is mentioned in the description and is white.\nC. The color of the clothes is mentioned in the description but is not white.\nD. The color of the clothes is not mentioned, but the clothes of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned.\nB. The hairstyle of the person is mentioned in the description and is bun.\nC. The hairstyle of the person is mentioned in the description but is not bun.\nD. The hairstyle of the person is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned.\nB. The hair color of the person is mentioned in the description and is dark or black.\nC. The hair color of the person is mentioned in the description but is not dark or black.\nD. The hair color of the person is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pants or the person are not mentioned.\nB. The color of the pants is mentioned in the description and is black.\nC. The color of the pants is mentioned in the description but is not black.\nD. The color of the pants is not mentioned, but the pants of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouth of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The mouth of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The face of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The face of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nose of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The nose of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wall is mentioned in the description.\nB. The wall is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied back in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is seated and holding a baby close to her chest.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycle cart is mentioned in the description.\nB. The bicycle cart is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The style of the clothes is mentioned in the description and is t-shirt.", + "B. The color of the clothes is mentioned in the description and is white.", + "B. The hairstyle of the person is mentioned in the description and is bun.", + "B. The hair color of the person is mentioned in the description and is dark or black.", + "B. The color of the pants is mentioned in the description and is black.", + "C. The mouth of the person is not mentioned in the description.", + "C. The face of the person is not mentioned in the description.", + "C. The nose of the person is not mentioned in the description.", + "B. The wall is not mentioned in the description.", + "B. The bicycle cart is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "13138178": { + "pred": "The stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stool is mentioned in the description and is blue.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The color of the stool is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the stool is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the stool is mentioned in the description and is blue.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is mentioned in the description and is plastic.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the stool is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the stool is not mentioned.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The armrest of the stool is mentioned in the description.", + -1 + ], + [ + "The armrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The armrest of the stool is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The backrest of the stool is mentioned in the description.", + -1 + ], + [ + "The backrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The backrest of the stool is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Cooking grate is mentioned in the description.", + -1 + ], + [ + "The Cooking grate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Cooking grate is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the stool is mentioned in the description.", + -1 + ], + [ + "The footrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The footrest of the stool is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swivel base of the stool is mentioned in the description.", + -1 + ], + [ + "The swivel base of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The swivel base of the stool is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stool. Based on the image, is it likely that the object in the description is given class: stool or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stool. Based on the image, is it likely that the object in the description is given class: stool or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stool is mentioned in the description and is blue.\nB. The stool is not mentioned.\nC. The color of the stool is mentioned in the description but is not blue.\nD. The color of the stool is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stool is mentioned in the description and is plastic.\nB. The stool is not mentioned.\nC. The material of the stool is mentioned in the description but is not plastic.\nD. The material of the stool is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the seat is mentioned in the description and is flat.\nB. The seat or the stool is not mentioned.\nC. The shape of the seat is mentioned in the description but is not flat.\nD. The shape of the seat is not mentioned, but the seat of the stool is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The armrest of the stool is mentioned in the description.\nB. The armrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The backrest of the stool is mentioned in the description.\nB. The backrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Cooking grate is mentioned in the description.\nB. The Cooking grate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The footrest of the stool is mentioned in the description.\nB. The footrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe stool has a deep blue, glossy finish with a smooth, curved design. The visible part includes a rounded, arch-like structure with a slight indentation in the middle, creating a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The swivel base of the stool is mentioned in the description.\nB. The swivel base of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the stool is mentioned in the description and is blue.", + "D. The material of the stool is not mentioned.", + "D. The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + "B. The armrest of the stool is not mentioned in the description.", + "B. The backrest of the stool is not mentioned in the description.", + "B. The Cooking grate is not mentioned in the description.", + "B. The footrest of the stool is not mentioned in the description.", + "B. The swivel base of the stool is not mentioned in the description." + ], + "score": 0.8125, + "score_pos": 0.5, + "score_neg": 1.0, + "recognition_result": true + }, + "13187927": { + "pred": "The motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the exterior is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the exterior is mentioned in the description and is white.", + 1 + ], + [ + "The exterior or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the exterior is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the seat is mentioned in the description but is not leather or vinyl.", + -1 + ], + [ + "The material of the seat is mentioned in the description and is leather or vinyl.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the seat is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the taillight is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the taillight is mentioned in the description and is red.", + 1 + ], + [ + "The taillight or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the taillight is mentioned in the description and is red.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ], + [ + "The license plate or the motorcycle is not mentioned.", + 0 + ], + [ + "The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycle is not mentioned in the description.", + 0 + ], + [ + "The windshield of the motorcycle is not mentioned in the description.", + 1 + ], + [ + "The windshield of the motorcycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The windshield of the motorcycle is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The balconies are not mentioned in the description.", + 1 + ], + [ + "The balconies are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The balconies are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pepsi advertisements are not mentioned in the description.", + 1 + ], + [ + "The Pepsi advertisements are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The Pepsi advertisements are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The windows are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative metal grill is not mentioned in the description.", + 1 + ], + [ + "The decorative metal grill is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The decorative metal grill is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is motorcycle. Based on the image, is it likely that the object in the description is given class: motorcycle or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is motorcycle. Based on the image, is it likely that the object in the description is given class: motorcycle or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the exterior is mentioned in the description but is not white.\nB. The color of the exterior is mentioned in the description and is white.\nC. The exterior or the motorcycle is not mentioned.\nD. The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the seat is mentioned in the description but is not leather or vinyl.\nB. The material of the seat is mentioned in the description and is leather or vinyl.\nC. The seat or the motorcycle is not mentioned.\nD. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the seat is mentioned in the description but is not black.\nB. The color of the seat is mentioned in the description and is black.\nC. The seat or the motorcycle is not mentioned.\nD. The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the taillight is mentioned in the description but is not red.\nB. The color of the taillight is mentioned in the description and is red.\nC. The taillight or the motorcycle is not mentioned.\nD. The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the license plate is mentioned in the description but is not rectangular.\nB. The shape of the license plate is mentioned in the description and is rectangular.\nC. The license plate or the motorcycle is not mentioned.\nD. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motorcycle is not mentioned in the description.\nB. The windshield of the motorcycle is not mentioned in the description.\nC. The windshield of the motorcycle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The balconies are not mentioned in the description.\nB. The balconies are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Pepsi advertisements are not mentioned in the description.\nB. The Pepsi advertisements are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are not mentioned in the description.\nB. The windows are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a black seat and a rear storage compartment. It features a rear red tail light and a license plate mounted below the tail light. The handlebars are equipped with rearview mirrors, and the body has a sleek, modern design with a slightly curved front.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative metal grill is not mentioned in the description.\nB. The decorative metal grill is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the exterior is mentioned in the description and is white.", + "D. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + "B. The color of the seat is mentioned in the description and is black.", + "B. The color of the taillight is mentioned in the description and is red.", + "D. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + "B. The windshield of the motorcycle is not mentioned in the description.", + "A. The balconies are not mentioned in the description.", + "A. The Pepsi advertisements are not mentioned in the description.", + "A. The windows are not mentioned in the description.", + "A. The decorative metal grill is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "14490578": { + "pred": "The harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the seal is mentioned in the description but is not smooth.", + -1 + ], + [ + "The seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the seal is mentioned in the description and is smooth.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not gray, black, or dark.", + -1 + ], + [ + "The skin or the seal is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the seal is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is gray, black, or dark.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the skin is mentioned in the description and is gray, black, or dark.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the body is mentioned in the description but is not elongated.", + -1 + ], + [ + "The body or the seal is not mentioned.", + 0 + ], + [ + "The shape of the body is not mentioned, but the body of the seal is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is elongated.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the body is mentioned in the description and is elongated.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The teeth of the seal are not mentioned in the description.", + 1 + ], + [ + "The teeth of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The teeth of the seal are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the seal are not mentioned in the description.", + 1 + ], + [ + "The claws of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The claws of the seal are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ears of the seal are not mentioned in the description.", + 1 + ], + [ + "The ears of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ears of the seal are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sand is not mentioned in the description.", + 1 + ], + [ + "The sand is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sand is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rocks are not mentioned in the description.", + 1 + ], + [ + "The rocks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The rocks are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is seal. Based on the image, is it likely that the object in the description is given class: seal or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is seal. Based on the image, is it likely that the object in the description is given class: seal or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the seal is mentioned in the description but is not smooth.\nB. The seal is not mentioned.\nC. The texture of the seal is not mentioned.\nD. The texture of the seal is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skin is mentioned in the description but is not gray, black, or dark.\nB. The skin or the seal is not mentioned.\nC. The color of the skin is not mentioned, but the skin of the seal is mentioned.\nD. The color of the skin is mentioned in the description and is gray, black, or dark.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the body is mentioned in the description but is not elongated.\nB. The body or the seal is not mentioned.\nC. The shape of the body is not mentioned, but the body of the seal is mentioned.\nD. The shape of the body is mentioned in the description and is elongated.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The teeth of the seal are not mentioned in the description.\nB. The teeth of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The claws of the seal are not mentioned in the description.\nB. The claws of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ears of the seal are not mentioned in the description.\nB. The ears of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sand is not mentioned in the description.\nB. The sand is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark brown to black coloration. Its skin appears smooth and slightly shiny, with a few lighter patches scattered across its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rocks are not mentioned in the description.\nB. The rocks are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the seal is mentioned in the description and is smooth.", + "D. The color of the skin is mentioned in the description and is gray, black, or dark.", + "D. The shape of the body is mentioned in the description and is elongated.", + "A. The teeth of the seal are not mentioned in the description.", + "A. The claws of the seal are not mentioned in the description.", + "A. The ears of the seal are not mentioned in the description.", + "A. The sand is not mentioned in the description.", + "A. The rocks are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "14640483": { + "pred": "A rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.", + 0.5 + ], + [ + "The surface or the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the surface is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the surface is mentioned in the description and is smooth.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.", + -1 + ], + [ + "The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The material of the cutting/chopping board is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the cutting/chopping board is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the cutting/chopping board is mentioned in the description and is wood.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hanging hole of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The hanging hole of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The hanging hole of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The juice groove of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The juice groove of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The juice groove of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The handle of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The whisk is mentioned in the description.", + -1 + ], + [ + "The whisk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The whisk is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensils are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is cutting/chopping board. Based on the image, is it likely that the object in the description is given class: cutting/chopping board or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is cutting/chopping board. Based on the image, is it likely that the object in the description is given class: cutting/chopping board or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.\nB. The surface or the cutting/chopping board is not mentioned.\nC. The texture of the surface is mentioned in the description but is not smooth.\nD. The texture of the surface is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.\nD. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The shape of the cutting/chopping board is mentioned in the description but is not rectangular.\nD. The shape of the cutting/chopping board is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The material of the cutting/chopping board is mentioned in the description but is not wood.\nD. The material of the cutting/chopping board is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hanging hole of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The hanging hole of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The juice groove of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The juice groove of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The handle of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The whisk is mentioned in the description.\nB. The whisk is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and rounded edges. The board has a natural wood grain pattern and a warm, honey-brown color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensils are mentioned in the description.\nB. The utensils are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the surface is mentioned in the description and is smooth.", + "D. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + "D. The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + "D. The material of the cutting/chopping board is mentioned in the description and is wood.", + "C. The hanging hole of the cutting/chopping board is not mentioned in the description.", + "C. The juice groove of the cutting/chopping board is not mentioned in the description.", + "C. The handle of the cutting/chopping board is not mentioned in the description.", + "B. The whisk is not mentioned in the description.", + "B. The utensils are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "16010041": { + "pred": "A pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the chopsticks is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the chopsticks is not mentioned.", + 0 + ], + [ + "The material of the chopsticks is mentioned in the description and is wood.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the chopsticks is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + -1 + ], + [ + "The shape of the chopsticks is not mentioned.", + 0 + ], + [ + "The shape of the chopsticks is mentioned in the description and is long cylindrical.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the chopsticks is mentioned in the description and is long cylindrical.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the chopsticks is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The body or the chopsticks is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the body is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon slices are not mentioned in the description.", + 1 + ], + [ + "The lemon slices are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The lemon slices are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the chopsticks are not mentioned in the description.", + 1 + ], + [ + "The chopsticks are not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the chopsticks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The decorative elements of the chopsticks are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is not mentioned in the description.", + 1 + ], + [ + "The man is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The man is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meat is not mentioned in the description.", + 1 + ], + [ + "The sliced meat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sliced meat is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green garnish is not mentioned in the description.", + 1 + ], + [ + "The green garnish is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The green garnish is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is chopsticks. Based on the image, is it likely that the object in the description is given class: chopsticks or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is chopsticks. Based on the image, is it likely that the object in the description is given class: chopsticks or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the chopsticks is mentioned in the description but is not wood.\nB. The material of the chopsticks is not mentioned.\nC. The material of the chopsticks is mentioned in the description and is wood.\nD. The chopsticks are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the chopsticks is mentioned in the description but is not long cylindrical.\nB. The shape of the chopsticks is not mentioned.\nC. The shape of the chopsticks is mentioned in the description and is long cylindrical.\nD. The chopsticks are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the body is mentioned in the description but is not smooth.\nB. The texture of the body is not mentioned, but the body of the chopsticks is mentioned.\nC. The texture of the body is mentioned in the description and is smooth.\nD. The body or the chopsticks is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lemon slices are not mentioned in the description.\nB. The lemon slices are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the chopsticks are not mentioned in the description.\nB. The chopsticks are not mentioned in the description.\nC. The decorative elements of the chopsticks are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is not mentioned in the description.\nB. The man is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sliced meat is not mentioned in the description.\nB. The sliced meat is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored chopsticks with a smooth, slightly tapered design, featuring a subtle gradient from a pale yellow to a soft orange hue at the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green garnish is not mentioned in the description.\nB. The green garnish is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the chopsticks is not mentioned.", + "C. The shape of the chopsticks is mentioned in the description and is long cylindrical.", + "C. The texture of the body is mentioned in the description and is smooth.", + "A. The lemon slices are not mentioned in the description.", + "A. The decorative elements of the chopsticks are not mentioned in the description.", + "A. The man is not mentioned in the description.", + "A. The sliced meat is not mentioned in the description.", + "A. The green garnish is not mentioned in the description." + ], + "score": 0.875, + "score_pos": 0.6666666666666666, + "score_neg": 1.0, + "recognition_result": true + }, + "17072759": { + "pred": "A black belt with a smooth texture, featuring a silver rectangular buckle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the belt is not mentioned.", + 0 + ], + [ + "The color of the belt is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the belt is mentioned in the description and is gray or black.", + 1 + ], + [ + "The belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the belt is mentioned in the description and is gray or black.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the strap is not mentioned, but the strap of the belt is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The texture of the strap is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the strap is mentioned in the description and is smooth.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the strap is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The tip of the belt is mentioned in the description.", + -1 + ], + [ + "The tip of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The tip of the belt is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chess board is mentioned in the description.", + -1 + ], + [ + "The chess board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chess board is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blanket is mentioned in the description.", + -1 + ], + [ + "The blanket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The blanket is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The keeper of the belt is mentioned in the description.", + -1 + ], + [ + "The keeper of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The keeper of the belt is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is belt. Based on the image, is it likely that the object in the description is given class: belt or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is belt. Based on the image, is it likely that the object in the description is given class: belt or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the belt is not mentioned.\nB. The color of the belt is mentioned in the description but is not gray or black.\nC. The color of the belt is mentioned in the description and is gray or black.\nD. The belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the strap is not mentioned, but the strap of the belt is mentioned.\nB. The material of the strap is mentioned in the description but is not leather.\nC. The material of the strap is mentioned in the description and is leather.\nD. The strap or the belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the strap is not mentioned, but the strap of the belt is mentioned.\nB. The texture of the strap is mentioned in the description but is not smooth.\nC. The texture of the strap is mentioned in the description and is smooth.\nD. The strap or the belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The belt is not mentioned in the description.\nB. The tip of the belt is mentioned in the description.\nC. The tip of the belt is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chess board is mentioned in the description.\nB. The chess board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blanket is mentioned in the description.\nB. The blanket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The belt is not mentioned in the description.\nB. The keeper of the belt is mentioned in the description.\nC. The keeper of the belt is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the belt is mentioned in the description and is gray or black.", + "A. The material of the strap is not mentioned, but the strap of the belt is mentioned.", + "C. The texture of the strap is mentioned in the description and is smooth.", + "C. The tip of the belt is not mentioned in the description.", + "B. The chess board is not mentioned in the description.", + "B. The blanket is not mentioned in the description.", + "B. The sky is not mentioned in the description.", + "C. The keeper of the belt is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "recognition_result": true + }, + "17072764": { + "pred": "A partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the pear is mentioned in the description but is not smooth.", + -1 + ], + [ + "The pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the pear is mentioned in the description and is smooth.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the stem is mentioned in the description but is not short.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The size of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The size of the stem is mentioned in the description and is short.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The size of the stem is not mentioned, but the stem of the pear is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stem is mentioned in the description but is not brown.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The color of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The color of the stem is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the stem is mentioned in the description and is brown.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The skin or the pear is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the pear is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is yellow or green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the skin is mentioned in the description and is yellow or green.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The core of the pear is not mentioned in the description.", + 1 + ], + [ + "The core of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The core of the pear is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The white top is not mentioned in the description.", + 1 + ], + [ + "The white top is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The white top is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are not mentioned in the description.", + 1 + ], + [ + "The grass are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The grass are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The leaf of the pear is not mentioned in the description.", + 1 + ], + [ + "The leaf of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The leaf of the pear is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is pear. Based on the image, is it likely that the object in the description is given class: pear or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is pear. Based on the image, is it likely that the object in the description is given class: pear or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the pear is mentioned in the description but is not smooth.\nB. The pear is not mentioned.\nC. The texture of the pear is not mentioned.\nD. The texture of the pear is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the stem is mentioned in the description but is not short.\nB. The stem or the pear is not mentioned.\nC. The size of the stem is not mentioned, but the stem of the pear is mentioned.\nD. The size of the stem is mentioned in the description and is short.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stem is mentioned in the description but is not brown.\nB. The stem or the pear is not mentioned.\nC. The color of the stem is not mentioned, but the stem of the pear is mentioned.\nD. The color of the stem is mentioned in the description and is brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skin is mentioned in the description but is not yellow or green.\nB. The skin or the pear is not mentioned.\nC. The color of the skin is not mentioned, but the skin of the pear is mentioned.\nD. The color of the skin is mentioned in the description and is yellow or green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The core of the pear is not mentioned in the description.\nC. The core of the pear is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The white top is not mentioned in the description.\nB. The white top is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are not mentioned in the description.\nB. The grass are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA partially visible pear with a smooth, light green skin transitioning to a yellowish hue towards the top. The pear has a small, brown stem protruding from its top left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The leaf of the pear is not mentioned in the description.\nC. The leaf of the pear is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the pear is mentioned in the description and is smooth.", + "C. The size of the stem is not mentioned, but the stem of the pear is mentioned.", + "D. The color of the stem is mentioned in the description and is brown.", + "D. The color of the skin is mentioned in the description and is yellow or green.", + "A. The cup is not mentioned in the description.", + "B. The core of the pear is not mentioned in the description.", + "A. The white top is not mentioned in the description.", + "A. The grass are not mentioned in the description.", + "B. The leaf of the pear is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "18301585": { + "pred": "The bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the backrest is mentioned in the description and is black.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The color of the backrest is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the backrest is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the bench are not mentioned.", + 0 + ], + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the bench are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the legs is mentioned in the description and is white.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the backrest is mentioned in the description and is slatted.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The shape of the backrest is mentioned in the description but is not slatted.", + -1 + ], + [ + "The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the backrest is mentioned in the description and is slatted.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bollards are mentioned in the description.", + -1 + ], + [ + "The bollards are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bollards are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the bench is mentioned in the description.", + -1 + ], + [ + "The storage compartment of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The storage compartment of the bench is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the bench is mentioned in the description.", + -1 + ], + [ + "The footrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The footrest of the bench is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headrest of the bench is mentioned in the description.", + -1 + ], + [ + "The headrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The headrest of the bench is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trees are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bench. Based on the image, is it likely that the object in the description is given class: bench or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bench. Based on the image, is it likely that the object in the description is given class: bench or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the backrest is mentioned in the description and is black.\nB. The backrest or the bench is not mentioned.\nC. The color of the backrest is mentioned in the description but is not black.\nD. The color of the backrest is not mentioned, but the backrest of the bench is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the legs is mentioned in the description and is white.\nB. The legs or the bench are not mentioned.\nC. The color of the legs is mentioned in the description but is not white.\nD. The color of the legs is not mentioned, but the legs of the bench are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the backrest is mentioned in the description and is slatted.\nB. The backrest or the bench is not mentioned.\nC. The shape of the backrest is mentioned in the description but is not slatted.\nD. The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bollards are mentioned in the description.\nB. The bollards are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The storage compartment of the bench is mentioned in the description.\nB. The storage compartment of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The footrest of the bench is mentioned in the description.\nB. The footrest of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headrest of the bench is mentioned in the description.\nB. The headrest of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest slats are evenly spaced and supported by white, rectangular concrete supports. The seat slats are also black and run parallel to the backrest. The bench has a sturdy, industrial design with a solid, robust appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are mentioned in the description.\nB. The trees are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the backrest is mentioned in the description and is black.", + "A. The color of the legs is mentioned in the description and is white.", + "A. The shape of the backrest is mentioned in the description and is slatted.", + "B. The bollards are not mentioned in the description.", + "B. The storage compartment of the bench is not mentioned in the description.", + "B. The footrest of the bench is not mentioned in the description.", + "B. The headrest of the bench is not mentioned in the description.", + "B. The trees are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "18680641": { + "pred": "A rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the carpet is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the carpet is not mentioned.", + 0 + ], + [ + "The material of the carpet is mentioned in the description and is fabric.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the carpet is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the edge is mentioned in the description but is not straight.", + -1 + ], + [ + "The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is straight.", + 1 + ], + [ + "The edge or the carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the carpet is mentioned in the description but is not orange or red.", + -1 + ], + [ + "The color of the carpet is not mentioned.", + 0 + ], + [ + "The color of the carpet is mentioned in the description and is orange or red.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the carpet is mentioned in the description and is orange or red.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the carpet is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the carpet is not mentioned.", + 0 + ], + [ + "The shape of the carpet is mentioned in the description and is rectangular.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the carpet is mentioned in the description and is rectangular.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The tassels of the carpet are not mentioned in the description.", + 1 + ], + [ + "The tassels of the carpet are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tassels of the carpet are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drainage pipe is not mentioned in the description.", + 1 + ], + [ + "The drainage pipe is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The drainage pipe is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The pattern of the carpet is not mentioned in the description.", + 1 + ], + [ + "The pattern of the carpet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pattern of the carpet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shop sign is not mentioned in the description.", + 1 + ], + [ + "The shop sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The shop sign is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The candy display is not mentioned in the description.", + 1 + ], + [ + "The candy display is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The candy display is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is carpet. Based on the image, is it likely that the object in the description is given class: carpet or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is carpet. Based on the image, is it likely that the object in the description is given class: carpet or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the carpet is mentioned in the description but is not fabric.\nB. The material of the carpet is not mentioned.\nC. The material of the carpet is mentioned in the description and is fabric.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the edge is mentioned in the description but is not straight.\nB. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.\nC. The shape of the edge is mentioned in the description and is straight.\nD. The edge or the carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the carpet is mentioned in the description but is not orange or red.\nB. The color of the carpet is not mentioned.\nC. The color of the carpet is mentioned in the description and is orange or red.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the carpet is mentioned in the description but is not rectangular.\nB. The shape of the carpet is not mentioned.\nC. The shape of the carpet is mentioned in the description and is rectangular.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The carpet is not mentioned in the description.\nB. The tassels of the carpet are not mentioned in the description.\nC. The tassels of the carpet are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drainage pipe is not mentioned in the description.\nB. The drainage pipe is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The carpet is not mentioned in the description.\nB. The pattern of the carpet is not mentioned in the description.\nC. The pattern of the carpet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shop sign is not mentioned in the description.\nB. The shop sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly uneven surface and a subtle gradient of darker red in the middle. The edges are bordered by a thin, dark gray trim.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The candy display is not mentioned in the description.\nB. The candy display is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the carpet is not mentioned.", + "B. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + "C. The color of the carpet is mentioned in the description and is orange or red.", + "C. The shape of the carpet is mentioned in the description and is rectangular.", + "B. The tassels of the carpet are not mentioned in the description.", + "A. The drainage pipe is not mentioned in the description.", + "B. The pattern of the carpet is not mentioned in the description.", + "A. The shop sign is not mentioned in the description.", + "A. The candy display is not mentioned in the description." + ], + "score": 0.8333333333333334, + "score_pos": 0.625, + "score_neg": 1.0, + "recognition_result": true + }, + "25273528": { + "pred": "A hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the hot air balloon is mentioned in the description and is multicolored.", + 1 + ], + [ + "The hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the hot air balloon is mentioned in the description but is not multicolored.", + -1 + ], + [ + "The color of the hot air balloon is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the hot air balloon is mentioned in the description and is multicolored.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the envelope is mentioned in the description and is nylon or polyester.", + 1 + ], + [ + "The envelope or the hot air balloon is not mentioned.", + 0 + ], + [ + "The material of the envelope is mentioned in the description but is not nylon or polyester.", + -1 + ], + [ + "The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the basket is mentioned in the description and is bottom.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The position of the basket is mentioned in the description but is not bottom.", + -1 + ], + [ + "The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The position of the basket is mentioned in the description and is bottom.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the basket is mentioned in the description and is small.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The size of the basket is mentioned in the description but is not small.", + -1 + ], + [ + "The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the basket is mentioned in the description and is small.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the basket is mentioned in the description and is dark or black.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fuel tanks of the hot air balloon are not mentioned in the description.", + 1 + ], + [ + "The fuel tanks of the hot air balloon are mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The fuel tanks of the hot air balloon are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner of the hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The burner of the hot air balloon is mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The burner of the hot air balloon is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ground is not mentioned in the description.", + 1 + ], + [ + "The ground is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ground is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The people are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trees are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is hot air balloon. Based on the image, is it likely that the object in the description is given class: hot air balloon or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is hot air balloon. Based on the image, is it likely that the object in the description is given class: hot air balloon or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the hot air balloon is mentioned in the description and is multicolored.\nB. The hot air balloon is not mentioned.\nC. The color of the hot air balloon is mentioned in the description but is not multicolored.\nD. The color of the hot air balloon is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the envelope is mentioned in the description and is nylon or polyester.\nB. The envelope or the hot air balloon is not mentioned.\nC. The material of the envelope is mentioned in the description but is not nylon or polyester.\nD. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the basket is mentioned in the description and is bottom.\nB. The basket or the hot air balloon is not mentioned.\nC. The position of the basket is mentioned in the description but is not bottom.\nD. The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the basket is mentioned in the description and is small.\nB. The basket or the hot air balloon is not mentioned.\nC. The size of the basket is mentioned in the description but is not small.\nD. The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the basket is mentioned in the description and is dark or black.\nB. The basket or the hot air balloon is not mentioned.\nC. The color of the basket is mentioned in the description but is not dark or black.\nD. The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fuel tanks of the hot air balloon are not mentioned in the description.\nB. The fuel tanks of the hot air balloon are mentioned in the description.\nC. The hot air balloon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burner of the hot air balloon is not mentioned in the description.\nB. The burner of the hot air balloon is mentioned in the description.\nC. The hot air balloon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ground is not mentioned in the description.\nB. The ground is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are not mentioned in the description.\nB. The people are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA hot air balloon with a vibrant pattern of alternating vertical stripes in dark blue, red, and yellow. The balloon has a teardrop shape with a small basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are not mentioned in the description.\nB. The trees are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the hot air balloon is mentioned in the description and is multicolored.", + "D. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + "A. The position of the basket is mentioned in the description and is bottom.", + "A. The size of the basket is mentioned in the description and is small.", + "D. The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "A. The fuel tanks of the hot air balloon are not mentioned in the description.", + "A. The burner of the hot air balloon is not mentioned in the description.", + "A. The ground is not mentioned in the description.", + "A. The people are not mentioned in the description.", + "A. The trees are not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "25419509": { + "pred": "A metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned.", + 0 + ], + [ + "The color of the fork is not mentioned.", + 0 + ], + [ + "The color of the fork is mentioned in the description and is metallic.", + 1 + ], + [ + "The color of the fork is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the fork is mentioned in the description and is metallic.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The material of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The material of the handle is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the handle is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the handle is mentioned in the description and is metal.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The shape of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The texture of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The texture of the handle is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handle is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the handle is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tines or the fork are not mentioned.", + 0 + ], + [ + "The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.", + 0.5 + ], + [ + "The number of parts of the tines is mentioned in the description and is 4.", + 1 + ], + [ + "The number of parts of the tines is mentioned in the description but is not 4.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the tines is mentioned in the description and is 4.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bolster of the fork is not mentioned in the description.", + 1 + ], + [ + "The bolster of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The bolster of the fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plates are not mentioned in the description.", + 1 + ], + [ + "The plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plates are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The end cap of the fork is not mentioned in the description.", + 1 + ], + [ + "The end cap of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The end cap of the fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drinks are not mentioned in the description.", + 1 + ], + [ + "The drinks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The drinks are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ferrule of the fork is not mentioned in the description.", + 1 + ], + [ + "The ferrule of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ferrule of the fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is fork. Based on the image, is it likely that the object in the description is given class: fork or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is fork. Based on the image, is it likely that the object in the description is given class: fork or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned.\nB. The color of the fork is not mentioned.\nC. The color of the fork is mentioned in the description and is metallic.\nD. The color of the fork is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The material of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The material of the handle is mentioned in the description and is metal.\nD. The material of the handle is mentioned in the description but is not metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The shape of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The shape of the handle is mentioned in the description and is curved.\nD. The shape of the handle is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The texture of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The texture of the handle is mentioned in the description and is smooth.\nD. The texture of the handle is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tines or the fork are not mentioned.\nB. The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.\nC. The number of parts of the tines is mentioned in the description and is 4.\nD. The number of parts of the tines is mentioned in the description but is not 4.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bolster of the fork is not mentioned in the description.\nB. The bolster of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plates are not mentioned in the description.\nB. The plates are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The end cap of the fork is not mentioned in the description.\nB. The end cap of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drinks are not mentioned in the description.\nB. The drinks are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a subtle gradient from light to dark.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ferrule of the fork is not mentioned in the description.\nB. The ferrule of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the fork is mentioned in the description and is metallic.", + "C. The material of the handle is mentioned in the description and is metal.", + "C. The shape of the handle is mentioned in the description and is curved.", + "C. The texture of the handle is mentioned in the description and is smooth.", + "C. The number of parts of the tines is mentioned in the description and is 4.", + "A. The bolster of the fork is not mentioned in the description.", + "A. The plates are not mentioned in the description.", + "A. The end cap of the fork is not mentioned in the description.", + "A. The drinks are not mentioned in the description.", + "A. The ferrule of the fork is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "25612310": { + "pred": "A woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is mentioned in the description and is woven.", + 1 + ], + [ + "The texture of the basket is mentioned in the description but is not woven.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the basket is mentioned in the description and is woven.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The material of the basket is not mentioned.", + 0 + ], + [ + "The material of the basket is mentioned in the description and is wicker.", + 1 + ], + [ + "The material of the basket is mentioned in the description but is not wicker.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the basket is mentioned in the description and is wicker.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The type of the basket is not mentioned.", + 0 + ], + [ + "The type of the basket is mentioned in the description and is interlaced.", + 1 + ], + [ + "The type of the basket is mentioned in the description but is not interlaced.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The type of the basket is mentioned in the description and is interlaced.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The color of the basket is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description and is brown or wooden.", + 1 + ], + [ + "The color of the basket is mentioned in the description but is not brown or wooden.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the basket is mentioned in the description and is brown or wooden.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The base of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The base of the basket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The handle of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the basket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lid of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the basket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the basket are mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the basket are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The decorative elements of the basket are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lining of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lining of the basket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is basket. Based on the image, is it likely that the object in the description is given class: basket or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is basket. Based on the image, is it likely that the object in the description is given class: basket or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The texture of the basket is not mentioned.\nC. The texture of the basket is mentioned in the description and is woven.\nD. The texture of the basket is mentioned in the description but is not woven.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The material of the basket is not mentioned.\nC. The material of the basket is mentioned in the description and is wicker.\nD. The material of the basket is mentioned in the description but is not wicker.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The type of the basket is not mentioned.\nC. The type of the basket is mentioned in the description and is interlaced.\nD. The type of the basket is mentioned in the description but is not interlaced.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The color of the basket is not mentioned.\nC. The color of the basket is mentioned in the description and is brown or wooden.\nD. The color of the basket is mentioned in the description but is not brown or wooden.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The base of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The handle of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The lid of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the basket are mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The decorative elements of the basket are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woven wicker basket with a dark brown hue, featuring a series of horizontal slats and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lining of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The lining of the basket is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The texture of the basket is mentioned in the description and is woven.", + "C. The material of the basket is mentioned in the description and is wicker.", + "C. The type of the basket is mentioned in the description and is interlaced.", + "C. The color of the basket is mentioned in the description and is brown or wooden.", + "C. The base of the basket is not mentioned in the description.", + "C. The handle of the basket is not mentioned in the description.", + "C. The lid of the basket is not mentioned in the description.", + "C. The decorative elements of the basket are not mentioned in the description.", + "C. The lining of the basket is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "17265253": { + "pred": "A black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the spokes is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The material of the spokes is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tire is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The shape of the tire is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the tire is mentioned in the description and is circular.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tire is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The material of the tire is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the spokes is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The color of the spokes is mentioned in the description and is silver.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tire is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the tire is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the tire is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The path is not mentioned in the description.", + 1 + ], + [ + "The path is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The path is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw canopy of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The rickshaw canopy of the rickshaw is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The rickshaw canopy of the rickshaw is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw handlebars of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The rickshaw handlebars of the rickshaw are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The rickshaw handlebars of the rickshaw are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fence is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo trees are not mentioned in the description.", + 1 + ], + [ + "The bamboo trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo trees are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is rickshaw. Based on the image, is it likely that the object in the description is given class: rickshaw or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is rickshaw. Based on the image, is it likely that the object in the description is given class: rickshaw or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the spokes is mentioned in the description but is not metal.\nB. The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.\nC. The spokes or the rickshaw are not mentioned.\nD. The material of the spokes is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the tire is mentioned in the description but is not circular.\nB. The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The shape of the tire is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tire is mentioned in the description but is not rubber.\nB. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The material of the tire is mentioned in the description and is rubber.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the spokes is mentioned in the description but is not silver.\nB. The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.\nC. The spokes or the rickshaw are not mentioned.\nD. The color of the spokes is mentioned in the description and is silver.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tire is mentioned in the description but is not black.\nB. The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The color of the tire is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The path is not mentioned in the description.\nB. The path is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rickshaw is not mentioned in the description.\nB. The rickshaw canopy of the rickshaw is not mentioned in the description.\nC. The rickshaw canopy of the rickshaw is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rickshaw is not mentioned in the description.\nB. The rickshaw handlebars of the rickshaw are not mentioned in the description.\nC. The rickshaw handlebars of the rickshaw are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fence is not mentioned in the description.\nB. The fence is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a single visible wheel featuring a silver rim and black tire. The wheel is attached to a black frame with a visible axle and a small, round, orange reflector on the side. The rickshaw has a black canopy with a slightly curved top edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo trees are not mentioned in the description.\nB. The bamboo trees are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + "D. The shape of the tire is mentioned in the description and is circular.", + "B. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + "B. The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + "D. The color of the tire is mentioned in the description and is black.", + "A. The path is not mentioned in the description.", + "C. The rickshaw canopy of the rickshaw is mentioned in the description.", + "B. The rickshaw handlebars of the rickshaw are not mentioned in the description.", + "A. The fence is not mentioned in the description.", + "A. The bamboo trees are not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.7, + "score_neg": 0.6, + "recognition_result": true + } +} \ No newline at end of file diff --git a/evaluation/DLC-Bench/model_outputs/gar_8b.json b/evaluation/DLC-Bench/model_outputs/gar_8b.json new file mode 100644 index 0000000000000000000000000000000000000000..b1fa61f68eec73db8700d623609e6b9a4b36d9c4 --- /dev/null +++ b/evaluation/DLC-Bench/model_outputs/gar_8b.json @@ -0,0 +1,102 @@ +{ + "279135": "The ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.", + "297718": "A piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.", + "361105": "A small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.", + "622329": "A rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.", + "622332": "A black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.", + "1075308": "A vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.", + "1196168": "A rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.", + "1770866": "A white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.", + "1894089": "A metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.", + "2391761": "The canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.", + "2391780": "The bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.", + "2391781": "The bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.", + "2580318": "The mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.", + "2580323": "A rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.", + "2588513": "A rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.", + "3993075": "A white pen with a red cap and a green and blue design on the barrel.", + "4027486": "The bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.", + "4243725": "A curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.", + "4502267": "A green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.", + "4604873": "A tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.", + "4781902": "A dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.", + "4782942": "A dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.", + "4782949": "A cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.", + "4916799": "A spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.", + "5211280": "A stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.", + "5718392": "The box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.", + "5718415": "The tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.", + "5718424": "A rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.", + "6012878": "A square traffic light with a black background and a red illuminated hand symbol on the left side.", + "6037269": "A vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.", + "6037272": "A green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.", + "6055310": "A wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.", + "6820594": "A medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.", + "6820595": "A cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.", + "7050495": "A black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.", + "8201777": "A black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.", + "8331685": "A black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.", + "8331699": "The visible part of the waste container is black with a smooth surface and a slightly curved edge.", + "8331718": "A black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.", + "8556674": "A single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.", + "8556676": "A deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.", + "8557176": "The watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.", + "8557195": "A beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.", + "8906172": "A black, curved earphone with a smooth, glossy finish.", + "9766617": "The goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.", + "10666665": "A round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.", + "10811497": "The mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.", + "11012500": "A burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.", + "11021544": "A metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.", + "11021562": "The microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.", + "11021563": "A stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.", + "11775390": "A green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.", + "11950619": "The dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.", + "12178946": "A yellow bottle with a blue label featuring white text.", + "12348078": "A woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.", + "12348079": "A rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.", + "12348080": "A pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.", + "13138178": "A blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.", + "13187927": "The motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.", + "14490578": "The harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.", + "14640483": "A rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.", + "14832137": "A cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.", + "15050320": "A dark brown wine glass with a wide, flat base and a slender stem.", + "16010041": "A pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.", + "16951734": "A wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.", + "16957916": "Fresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.", + "17072759": "A black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.", + "17072764": "A pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.", + "17265253": "A black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.", + "17265254": "A traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.", + "17385866": "A scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.", + "17404769": "The car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.", + "18217373": "The spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.", + "18301585": "The bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.", + "18680641": "A rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.", + "18845103": "A metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.", + "19455186": "A blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.", + "19610023": "A bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.", + "19610025": "A white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.", + "20568676": "A stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.", + "20993402": "A roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.", + "21107974": "A wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.", + "21529954": "A cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.", + "22064315": "The visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.", + "22107522": "A black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.", + "22879790": "A single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.", + "24010373": "The guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.", + "24017816": "The van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.", + "24498027": "A tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.", + "24581953": "A large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.", + "24694197": "A ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.", + "24786060": "A light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.", + "25054869": "The toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.", + "25273528": "The balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.", + "25273553": "A black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.", + "25419495": "The tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.", + "25419509": "A metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.", + "25419516": "A plush toy with a blue face, large white eyes with black pupils, and two pointed ears.", + "25579493": "A small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.", + "25612310": "A dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side." +} \ No newline at end of file diff --git a/evaluation/DLC-Bench/model_outputs/gar_8b_eval.json b/evaluation/DLC-Bench/model_outputs/gar_8b_eval.json new file mode 100644 index 0000000000000000000000000000000000000000..b3ef40e813407ab6a854ef8d1ebe94e303d8a660 --- /dev/null +++ b/evaluation/DLC-Bench/model_outputs/gar_8b_eval.json @@ -0,0 +1,26028 @@ +{ + "2391781": { + "pred": "The bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The color of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the wing is mentioned in the description but is not grey or brown.", + -1 + ], + [ + "The color of the wing is mentioned in the description and is grey or brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the wing is mentioned in the description and is grey or brown.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feathers or the wild bird are not mentioned.", + 0 + ], + [ + "The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.", + 0.5 + ], + [ + "The color of the feathers is mentioned in the description but is not white, grey, or brown.", + -1 + ], + [ + "The color of the feathers is mentioned in the description and is white, grey, or brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the feathers is mentioned in the description and is white, grey, or brown.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the wild bird is not mentioned.", + 0 + ], + [ + "The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + 0.5 + ], + [ + "The shape of the tail is mentioned in the description but is not fan-like.", + -1 + ], + [ + "The shape of the tail is mentioned in the description and is fan-like.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The tail or the wild bird is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The null or the wild bird is not mentioned.", + 0 + ], + [ + "The action of the null is not mentioned, but the null of the wild bird is mentioned.", + 0.5 + ], + [ + "The action of the null is mentioned in the description but is not flying.", + -1 + ], + [ + "The action of the null is mentioned in the description and is flying.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The null or the wild bird is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The position of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The position of the wing is mentioned in the description but is not extended or outstretched.", + -1 + ], + [ + "The position of the wing is mentioned in the description and is extended or outstretched.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The position of the wing is mentioned in the description and is extended or outstretched.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the wild bird are not mentioned in the description.", + 1 + ], + [ + "The claws of the wild bird are mentioned in the description.", + -1 + ], + [ + "The wild bird is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The claws of the wild bird are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are not mentioned in the description.", + 1 + ], + [ + "The boats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The boats are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are not mentioned in the description.", + 1 + ], + [ + "The chimneys are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chimneys are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bridge is not mentioned in the description.", + 1 + ], + [ + "The bridge is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bridge is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The windows are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a wild bird or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a wild bird or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wing or the wild bird is not mentioned.\nB. The color of the wing is not mentioned, but the wing of the wild bird is mentioned.\nC. The color of the wing is mentioned in the description but is not grey or brown.\nD. The color of the wing is mentioned in the description and is grey or brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feathers or the wild bird are not mentioned.\nB. The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.\nC. The color of the feathers is mentioned in the description but is not white, grey, or brown.\nD. The color of the feathers is mentioned in the description and is white, grey, or brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the wild bird is not mentioned.\nB. The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.\nC. The shape of the tail is mentioned in the description but is not fan-like.\nD. The shape of the tail is mentioned in the description and is fan-like.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The null or the wild bird is not mentioned.\nB. The action of the null is not mentioned, but the null of the wild bird is mentioned.\nC. The action of the null is mentioned in the description but is not flying.\nD. The action of the null is mentioned in the description and is flying.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wing or the wild bird is not mentioned.\nB. The position of the wing is not mentioned, but the wing of the wild bird is mentioned.\nC. The position of the wing is mentioned in the description but is not extended or outstretched.\nD. The position of the wing is mentioned in the description and is extended or outstretched.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The claws of the wild bird are not mentioned in the description.\nB. The claws of the wild bird are mentioned in the description.\nC. The wild bird is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boats are not mentioned in the description.\nB. The boats are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chimneys are not mentioned in the description.\nB. The chimneys are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bridge is not mentioned in the description.\nB. The bridge is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are not mentioned in the description.\nB. The windows are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the wing is mentioned in the description and is grey or brown.", + "D. The color of the feathers is mentioned in the description and is white, grey, or brown.", + "A. The tail or the wild bird is not mentioned.", + "A. The null or the wild bird is not mentioned.", + "D. The position of the wing is mentioned in the description and is extended or outstretched.", + "A. The claws of the wild bird are not mentioned in the description.", + "A. The boats are not mentioned in the description.", + "A. The chimneys are not mentioned in the description.", + "A. The bridge is not mentioned in the description.", + "A. The windows are not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "2580323": { + "pred": "A rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the frame is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The shape of the frame is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the frame is mentioned in the description and is rectangular.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the picture is mentioned in the description and is white.", + 1 + ], + [ + "The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The background color of the picture is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The background color of the picture is mentioned in the description but is not white.", + "pred_index": 3, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + 1 + ], + [ + "The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The material of the frame is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the frame is mentioned in the description and is wood.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The radio is mentioned in the description.", + -1 + ], + [ + "The radio is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The radio is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The picture/frame is not mentioned in the description.", + 0 + ], + [ + "The glass of the picture/frame are mentioned in the description.", + -1 + ], + [ + "The glass of the picture/frame are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The glass of the picture/frame are not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The keyboard is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The light switch is mentioned in the description.", + -1 + ], + [ + "The light switch is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The light switch is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The curtain is mentioned in the description.", + -1 + ], + [ + "The curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The curtain is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a picture/frame or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a picture/frame or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the frame is mentioned in the description and is rectangular.\nB. The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.\nC. The frame or the picture/frame is not mentioned.\nD. The shape of the frame is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background color of the picture is mentioned in the description and is white.\nB. The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.\nC. The picture or the picture/frame is not mentioned.\nD. The background color of the picture is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.\nB. The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.\nC. The picture or the picture/frame is not mentioned.\nD. The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the frame is mentioned in the description and is wood.\nB. The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.\nC. The frame or the picture/frame is not mentioned.\nD. The material of the frame is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The radio is mentioned in the description.\nB. The radio is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The picture/frame is not mentioned in the description.\nB. The glass of the picture/frame are mentioned in the description.\nC. The glass of the picture/frame are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is mentioned in the description.\nB. The keyboard is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The light switch is mentioned in the description.\nB. The light switch is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The curtain is mentioned in the description.\nB. The curtain is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the frame is mentioned in the description and is rectangular.", + "D. The background color of the picture is mentioned in the description but is not white.", + "A. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + "A. The material of the frame is mentioned in the description and is wood.", + "B. The radio is not mentioned in the description.", + "C. The glass of the picture/frame are not mentioned in the description.", + "B. The keyboard is not mentioned in the description.", + "B. The light switch is not mentioned in the description.", + "B. The curtain is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4782942": { + "pred": "A dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description but is not conical.", + -1 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description and is conical.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the megaphone/speaker is mentioned in the description and is conical.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the megaphone/speaker is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the megaphone/speaker is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front/opening or the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.", + 0.5 + ], + [ + "The shape of the front/opening is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the front/opening is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The front/opening or the megaphone/speaker is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The siren button of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The siren button of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The megaphone/speaker is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fence is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The strap of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The strap of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The strap of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The battery compartment of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The battery compartment of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The battery compartment of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The houses are not mentioned in the description.", + 1 + ], + [ + "The houses are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The houses are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a megaphone/speaker or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a megaphone/speaker or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned.\nB. The shape of the megaphone/speaker is not mentioned.\nC. The shape of the megaphone/speaker is mentioned in the description but is not conical.\nD. The shape of the megaphone/speaker is mentioned in the description and is conical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned.\nB. The color of the megaphone/speaker is not mentioned.\nC. The color of the megaphone/speaker is mentioned in the description but is not gray.\nD. The color of the megaphone/speaker is mentioned in the description and is gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front/opening or the megaphone/speaker is not mentioned.\nB. The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.\nC. The shape of the front/opening is mentioned in the description but is not round.\nD. The shape of the front/opening is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The siren button of the megaphone/speaker is not mentioned in the description.\nC. The siren button of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fence is not mentioned in the description.\nB. The fence is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The strap of the megaphone/speaker is not mentioned in the description.\nC. The strap of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The battery compartment of the megaphone/speaker is not mentioned in the description.\nC. The battery compartment of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The houses are not mentioned in the description.\nB. The houses are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the megaphone/speaker is mentioned in the description and is conical.", + "B. The color of the megaphone/speaker is not mentioned.", + "A. The front/opening or the megaphone/speaker is not mentioned.", + "A. The megaphone/speaker is not mentioned in the description.", + "A. The fence is not mentioned in the description.", + "B. The strap of the megaphone/speaker is not mentioned in the description.", + "B. The battery compartment of the megaphone/speaker is not mentioned in the description.", + "A. The houses are not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.3333333333333333, + "score_neg": 0.8, + "neg_valid_num": 5, + "recognition_result": true + }, + "6037269": { + "pred": "A vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the showerhead is mentioned in the description but is not silver and/or metallic.", + -1 + ], + [ + "The color of the showerhead is not mentioned.", + 0 + ], + [ + "The color of the showerhead is mentioned in the description and is silver and/or metallic.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the showerhead is mentioned in the description and is silver and/or metallic.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the showerhead is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the showerhead is not mentioned.", + 0 + ], + [ + "The texture of the showerhead is mentioned in the description and is smooth.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the showerhead is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the showerhead is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the showerhead is not mentioned.", + 0 + ], + [ + "The shape of the showerhead is mentioned in the description and is circular.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the showerhead is mentioned in the description and is circular.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the showerhead is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the showerhead is not mentioned.", + 0 + ], + [ + "The material of the showerhead is mentioned in the description and is metal.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the showerhead is mentioned in the description and is metal.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the handle is not mentioned, but the handle of the showerhead is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is white.", + 1 + ], + [ + "The handle or the showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is mentioned in the description but is not white.", + "pred_index": 0, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower hose of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The shower hose of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The shower hose of the showerhead is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bath caddy is mentioned in the description.", + -1 + ], + [ + "The bath caddy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bath caddy is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathtub is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The showerhead filter of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The showerhead filter of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The showerhead filter of the showerhead is not mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet paper holder is mentioned in the description.", + -1 + ], + [ + "The toilet paper holder is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toilet paper holder is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a showerhead or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a showerhead or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the showerhead is mentioned in the description but is not silver and/or metallic.\nB. The color of the showerhead is not mentioned.\nC. The color of the showerhead is mentioned in the description and is silver and/or metallic.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the showerhead is mentioned in the description but is not smooth.\nB. The texture of the showerhead is not mentioned.\nC. The texture of the showerhead is mentioned in the description and is smooth.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the showerhead is mentioned in the description but is not circular.\nB. The shape of the showerhead is not mentioned.\nC. The shape of the showerhead is mentioned in the description and is circular.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the showerhead is mentioned in the description but is not metal.\nB. The material of the showerhead is not mentioned.\nC. The material of the showerhead is mentioned in the description and is metal.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is mentioned in the description but is not white.\nB. The color of the handle is not mentioned, but the handle of the showerhead is mentioned.\nC. The color of the handle is mentioned in the description and is white.\nD. The handle or the showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shower hose of the showerhead is mentioned in the description.\nB. The showerhead is not mentioned in the description.\nC. The shower hose of the showerhead is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bath caddy is mentioned in the description.\nB. The bath caddy is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is mentioned in the description.\nB. The bathtub is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The showerhead filter of the showerhead is mentioned in the description.\nB. The showerhead is not mentioned in the description.\nC. The showerhead filter of the showerhead is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet paper holder is mentioned in the description.\nB. The toilet paper holder is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the showerhead is mentioned in the description and is silver and/or metallic.", + "B. The texture of the showerhead is not mentioned.", + "C. The shape of the showerhead is mentioned in the description and is circular.", + "C. The material of the showerhead is mentioned in the description and is metal.", + "A. The color of the handle is mentioned in the description but is not white.", + "C. The shower hose of the showerhead is not mentioned in the description.", + "B. The bath caddy is not mentioned in the description.", + "B. The bathtub is not mentioned in the description.", + "C. The showerhead filter of the showerhead is not mentioned in the description.", + "B. The toilet paper holder is not mentioned in the description." + ], + "score": 0.7, + "score_pos": 0.4, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "7050495": { + "pred": "A black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the handbag is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handbag is not mentioned.", + 0 + ], + [ + "The texture of the handbag is mentioned in the description but is not smooth.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the handbag is mentioned in the description and is smooth.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handbag is mentioned in the description and is black or glossy.", + 1 + ], + [ + "The color of the handbag is not mentioned.", + 0 + ], + [ + "The color of the handbag is mentioned in the description but is not black or glossy.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handbag is mentioned in the description and is black or glossy.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handbag is mentioned in the description and is leather.", + 1 + ], + [ + "The material of the handbag is not mentioned.", + 0 + ], + [ + "The material of the handbag is mentioned in the description but is not leather.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the handbag is mentioned in the description and is leather.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handbag is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the handbag is not mentioned.", + 0 + ], + [ + "The shape of the handbag is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the handbag is mentioned in the description and is rectangular.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seam of the handbag is mentioned in the description and is visible.", + 1 + ], + [ + "The seam of the handbag is not mentioned.", + 0 + ], + [ + "The seam of the handbag is mentioned in the description but is not visible.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The seam of the handbag is mentioned in the description and is visible.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The price tags are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The logo of the handbag is not mentioned in the description.", + 1 + ], + [ + "The logo of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The logo of the handbag is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The handle of the handbag is not mentioned in the description.", + 1 + ], + [ + "The handle of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The handle of the handbag is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The pocket of the handbag is not mentioned in the description.", + 1 + ], + [ + "The pocket of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pocket of the handbag is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The zipper of the handbag is not mentioned in the description.", + 1 + ], + [ + "The zipper of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The zipper of the handbag is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a handbag or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a handbag or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the handbag is mentioned in the description and is smooth.\nB. The texture of the handbag is not mentioned.\nC. The texture of the handbag is mentioned in the description but is not smooth.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handbag is mentioned in the description and is black or glossy.\nB. The color of the handbag is not mentioned.\nC. The color of the handbag is mentioned in the description but is not black or glossy.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handbag is mentioned in the description and is leather.\nB. The material of the handbag is not mentioned.\nC. The material of the handbag is mentioned in the description but is not leather.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handbag is mentioned in the description and is rectangular.\nB. The shape of the handbag is not mentioned.\nC. The shape of the handbag is mentioned in the description but is not rectangular.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seam of the handbag is mentioned in the description and is visible.\nB. The seam of the handbag is not mentioned.\nC. The seam of the handbag is mentioned in the description but is not visible.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The price tags are not mentioned in the description.\nB. The price tags are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The logo of the handbag is not mentioned in the description.\nC. The logo of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The handle of the handbag is not mentioned in the description.\nC. The handle of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The pocket of the handbag is not mentioned in the description.\nC. The pocket of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The zipper of the handbag is not mentioned in the description.\nC. The zipper of the handbag is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The texture of the handbag is mentioned in the description and is smooth.", + "A. The color of the handbag is mentioned in the description and is black or glossy.", + "A. The material of the handbag is mentioned in the description and is leather.", + "A. The shape of the handbag is mentioned in the description and is rectangular.", + "A. The seam of the handbag is mentioned in the description and is visible.", + "A. The price tags are not mentioned in the description.", + "B. The logo of the handbag is not mentioned in the description.", + "B. The handle of the handbag is not mentioned in the description.", + "B. The pocket of the handbag is not mentioned in the description.", + "B. The zipper of the handbag is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8331699": { + "pred": "The visible part of the waste container is black with a smooth surface and a slightly curved edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the computer box is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the computer box is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is mentioned in the description and is black or gray.", + 1 + ], + [ + "The color of the computer box is mentioned in the description but is not black or gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the computer box is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side panel or the computer box is not mentioned.", + 0 + ], + [ + "The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.", + 0.5 + ], + [ + "The color of the side panel is mentioned in the description and is gray.", + 1 + ], + [ + "The color of the side panel is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the side panel is mentioned in the description but is not gray.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rubber duck is not mentioned in the description.", + 1 + ], + [ + "The rubber duck is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The rubber duck is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power button of the computer box is not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The power button of the computer box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The computer box is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky notes are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The USB ports of the computer box are not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The USB ports of the computer box are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The computer box is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a computer box or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a computer box or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The computer box is not mentioned.\nB. The shape of the computer box is not mentioned.\nC. The shape of the computer box is mentioned in the description and is rectangular.\nD. The shape of the computer box is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The computer box is not mentioned.\nB. The color of the computer box is not mentioned.\nC. The color of the computer box is mentioned in the description and is black or gray.\nD. The color of the computer box is mentioned in the description but is not black or gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The side panel or the computer box is not mentioned.\nB. The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.\nC. The color of the side panel is mentioned in the description and is gray.\nD. The color of the side panel is mentioned in the description but is not gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rubber duck is not mentioned in the description.\nB. The rubber duck is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The power button of the computer box is not mentioned in the description.\nB. The computer box is not mentioned in the description.\nC. The power button of the computer box is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky notes are not mentioned in the description.\nB. The sticky notes are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The USB ports of the computer box are not mentioned in the description.\nB. The computer box is not mentioned in the description.\nC. The USB ports of the computer box are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the computer box is not mentioned.", + "B. The color of the computer box is not mentioned.", + "D. The color of the side panel is mentioned in the description but is not gray.", + "A. The chair is not mentioned in the description.", + "A. The rubber duck is not mentioned in the description.", + "B. The computer box is not mentioned in the description.", + "A. The sticky notes are not mentioned in the description.", + "B. The computer box is not mentioned in the description." + ], + "score": 0.25, + "score_pos": -0.3333333333333333, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "8556676": { + "pred": "A deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the apple is mentioned in the description and is small.", + 1 + ], + [ + "The size of the apple is not mentioned.", + 0 + ], + [ + "The size of the apple is mentioned in the description but is not small.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the apple is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the apple is mentioned in the description and is smooth or glossy.", + 1 + ], + [ + "The texture of the apple is not mentioned.", + 0 + ], + [ + "The texture of the apple is mentioned in the description but is not smooth or glossy.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the apple is mentioned in the description and is smooth or glossy.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the apple is mentioned in the description and is red.", + 1 + ], + [ + "The color of the apple is not mentioned.", + 0 + ], + [ + "The color of the apple is mentioned in the description but is not red.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the apple is mentioned in the description and is red.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon is mentioned in the description.", + -1 + ], + [ + "The lemon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The lemon is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange is mentioned in the description.", + -1 + ], + [ + "The orange is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The orange is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple is mentioned in the description.", + -1 + ], + [ + "The pineapple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The pineapple is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the apple are mentioned in the description.", + -1 + ], + [ + "The seeds of the apple are not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The seeds of the apple are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the apple is mentioned in the description.", + -1 + ], + [ + "The stem of the apple is not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The stem of the apple is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an apple or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an apple or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the apple is mentioned in the description and is small.\nB. The size of the apple is not mentioned.\nC. The size of the apple is mentioned in the description but is not small.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the apple is mentioned in the description and is smooth or glossy.\nB. The texture of the apple is not mentioned.\nC. The texture of the apple is mentioned in the description but is not smooth or glossy.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the apple is mentioned in the description and is red.\nB. The color of the apple is not mentioned.\nC. The color of the apple is mentioned in the description but is not red.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lemon is mentioned in the description.\nB. The lemon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange is mentioned in the description.\nB. The orange is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pineapple is mentioned in the description.\nB. The pineapple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seeds of the apple are mentioned in the description.\nB. The seeds of the apple are not mentioned in the description.\nC. The apple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stem of the apple is mentioned in the description.\nB. The stem of the apple is not mentioned in the description.\nC. The apple is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The size of the apple is not mentioned.", + "A. The texture of the apple is mentioned in the description and is smooth or glossy.", + "A. The color of the apple is mentioned in the description and is red.", + "B. The lemon is not mentioned in the description.", + "B. The orange is not mentioned in the description.", + "B. The pineapple is not mentioned in the description.", + "B. The seeds of the apple are not mentioned in the description.", + "B. The stem of the apple is not mentioned in the description." + ], + "score": 0.875, + "score_pos": 0.6666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "11012500": { + "pred": "A burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the lettuce is mentioned in the description but is not shredded.", + -1 + ], + [ + "The lettuce or the taco is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is shredded.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the lettuce is mentioned in the description and is shredded.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the tomato is mentioned in the description but is not sliced.", + -1 + ], + [ + "The tomato or the taco is not mentioned.", + 0 + ], + [ + "The shape of the tomato is mentioned in the description and is sliced.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the tomato is mentioned in the description and is sliced.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the arugula is not mentioned, but the arugula of the taco is mentioned.", + 0.5 + ], + [ + "The color of the arugula is mentioned in the description but is not green.", + -1 + ], + [ + "The arugula or the taco is not mentioned.", + 0 + ], + [ + "The color of the arugula is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the arugula is mentioned in the description and is green.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.", + 0.5 + ], + [ + "The color of the tortilla is mentioned in the description but is not white.", + -1 + ], + [ + "The tortilla or the taco is not mentioned.", + 0 + ], + [ + "The color of the tortilla is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tortilla is mentioned in the description but is not white.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The whipped cream of the taco is not mentioned in the description.", + 1 + ], + [ + "The whipped cream of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taco is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + 1 + ], + [ + "The two glasses of lemonade with lemon slices and straws are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The nuts of the taco are not mentioned in the description.", + 1 + ], + [ + "The nuts of the taco are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taco is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandwich with vegetables are not mentioned in the description.", + 1 + ], + [ + "The sandwich with vegetables are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sandwich with vegetables are mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The chocolate of the taco is not mentioned in the description.", + 1 + ], + [ + "The chocolate of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taco is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a taco or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a taco or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.\nB. The shape of the lettuce is mentioned in the description but is not shredded.\nC. The lettuce or the taco is not mentioned.\nD. The shape of the lettuce is mentioned in the description and is shredded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.\nB. The shape of the tomato is mentioned in the description but is not sliced.\nC. The tomato or the taco is not mentioned.\nD. The shape of the tomato is mentioned in the description and is sliced.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the arugula is not mentioned, but the arugula of the taco is mentioned.\nB. The color of the arugula is mentioned in the description but is not green.\nC. The arugula or the taco is not mentioned.\nD. The color of the arugula is mentioned in the description and is green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.\nB. The color of the tortilla is mentioned in the description but is not white.\nC. The tortilla or the taco is not mentioned.\nD. The color of the tortilla is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The whipped cream of the taco is not mentioned in the description.\nC. The whipped cream of the taco is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.\nB. The two glasses of lemonade with lemon slices and straws are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The nuts of the taco are not mentioned in the description.\nC. The nuts of the taco are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandwich with vegetables are not mentioned in the description.\nB. The sandwich with vegetables are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The chocolate of the taco is not mentioned in the description.\nC. The chocolate of the taco is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the lettuce is mentioned in the description and is shredded.", + "D. The shape of the tomato is mentioned in the description and is sliced.", + "D. The color of the arugula is mentioned in the description and is green.", + "B. The color of the tortilla is mentioned in the description but is not white.", + "A. The taco is not mentioned in the description.", + "A. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + "A. The taco is not mentioned in the description.", + "B. The sandwich with vegetables are mentioned in the description.", + "A. The taco is not mentioned in the description." + ], + "score": 0.2222222222222222, + "score_pos": 0.5, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "12348080": { + "pred": "A pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handles is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the handles is mentioned in the description and is plastic.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the handles is mentioned in the description and is plastic.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blades is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The color of the blades is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The color of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the blades is mentioned in the description and is silver or metallic.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blades is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blades is mentioned in the description and is metal.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The material of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the blades is mentioned in the description and is metal.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handles is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the handles is mentioned in the description and is red.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The color of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the handles is mentioned in the description and is red.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The adjustment screw of the scissors is not mentioned in the description.", + 1 + ], + [ + "The adjustment screw of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The adjustment screw of the scissors is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blade guard of the scissors is not mentioned in the description.", + 1 + ], + [ + "The blade guard of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The blade guard of the scissors is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tricycle cart is not mentioned in the description.", + 1 + ], + [ + "The tricycle cart is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tricycle cart is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The baskets of fruit are not mentioned in the description.", + 1 + ], + [ + "The baskets of fruit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The baskets of fruit are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scale is not mentioned in the description.", + 1 + ], + [ + "The scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The scale is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are scissors or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are scissors or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handles is mentioned in the description but is not plastic.\nB. The material of the handles is mentioned in the description and is plastic.\nC. The handles or the scissors are not mentioned.\nD. The material of the handles is not mentioned, but the handles of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the blades is mentioned in the description but is not silver or metallic.\nB. The color of the blades is mentioned in the description and is silver or metallic.\nC. The blades or the scissors are not mentioned.\nD. The color of the blades is not mentioned, but the blades of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the blades is mentioned in the description but is not metal.\nB. The material of the blades is mentioned in the description and is metal.\nC. The blades or the scissors are not mentioned.\nD. The material of the blades is not mentioned, but the blades of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handles is mentioned in the description but is not red.\nB. The color of the handles is mentioned in the description and is red.\nC. The handles or the scissors are not mentioned.\nD. The color of the handles is not mentioned, but the handles of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The adjustment screw of the scissors is not mentioned in the description.\nB. The adjustment screw of the scissors is mentioned in the description.\nC. The scissors are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blade guard of the scissors is not mentioned in the description.\nB. The blade guard of the scissors is mentioned in the description.\nC. The scissors are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tricycle cart is not mentioned in the description.\nB. The tricycle cart is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The baskets of fruit are not mentioned in the description.\nB. The baskets of fruit are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The scale is not mentioned in the description.\nB. The scale is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the handles is mentioned in the description and is plastic.", + "B. The color of the blades is mentioned in the description and is silver or metallic.", + "B. The material of the blades is mentioned in the description and is metal.", + "B. The color of the handles is mentioned in the description and is red.", + "A. The adjustment screw of the scissors is not mentioned in the description.", + "A. The blade guard of the scissors is not mentioned in the description.", + "A. The tricycle cart is not mentioned in the description.", + "A. The baskets of fruit are not mentioned in the description.", + "A. The scale is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "16951734": { + "pred": "A wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description and is yellow, golden, or brown.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description but is not yellow, golden, or brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description and is smooth.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description and is irregular.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description but is not irregular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprouts of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The sprouts of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The potato is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned in the description.", + 1 + ], + [ + "The bowl is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bowl is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The roots of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The potato is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The liquid is not mentioned in the description.", + 1 + ], + [ + "The liquid is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The liquid is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a potato or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a potato or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the potato is not mentioned.\nB. The color of the potato is mentioned in the description and is yellow, golden, or brown.\nC. The potato is not mentioned.\nD. The color of the potato is mentioned in the description but is not yellow, golden, or brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the potato is not mentioned.\nB. The texture of the potato is mentioned in the description and is smooth.\nC. The potato is not mentioned.\nD. The texture of the potato is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the potato is not mentioned.\nB. The shape of the potato is mentioned in the description and is irregular.\nC. The potato is not mentioned.\nD. The shape of the potato is mentioned in the description but is not irregular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sprouts of the potato are not mentioned in the description.\nB. The potato is not mentioned in the description.\nC. The sprouts of the potato are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned in the description.\nB. The bowl is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The roots of the potato are not mentioned in the description.\nB. The potato is not mentioned in the description.\nC. The roots of the potato are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The liquid is not mentioned in the description.\nB. The liquid is mentioned in the description.\n" + ], + "response": [ + "B. No", + "C. The potato is not mentioned.", + "C. The potato is not mentioned.", + "C. The potato is not mentioned.", + "A. The cup is not mentioned in the description.", + "B. The potato is not mentioned in the description.", + "A. The bowl is not mentioned in the description.", + "B. The potato is not mentioned in the description.", + "A. The liquid is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "17265254": { + "pred": "A traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The shape of the wheels is mentioned in the description and is circular or spoked.", + 1 + ], + [ + "The shape of the wheels is mentioned in the description but is not circular or spoked.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the wheels is mentioned in the description and is circular or spoked.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + 0.5 + ], + [ + "The seat or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The number of parts of the wheels is mentioned in the description and is 2.", + 1 + ], + [ + "The number of parts of the wheels is mentioned in the description but is not 2.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The number of parts of the wheels is mentioned in the description but is not 2.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The lights of the rickshaw are mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lights of the rickshaw are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The people are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The storage compartment of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The storage compartment of the rickshaw is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo forest is not mentioned in the description.", + 1 + ], + [ + "The bamboo forest is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo forest is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The horn of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The horn of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The horn of the rickshaw is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a rickshaw or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a rickshaw or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.\nB. The wheels or the rickshaw are not mentioned.\nC. The shape of the wheels is mentioned in the description and is circular or spoked.\nD. The shape of the wheels is mentioned in the description but is not circular or spoked.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.\nB. The seat or the rickshaw is not mentioned.\nC. The color of the seat is mentioned in the description and is black.\nD. The color of the seat is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.\nB. The wheels or the rickshaw are not mentioned.\nC. The number of parts of the wheels is mentioned in the description and is 2.\nD. The number of parts of the wheels is mentioned in the description but is not 2.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lights of the rickshaw are not mentioned in the description.\nB. The lights of the rickshaw are mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are not mentioned in the description.\nB. The people are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The storage compartment of the rickshaw is not mentioned in the description.\nB. The storage compartment of the rickshaw is mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo forest is not mentioned in the description.\nB. The bamboo forest is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The horn of the rickshaw is not mentioned in the description.\nB. The horn of the rickshaw is mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the wheels is mentioned in the description and is circular or spoked.", + "A. The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + "D. The number of parts of the wheels is mentioned in the description but is not 2.", + "A. The lights of the rickshaw are not mentioned in the description.", + "A. The people are not mentioned in the description.", + "A. The storage compartment of the rickshaw is not mentioned in the description.", + "A. The bamboo forest is not mentioned in the description.", + "A. The horn of the rickshaw is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "18845103": { + "pred": "A metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is metallic.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The material of the bowl is mentioned in the description and is metal.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The material of the bowl is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the bowl is mentioned in the description and is metallic.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the bowl is mentioned in the description and is round or oval.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description but is not round or oval.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bowl is mentioned in the description and is round or oval.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.", + "pred_index": 3, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The engraved handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The engraved handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The engraved handle of the spoon is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The kitchen cabinets are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The twisted handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The twisted handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The twisted handle of the spoon is not mentioned in the description.", + "pred_index": 2, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a spoon or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a spoon or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is not mentioned, but the handle of the spoon is mentioned.\nB. The color of the handle is mentioned in the description and is metallic.\nC. The handle or the spoon is not mentioned.\nD. The color of the handle is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The material of the bowl is mentioned in the description and is metal.\nC. The bowl or the spoon is not mentioned.\nD. The material of the bowl is mentioned in the description but is not metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The color of the bowl is mentioned in the description and is metallic.\nC. The bowl or the spoon is not mentioned.\nD. The color of the bowl is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The shape of the bowl is mentioned in the description and is round or oval.\nC. The bowl or the spoon is not mentioned.\nD. The shape of the bowl is mentioned in the description but is not round or oval.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the spoon is mentioned.\nB. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.\nC. The handle or the spoon is not mentioned.\nD. The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is not mentioned in the description.\nB. The engraved handle of the spoon is mentioned in the description.\nC. The engraved handle of the spoon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is mentioned in the description.\nB. The cutting board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The kitchen cabinets are mentioned in the description.\nB. The kitchen cabinets are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is not mentioned in the description.\nB. The twisted handle of the spoon is mentioned in the description.\nC. The twisted handle of the spoon is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + "A. The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "A. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "B. The shape of the bowl is mentioned in the description and is round or oval.", + "D. The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.", + "C. The engraved handle of the spoon is not mentioned in the description.", + "B. The cutting board is not mentioned in the description.", + "B. The kitchen cabinets are not mentioned in the description.", + "B. The sink is not mentioned in the description.", + "C. The twisted handle of the spoon is not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.3, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "20993402": { + "pred": "A roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the tape is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the tape is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the tape is mentioned in the description and is smooth.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tape roll is mentioned in the description but is not beige, white, or transparent.", + -1 + ], + [ + "The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + 1 + ], + [ + "The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the tape is mentioned in the description but is not adhesive tape.", + -1 + ], + [ + "The type of the tape is mentioned in the description and is adhesive tape.", + 1 + ], + [ + "The type of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the tape is mentioned in the description and is adhesive tape.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the core is mentioned in the description but is not cardboard.", + -1 + ], + [ + "The material of the core is mentioned in the description and is cardboard.", + 1 + ], + [ + "The material of the core is not mentioned, but the core of the tape is mentioned.", + 0.5 + ], + [ + "The core or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the core is mentioned in the description and is cardboard.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tape roll is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the tape roll is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the tape roll is mentioned in the description and is plastic.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The dispenser of the tape is not mentioned in the description.", + 1 + ], + [ + "The dispenser of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The dispenser of the tape is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trees are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The window is not mentioned in the description.", + 1 + ], + [ + "The window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The window is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stack of plates are not mentioned in the description.", + 1 + ], + [ + "The stack of plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The stack of plates are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The cutting edge of the tape is not mentioned in the description.", + 1 + ], + [ + "The cutting edge of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The cutting edge of the tape is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tape or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tape or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the tape is mentioned in the description but is not smooth.\nB. The texture of the tape is mentioned in the description and is smooth.\nC. The texture of the tape is not mentioned.\nD. The tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tape roll is mentioned in the description but is not beige, white, or transparent.\nB. The color of the tape roll is mentioned in the description and is beige, white, or transparent.\nC. The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.\nD. The tape roll or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the tape is mentioned in the description but is not adhesive tape.\nB. The type of the tape is mentioned in the description and is adhesive tape.\nC. The type of the tape is not mentioned.\nD. The tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the core is mentioned in the description but is not cardboard.\nB. The material of the core is mentioned in the description and is cardboard.\nC. The material of the core is not mentioned, but the core of the tape is mentioned.\nD. The core or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tape roll is mentioned in the description but is not plastic.\nB. The material of the tape roll is mentioned in the description and is plastic.\nC. The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.\nD. The tape roll or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The dispenser of the tape is not mentioned in the description.\nC. The dispenser of the tape is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are not mentioned in the description.\nB. The trees are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The window is not mentioned in the description.\nB. The window is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stack of plates are not mentioned in the description.\nB. The stack of plates are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The cutting edge of the tape is not mentioned in the description.\nC. The cutting edge of the tape is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the tape is mentioned in the description and is smooth.", + "B. The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + "B. The type of the tape is mentioned in the description and is adhesive tape.", + "B. The material of the core is mentioned in the description and is cardboard.", + "B. The material of the tape roll is mentioned in the description and is plastic.", + "B. The dispenser of the tape is not mentioned in the description.", + "A. The trees are not mentioned in the description.", + "A. The window is not mentioned in the description.", + "A. The stack of plates are not mentioned in the description.", + "B. The cutting edge of the tape is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "21529954": { + "pred": "A cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the can/container is not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the cap/lid is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the cap/lid is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The color of the cap/lid is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the cap/lid is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the cap/lid is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the body is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the body is not mentioned, but the body of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is cylindrical.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the body is mentioned in the description and is cylindrical.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the can/container is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description but is not green, white, yellow.", + -1 + ], + [ + "The color of the label is not mentioned, but the label of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description and is green, white, yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the label is mentioned in the description and is green, white, yellow.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The raspberries are mentioned in the description.", + -1 + ], + [ + "The raspberries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The raspberries are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red bell peppers are mentioned in the description.", + -1 + ], + [ + "The red bell peppers are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The red bell peppers are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ginger is mentioned in the description.", + -1 + ], + [ + "The ginger is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ginger is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sweet potato is mentioned in the description.", + -1 + ], + [ + "The sweet potato is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sweet potato is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the can/container is mentioned in the description.", + -1 + ], + [ + "The base of the can/container is not mentioned in the description.", + 1 + ], + [ + "The can/container is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The base of the can/container is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a can/container or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a can/container or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The can/container is not mentioned.\nB. The material of the can/container is mentioned in the description but is not plastic.\nC. The material of the can/container is not mentioned.\nD. The material of the can/container is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/lid or the can/container is not mentioned.\nB. The shape of the cap/lid is mentioned in the description but is not circular.\nC. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.\nD. The shape of the cap/lid is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/lid or the can/container is not mentioned.\nB. The color of the cap/lid is mentioned in the description but is not white.\nC. The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.\nD. The color of the cap/lid is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the can/container is not mentioned.\nB. The shape of the body is mentioned in the description but is not cylindrical.\nC. The shape of the body is not mentioned, but the body of the can/container is mentioned.\nD. The shape of the body is mentioned in the description and is cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label or the can/container is not mentioned.\nB. The color of the label is mentioned in the description but is not green, white, yellow.\nC. The color of the label is not mentioned, but the label of the can/container is mentioned.\nD. The color of the label is mentioned in the description and is green, white, yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The raspberries are mentioned in the description.\nB. The raspberries are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The red bell peppers are mentioned in the description.\nB. The red bell peppers are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ginger is mentioned in the description.\nB. The ginger is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sweet potato is mentioned in the description.\nB. The sweet potato is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the can/container is mentioned in the description.\nB. The base of the can/container is not mentioned in the description.\nC. The can/container is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the can/container is not mentioned.", + "C. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + "D. The color of the cap/lid is mentioned in the description and is white.", + "D. The shape of the body is mentioned in the description and is cylindrical.", + "D. The color of the label is mentioned in the description and is green, white, yellow.", + "B. The raspberries are not mentioned in the description.", + "B. The red bell peppers are not mentioned in the description.", + "B. The ginger is not mentioned in the description.", + "B. The sweet potato is not mentioned in the description.", + "B. The base of the can/container is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "22879790": { + "pred": "A single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the garlic is mentioned in the description and is white.", + 1 + ], + [ + "The color of the garlic is not mentioned.", + 0 + ], + [ + "The garlic is not mentioned.", + 0 + ], + [ + "The color of the garlic is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The garlic is not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the skin is mentioned in the description and is papery.", + 1 + ], + [ + "The texture of the skin is not mentioned, but the skin of the garlic is mentioned.", + 0.5 + ], + [ + "The skin or the garlic is not mentioned.", + 0 + ], + [ + "The texture of the skin is mentioned in the description but is not papery.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the skin is mentioned in the description and is papery.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the root is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the root is not mentioned, but the root of the garlic is mentioned.", + 0.5 + ], + [ + "The root or the garlic is not mentioned.", + 0 + ], + [ + "The color of the root is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the root is mentioned in the description and is brown.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaf of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The leaf of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The garlic is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + 1 + ], + [ + "The ceramic rooster and hen salt and pepper shakers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The stem of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The garlic is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic hen salt shaker is not mentioned in the description.", + 1 + ], + [ + "The ceramic hen salt shaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ceramic hen salt shaker is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green glass rooster is not mentioned in the description.", + 1 + ], + [ + "The green glass rooster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The green glass rooster is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a garlic or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a garlic or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the garlic is mentioned in the description and is white.\nB. The color of the garlic is not mentioned.\nC. The garlic is not mentioned.\nD. The color of the garlic is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the skin is mentioned in the description and is papery.\nB. The texture of the skin is not mentioned, but the skin of the garlic is mentioned.\nC. The skin or the garlic is not mentioned.\nD. The texture of the skin is mentioned in the description but is not papery.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the root is mentioned in the description and is brown.\nB. The color of the root is not mentioned, but the root of the garlic is mentioned.\nC. The root or the garlic is not mentioned.\nD. The color of the root is mentioned in the description but is not brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The leaf of the garlic is not mentioned in the description.\nB. The garlic is not mentioned in the description.\nC. The leaf of the garlic is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.\nB. The ceramic rooster and hen salt and pepper shakers are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stem of the garlic is not mentioned in the description.\nB. The garlic is not mentioned in the description.\nC. The stem of the garlic is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceramic hen salt shaker is not mentioned in the description.\nB. The ceramic hen salt shaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green glass rooster is not mentioned in the description.\nB. The green glass rooster is mentioned in the description.\n" + ], + "response": [ + "B. No", + "C. The garlic is not mentioned.", + "A. The texture of the skin is mentioned in the description and is papery.", + "A. The color of the root is mentioned in the description and is brown.", + "B. The garlic is not mentioned in the description.", + "A. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + "B. The garlic is not mentioned in the description.", + "A. The ceramic hen salt shaker is not mentioned in the description.", + "A. The green glass rooster is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "24010373": { + "pred": "The guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The neck or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the neck is mentioned in the description and is straight or slightly curved.", + 1 + ], + [ + "The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the neck is mentioned in the description but is not straight or slightly curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the guitar is not mentioned.", + 0 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is not mentioned, but the body of the guitar is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the body is not mentioned, but the body of the guitar is mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strings or the guitar are not mentioned.", + 0 + ], + [ + "The number of parts of the strings is mentioned in the description and is 6.", + 1 + ], + [ + "The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + 0.5 + ], + [ + "The number of parts of the strings is mentioned in the description but is not 6.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sound hole or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the sound hole is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the sound hole is mentioned in the description but is not round.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the sound hole is mentioned in the description and is round.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description and is black.", + 1 + ], + [ + "The color of the guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the guitar is mentioned in the description but is not black.", + "pred_index": 3, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The violin is not mentioned in the description.", + 1 + ], + [ + "The violin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The violin is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned in the description.", + 0 + ], + [ + "The pickguard of the guitar is not mentioned in the description.", + 1 + ], + [ + "The pickguard of the guitar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pickguard of the guitar is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is not mentioned in the description.", + 1 + ], + [ + "The sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sign is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The music stands are not mentioned in the description.", + 1 + ], + [ + "The music stands are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The music stands are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The books are not mentioned in the description.", + 1 + ], + [ + "The books are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The books are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a guitar or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a guitar or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The neck or the guitar is not mentioned.\nB. The shape of the neck is mentioned in the description and is straight or slightly curved.\nC. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.\nD. The shape of the neck is mentioned in the description but is not straight or slightly curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the guitar is not mentioned.\nB. The texture of the body is mentioned in the description and is smooth.\nC. The texture of the body is not mentioned, but the body of the guitar is mentioned.\nD. The texture of the body is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strings or the guitar are not mentioned.\nB. The number of parts of the strings is mentioned in the description and is 6.\nC. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.\nD. The number of parts of the strings is mentioned in the description but is not 6.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sound hole or the guitar is not mentioned.\nB. The shape of the sound hole is mentioned in the description and is round.\nC. The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.\nD. The shape of the sound hole is mentioned in the description but is not round.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The guitar is not mentioned.\nB. The color of the guitar is mentioned in the description and is black.\nC. The color of the guitar is not mentioned.\nD. The color of the guitar is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The violin is not mentioned in the description.\nB. The violin is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The guitar is not mentioned in the description.\nB. The pickguard of the guitar is not mentioned in the description.\nC. The pickguard of the guitar is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign is not mentioned in the description.\nB. The sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The music stands are not mentioned in the description.\nB. The music stands are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The books are not mentioned in the description.\nB. The books are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + "C. The texture of the body is not mentioned, but the body of the guitar is mentioned.", + "C. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + "B. The shape of the sound hole is mentioned in the description and is round.", + "D. The color of the guitar is mentioned in the description but is not black.", + "A. The violin is not mentioned in the description.", + "B. The pickguard of the guitar is not mentioned in the description.", + "A. The sign is not mentioned in the description.", + "A. The music stands are not mentioned in the description.", + "A. The books are not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.3, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "24694197": { + "pred": "A ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The skin or the avocado is not mentioned.", + 0 + ], + [ + "The color of the skin is mentioned in the description but is not dark green.", + -1 + ], + [ + "The color of the skin is mentioned in the description and is dark green.", + 1 + ], + [ + "The color of the skin is not mentioned, but the skin of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the skin is mentioned in the description and is dark green.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The texture of the flesh is mentioned in the description but is not creamy.", + -1 + ], + [ + "The texture of the flesh is mentioned in the description and is creamy.", + 1 + ], + [ + "The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the flesh is mentioned in the description and is creamy.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pit or the avocado is not mentioned.", + 0 + ], + [ + "The shape of the pit is mentioned in the description but is not oval.", + -1 + ], + [ + "The shape of the pit is mentioned in the description and is oval.", + 1 + ], + [ + "The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The color of the flesh is mentioned in the description but is not light green.", + -1 + ], + [ + "The color of the flesh is mentioned in the description and is light green.", + 1 + ], + [ + "The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the flesh is mentioned in the description but is not light green.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The leaves of the avocado are mentioned in the description.", + -1 + ], + [ + "The leaves of the avocado are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The leaves of the avocado are not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utility pole is mentioned in the description.", + -1 + ], + [ + "The utility pole is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utility pole is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The stem of the avocado is mentioned in the description.", + -1 + ], + [ + "The stem of the avocado is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The stem of the avocado is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is mentioned in the description.", + -1 + ], + [ + "The building is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The building is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are mentioned in the description.", + -1 + ], + [ + "The people are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The people are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an avocado or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an avocado or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The skin or the avocado is not mentioned.\nB. The color of the skin is mentioned in the description but is not dark green.\nC. The color of the skin is mentioned in the description and is dark green.\nD. The color of the skin is not mentioned, but the skin of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flesh or the avocado is not mentioned.\nB. The texture of the flesh is mentioned in the description but is not creamy.\nC. The texture of the flesh is mentioned in the description and is creamy.\nD. The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pit or the avocado is not mentioned.\nB. The shape of the pit is mentioned in the description but is not oval.\nC. The shape of the pit is mentioned in the description and is oval.\nD. The shape of the pit is not mentioned, but the pit of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flesh or the avocado is not mentioned.\nB. The color of the flesh is mentioned in the description but is not light green.\nC. The color of the flesh is mentioned in the description and is light green.\nD. The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado is not mentioned in the description.\nB. The leaves of the avocado are mentioned in the description.\nC. The leaves of the avocado are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utility pole is mentioned in the description.\nB. The utility pole is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado is not mentioned in the description.\nB. The stem of the avocado is mentioned in the description.\nC. The stem of the avocado is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is mentioned in the description.\nB. The building is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are mentioned in the description.\nB. The people are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the skin is mentioned in the description and is dark green.", + "C. The texture of the flesh is mentioned in the description and is creamy.", + "D. The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + "B. The color of the flesh is mentioned in the description but is not light green.", + "C. The leaves of the avocado are not mentioned in the description.", + "B. The utility pole is not mentioned in the description.", + "C. The stem of the avocado is not mentioned in the description.", + "B. The building is not mentioned in the description.", + "B. The people are not mentioned in the description." + ], + "score": 0.7222222222222222, + "score_pos": 0.375, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "279135": { + "pred": "The ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The color of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The color of the binding is mentioned in the description and is black or orange.", + 1 + ], + [ + "The color of the binding is mentioned in the description but is not black or orange.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The binding or the ski is not mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The color of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The color of the deck is mentioned in the description and is black, white, or orange.", + 1 + ], + [ + "The color of the deck is mentioned in the description but is not black, white, or orange.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The deck or the ski is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The material of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The material of the binding is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The material of the binding is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The binding or the ski is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The shape of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The shape of the deck is mentioned in the description and is slightly curved.", + 1 + ], + [ + "The shape of the deck is mentioned in the description but is not slightly curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The deck or the ski is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the ski is not mentioned.", + 0 + ], + [ + "The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.", + 0.5 + ], + [ + "The texture/pattern of the tail is mentioned in the description and is geometric shapes.", + 1 + ], + [ + "The texture/pattern of the tail is mentioned in the description but is not geometric shapes.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The tail or the ski is not mentioned.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the ski are not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The wheels of the ski are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wheels of the ski are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wooden post is not mentioned in the description.", + 1 + ], + [ + "The wooden post is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The wooden post is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass window is not mentioned in the description.", + 1 + ], + [ + "The glass window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The glass window is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski base of the ski is not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The ski base of the ski is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The ski base of the ski is mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski poles are not mentioned in the description.", + 1 + ], + [ + "The ski poles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ski poles are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a ski or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a ski or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The binding or the ski is not mentioned.\nB. The color of the binding is not mentioned, but the binding of the ski is mentioned.\nC. The color of the binding is mentioned in the description and is black or orange.\nD. The color of the binding is mentioned in the description but is not black or orange.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deck or the ski is not mentioned.\nB. The color of the deck is not mentioned, but the deck of the ski is mentioned.\nC. The color of the deck is mentioned in the description and is black, white, or orange.\nD. The color of the deck is mentioned in the description but is not black, white, or orange.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The binding or the ski is not mentioned.\nB. The material of the binding is not mentioned, but the binding of the ski is mentioned.\nC. The material of the binding is mentioned in the description and is metal and plastic.\nD. The material of the binding is mentioned in the description but is not metal and plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deck or the ski is not mentioned.\nB. The shape of the deck is not mentioned, but the deck of the ski is mentioned.\nC. The shape of the deck is mentioned in the description and is slightly curved.\nD. The shape of the deck is mentioned in the description but is not slightly curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the ski is not mentioned.\nB. The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.\nC. The texture/pattern of the tail is mentioned in the description and is geometric shapes.\nD. The texture/pattern of the tail is mentioned in the description but is not geometric shapes.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the ski are not mentioned in the description.\nB. The ski is not mentioned in the description.\nC. The wheels of the ski are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wooden post is not mentioned in the description.\nB. The wooden post is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glass window is not mentioned in the description.\nB. The glass window is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ski base of the ski is not mentioned in the description.\nB. The ski is not mentioned in the description.\nC. The ski base of the ski is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ski poles are not mentioned in the description.\nB. The ski poles are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The binding or the ski is not mentioned.", + "A. The deck or the ski is not mentioned.", + "A. The binding or the ski is not mentioned.", + "A. The deck or the ski is not mentioned.", + "A. The tail or the ski is not mentioned.", + "A. The wheels of the ski are not mentioned in the description.", + "A. The wooden post is not mentioned in the description.", + "A. The glass window is not mentioned in the description.", + "C. The ski base of the ski is mentioned in the description.", + "A. The ski poles are not mentioned in the description." + ], + "score": 0.3, + "score_pos": 0.0, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "622329": { + "pred": "A rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The color of the eraser is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the eraser is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eraser is mentioned in the description but is not brown.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The material of the eraser is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the eraser is mentioned in the description and is rubber.", + 1 + ], + [ + "The material of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the eraser is mentioned in the description but is not rubber.", + "pred_index": 1, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The corner or the eraser is not mentioned.", + 0 + ], + [ + "The shape of the corner is mentioned in the description but is not rounded.", + -1 + ], + [ + "The shape of the corner is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the corner is not mentioned, but the corner of the eraser is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The corner or the eraser is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The desk organizer is not mentioned in the description.", + 1 + ], + [ + "The desk organizer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The desk organizer is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper sleeve of the eraser is not mentioned in the description.", + 1 + ], + [ + "The eraser is not mentioned in the description.", + 0 + ], + [ + "The paper sleeve of the eraser is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The paper sleeve of the eraser is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The phone is not mentioned in the description.", + 1 + ], + [ + "The phone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The phone is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky notes are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 1 + ], + [ + "The tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tape is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an eraser or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an eraser or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eraser is not mentioned.\nB. The color of the eraser is mentioned in the description but is not brown.\nC. The color of the eraser is mentioned in the description and is brown.\nD. The color of the eraser is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eraser is not mentioned.\nB. The material of the eraser is mentioned in the description but is not rubber.\nC. The material of the eraser is mentioned in the description and is rubber.\nD. The material of the eraser is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The corner or the eraser is not mentioned.\nB. The shape of the corner is mentioned in the description but is not rounded.\nC. The shape of the corner is mentioned in the description and is rounded.\nD. The shape of the corner is not mentioned, but the corner of the eraser is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The desk organizer is not mentioned in the description.\nB. The desk organizer is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paper sleeve of the eraser is not mentioned in the description.\nB. The eraser is not mentioned in the description.\nC. The paper sleeve of the eraser is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The phone is not mentioned in the description.\nB. The phone is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky notes are not mentioned in the description.\nB. The sticky notes are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The tape is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the eraser is mentioned in the description but is not brown.", + "B. The material of the eraser is mentioned in the description but is not rubber.", + "A. The corner or the eraser is not mentioned.", + "A. The desk organizer is not mentioned in the description.", + "A. The paper sleeve of the eraser is not mentioned in the description.", + "A. The phone is not mentioned in the description.", + "A. The sticky notes are not mentioned in the description.", + "A. The tape is not mentioned in the description." + ], + "score": 0.375, + "score_pos": -0.6666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "622332": { + "pred": "A black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base plate is mentioned in the description and is metallic.", + 1 + ], + [ + "The base plate or the stapler is not mentioned.", + 0 + ], + [ + "The material of the base plate is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The base plate or the stapler is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stapler is mentioned in the description and is rectangular.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The shape of the stapler is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the stapler is mentioned in the description and is rectangular.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo is mentioned in the description and is white.", + 1 + ], + [ + "The logo or the stapler is not mentioned.", + 0 + ], + [ + "The color of the logo is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo is not mentioned, but the logo of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the logo is mentioned in the description and is white.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the top cover is mentioned in the description and is black.", + 1 + ], + [ + "The top cover or the stapler is not mentioned.", + 0 + ], + [ + "The color of the top cover is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the top cover is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stapler is mentioned in the description and is black.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The color of the stapler is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the stapler is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is mentioned in the description.", + -1 + ], + [ + "The tape is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tape is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple remover of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple remover of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The staple remover of the stapler is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper clips are mentioned in the description.", + -1 + ], + [ + "The paper clips are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The paper clips are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paintbrushes are mentioned in the description.", + -1 + ], + [ + "The paintbrushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The paintbrushes are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple storage compartment of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple storage compartment of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The staple storage compartment of the stapler is not mentioned in the description.", + "pred_index": 2, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stapler or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stapler or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the base plate is mentioned in the description and is metallic.\nB. The base plate or the stapler is not mentioned.\nC. The material of the base plate is mentioned in the description but is not metallic.\nD. The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the stapler is mentioned in the description and is rectangular.\nB. The stapler is not mentioned.\nC. The shape of the stapler is mentioned in the description but is not rectangular.\nD. The shape of the stapler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the logo is mentioned in the description and is white.\nB. The logo or the stapler is not mentioned.\nC. The color of the logo is mentioned in the description but is not white.\nD. The color of the logo is not mentioned, but the logo of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the top cover is mentioned in the description and is black.\nB. The top cover or the stapler is not mentioned.\nC. The color of the top cover is mentioned in the description but is not black.\nD. The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stapler is mentioned in the description and is black.\nB. The stapler is not mentioned.\nC. The color of the stapler is mentioned in the description but is not black.\nD. The color of the stapler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is mentioned in the description.\nB. The tape is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The staple remover of the stapler is mentioned in the description.\nB. The stapler is not mentioned in the description.\nC. The staple remover of the stapler is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paper clips are mentioned in the description.\nB. The paper clips are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paintbrushes are mentioned in the description.\nB. The paintbrushes are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The staple storage compartment of the stapler is mentioned in the description.\nB. The stapler is not mentioned in the description.\nC. The staple storage compartment of the stapler is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The base plate or the stapler is not mentioned.", + "A. The shape of the stapler is mentioned in the description and is rectangular.", + "A. The color of the logo is mentioned in the description and is white.", + "A. The color of the top cover is mentioned in the description and is black.", + "A. The color of the stapler is mentioned in the description and is black.", + "B. The tape is not mentioned in the description.", + "C. The staple remover of the stapler is not mentioned in the description.", + "B. The paper clips are not mentioned in the description.", + "B. The paintbrushes are not mentioned in the description.", + "C. The staple storage compartment of the stapler is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "1075308": { + "pred": "A vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + 1 + ], + [ + "The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the frame is mentioned in the description but is not plastic.", + -1 + ], + [ + "The frame or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the monitor/tv is mentioned in the description and is black.", + 1 + ], + [ + "The color of the monitor/tv is mentioned in the description but is not black.", + -1 + ], + [ + "The monitor/tv is not mentioned.", + 0 + ], + [ + "The color of the monitor/tv is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the monitor/tv is not mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screen is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the screen is mentioned in the description but is not glass.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + "pred_index": 3, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chairs are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The remote control of the monitor/tv is mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The remote control of the monitor/tv is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The remote control of the monitor/tv is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ports of the monitor/tv are mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The ports of the monitor/tv are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The ports of the monitor/tv are not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass are mentioned in the description.", + -1 + ], + [ + "The glass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The glass are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The range hood is mentioned in the description.", + -1 + ], + [ + "The range hood is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The range hood is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a monitor/tv or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a monitor/tv or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.\nB. The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.\nC. The screen or the monitor/tv is not mentioned.\nD. The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the frame is mentioned in the description and is plastic.\nB. The material of the frame is mentioned in the description but is not plastic.\nC. The frame or the monitor/tv is not mentioned.\nD. The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the monitor/tv is mentioned in the description and is black.\nB. The color of the monitor/tv is mentioned in the description but is not black.\nC. The monitor/tv is not mentioned.\nD. The color of the monitor/tv is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the screen is mentioned in the description and is glass.\nB. The material of the screen is mentioned in the description but is not glass.\nC. The screen or the monitor/tv is not mentioned.\nD. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chairs are mentioned in the description.\nB. The chairs are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The remote control of the monitor/tv is mentioned in the description.\nB. The monitor/tv is not mentioned in the description.\nC. The remote control of the monitor/tv is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ports of the monitor/tv are mentioned in the description.\nB. The monitor/tv is not mentioned in the description.\nC. The ports of the monitor/tv are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glass are mentioned in the description.\nB. The glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The range hood is mentioned in the description.\nB. The range hood is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + "D. The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + "D. The color of the monitor/tv is not mentioned.", + "D. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + "B. The chairs are not mentioned in the description.", + "C. The remote control of the monitor/tv is not mentioned in the description.", + "C. The ports of the monitor/tv are not mentioned in the description.", + "B. The glass are not mentioned in the description.", + "B. The range hood is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "1770866": { + "pred": "A white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the text is mentioned in the description but is not handwritten.", + -1 + ], + [ + "The type of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The type of the text is mentioned in the description and is handwritten.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The type of the text is mentioned in the description and is handwritten.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + 0.5 + ], + [ + "The board or the sign/banner is not mentioned.", + 0 + ], + [ + "The shape of the board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The board or the sign/banner is not mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the sign/banner is mentioned in the description but is not white.", + -1 + ], + [ + "The background color of the sign/banner is not mentioned.", + 0 + ], + [ + "The sign/banner is not mentioned.", + 0 + ], + [ + "The background color of the sign/banner is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The background color of the sign/banner is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the text is mentioned in the description but is not black or blue and red.", + -1 + ], + [ + "The color of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is black or blue and red.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the text is mentioned in the description and is black or blue and red.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The salami is not mentioned in the description.", + 1 + ], + [ + "The salami is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The salami is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meats are not mentioned in the description.", + 1 + ], + [ + "The sliced meats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sliced meats are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The duster of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The duster of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The duster of the sign/banner is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The marker of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The marker of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The marker of the sign/banner is mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The price tags are mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a sign/banner or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a sign/banner or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the text is mentioned in the description but is not handwritten.\nB. The type of the text is not mentioned, but the text of the sign/banner is mentioned.\nC. The text or the sign/banner is not mentioned.\nD. The type of the text is mentioned in the description and is handwritten.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the board is mentioned in the description but is not rectangular.\nB. The shape of the board is not mentioned, but the board of the sign/banner is mentioned.\nC. The board or the sign/banner is not mentioned.\nD. The shape of the board is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background color of the sign/banner is mentioned in the description but is not white.\nB. The background color of the sign/banner is not mentioned.\nC. The sign/banner is not mentioned.\nD. The background color of the sign/banner is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the text is mentioned in the description but is not black or blue and red.\nB. The color of the text is not mentioned, but the text of the sign/banner is mentioned.\nC. The text or the sign/banner is not mentioned.\nD. The color of the text is mentioned in the description and is black or blue and red.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The salami is not mentioned in the description.\nB. The salami is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sliced meats are not mentioned in the description.\nB. The sliced meats are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign/banner is not mentioned in the description.\nB. The duster of the sign/banner is not mentioned in the description.\nC. The duster of the sign/banner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign/banner is not mentioned in the description.\nB. The marker of the sign/banner is not mentioned in the description.\nC. The marker of the sign/banner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The price tags are not mentioned in the description.\nB. The price tags are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The type of the text is mentioned in the description and is handwritten.", + "C. The board or the sign/banner is not mentioned.", + "D. The background color of the sign/banner is mentioned in the description and is white.", + "D. The color of the text is mentioned in the description and is black or blue and red.", + "A. The salami is not mentioned in the description.", + "A. The sliced meats are not mentioned in the description.", + "B. The duster of the sign/banner is not mentioned in the description.", + "C. The marker of the sign/banner is mentioned in the description.", + "B. The price tags are mentioned in the description." + ], + "score": 0.4444444444444444, + "score_pos": 0.75, + "score_neg": 0.2, + "neg_valid_num": 5, + "recognition_result": true + }, + "2391761": { + "pred": "The canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hull or the boat is not mentioned.", + 0 + ], + [ + "The color of the hull is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the hull is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the hull is not mentioned, but the hull of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the hull is mentioned in the description and is brown.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cover or the boat is not mentioned.", + 0 + ], + [ + "The color of the cover is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cover is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the cover is not mentioned, but the cover of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cover is mentioned in the description and is blue.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The life preserver/life buoy or the boat is not mentioned.", + 0 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description and is red or white.", + 1 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description but is not red or white.", + -1 + ], + [ + "The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the life preserver/life buoy is mentioned in the description and is red or white.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motor or the boat is not mentioned.", + 0 + ], + [ + "The color of the motor is mentioned in the description and is black.", + 1 + ], + [ + "The color of the motor is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the motor is not mentioned, but the motor of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The motor or the boat is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rudder of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The rudder of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The rudder of the boat is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sail of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The sail of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The sail of the boat is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabin of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The cabin of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cabin of the boat is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The anchor of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The anchor of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The anchor of the boat is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ducks are mentioned in the description.", + -1 + ], + [ + "The ducks are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ducks are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a boat or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a boat or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hull or the boat is not mentioned.\nB. The color of the hull is mentioned in the description and is brown.\nC. The color of the hull is mentioned in the description but is not brown.\nD. The color of the hull is not mentioned, but the hull of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cover or the boat is not mentioned.\nB. The color of the cover is mentioned in the description and is blue.\nC. The color of the cover is mentioned in the description but is not blue.\nD. The color of the cover is not mentioned, but the cover of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The life preserver/life buoy or the boat is not mentioned.\nB. The color of the life preserver/life buoy is mentioned in the description and is red or white.\nC. The color of the life preserver/life buoy is mentioned in the description but is not red or white.\nD. The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motor or the boat is not mentioned.\nB. The color of the motor is mentioned in the description and is black.\nC. The color of the motor is mentioned in the description but is not black.\nD. The color of the motor is not mentioned, but the motor of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rudder of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The rudder of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sail of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The sail of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabin of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The cabin of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The anchor of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The anchor of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ducks are mentioned in the description.\nB. The ducks are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the hull is mentioned in the description and is brown.", + "B. The color of the cover is mentioned in the description and is blue.", + "B. The color of the life preserver/life buoy is mentioned in the description and is red or white.", + "A. The motor or the boat is not mentioned.", + "C. The rudder of the boat is not mentioned in the description.", + "C. The sail of the boat is not mentioned in the description.", + "C. The cabin of the boat is not mentioned in the description.", + "C. The anchor of the boat is not mentioned in the description.", + "B. The ducks are not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "2580318": { + "pred": "The mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the mouse is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the mouse is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the mouse is mentioned in the description and is matte.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the mouse is mentioned in the description but is not matte.", + "pred_index": 2, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is mentioned in the description but is not ergonomic.", + -1 + ], + [ + "The shape of the mouse is mentioned in the description and is ergonomic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the mouse is mentioned in the description and is ergonomic.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Paper is mentioned in the description.", + -1 + ], + [ + "The Paper is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Paper is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The cable of the mouse is mentioned in the description.", + -1 + ], + [ + "The cable of the mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cable of the mouse is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Framed diagram is mentioned in the description.", + -1 + ], + [ + "The Framed diagram is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Framed diagram is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pen is mentioned in the description.", + -1 + ], + [ + "The Pen is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Pen is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The side buttons of the mouse are mentioned in the description.", + -1 + ], + [ + "The side buttons of the mouse are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The side buttons of the mouse are not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a mouse or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a mouse or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The color of the mouse is not mentioned.\nC. The color of the mouse is mentioned in the description but is not gray.\nD. The color of the mouse is mentioned in the description and is gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The texture of the mouse is not mentioned.\nC. The texture of the mouse is mentioned in the description but is not matte.\nD. The texture of the mouse is mentioned in the description and is matte.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The shape of the mouse is not mentioned.\nC. The shape of the mouse is mentioned in the description but is not ergonomic.\nD. The shape of the mouse is mentioned in the description and is ergonomic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Paper is mentioned in the description.\nB. The Paper is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned in the description.\nB. The cable of the mouse is mentioned in the description.\nC. The cable of the mouse is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Framed diagram is mentioned in the description.\nB. The Framed diagram is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Pen is mentioned in the description.\nB. The Pen is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned in the description.\nB. The side buttons of the mouse are mentioned in the description.\nC. The side buttons of the mouse are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the mouse is not mentioned.", + "C. The texture of the mouse is mentioned in the description but is not matte.", + "D. The shape of the mouse is mentioned in the description and is ergonomic.", + "B. The Paper is not mentioned in the description.", + "C. The cable of the mouse is not mentioned in the description.", + "B. The Framed diagram is not mentioned in the description.", + "B. The Pen is not mentioned in the description.", + "C. The side buttons of the mouse are not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "2588513": { + "pred": "A rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wood block is not mentioned.", + 0 + ], + [ + "The color of the wood block is mentioned in the description but is not wooden or brown.", + -1 + ], + [ + "The color of the wood block is mentioned in the description and is wooden or brown.", + 1 + ], + [ + "The wood block is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the wood block is mentioned in the description but is not wooden or brown.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the top is not mentioned, but the top of the wood block is mentioned.", + 0.5 + ], + [ + "The texture of the top is mentioned in the description but is not grain.", + -1 + ], + [ + "The texture of the top is mentioned in the description and is grain.", + 1 + ], + [ + "The top or the wood block is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the top is mentioned in the description and is grain.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The metal clip of the wood block is not mentioned in the description.", + 1 + ], + [ + "The metal clip of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The metal clip of the wood block is not mentioned in the description.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The machine with a motor are not mentioned in the description.", + 1 + ], + [ + "The machine with a motor are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The machine with a motor are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickup is not mentioned in the description.", + 1 + ], + [ + "The pickup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pickup is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The plastic handle of the wood block is not mentioned in the description.", + 1 + ], + [ + "The plastic handle of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The plastic handle of the wood block is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The copper wire spool is not mentioned in the description.", + 1 + ], + [ + "The copper wire spool is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The copper wire spool is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a wood block or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a wood block or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wood block is not mentioned.\nB. The color of the wood block is mentioned in the description but is not wooden or brown.\nC. The color of the wood block is mentioned in the description and is wooden or brown.\nD. The wood block is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the top is not mentioned, but the top of the wood block is mentioned.\nB. The texture of the top is mentioned in the description but is not grain.\nC. The texture of the top is mentioned in the description and is grain.\nD. The top or the wood block is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wood block is not mentioned in the description.\nB. The metal clip of the wood block is not mentioned in the description.\nC. The metal clip of the wood block is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The machine with a motor are not mentioned in the description.\nB. The machine with a motor are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pickup is not mentioned in the description.\nB. The pickup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wood block is not mentioned in the description.\nB. The plastic handle of the wood block is not mentioned in the description.\nC. The plastic handle of the wood block is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The copper wire spool is not mentioned in the description.\nB. The copper wire spool is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the wood block is mentioned in the description but is not wooden or brown.", + "C. The texture of the top is mentioned in the description and is grain.", + "B. The metal clip of the wood block is not mentioned in the description.", + "A. The machine with a motor are not mentioned in the description.", + "A. The pickup is not mentioned in the description.", + "B. The plastic handle of the wood block is not mentioned in the description.", + "A. The copper wire spool is not mentioned in the description." + ], + "score": 0.7142857142857143, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "3993075": { + "pred": "A white pen with a red cap and a green and blue design on the barrel.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cap is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the cap is mentioned in the description and is orange.", + 1 + ], + [ + "The cap or the marker is not mentioned.", + 0 + ], + [ + "The color of the cap is not mentioned, but the cap of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the cap is mentioned in the description but is not orange.", + "pred_index": 0, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the marker is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the marker is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The marker is not mentioned.", + 0 + ], + [ + "The shape of the marker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the marker is not mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The material of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the body is not mentioned, but the body of the marker is mentioned.", + "pred_index": 3, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The children are mentioned in the description.", + -1 + ], + [ + "The children are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The children are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is mentioned in the description.", + -1 + ], + [ + "The table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The table is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clip of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The clip of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The clip of the marker is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The label of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The label of the marker is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chairs are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a marker or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a marker or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cap is mentioned in the description but is not orange.\nB. The color of the cap is mentioned in the description and is orange.\nC. The cap or the marker is not mentioned.\nD. The color of the cap is not mentioned, but the cap of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not white.\nB. The color of the body is mentioned in the description and is white.\nC. The body or the marker is not mentioned.\nD. The color of the body is not mentioned, but the body of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the marker is mentioned in the description but is not cylindrical.\nB. The shape of the marker is mentioned in the description and is cylindrical.\nC. The marker is not mentioned.\nD. The shape of the marker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the body is mentioned in the description but is not plastic.\nB. The material of the body is mentioned in the description and is plastic.\nC. The body or the marker is not mentioned.\nD. The material of the body is not mentioned, but the body of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The children are mentioned in the description.\nB. The children are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table is mentioned in the description.\nB. The table is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clip of the marker is mentioned in the description.\nB. The marker is not mentioned in the description.\nC. The clip of the marker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label of the marker is mentioned in the description.\nB. The marker is not mentioned in the description.\nC. The label of the marker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chairs are mentioned in the description.\nB. The chairs are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the cap is mentioned in the description but is not orange.", + "B. The color of the body is mentioned in the description and is white.", + "D. The shape of the marker is not mentioned.", + "D. The material of the body is not mentioned, but the body of the marker is mentioned.", + "B. The children are not mentioned in the description.", + "B. The table is not mentioned in the description.", + "C. The clip of the marker is not mentioned in the description.", + "C. The label of the marker is not mentioned in the description.", + "B. The chairs are not mentioned in the description." + ], + "score": 0.6111111111111112, + "score_pos": 0.125, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4027486": { + "pred": "The bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.", + 0.5 + ], + [ + "The license plate or the truck is not mentioned.", + 0 + ], + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the license plate is mentioned in the description and is rectangular.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the front is not mentioned, but the front of the truck is mentioned.", + 0.5 + ], + [ + "The front or the truck is not mentioned.", + 0 + ], + [ + "The color of the front is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the front is mentioned in the description and is blue.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the front is mentioned in the description and is blue.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the grille is not mentioned, but the grille of the truck is mentioned.", + 0.5 + ], + [ + "The grille or the truck is not mentioned.", + 0 + ], + [ + "The color of the grille is mentioned in the description but is not black, green, or blue.", + -1 + ], + [ + "The color of the grille is mentioned in the description and is black, green, or blue.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The grille or the truck is not mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The door handle of the truck is mentioned in the description.", + -1 + ], + [ + "The door handle of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The truck is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The rear part of the truck is mentioned in the description.", + -1 + ], + [ + "The rear part of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The truck is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The exhaust pipe of the truck is mentioned in the description.", + -1 + ], + [ + "The exhaust pipe of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The exhaust pipe of the truck is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The tail light of the truck is mentioned in the description.", + -1 + ], + [ + "The tail light of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The tail light of the truck is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The cargo area of the truck is mentioned in the description.", + -1 + ], + [ + "The cargo area of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The truck is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a truck or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a truck or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.\nB. The license plate or the truck is not mentioned.\nC. The shape of the license plate is mentioned in the description but is not rectangular.\nD. The shape of the license plate is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the front is not mentioned, but the front of the truck is mentioned.\nB. The front or the truck is not mentioned.\nC. The color of the front is mentioned in the description but is not blue.\nD. The color of the front is mentioned in the description and is blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the grille is not mentioned, but the grille of the truck is mentioned.\nB. The grille or the truck is not mentioned.\nC. The color of the grille is mentioned in the description but is not black, green, or blue.\nD. The color of the grille is mentioned in the description and is black, green, or blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The door handle of the truck is mentioned in the description.\nC. The door handle of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The rear part of the truck is mentioned in the description.\nC. The rear part of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The exhaust pipe of the truck is mentioned in the description.\nC. The exhaust pipe of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The tail light of the truck is mentioned in the description.\nC. The tail light of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The cargo area of the truck is mentioned in the description.\nC. The cargo area of the truck is not mentioned in the description.\n" + ], + "response": [ + "B. No", + "D. The shape of the license plate is mentioned in the description and is rectangular.", + "D. The color of the front is mentioned in the description and is blue.", + "B. The grille or the truck is not mentioned.", + "A. The truck is not mentioned in the description.", + "A. The truck is not mentioned in the description.", + "C. The exhaust pipe of the truck is not mentioned in the description.", + "C. The tail light of the truck is not mentioned in the description.", + "A. The truck is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "4243725": { + "pred": "A curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the soap is mentioned in the description and is bar soap.", + 1 + ], + [ + "The type of the soap is mentioned in the description but is not bar soap.", + -1 + ], + [ + "The type of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the soap is mentioned in the description but is not bar soap.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the soap is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The color of the soap is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the soap is mentioned in the description and is yellow or brown.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the soap are not mentioned in the description.", + 1 + ], + [ + "The decorative elements of the soap are mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The decorative elements of the soap are not mentioned in the description.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is not mentioned in the description.", + 1 + ], + [ + "The cutting board is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dishwasher is not mentioned in the description.", + 1 + ], + [ + "The dishwasher is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The dishwasher is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative peppers are not mentioned in the description.", + 1 + ], + [ + "The decorative peppers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The decorative peppers are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The logo of the soap is not mentioned in the description.", + 1 + ], + [ + "The logo of the soap is mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The logo of the soap is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a soap or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a soap or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the soap is mentioned in the description and is bar soap.\nB. The type of the soap is mentioned in the description but is not bar soap.\nC. The type of the soap is not mentioned.\nD. The soap is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the soap is mentioned in the description and is yellow or brown.\nB. The color of the soap is mentioned in the description but is not yellow or brown.\nC. The color of the soap is not mentioned.\nD. The soap is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the soap are not mentioned in the description.\nB. The decorative elements of the soap are mentioned in the description.\nC. The soap is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is not mentioned in the description.\nB. The cutting board is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The dishwasher is not mentioned in the description.\nB. The dishwasher is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative peppers are not mentioned in the description.\nB. The decorative peppers are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The logo of the soap is not mentioned in the description.\nB. The logo of the soap is mentioned in the description.\nC. The soap is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The type of the soap is mentioned in the description but is not bar soap.", + "A. The color of the soap is mentioned in the description and is yellow or brown.", + "A. The decorative elements of the soap are not mentioned in the description.", + "B. The cutting board is mentioned in the description.", + "A. The dishwasher is not mentioned in the description.", + "A. The decorative peppers are not mentioned in the description.", + "A. The logo of the soap is not mentioned in the description." + ], + "score": 0.42857142857142855, + "score_pos": 0.0, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "4781902": { + "pred": "A dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is not mentioned.", + 0 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the stool is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the stool is mentioned in the description and is wood.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the leg is not mentioned, but the leg of the stool is mentioned.", + 0.5 + ], + [ + "The leg or the stool is not mentioned.", + 0 + ], + [ + "The material of the leg is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the leg is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the leg is mentioned in the description and is wood.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The shape of the crossbar is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the crossbar is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The material of the crossbar is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the crossbar is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the seat is mentioned in the description but is not flat.", + "pred_index": 2, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building in the background are mentioned in the description.", + -1 + ], + [ + "The building in the background are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The building in the background are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swing set is mentioned in the description.", + -1 + ], + [ + "The swing set is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The swing set is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The slide is mentioned in the description.", + -1 + ], + [ + "The slide is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The slide is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The climbing wall is mentioned in the description.", + -1 + ], + [ + "The climbing wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The climbing wall is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stool or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stool or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stool is not mentioned.\nB. The stool is not mentioned.\nC. The material of the stool is mentioned in the description but is not wood.\nD. The material of the stool is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the leg is not mentioned, but the leg of the stool is mentioned.\nB. The leg or the stool is not mentioned.\nC. The material of the leg is mentioned in the description but is not wood.\nD. The material of the leg is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.\nB. The crossbar or the stool is not mentioned.\nC. The shape of the crossbar is mentioned in the description but is not rectangular.\nD. The shape of the crossbar is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.\nB. The crossbar or the stool is not mentioned.\nC. The material of the crossbar is mentioned in the description but is not wood.\nD. The material of the crossbar is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the seat is not mentioned, but the seat of the stool is mentioned.\nB. The seat or the stool is not mentioned.\nC. The shape of the seat is mentioned in the description but is not flat.\nD. The shape of the seat is mentioned in the description and is flat.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building in the background are mentioned in the description.\nB. The building in the background are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The swing set is mentioned in the description.\nB. The swing set is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The slide is mentioned in the description.\nB. The slide is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The climbing wall is mentioned in the description.\nB. The climbing wall is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the stool is mentioned in the description and is wood.", + "D. The material of the leg is mentioned in the description and is wood.", + "A. The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + "A. The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + "C. The shape of the seat is mentioned in the description but is not flat.", + "B. The sky is not mentioned in the description.", + "B. The building in the background are not mentioned in the description.", + "B. The swing set is not mentioned in the description.", + "B. The slide is not mentioned in the description.", + "B. The climbing wall is not mentioned in the description." + ], + "score": 0.7, + "score_pos": 0.4, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4782949": { + "pred": "A cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the drum is mentioned in the description but is not round or circular.", + -1 + ], + [ + "The shape of the drum is mentioned in the description and is round or circular.", + 1 + ], + [ + "The shape of the drum is not mentioned.", + 0 + ], + [ + "The drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the drum is mentioned in the description and is round or circular.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the body is mentioned in the description and is red.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the drum is mentioned.", + 0.5 + ], + [ + "The body or the drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the body is mentioned in the description but is not red.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the rim is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the rim is mentioned in the description and is metallic.", + 1 + ], + [ + "The material of the rim is not mentioned, but the rim of the drum is mentioned.", + 0.5 + ], + [ + "The rim or the drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the rim is mentioned in the description and is metallic.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum stand of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum stand of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drum stand of the drum is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Trees are mentioned in the description.", + -1 + ], + [ + "The Trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Trees are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum pedal of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum pedal of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drum pedal of the drum is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Superior Foods sign is mentioned in the description.", + -1 + ], + [ + "The Superior Foods sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Superior Foods sign is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The People are mentioned in the description.", + -1 + ], + [ + "The People are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The People are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a drum or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a drum or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the drum is mentioned in the description but is not round or circular.\nB. The shape of the drum is mentioned in the description and is round or circular.\nC. The shape of the drum is not mentioned.\nD. The drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not red.\nB. The color of the body is mentioned in the description and is red.\nC. The color of the body is not mentioned, but the body of the drum is mentioned.\nD. The body or the drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the rim is mentioned in the description but is not metallic.\nB. The material of the rim is mentioned in the description and is metallic.\nC. The material of the rim is not mentioned, but the rim of the drum is mentioned.\nD. The rim or the drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drum stand of the drum is mentioned in the description.\nB. The drum is not mentioned in the description.\nC. The drum stand of the drum is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Trees are mentioned in the description.\nB. The Trees are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drum pedal of the drum is mentioned in the description.\nB. The drum is not mentioned in the description.\nC. The drum pedal of the drum is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Superior Foods sign is mentioned in the description.\nB. The Superior Foods sign is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The People are mentioned in the description.\nB. The People are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the drum is mentioned in the description and is round or circular.", + "A. The color of the body is mentioned in the description but is not red.", + "B. The material of the rim is mentioned in the description and is metallic.", + "C. The drum stand of the drum is not mentioned in the description.", + "B. The Trees are not mentioned in the description.", + "C. The drum pedal of the drum is not mentioned in the description.", + "B. The Superior Foods sign is not mentioned in the description.", + "B. The People are not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "5211280": { + "pred": "A stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the rice cooker is mentioned in the description and is silver and black.", + 1 + ], + [ + "The color of the rice cooker is mentioned in the description but is not silver and black.", + -1 + ], + [ + "The color of the rice cooker is not mentioned.", + 0 + ], + [ + "The rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the rice cooker is mentioned in the description but is not silver and black.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + 0.5 + ], + [ + "The handle or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the display is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the display is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + 0.5 + ], + [ + "The display or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the display is mentioned in the description but is not rectangular.", + "pred_index": 1, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description and is silver.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the rice cooker is mentioned.", + 0.5 + ], + [ + "The body or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description but is not silver.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the base is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + 0.5 + ], + [ + "The base or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the base is mentioned in the description but is not plastic.", + "pred_index": 1, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red crates are mentioned in the description.", + -1 + ], + [ + "The red crates are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The red crates are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The generator is mentioned in the description.", + -1 + ], + [ + "The generator is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The generator is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The steam vent of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The steam vent of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The steam vent of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The inner pot of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The inner pot of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The inner pot of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The measuring cup of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The measuring cup of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a rice cooker or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a rice cooker or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the rice cooker is mentioned in the description and is silver and black.\nB. The color of the rice cooker is mentioned in the description but is not silver and black.\nC. The color of the rice cooker is not mentioned.\nD. The rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is mentioned in the description and is curved.\nB. The shape of the handle is mentioned in the description but is not curved.\nC. The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.\nD. The handle or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the display is mentioned in the description and is rectangular.\nB. The shape of the display is mentioned in the description but is not rectangular.\nC. The shape of the display is not mentioned, but the display of the rice cooker is mentioned.\nD. The display or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description and is silver.\nB. The color of the body is mentioned in the description but is not silver.\nC. The color of the body is not mentioned, but the body of the rice cooker is mentioned.\nD. The body or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the base is mentioned in the description and is plastic.\nB. The material of the base is mentioned in the description but is not plastic.\nC. The material of the base is not mentioned, but the base of the rice cooker is mentioned.\nD. The base or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The red crates are mentioned in the description.\nB. The red crates are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The generator is mentioned in the description.\nB. The generator is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The steam vent of the rice cooker is mentioned in the description.\nB. The steam vent of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The inner pot of the rice cooker is mentioned in the description.\nB. The inner pot of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The measuring cup of the rice cooker is mentioned in the description.\nB. The measuring cup of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the rice cooker is mentioned in the description but is not silver and black.", + "A. The shape of the handle is mentioned in the description and is curved.", + "B. The shape of the display is mentioned in the description but is not rectangular.", + "B. The color of the body is mentioned in the description but is not silver.", + "B. The material of the base is mentioned in the description but is not plastic.", + "B. The red crates are not mentioned in the description.", + "B. The generator is not mentioned in the description.", + "B. The steam vent of the rice cooker is not mentioned in the description.", + "B. The inner pot of the rice cooker is not mentioned in the description.", + "B. The measuring cup of the rice cooker is not mentioned in the description." + ], + "score": 0.2, + "score_pos": -0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "5718392": { + "pred": "The box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the storage box is mentioned in the description but is not wicker/woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is mentioned in the description and is wicker/woven.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the storage box is mentioned in the description and is wicker/woven.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the storage box is mentioned in the description but is not woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is mentioned in the description and is woven.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the storage box is mentioned in the description and is woven.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the storage box is mentioned in the description but is not brown.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the storage box is mentioned in the description and is brown.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the storage box are mentioned in the description.", + -1 + ], + [ + "The wheels of the storage box are not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The wheels of the storage box are not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensils are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the storage box is mentioned in the description.", + -1 + ], + [ + "The handle of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The handle of the storage box is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the storage box is mentioned in the description.", + -1 + ], + [ + "The lock of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The lock of the storage box is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blue canopy is mentioned in the description.", + -1 + ], + [ + "The blue canopy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The blue canopy is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a storage box or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a storage box or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the storage box is mentioned in the description but is not wicker/woven.\nB. The storage box is not mentioned.\nC. The material of the storage box is not mentioned.\nD. The material of the storage box is mentioned in the description and is wicker/woven.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the storage box is mentioned in the description but is not woven.\nB. The storage box is not mentioned.\nC. The texture of the storage box is not mentioned.\nD. The texture of the storage box is mentioned in the description and is woven.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the storage box is mentioned in the description but is not brown.\nB. The storage box is not mentioned.\nC. The color of the storage box is not mentioned.\nD. The color of the storage box is mentioned in the description and is brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the storage box are mentioned in the description.\nB. The wheels of the storage box are not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensils are mentioned in the description.\nB. The utensils are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the storage box is mentioned in the description.\nB. The handle of the storage box is not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lock of the storage box is mentioned in the description.\nB. The lock of the storage box is not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blue canopy is mentioned in the description.\nB. The blue canopy is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the storage box is mentioned in the description and is wicker/woven.", + "D. The texture of the storage box is mentioned in the description and is woven.", + "D. The color of the storage box is mentioned in the description and is brown.", + "B. The wheels of the storage box are not mentioned in the description.", + "B. The utensils are not mentioned in the description.", + "B. The handle of the storage box is not mentioned in the description.", + "B. The lock of the storage box is not mentioned in the description.", + "B. The blue canopy is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "6037272": { + "pred": "A green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle or the bottle is not mentioned.", + 0 + ], + [ + "The material of the bottle is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + 0.5 + ], + [ + "The material of the bottle is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is green.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The cap or the bottle is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text on the label or the bottle are not mentioned.", + 0 + ], + [ + "The color of the text on the label is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.", + 0.5 + ], + [ + "The color of the text on the label is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the text on the label is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description and is green.", + 1 + ], + [ + "The color of the bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the bottle is mentioned in the description and is green.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/top or the bottle is not mentioned.", + 0 + ], + [ + "The shape of the cap/top is mentioned in the description and is flat or tapered.", + 1 + ], + [ + "The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.", + 0.5 + ], + [ + "The shape of the cap/top is mentioned in the description but is not flat or tapered.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The cap/top or the bottle is not mentioned.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nozzle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The nozzle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The nozzle of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower curtain is mentioned in the description.", + -1 + ], + [ + "The shower curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The shower curtain is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathtub is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pump of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The pump of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The pump of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bottle or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bottle or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle or the bottle is not mentioned.\nB. The material of the bottle is mentioned in the description and is plastic.\nC. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.\nD. The material of the bottle is mentioned in the description but is not plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap or the bottle is not mentioned.\nB. The color of the cap is mentioned in the description and is green.\nC. The color of the cap is not mentioned, but the cap of the bottle is mentioned.\nD. The color of the cap is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text on the label or the bottle are not mentioned.\nB. The color of the text on the label is mentioned in the description and is white.\nC. The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.\nD. The color of the text on the label is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned.\nB. The color of the bottle is mentioned in the description and is green.\nC. The color of the bottle is not mentioned.\nD. The color of the bottle is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/top or the bottle is not mentioned.\nB. The shape of the cap/top is mentioned in the description and is flat or tapered.\nC. The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.\nD. The shape of the cap/top is mentioned in the description but is not flat or tapered.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nozzle of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The nozzle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shower curtain is mentioned in the description.\nB. The shower curtain is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is mentioned in the description.\nB. The bathtub is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The handle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pump of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The pump of the bottle is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + "A. The cap or the bottle is not mentioned.", + "B. The color of the text on the label is mentioned in the description and is white.", + "B. The color of the bottle is mentioned in the description and is green.", + "A. The cap/top or the bottle is not mentioned.", + "C. The nozzle of the bottle is not mentioned in the description.", + "B. The shower curtain is not mentioned in the description.", + "B. The bathtub is not mentioned in the description.", + "C. The handle of the bottle is not mentioned in the description.", + "C. The pump of the bottle is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "6820594": { + "pred": "A medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ears is mentioned in the description but is not triangular or pointed.", + -1 + ], + [ + "The shape of the ears is not mentioned, but the ears of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the ears is mentioned in the description and is triangular or pointed.", + 1 + ], + [ + "The ears or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the ears is not mentioned, but the ears of the cat are mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is yellow or green.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the eyes is mentioned in the description and is yellow or green.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the cat are mentioned.", + 0.5 + ], + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the legs is not mentioned, but the legs of the cat are mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the eyes is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the eyes is mentioned in the description and is round.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the eyes is mentioned in the description and is round.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the back is mentioned in the description but is not brown or black.", + -1 + ], + [ + "The color of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The color of the back is mentioned in the description and is brown or black.", + 1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the back is mentioned in the description and is brown or black.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothpaste is mentioned in the description.", + -1 + ], + [ + "The toothpaste is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothpaste is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothbrush is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the cat is mentioned in the description.", + -1 + ], + [ + "The tail of the cat is not mentioned in the description.", + 1 + ], + [ + "The cat is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the cat is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The faucet is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a cat or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a cat or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the ears is mentioned in the description but is not triangular or pointed.\nB. The shape of the ears is not mentioned, but the ears of the cat are mentioned.\nC. The shape of the ears is mentioned in the description and is triangular or pointed.\nD. The ears or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eyes is mentioned in the description but is not yellow or green.\nB. The color of the eyes is not mentioned, but the eyes of the cat are mentioned.\nC. The color of the eyes is mentioned in the description and is yellow or green.\nD. The eyes or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the legs is mentioned in the description but is not white.\nB. The color of the legs is not mentioned, but the legs of the cat are mentioned.\nC. The color of the legs is mentioned in the description and is white.\nD. The legs or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the eyes is mentioned in the description but is not round.\nB. The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.\nC. The shape of the eyes is mentioned in the description and is round.\nD. The eyes or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the back is mentioned in the description but is not brown or black.\nB. The color of the back is not mentioned, but the back of the cat is mentioned.\nC. The color of the back is mentioned in the description and is brown or black.\nD. The back or the cat is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothpaste is mentioned in the description.\nB. The toothpaste is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothbrush is mentioned in the description.\nB. The toothbrush is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the cat is mentioned in the description.\nB. The tail of the cat is not mentioned in the description.\nC. The cat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is mentioned in the description.\nB. The faucet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the ears is not mentioned, but the ears of the cat are mentioned.", + "C. The color of the eyes is mentioned in the description and is yellow or green.", + "B. The color of the legs is not mentioned, but the legs of the cat are mentioned.", + "C. The shape of the eyes is mentioned in the description and is round.", + "C. The color of the back is mentioned in the description and is brown or black.", + "B. The toothpaste is not mentioned in the description.", + "B. The toothbrush is not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The tail of the cat is not mentioned in the description.", + "B. The faucet is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "5718424": { + "pred": "A rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + 0.5 + ], + [ + "The color of the laces is mentioned in the description but is not white or green.", + -1 + ], + [ + "The color of the laces is mentioned in the description and is white or green.", + 1 + ], + [ + "The laces or the sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sneakers is not mentioned.", + 0 + ], + [ + "The color of the sneakers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the sneakers is mentioned in the description and is black.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the sneakers is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sneakers is not mentioned.", + 0 + ], + [ + "The material of the sneakers is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sneakers is mentioned in the description and is rubber.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the sneakers is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is not mentioned in the description.", + 1 + ], + [ + "The table is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The table is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The base of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The base of the sneakers is mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heel of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The heel of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The heel of the sneakers is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The person is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The food is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are sneakers or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are sneakers or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.\nB. The color of the laces is mentioned in the description but is not white or green.\nC. The color of the laces is mentioned in the description and is white or green.\nD. The laces or the sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sneakers is not mentioned.\nB. The color of the sneakers is mentioned in the description but is not black.\nC. The color of the sneakers is mentioned in the description and is black.\nD. The sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the sneakers is not mentioned.\nB. The material of the sneakers is mentioned in the description but is not rubber.\nC. The material of the sneakers is mentioned in the description and is rubber.\nD. The sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table is not mentioned in the description.\nB. The table is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the sneakers is not mentioned in the description.\nB. The sneakers are not mentioned in the description.\nC. The base of the sneakers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The heel of the sneakers is not mentioned in the description.\nB. The sneakers are not mentioned in the description.\nC. The heel of the sneakers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned in the description.\nB. The person is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is not mentioned in the description.\nB. The food is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + "A. The color of the sneakers is not mentioned.", + "A. The material of the sneakers is not mentioned.", + "A. The table is not mentioned in the description.", + "C. The base of the sneakers is mentioned in the description.", + "A. The heel of the sneakers is not mentioned in the description.", + "A. The person is not mentioned in the description.", + "A. The food is not mentioned in the description." + ], + "score": 0.4375, + "score_pos": 0.16666666666666666, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "6055310": { + "pred": "A wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the blade is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the blade is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the blade is mentioned in the description but is not rectangular.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blade is mentioned in the description and is yellow or golden.", + 1 + ], + [ + "The color of the blade is mentioned in the description but is not yellow or golden.", + -1 + ], + [ + "The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the blade is mentioned in the description but is not yellow or golden.", + "pred_index": 1, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the blade is mentioned in the description and is long.", + 1 + ], + [ + "The size of the blade is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the blade is mentioned in the description but is not long.", + "pred_index": 1, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blade is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the blade is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the tip is mentioned in the description and is metal.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The case of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The case of the tape measure/ruler is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is not mentioned in the description.", + 1 + ], + [ + "The bathtub is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bathtub is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The lock of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lock of the tape measure/ruler is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The door is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The box is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tape measure/ruler or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tape measure/ruler or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the blade is mentioned in the description and is rectangular.\nB. The shape of the blade is mentioned in the description but is not rectangular.\nC. The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the blade is mentioned in the description and is yellow or golden.\nB. The color of the blade is mentioned in the description but is not yellow or golden.\nC. The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the blade is mentioned in the description and is long.\nB. The size of the blade is mentioned in the description but is not long.\nC. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the blade is mentioned in the description and is metal.\nB. The material of the blade is mentioned in the description but is not metal.\nC. The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case of the tape measure/ruler is not mentioned in the description.\nB. The tape measure/ruler is not mentioned in the description.\nC. The case of the tape measure/ruler is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is not mentioned in the description.\nB. The bathtub is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lock of the tape measure/ruler is not mentioned in the description.\nB. The tape measure/ruler is not mentioned in the description.\nC. The lock of the tape measure/ruler is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is not mentioned in the description.\nB. The door is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The box is not mentioned in the description.\nB. The box is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the blade is mentioned in the description but is not rectangular.", + "B. The color of the blade is mentioned in the description but is not yellow or golden.", + "B. The size of the blade is mentioned in the description but is not long.", + "A. The material of the tip is mentioned in the description and is metal.", + "A. The case of the tape measure/ruler is not mentioned in the description.", + "A. The bathtub is not mentioned in the description.", + "A. The lock of the tape measure/ruler is not mentioned in the description.", + "A. The door is not mentioned in the description.", + "A. The box is not mentioned in the description." + ], + "score": 0.3333333333333333, + "score_pos": -0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8201777": { + "pred": "A black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the van is mentioned in the description but is not black.", + -1 + ], + [ + "The van is not mentioned.", + 0 + ], + [ + "The color of the van is not mentioned.", + 0 + ], + [ + "The color of the van is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the van is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sticker is mentioned in the description but is not white.", + -1 + ], + [ + "The sticker or the van is not mentioned.", + 0 + ], + [ + "The color of the sticker is not mentioned, but the sticker of the van is mentioned.", + 0.5 + ], + [ + "The color of the sticker is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the sticker is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The taillight or the van is not mentioned.", + 0 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + 0.5 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the license plate is mentioned in the description but is not yellow.", + -1 + ], + [ + "The license plate or the van is not mentioned.", + 0 + ], + [ + "The color of the license plate is not mentioned, but the license plate of the van is mentioned.", + 0.5 + ], + [ + "The color of the license plate is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the license plate is mentioned in the description and is yellow.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the sign is mentioned in the description but is not taxi.", + -1 + ], + [ + "The sign or the van is not mentioned.", + 0 + ], + [ + "The text of the sign is not mentioned, but the sign of the van is mentioned.", + 0.5 + ], + [ + "The text of the sign is mentioned in the description and is taxi.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The text of the sign is mentioned in the description and is taxi.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The grill of the van is not mentioned in the description.", + 1 + ], + [ + "The grill of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The grill of the van is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The building is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the van is not mentioned in the description.", + 1 + ], + [ + "The front bumper of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The front bumper of the van is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The antenna of the van is not mentioned in the description.", + 1 + ], + [ + "The antenna of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The antenna of the van is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The person is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a van or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a van or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the van is mentioned in the description but is not black.\nB. The van is not mentioned.\nC. The color of the van is not mentioned.\nD. The color of the van is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sticker is mentioned in the description but is not white.\nB. The sticker or the van is not mentioned.\nC. The color of the sticker is not mentioned, but the sticker of the van is mentioned.\nD. The color of the sticker is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the taillight is mentioned in the description but is not plastic.\nB. The taillight or the van is not mentioned.\nC. The material of the taillight is not mentioned, but the taillight of the van is mentioned.\nD. The material of the taillight is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the license plate is mentioned in the description but is not yellow.\nB. The license plate or the van is not mentioned.\nC. The color of the license plate is not mentioned, but the license plate of the van is mentioned.\nD. The color of the license plate is mentioned in the description and is yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text of the sign is mentioned in the description but is not taxi.\nB. The sign or the van is not mentioned.\nC. The text of the sign is not mentioned, but the sign of the van is mentioned.\nD. The text of the sign is mentioned in the description and is taxi.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The grill of the van is not mentioned in the description.\nC. The grill of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is not mentioned in the description.\nB. The building is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The front bumper of the van is not mentioned in the description.\nC. The front bumper of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The antenna of the van is not mentioned in the description.\nC. The antenna of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned in the description.\nB. The person is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the van is mentioned in the description and is black.", + "D. The color of the sticker is mentioned in the description and is white.", + "C. The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + "D. The color of the license plate is mentioned in the description and is yellow.", + "D. The text of the sign is mentioned in the description and is taxi.", + "B. The grill of the van is not mentioned in the description.", + "A. The building is not mentioned in the description.", + "B. The front bumper of the van is not mentioned in the description.", + "B. The antenna of the van is not mentioned in the description.", + "A. The person is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8331685": { + "pred": "A black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is mentioned in the description and is black.", + 1 + ], + [ + "The color of the headphone is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the headphone is mentioned in the description and is black.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ear cup or the headphone is not mentioned.", + 0 + ], + [ + "The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.", + 0.5 + ], + [ + "The type of the ear cup is mentioned in the description and is cushioned.", + 1 + ], + [ + "The type of the ear cup is mentioned in the description but is not cushioned.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The type of the ear cup is mentioned in the description and is cushioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headband or the headphone is not mentioned.", + 0 + ], + [ + "The shape of the headband is not mentioned, but the headband of the headphone is mentioned.", + 0.5 + ], + [ + "The shape of the headband is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the headband is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the headband is mentioned in the description and is curved.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone of the headphone is not mentioned in the description.", + 1 + ], + [ + "The microphone of the headphone is mentioned in the description.", + -1 + ], + [ + "The headphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The microphone of the headphone is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is not mentioned in the description.", + 1 + ], + [ + "The keyboard is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The keyboard is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned in the description.", + 1 + ], + [ + "The clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The clock is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 1 + ], + [ + "The bottle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bottle is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a headphone or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a headphone or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headphone is not mentioned.\nB. The color of the headphone is not mentioned.\nC. The color of the headphone is mentioned in the description and is black.\nD. The color of the headphone is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ear cup or the headphone is not mentioned.\nB. The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.\nC. The type of the ear cup is mentioned in the description and is cushioned.\nD. The type of the ear cup is mentioned in the description but is not cushioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headband or the headphone is not mentioned.\nB. The shape of the headband is not mentioned, but the headband of the headphone is mentioned.\nC. The shape of the headband is mentioned in the description and is curved.\nD. The shape of the headband is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microphone of the headphone is not mentioned in the description.\nB. The microphone of the headphone is mentioned in the description.\nC. The headphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is not mentioned in the description.\nB. The keyboard is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock is not mentioned in the description.\nB. The clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The bottle is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the headphone is mentioned in the description and is black.", + "C. The type of the ear cup is mentioned in the description and is cushioned.", + "C. The shape of the headband is mentioned in the description and is curved.", + "A. The microphone of the headphone is not mentioned in the description.", + "A. The keyboard is not mentioned in the description.", + "A. The plant is not mentioned in the description.", + "A. The clock is not mentioned in the description.", + "A. The bottle is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8331718": { + "pred": "A black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the notebook is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the notebook is mentioned in the description and is black.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The color of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the notebook is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the notebook is mentioned in the description but is not spiral-bound.", + -1 + ], + [ + "The type of the notebook is mentioned in the description and is spiral-bound.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The type of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the notebook is mentioned in the description and is spiral-bound.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the cover is mentioned in the description but is not YAHOO.", + -1 + ], + [ + "The text of the cover is mentioned in the description and is YAHOO.", + 1 + ], + [ + "The cover or the notebook is not mentioned.", + 0 + ], + [ + "The text of the cover is not mentioned, but the cover of the notebook is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The text of the cover is mentioned in the description but is not YAHOO.", + "pred_index": 0, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo/text on the cover is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo/text on the cover is mentioned in the description and is white.", + 1 + ], + [ + "The logo/text on the cover or the notebook are not mentioned.", + 0 + ], + [ + "The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the logo/text on the cover is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is mentioned in the description.", + -1 + ], + [ + "The chair is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chair is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bottle is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bookmark of the notebook is mentioned in the description.", + -1 + ], + [ + "The notebook is not mentioned in the description.", + 0 + ], + [ + "The bookmark of the notebook is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The bookmark of the notebook is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is mentioned in the description.", + -1 + ], + [ + "The mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mouse is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The keyboard is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a notebook or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a notebook or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the notebook is mentioned in the description but is not black.\nB. The color of the notebook is mentioned in the description and is black.\nC. The notebook is not mentioned.\nD. The color of the notebook is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the notebook is mentioned in the description but is not spiral-bound.\nB. The type of the notebook is mentioned in the description and is spiral-bound.\nC. The notebook is not mentioned.\nD. The type of the notebook is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text of the cover is mentioned in the description but is not YAHOO.\nB. The text of the cover is mentioned in the description and is YAHOO.\nC. The cover or the notebook is not mentioned.\nD. The text of the cover is not mentioned, but the cover of the notebook is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the logo/text on the cover is mentioned in the description but is not white.\nB. The color of the logo/text on the cover is mentioned in the description and is white.\nC. The logo/text on the cover or the notebook are not mentioned.\nD. The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is mentioned in the description.\nB. The chair is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bookmark of the notebook is mentioned in the description.\nB. The notebook is not mentioned in the description.\nC. The bookmark of the notebook is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is mentioned in the description.\nB. The mouse is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is mentioned in the description.\nB. The keyboard is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the notebook is mentioned in the description and is black.", + "B. The type of the notebook is mentioned in the description and is spiral-bound.", + "A. The text of the cover is mentioned in the description but is not YAHOO.", + "B. The color of the logo/text on the cover is mentioned in the description and is white.", + "B. The chair is not mentioned in the description.", + "B. The bottle is not mentioned in the description.", + "C. The bookmark of the notebook is not mentioned in the description.", + "B. The mouse is not mentioned in the description.", + "B. The keyboard is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8557176": { + "pred": "The watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The color of the case is mentioned in the description but is not golden.", + -1 + ], + [ + "The color of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The color of the case is mentioned in the description and is golden.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the case is mentioned in the description and is golden.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The material of the case is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The material of the case is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the case is mentioned in the description and is metal.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The color of the strap is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The color of the strap is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the strap is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the strap is mentioned in the description but is not leather.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The food is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The oven is not mentioned in the description.", + 1 + ], + [ + "The oven is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The oven is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The crab cracker is not mentioned in the description.", + 1 + ], + [ + "The crab cracker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The crab cracker is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The jar is not mentioned in the description.", + 1 + ], + [ + "The jar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The jar is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a watch or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a watch or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case or the watch is not mentioned.\nB. The color of the case is mentioned in the description but is not golden.\nC. The color of the case is not mentioned, but the case of the watch is mentioned.\nD. The color of the case is mentioned in the description and is golden.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case or the watch is not mentioned.\nB. The material of the case is mentioned in the description but is not metal.\nC. The material of the case is not mentioned, but the case of the watch is mentioned.\nD. The material of the case is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the watch is not mentioned.\nB. The color of the strap is mentioned in the description but is not black.\nC. The color of the strap is not mentioned, but the strap of the watch is mentioned.\nD. The color of the strap is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the watch is not mentioned.\nB. The material of the strap is mentioned in the description but is not leather.\nC. The material of the strap is not mentioned, but the strap of the watch is mentioned.\nD. The material of the strap is mentioned in the description and is leather.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is not mentioned in the description.\nB. The food is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The oven is not mentioned in the description.\nB. The oven is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The crab cracker is not mentioned in the description.\nB. The crab cracker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The jar is not mentioned in the description.\nB. The jar is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the case is mentioned in the description and is golden.", + "D. The material of the case is mentioned in the description and is metal.", + "D. The color of the strap is mentioned in the description and is black.", + "B. The material of the strap is mentioned in the description but is not leather.", + "A. The food is not mentioned in the description.", + "A. The cup is not mentioned in the description.", + "A. The oven is not mentioned in the description.", + "A. The crab cracker is not mentioned in the description.", + "A. The jar is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8557195": { + "pred": "A beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the body is mentioned in the description but is not plastic.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the toaster is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the toaster is mentioned in the description but is not white or beige.", + -1 + ], + [ + "The color of the toaster is not mentioned.", + 0 + ], + [ + "The toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the toaster is mentioned in the description and is white or beige.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the body is mentioned in the description and is smooth.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lever of the toaster is not mentioned in the description.", + 1 + ], + [ + "The lever of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toaster is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The knife is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cord of the toaster is not mentioned in the description.", + 1 + ], + [ + "The cord of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toaster is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knob of the toaster is not mentioned in the description.", + 1 + ], + [ + "The knob of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toaster is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is not mentioned in the description.", + 1 + ], + [ + "The utensil is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensil is mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a toaster or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a toaster or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the body is mentioned in the description and is plastic.\nB. The material of the body is mentioned in the description but is not plastic.\nC. The material of the body is not mentioned, but the body of the toaster is mentioned.\nD. The body or the toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the toaster is mentioned in the description and is white or beige.\nB. The color of the toaster is mentioned in the description but is not white or beige.\nC. The color of the toaster is not mentioned.\nD. The toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the body is mentioned in the description and is smooth.\nB. The texture of the body is mentioned in the description but is not smooth.\nC. The texture of the body is not mentioned, but the body of the toaster is mentioned.\nD. The body or the toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lever of the toaster is not mentioned in the description.\nB. The lever of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knife is not mentioned in the description.\nB. The knife is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cord of the toaster is not mentioned in the description.\nB. The cord of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knob of the toaster is not mentioned in the description.\nB. The knob of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensil is not mentioned in the description.\nB. The utensil is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the body is mentioned in the description but is not plastic.", + "A. The color of the toaster is mentioned in the description and is white or beige.", + "A. The texture of the body is mentioned in the description and is smooth.", + "C. The toaster is not mentioned in the description.", + "A. The knife is not mentioned in the description.", + "C. The toaster is not mentioned in the description.", + "C. The toaster is not mentioned in the description.", + "B. The utensil is mentioned in the description." + ], + "score": 0.125, + "score_pos": 0.3333333333333333, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "9766617": { + "pred": "The goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The head or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the head is mentioned in the description and is black.", + 1 + ], + [ + "The color of the head is not mentioned, but the head of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the head is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The beak or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the beak is mentioned in the description and is black.", + 1 + ], + [ + "The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the beak is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the body is mentioned in the description and is grey or brown.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description but is not grey or brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is grey or brown.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the tail is mentioned in the description and is black and white.", + 1 + ], + [ + "The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the tail is mentioned in the description but is not black and white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tail is mentioned in the description and is black and white.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wings or the duck/goose are not mentioned.", + 0 + ], + [ + "The shape of the wings is mentioned in the description and is folded.", + 1 + ], + [ + "The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + 0.5 + ], + [ + "The shape of the wings is mentioned in the description but is not folded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + "pred_index": 2, + "question_index": 4, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the duck/goose are mentioned in the description.", + -1 + ], + [ + "The feet of the duck/goose are not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The feet of the duck/goose are mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mud of the duck/goose is mentioned in the description.", + -1 + ], + [ + "The mud of the duck/goose is not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The mud of the duck/goose is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The grass are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pigeon is mentioned in the description.", + -1 + ], + [ + "The pigeon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The pigeon is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tree is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a duck/goose or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a duck/goose or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The head or the duck/goose is not mentioned.\nB. The color of the head is mentioned in the description and is black.\nC. The color of the head is not mentioned, but the head of the duck/goose is mentioned.\nD. The color of the head is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The beak or the duck/goose is not mentioned.\nB. The color of the beak is mentioned in the description and is black.\nC. The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.\nD. The color of the beak is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the duck/goose is not mentioned.\nB. The color of the body is mentioned in the description and is grey or brown.\nC. The color of the body is not mentioned, but the body of the duck/goose is mentioned.\nD. The color of the body is mentioned in the description but is not grey or brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the duck/goose is not mentioned.\nB. The color of the tail is mentioned in the description and is black and white.\nC. The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.\nD. The color of the tail is mentioned in the description but is not black and white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wings or the duck/goose are not mentioned.\nB. The shape of the wings is mentioned in the description and is folded.\nC. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.\nD. The shape of the wings is mentioned in the description but is not folded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feet of the duck/goose are mentioned in the description.\nB. The feet of the duck/goose are not mentioned in the description.\nC. The duck/goose is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mud of the duck/goose is mentioned in the description.\nB. The mud of the duck/goose is not mentioned in the description.\nC. The duck/goose is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are mentioned in the description.\nB. The grass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pigeon is mentioned in the description.\nB. The pigeon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is mentioned in the description.\nB. The tree is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the head is mentioned in the description and is black.", + "B. The color of the beak is mentioned in the description and is black.", + "B. The color of the body is mentioned in the description and is grey or brown.", + "B. The color of the tail is mentioned in the description and is black and white.", + "C. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + "A. The feet of the duck/goose are mentioned in the description.", + "B. The mud of the duck/goose is not mentioned in the description.", + "B. The grass are not mentioned in the description.", + "B. The pigeon is not mentioned in the description.", + "B. The tree is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.9, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "11021544": { + "pred": "A metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the faucet is mentioned in the description but is not metallic.", + -1 + ], + [ + "The texture of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The texture of the faucet is mentioned in the description and is metallic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the faucet is mentioned in the description and is metallic.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the spout is mentioned in the description but is not curved or arc.", + -1 + ], + [ + "The shape of the spout is not mentioned, but the spout of the faucet is mentioned.", + 0.5 + ], + [ + "The spout or the faucet is not mentioned.", + 0 + ], + [ + "The shape of the spout is mentioned in the description and is curved or arc.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the spout is mentioned in the description and is curved or arc.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the faucet is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The material of the faucet is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the faucet is mentioned in the description and is metal.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soap dispenser of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The soap dispenser of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The soap dispenser of the faucet is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side spray of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The side spray of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The side spray of the faucet is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprayer of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The sprayer of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The sprayer of the faucet is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet door is not mentioned in the description.", + 1 + ], + [ + "The cabinet door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet door is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a faucet or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a faucet or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the faucet is mentioned in the description but is not metallic.\nB. The texture of the faucet is not mentioned.\nC. The faucet is not mentioned.\nD. The texture of the faucet is mentioned in the description and is metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the spout is mentioned in the description but is not curved or arc.\nB. The shape of the spout is not mentioned, but the spout of the faucet is mentioned.\nC. The spout or the faucet is not mentioned.\nD. The shape of the spout is mentioned in the description and is curved or arc.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the faucet is mentioned in the description but is not metal.\nB. The material of the faucet is not mentioned.\nC. The faucet is not mentioned.\nD. The material of the faucet is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The soap dispenser of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The soap dispenser of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The side spray of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The side spray of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sprayer of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The sprayer of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet door is not mentioned in the description.\nB. The cabinet door is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the faucet is mentioned in the description and is metallic.", + "D. The shape of the spout is mentioned in the description and is curved or arc.", + "D. The material of the faucet is mentioned in the description and is metal.", + "A. The soap dispenser of the faucet is not mentioned in the description.", + "A. The side spray of the faucet is not mentioned in the description.", + "A. The chair is not mentioned in the description.", + "A. The sprayer of the faucet is not mentioned in the description.", + "A. The cabinet door is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "11775390": { + "pred": "A green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the sandals is not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the sandals is mentioned in the description and is green.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description but is not perforated.", + -1 + ], + [ + "The style of the sandals is not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description and is perforated.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The sandals are not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sandals is not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the sandals is mentioned in the description and is rubber.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the sandals is not mentioned.", + 0 + ], + [ + "The number of parts of the strap is mentioned in the description but is not one.", + -1 + ], + [ + "The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + 0.5 + ], + [ + "The number of parts of the strap is mentioned in the description and is one.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The strap or the sandals is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trailer is not mentioned in the description.", + 1 + ], + [ + "The trailer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trailer is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire is not mentioned in the description.", + 1 + ], + [ + "The tire is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tire is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pumpkins are not mentioned in the description.", + 1 + ], + [ + "The pumpkins are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pumpkins are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The box is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boy is not mentioned in the description.", + 1 + ], + [ + "The boy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The boy is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are sandals or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are sandals or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The color of the sandals is mentioned in the description but is not green.\nC. The color of the sandals is not mentioned.\nD. The color of the sandals is mentioned in the description and is green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The style of the sandals is mentioned in the description but is not perforated.\nC. The style of the sandals is not mentioned.\nD. The style of the sandals is mentioned in the description and is perforated.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The material of the sandals is mentioned in the description but is not rubber.\nC. The material of the sandals is not mentioned.\nD. The material of the sandals is mentioned in the description and is rubber.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the sandals is not mentioned.\nB. The number of parts of the strap is mentioned in the description but is not one.\nC. The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.\nD. The number of parts of the strap is mentioned in the description and is one.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trailer is not mentioned in the description.\nB. The trailer is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tire is not mentioned in the description.\nB. The tire is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pumpkins are not mentioned in the description.\nB. The pumpkins are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The box is not mentioned in the description.\nB. The box is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boy is not mentioned in the description.\nB. The boy is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the sandals is mentioned in the description and is green.", + "A. The sandals are not mentioned.", + "D. The material of the sandals is mentioned in the description and is rubber.", + "A. The strap or the sandals is not mentioned.", + "A. The trailer is not mentioned in the description.", + "A. The tire is not mentioned in the description.", + "A. The pumpkins are not mentioned in the description.", + "A. The box is not mentioned in the description.", + "A. The boy is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "11950619": { + "pred": "The dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description and is white, beige, or wooden color.", + 1 + ], + [ + "The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not white, beige, or wooden color.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is mentioned in the description and is white, beige, or wooden color.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the handle is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the handle is mentioned in the description but is not wood.", + "pred_index": 3, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cover is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The cover or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the cover is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The cover or the table tennis paddle is not mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The head or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The head or the table tennis paddle is not mentioned.", + "pred_index": 2, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dumbbell is not mentioned in the description.", + 1 + ], + [ + "The dumbbell is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The dumbbell is mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The edge tape of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The edge tape of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The edge tape of the table tennis paddle is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The logo of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The logo of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The table tennis paddle is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mat is not mentioned in the description.", + 1 + ], + [ + "The mat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The mat is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a table tennis paddle or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a table tennis paddle or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is mentioned in the description and is white, beige, or wooden color.\nB. The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.\nC. The handle or the table tennis paddle is not mentioned.\nD. The color of the handle is mentioned in the description but is not white, beige, or wooden color.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handle is mentioned in the description and is wood.\nB. The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.\nC. The handle or the table tennis paddle is not mentioned.\nD. The material of the handle is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cover is mentioned in the description and is plastic.\nB. The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.\nC. The cover or the table tennis paddle is not mentioned.\nD. The material of the cover is mentioned in the description but is not plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the head is mentioned in the description and is rounded.\nB. The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.\nC. The head or the table tennis paddle is not mentioned.\nD. The shape of the head is mentioned in the description but is not rounded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The dumbbell is not mentioned in the description.\nB. The dumbbell is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table tennis paddle is not mentioned in the description.\nB. The edge tape of the table tennis paddle is not mentioned in the description.\nC. The edge tape of the table tennis paddle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table tennis paddle is not mentioned in the description.\nB. The logo of the table tennis paddle is not mentioned in the description.\nC. The logo of the table tennis paddle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mat is not mentioned in the description.\nB. The mat is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet is not mentioned in the description.\nB. The cabinet is mentioned in the description.\n" + ], + "response": [ + "B. No", + "A. The color of the handle is mentioned in the description and is white, beige, or wooden color.", + "D. The material of the handle is mentioned in the description but is not wood.", + "C. The cover or the table tennis paddle is not mentioned.", + "C. The head or the table tennis paddle is not mentioned.", + "B. The dumbbell is mentioned in the description.", + "B. The edge tape of the table tennis paddle is not mentioned in the description.", + "A. The table tennis paddle is not mentioned in the description.", + "A. The mat is not mentioned in the description.", + "A. The cabinet is not mentioned in the description." + ], + "score": -0.2222222222222222, + "score_pos": -0.25, + "score_neg": -0.2, + "neg_valid_num": 5, + "recognition_result": false + }, + "12178946": { + "pred": "A yellow bottle with a blue label featuring white text.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The shape of the bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the bottle is not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The cap or the bottle is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the bottle is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the label is not mentioned, but the label of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the label is mentioned in the description and is blue.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text or the bottle is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text is not mentioned, but the text of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the text is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the text is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is mentioned in the description.", + -1 + ], + [ + "The apple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The apple is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The trigger of the bottle is mentioned in the description.", + -1 + ], + [ + "The trigger of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The trigger of the bottle is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup is mentioned in the description.", + -1 + ], + [ + "The measuring cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The measuring cup is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bottle or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bottle or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned.\nB. The shape of the bottle is mentioned in the description and is cylindrical.\nC. The shape of the bottle is not mentioned.\nD. The shape of the bottle is mentioned in the description but is not cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap or the bottle is not mentioned.\nB. The color of the cap is mentioned in the description and is yellow.\nC. The color of the cap is not mentioned, but the cap of the bottle is mentioned.\nD. The color of the cap is mentioned in the description but is not yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label or the bottle is not mentioned.\nB. The color of the label is mentioned in the description and is blue.\nC. The color of the label is not mentioned, but the label of the bottle is mentioned.\nD. The color of the label is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text or the bottle is not mentioned.\nB. The color of the text is mentioned in the description and is white.\nC. The color of the text is not mentioned, but the text of the bottle is mentioned.\nD. The color of the text is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The apple is mentioned in the description.\nB. The apple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The handle of the bottle is mentioned in the description.\nC. The handle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The trigger of the bottle is mentioned in the description.\nC. The trigger of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The measuring cup is mentioned in the description.\nB. The measuring cup is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is mentioned in the description.\nB. The cutting board is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the bottle is not mentioned.", + "A. The cap or the bottle is not mentioned.", + "B. The color of the label is mentioned in the description and is blue.", + "B. The color of the text is mentioned in the description and is white.", + "B. The apple is not mentioned in the description.", + "C. The handle of the bottle is not mentioned in the description.", + "C. The trigger of the bottle is not mentioned in the description.", + "B. The measuring cup is not mentioned in the description.", + "B. The cutting board is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "12348079": { + "pred": "A rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the weighing platform is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the weighing platform is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The color of the weighing platform is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the weighing platform is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the weighing platform is mentioned in the description and is blue.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keypad or the scale is not mentioned.", + 0 + ], + [ + "The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the keypad is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the keypad is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The keypad or the scale is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The display screen or the scale is not mentioned.", + 0 + ], + [ + "The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + 0.5 + ], + [ + "The color of the display screen is mentioned in the description and is black.", + 1 + ], + [ + "The color of the display screen is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the scale is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the scale is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is white.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power cord of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The power cord of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The power cord of the scale is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The usb cable of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The usb cable of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The usb cable of the scale is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle is not mentioned in the description.", + 1 + ], + [ + "The bicycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bicycle is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The child is not mentioned in the description.", + 1 + ], + [ + "The child is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The child is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is not mentioned in the description.", + 1 + ], + [ + "The woman is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The woman is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a scale or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a scale or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The weighing platform or the scale is not mentioned.\nB. The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.\nC. The shape of the weighing platform is mentioned in the description and is rectangular.\nD. The shape of the weighing platform is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The weighing platform or the scale is not mentioned.\nB. The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.\nC. The color of the weighing platform is mentioned in the description and is blue.\nD. The color of the weighing platform is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keypad or the scale is not mentioned.\nB. The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.\nC. The shape of the keypad is mentioned in the description and is rectangular.\nD. The shape of the keypad is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The display screen or the scale is not mentioned.\nB. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.\nC. The color of the display screen is mentioned in the description and is black.\nD. The color of the display screen is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the scale is not mentioned.\nB. The color of the body is not mentioned, but the body of the scale is mentioned.\nC. The color of the body is mentioned in the description and is white.\nD. The color of the body is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The power cord of the scale is not mentioned in the description.\nB. The scale is not mentioned in the description.\nC. The power cord of the scale is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The usb cable of the scale is not mentioned in the description.\nB. The scale is not mentioned in the description.\nC. The usb cable of the scale is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycle is not mentioned in the description.\nB. The bicycle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The child is not mentioned in the description.\nB. The child is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The woman is not mentioned in the description.\nB. The woman is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + "C. The color of the weighing platform is mentioned in the description and is blue.", + "A. The keypad or the scale is not mentioned.", + "B. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + "C. The color of the body is mentioned in the description and is white.", + "A. The power cord of the scale is not mentioned in the description.", + "A. The usb cable of the scale is not mentioned in the description.", + "A. The bicycle is not mentioned in the description.", + "A. The child is not mentioned in the description.", + "A. The woman is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "14832137": { + "pred": "A cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.", + -1 + ], + [ + "The shape of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The color of the barrel/bucket is mentioned in the description and is purple.", + 1 + ], + [ + "The color of the barrel/bucket is mentioned in the description but is not purple.", + -1 + ], + [ + "The color of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the barrel/bucket is mentioned in the description and is purple.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The bottom of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The bottom of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The bottom of the barrel/bucket is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heart is mentioned in the description.", + -1 + ], + [ + "The heart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The heart is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The lid of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The lid of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the barrel/bucket is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cake is mentioned in the description.", + -1 + ], + [ + "The cake is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cake is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a barrel/bucket or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a barrel/bucket or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned.\nB. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.\nC. The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.\nD. The shape of the barrel/bucket is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the barrel/bucket is not mentioned.\nB. The shape of the handle is mentioned in the description and is curved.\nC. The shape of the handle is mentioned in the description but is not curved.\nD. The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned.\nB. The color of the barrel/bucket is mentioned in the description and is purple.\nC. The color of the barrel/bucket is mentioned in the description but is not purple.\nD. The color of the barrel/bucket is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned in the description.\nB. The bottom of the barrel/bucket is mentioned in the description.\nC. The bottom of the barrel/bucket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The heart is mentioned in the description.\nB. The heart is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned in the description.\nB. The lid of the barrel/bucket is mentioned in the description.\nC. The lid of the barrel/bucket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cake is mentioned in the description.\nB. The cake is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + "B. The shape of the handle is mentioned in the description and is curved.", + "B. The color of the barrel/bucket is mentioned in the description and is purple.", + "C. The bottom of the barrel/bucket is not mentioned in the description.", + "B. The heart is not mentioned in the description.", + "C. The lid of the barrel/bucket is not mentioned in the description.", + "B. The cake is not mentioned in the description.", + "B. The plate is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "15050320": { + "pred": "A dark brown wine glass with a wide, flat base and a slender stem.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The material of the wine glass is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the wine glass is mentioned in the description but is not glass.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the wine glass is not mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The color of the wine glass is mentioned in the description and is transparent.", + 1 + ], + [ + "The color of the wine glass is mentioned in the description but is not transparent.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the wine glass is mentioned in the description but is not transparent.", + "pred_index": 3, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is not mentioned, but the base of the wine glass is mentioned.", + 0.5 + ], + [ + "The base or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the base is mentioned in the description and is round or flat.", + 1 + ], + [ + "The shape of the base is mentioned in the description but is not round or flat.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the base is mentioned in the description and is round or flat.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + 0.5 + ], + [ + "The stem or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the stem is mentioned in the description and is slender.", + 1 + ], + [ + "The shape of the stem is mentioned in the description but is not slender.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the stem is mentioned in the description and is slender.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The bowl of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The bowl of the wine glass is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is not mentioned in the description.", + 1 + ], + [ + "The plate is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plate is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rim of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The rim of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The rim of the wine glass is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The napkin is not mentioned in the description.", + 1 + ], + [ + "The napkin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The napkin is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are wine glass or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are wine glass or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the wine glass is not mentioned.\nB. The wine glass are not mentioned.\nC. The material of the wine glass is mentioned in the description and is glass.\nD. The material of the wine glass is mentioned in the description but is not glass.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wine glass is not mentioned.\nB. The wine glass are not mentioned.\nC. The color of the wine glass is mentioned in the description and is transparent.\nD. The color of the wine glass is mentioned in the description but is not transparent.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the base is not mentioned, but the base of the wine glass is mentioned.\nB. The base or the wine glass is not mentioned.\nC. The shape of the base is mentioned in the description and is round or flat.\nD. The shape of the base is mentioned in the description but is not round or flat.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.\nB. The stem or the wine glass is not mentioned.\nC. The shape of the stem is mentioned in the description and is slender.\nD. The shape of the stem is mentioned in the description but is not slender.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl of the wine glass is not mentioned in the description.\nB. The bowl of the wine glass is mentioned in the description.\nC. The wine glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is not mentioned in the description.\nB. The plate is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rim of the wine glass is not mentioned in the description.\nB. The rim of the wine glass is mentioned in the description.\nC. The wine glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned in the description.\nB. The fork is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The napkin is not mentioned in the description.\nB. The napkin is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The material of the wine glass is not mentioned.", + "D. The color of the wine glass is mentioned in the description but is not transparent.", + "C. The shape of the base is mentioned in the description and is round or flat.", + "C. The shape of the stem is mentioned in the description and is slender.", + "A. The bowl of the wine glass is not mentioned in the description.", + "A. The plate is not mentioned in the description.", + "A. The rim of the wine glass is not mentioned in the description.", + "A. The fork is not mentioned in the description.", + "A. The napkin is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.25, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "16957916": { + "pred": "Fresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is mentioned in the description and is white/green.", + 1 + ], + [ + "The color of the lettuce is mentioned in the description but is not white/green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the lettuce is mentioned in the description and is white/green.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is curved or irregular.", + 1 + ], + [ + "The shape of the lettuce is mentioned in the description but is not curved or irregular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the lettuce is mentioned in the description and is curved or irregular.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The edge or the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.", + 1 + ], + [ + "The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tartar sauce is mentioned in the description.", + -1 + ], + [ + "The tartar sauce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tartar sauce is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fried fish are mentioned in the description.", + -1 + ], + [ + "The fried fish are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fried fish are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The french fries are mentioned in the description.", + -1 + ], + [ + "The french fries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The french fries are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is mentioned in the description.", + -1 + ], + [ + "The utensil is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensil is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a lettuce or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a lettuce or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lettuce is not mentioned.\nB. The color of the lettuce is not mentioned.\nC. The color of the lettuce is mentioned in the description and is white/green.\nD. The color of the lettuce is mentioned in the description but is not white/green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lettuce is not mentioned.\nB. The shape of the lettuce is not mentioned.\nC. The shape of the lettuce is mentioned in the description and is curved or irregular.\nD. The shape of the lettuce is mentioned in the description but is not curved or irregular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The edge or the lettuce is not mentioned.\nB. The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.\nC. The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.\nD. The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tartar sauce is mentioned in the description.\nB. The tartar sauce is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fried fish are mentioned in the description.\nB. The fried fish are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The french fries are mentioned in the description.\nB. The french fries are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensil is mentioned in the description.\nB. The utensil is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the lettuce is mentioned in the description and is white/green.", + "C. The shape of the lettuce is mentioned in the description and is curved or irregular.", + "B. The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.", + "B. The tartar sauce is not mentioned in the description.", + "B. The fried fish are not mentioned in the description.", + "B. The plate is not mentioned in the description.", + "B. The french fries are not mentioned in the description.", + "B. The utensil is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "17385866": { + "pred": "A scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + 0.5 + ], + [ + "The scoop or the ice cream is not mentioned.", + 0 + ], + [ + "The shape of the scoop is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the scoop is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The material of the cone is mentioned in the description but is not waffle.", + -1 + ], + [ + "The material of the cone is mentioned in the description and is waffle.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The cone or the ice cream is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The color of the cone is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the cone is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The cone or the ice cream is not mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The cherry of the ice cream is mentioned in the description.", + -1 + ], + [ + "The cherry of the ice cream is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cherry of the ice cream is not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is mentioned in the description.", + -1 + ], + [ + "The woman is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The woman is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The sprinkles of the ice cream are mentioned in the description.", + -1 + ], + [ + "The sprinkles of the ice cream are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The sprinkles of the ice cream are not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The man is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Turkish flag is mentioned in the description.", + -1 + ], + [ + "The Turkish flag is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Turkish flag is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an ice cream or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an ice cream or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.\nB. The scoop or the ice cream is not mentioned.\nC. The shape of the scoop is mentioned in the description but is not round.\nD. The shape of the scoop is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cone is not mentioned, but the cone of the ice cream is mentioned.\nB. The cone or the ice cream is not mentioned.\nC. The material of the cone is mentioned in the description but is not waffle.\nD. The material of the cone is mentioned in the description and is waffle.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cone is not mentioned, but the cone of the ice cream is mentioned.\nB. The cone or the ice cream is not mentioned.\nC. The color of the cone is mentioned in the description but is not yellow.\nD. The color of the cone is mentioned in the description and is yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ice cream is not mentioned in the description.\nB. The cherry of the ice cream is mentioned in the description.\nC. The cherry of the ice cream is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The woman is mentioned in the description.\nB. The woman is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ice cream is not mentioned in the description.\nB. The sprinkles of the ice cream are mentioned in the description.\nC. The sprinkles of the ice cream are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is mentioned in the description.\nB. The man is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Turkish flag is mentioned in the description.\nB. The Turkish flag is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + "B. The cone or the ice cream is not mentioned.", + "B. The cone or the ice cream is not mentioned.", + "C. The cherry of the ice cream is not mentioned in the description.", + "B. The woman is not mentioned in the description.", + "C. The sprinkles of the ice cream are not mentioned in the description.", + "B. The man is not mentioned in the description.", + "B. The Turkish flag is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "17404769": { + "pred": "The car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gas cap is mentioned in the description but is not square.", + -1 + ], + [ + "The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.", + 0.5 + ], + [ + "The gas cap or the suv is not mentioned.", + 0 + ], + [ + "The shape of the gas cap is mentioned in the description and is square.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the gas cap is mentioned in the description and is square.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + 0.5 + ], + [ + "The taillight or the suv is not mentioned.", + 0 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The taillight or the suv is not mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the suv is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the suv is not mentioned.", + 0 + ], + [ + "The suv is not mentioned.", + 0 + ], + [ + "The color of the suv is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the suv is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front bumper of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The suv is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front wheel of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front wheel of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The suv is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grille of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The grille of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The suv is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The building is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tree is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a suv or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a suv or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the gas cap is mentioned in the description but is not square.\nB. The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.\nC. The gas cap or the suv is not mentioned.\nD. The shape of the gas cap is mentioned in the description and is square.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the taillight is mentioned in the description but is not plastic.\nB. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.\nC. The taillight or the suv is not mentioned.\nD. The material of the taillight is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the suv is mentioned in the description but is not white.\nB. The color of the suv is not mentioned.\nC. The suv is not mentioned.\nD. The color of the suv is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front bumper of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The front bumper of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front wheel of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The front wheel of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grille of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The grille of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is not mentioned in the description.\nB. The building is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is not mentioned in the description.\nB. The tree is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the gas cap is mentioned in the description and is square.", + "C. The taillight or the suv is not mentioned.", + "D. The color of the suv is mentioned in the description and is white.", + "B. The suv is not mentioned in the description.", + "B. The suv is not mentioned in the description.", + "B. The suv is not mentioned in the description.", + "A. The building is not mentioned in the description.", + "A. The tree is not mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.6666666666666666, + "score_neg": 0.4, + "neg_valid_num": 5, + "recognition_result": true + }, + "18217373": { + "pred": "The spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the frame is mentioned in the description but is not black or metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the frame is mentioned in the description but is not black or metallic.", + "pred_index": 3, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the frame is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the frame is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the frame is mentioned in the description and is square or rounded.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lens or the glasses are not mentioned.", + 0 + ], + [ + "The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + 0.5 + ], + [ + "The color of the lens is mentioned in the description and is clear.", + 1 + ], + [ + "The color of the lens is mentioned in the description but is not clear.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the lens is mentioned in the description but is not clear.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple arm or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the temple arm is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the temple arm is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fruit is mentioned in the description.", + -1 + ], + [ + "The fruit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fruit is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The women are mentioned in the description.", + -1 + ], + [ + "The women are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The women are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vegetables are mentioned in the description.", + -1 + ], + [ + "The vegetables are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The vegetables are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The produce is mentioned in the description.", + -1 + ], + [ + "The produce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The produce is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is mentioned in the description.", + -1 + ], + [ + "The food is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The food is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are glasses or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are glasses or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the glasses is not mentioned.\nB. The color of the frame is not mentioned, but the frame of the glasses is mentioned.\nC. The color of the frame is mentioned in the description and is black or metallic.\nD. The color of the frame is mentioned in the description but is not black or metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the glasses is not mentioned.\nB. The shape of the frame is not mentioned, but the frame of the glasses is mentioned.\nC. The shape of the frame is mentioned in the description and is square or rounded.\nD. The shape of the frame is mentioned in the description but is not square or rounded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lens or the glasses are not mentioned.\nB. The color of the lens is not mentioned, but the lens of the glasses are mentioned.\nC. The color of the lens is mentioned in the description and is clear.\nD. The color of the lens is mentioned in the description but is not clear.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The temple arm or the glasses is not mentioned.\nB. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.\nC. The shape of the temple arm is mentioned in the description and is curved.\nD. The shape of the temple arm is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fruit is mentioned in the description.\nB. The fruit is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The women are mentioned in the description.\nB. The women are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The vegetables are mentioned in the description.\nB. The vegetables are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The produce is mentioned in the description.\nB. The produce is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is mentioned in the description.\nB. The food is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the frame is mentioned in the description but is not black or metallic.", + "C. The shape of the frame is mentioned in the description and is square or rounded.", + "D. The color of the lens is mentioned in the description but is not clear.", + "B. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + "B. The fruit is not mentioned in the description.", + "B. The women are not mentioned in the description.", + "B. The vegetables are not mentioned in the description.", + "B. The produce is not mentioned in the description.", + "B. The food is not mentioned in the description." + ], + "score": 0.5, + "score_pos": -0.125, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "19455186": { + "pred": "A blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cart/trolley is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cart/trolley is not mentioned.", + 0 + ], + [ + "The color of the cart/trolley is mentioned in the description but is not blue.", + -1 + ], + [ + "The cart/trolley is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the cart/trolley is mentioned in the description and is blue.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bars is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The shape of the bars is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The bars or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the wheels is mentioned in the description and is small.", + 1 + ], + [ + "The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The size of the wheels is mentioned in the description but is not small.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wheels is mentioned in the description and is black.", + 1 + ], + [ + "The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The color of the wheels is mentioned in the description but is not black.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the wheels is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The lid of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the cart/trolley is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The basket of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The basket of the cart/trolley is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shelves are mentioned in the description.", + -1 + ], + [ + "The shelves are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The shelves are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drawers of the cart/trolley are mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The drawers of the cart/trolley are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drawers of the cart/trolley are not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights are mentioned in the description.", + -1 + ], + [ + "The lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The lights are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a cart/trolley or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a cart/trolley or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cart/trolley is mentioned in the description and is blue.\nB. The color of the cart/trolley is not mentioned.\nC. The color of the cart/trolley is mentioned in the description but is not blue.\nD. The cart/trolley is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bars is mentioned in the description and is rectangular.\nB. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.\nC. The shape of the bars is mentioned in the description but is not rectangular.\nD. The bars or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the wheels is mentioned in the description and is small.\nB. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.\nC. The size of the wheels is mentioned in the description but is not small.\nD. The wheels or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wheels is mentioned in the description and is black.\nB. The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.\nC. The color of the wheels is mentioned in the description but is not black.\nD. The wheels or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the cart/trolley is mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The lid of the cart/trolley is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket of the cart/trolley is mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The basket of the cart/trolley is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shelves are mentioned in the description.\nB. The shelves are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drawers of the cart/trolley are mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The drawers of the cart/trolley are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lights are mentioned in the description.\nB. The lights are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the cart/trolley is mentioned in the description and is blue.", + "B. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + "B. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + "A. The color of the wheels is mentioned in the description and is black.", + "C. The lid of the cart/trolley is not mentioned in the description.", + "C. The basket of the cart/trolley is not mentioned in the description.", + "B. The shelves are not mentioned in the description.", + "C. The drawers of the cart/trolley are not mentioned in the description.", + "B. The lights are not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "19610023": { + "pred": "A bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the slippers is mentioned in the description but is not green.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The color of the slippers is mentioned in the description and is green.", + 1 + ], + [ + "The color of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the slippers is mentioned in the description and is green.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the toe box is mentioned in the description but is not round.", + -1 + ], + [ + "The toe box or the slippers is not mentioned.", + 0 + ], + [ + "The shape of the toe box is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The toe box or the slippers is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the slippers is mentioned in the description but is not frog-shaped.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The shape of the slippers is mentioned in the description and is frog-shaped.", + 1 + ], + [ + "The shape of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the slippers is mentioned in the description and is frog-shaped.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The lining of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lining of the slippers is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shirt is not mentioned in the description.", + 1 + ], + [ + "The shirt is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The shirt is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buckle of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The buckle of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The buckle of the slippers is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is not mentioned in the description.", + 1 + ], + [ + "The wall is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The wall is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are slippers or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are slippers or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the slippers is mentioned in the description but is not green.\nB. The slippers are not mentioned.\nC. The color of the slippers is mentioned in the description and is green.\nD. The color of the slippers is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the toe box is mentioned in the description but is not round.\nB. The toe box or the slippers is not mentioned.\nC. The shape of the toe box is mentioned in the description and is round.\nD. The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the slippers is mentioned in the description but is not frog-shaped.\nB. The slippers are not mentioned.\nC. The shape of the slippers is mentioned in the description and is frog-shaped.\nD. The shape of the slippers is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lining of the slippers is not mentioned in the description.\nB. The slippers are not mentioned in the description.\nC. The lining of the slippers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shirt is not mentioned in the description.\nB. The shirt is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The buckle of the slippers is not mentioned in the description.\nB. The slippers are not mentioned in the description.\nC. The buckle of the slippers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wall is not mentioned in the description.\nB. The wall is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the slippers is mentioned in the description and is green.", + "B. The toe box or the slippers is not mentioned.", + "C. The shape of the slippers is mentioned in the description and is frog-shaped.", + "A. The plant is not mentioned in the description.", + "A. The lining of the slippers is not mentioned in the description.", + "A. The shirt is not mentioned in the description.", + "A. The buckle of the slippers is not mentioned in the description.", + "A. The wall is not mentioned in the description." + ], + "score": 0.875, + "score_pos": 0.6666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "19610025": { + "pred": "A white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the ear is mentioned in the description and is large.", + 1 + ], + [ + "The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The size of the ear is mentioned in the description but is not large.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the ear is mentioned in the description and is pink.", + 1 + ], + [ + "The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the ear is mentioned in the description but is not pink.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eye is mentioned in the description and is black.", + 1 + ], + [ + "The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + 0.5 + ], + [ + "The eye or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the eye is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the shirt is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The shirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the shirt is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the shirt is mentioned in the description and is yellow.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skirt is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The skirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the skirt is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the skirt is mentioned in the description and is blue.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The whisker of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The whisker of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The whisker of the rabbit is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The teeth of the rabbit are not mentioned in the description.", + 1 + ], + [ + "The teeth of the rabbit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The teeth of the rabbit are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The tail of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The tail of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the rabbit is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The poster is not mentioned in the description.", + 1 + ], + [ + "The poster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The poster is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a rabbit or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a rabbit or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the ear is mentioned in the description and is large.\nB. The size of the ear is not mentioned, but the ear of the rabbit is mentioned.\nC. The ear or the rabbit is not mentioned.\nD. The size of the ear is mentioned in the description but is not large.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the ear is mentioned in the description and is pink.\nB. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.\nC. The ear or the rabbit is not mentioned.\nD. The color of the ear is mentioned in the description but is not pink.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eye is mentioned in the description and is black.\nB. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.\nC. The eye or the rabbit is not mentioned.\nD. The color of the eye is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the shirt is mentioned in the description and is yellow.\nB. The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.\nC. The shirt or the rabbit is not mentioned.\nD. The color of the shirt is mentioned in the description but is not yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skirt is mentioned in the description and is blue.\nB. The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.\nC. The skirt or the rabbit is not mentioned.\nD. The color of the skirt is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The whisker of the rabbit is not mentioned in the description.\nC. The whisker of the rabbit is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The teeth of the rabbit are not mentioned in the description.\nC. The teeth of the rabbit are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The tail of the rabbit is not mentioned in the description.\nC. The tail of the rabbit is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The poster is not mentioned in the description.\nB. The poster is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "B. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "B. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + "A. The color of the shirt is mentioned in the description and is yellow.", + "A. The color of the skirt is mentioned in the description and is blue.", + "B. The whisker of the rabbit is not mentioned in the description.", + "B. The teeth of the rabbit are not mentioned in the description.", + "A. The plant is not mentioned in the description.", + "B. The tail of the rabbit is not mentioned in the description.", + "A. The poster is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "20568676": { + "pred": "A stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the rim is mentioned in the description and is round.", + 1 + ], + [ + "The rim or the pot is not mentioned.", + 0 + ], + [ + "The shape of the rim is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the rim is not mentioned, but the rim of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The rim or the pot is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the content is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The content or the pot is not mentioned.", + 0 + ], + [ + "The color of the content is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the content is not mentioned, but the content of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the content is mentioned in the description and is yellow or brown.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the pot is mentioned in the description and is metal.", + 1 + ], + [ + "The pot is not mentioned.", + 0 + ], + [ + "The material of the pot is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pot is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the pot is mentioned in the description and is metal.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spout of the pot is mentioned in the description.", + -1 + ], + [ + "The spout of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The spout of the pot is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bowl is mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottom of the pot is mentioned in the description.", + -1 + ], + [ + "The bottom of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The bottom of the pot is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the pot is mentioned in the description.", + -1 + ], + [ + "The lid of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The lid of the pot is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stove is mentioned in the description.", + -1 + ], + [ + "The stove is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The stove is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a pot or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a pot or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the rim is mentioned in the description and is round.\nB. The rim or the pot is not mentioned.\nC. The shape of the rim is mentioned in the description but is not round.\nD. The shape of the rim is not mentioned, but the rim of the pot is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the content is mentioned in the description and is yellow or brown.\nB. The content or the pot is not mentioned.\nC. The color of the content is mentioned in the description but is not yellow or brown.\nD. The color of the content is not mentioned, but the content of the pot is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the pot is mentioned in the description and is metal.\nB. The pot is not mentioned.\nC. The material of the pot is mentioned in the description but is not metal.\nD. The material of the pot is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spout of the pot is mentioned in the description.\nB. The spout of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is mentioned in the description.\nB. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottom of the pot is mentioned in the description.\nB. The bottom of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the pot is mentioned in the description.\nB. The lid of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stove is mentioned in the description.\nB. The stove is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The rim or the pot is not mentioned.", + "A. The color of the content is mentioned in the description and is yellow or brown.", + "A. The material of the pot is mentioned in the description and is metal.", + "B. The spout of the pot is not mentioned in the description.", + "A. The bowl is mentioned in the description.", + "B. The bottom of the pot is not mentioned in the description.", + "B. The lid of the pot is not mentioned in the description.", + "B. The stove is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.6666666666666666, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "21107974": { + "pred": "A wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the head is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The head or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not round or cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the head is mentioned in the description and is round or cylindrical.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The handle or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the handle is mentioned in the description and is cylindrical.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description and is brown.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the gavel/mallet is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description and is wood.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the gavel/mallet is mentioned in the description and is wood.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone is mentioned in the description.", + -1 + ], + [ + "The microphone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The microphone is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The man is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The podium is mentioned in the description.", + -1 + ], + [ + "The podium is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The podium is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The gavel/mallet is not mentioned in the description.", + 0 + ], + [ + "The neck of the gavel/mallet is mentioned in the description.", + -1 + ], + [ + "The neck of the gavel/mallet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The neck of the gavel/mallet is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is mentioned in the description.", + -1 + ], + [ + "The sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sign is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a gavel/mallet or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a gavel/mallet or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.\nB. The shape of the head is mentioned in the description and is round or cylindrical.\nC. The head or the gavel/mallet is not mentioned.\nD. The shape of the head is mentioned in the description but is not round or cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.\nB. The shape of the handle is mentioned in the description and is cylindrical.\nC. The handle or the gavel/mallet is not mentioned.\nD. The shape of the handle is mentioned in the description but is not cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the gavel/mallet is not mentioned.\nB. The color of the gavel/mallet is mentioned in the description and is brown.\nC. The gavel/mallet is not mentioned.\nD. The color of the gavel/mallet is mentioned in the description but is not brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the gavel/mallet is not mentioned.\nB. The material of the gavel/mallet is mentioned in the description and is wood.\nC. The gavel/mallet is not mentioned.\nD. The material of the gavel/mallet is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microphone is mentioned in the description.\nB. The microphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is mentioned in the description.\nB. The man is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The podium is mentioned in the description.\nB. The podium is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The gavel/mallet is not mentioned in the description.\nB. The neck of the gavel/mallet is mentioned in the description.\nC. The neck of the gavel/mallet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign is mentioned in the description.\nB. The sign is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the head is mentioned in the description and is round or cylindrical.", + "B. The shape of the handle is mentioned in the description and is cylindrical.", + "A. The color of the gavel/mallet is not mentioned.", + "B. The material of the gavel/mallet is mentioned in the description and is wood.", + "B. The microphone is not mentioned in the description.", + "B. The man is not mentioned in the description.", + "B. The podium is not mentioned in the description.", + "C. The neck of the gavel/mallet is not mentioned in the description.", + "B. The sign is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "22064315": { + "pred": "The visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the horn is mentioned in the description and is long, curved, or pointed.", + 1 + ], + [ + "The shape of the horn is mentioned in the description but is not long, curved, or pointed.", + -1 + ], + [ + "The shape of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the horn is mentioned in the description and is long, curved, or pointed.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the horn is mentioned in the description and is grey, black, or dark.", + 1 + ], + [ + "The color of the horn is mentioned in the description but is not grey, black, or dark.", + -1 + ], + [ + "The color of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the horn is mentioned in the description and is grey, black, or dark.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the antelope is mentioned in the description.", + -1 + ], + [ + "The tail of the antelope is not mentioned in the description.", + 1 + ], + [ + "The antelope is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the antelope is not mentioned in the description.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deer are mentioned in the description.", + -1 + ], + [ + "The deer are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The deer are not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The car is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tree is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The grass are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an antelope or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an antelope or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the horn is mentioned in the description and is long, curved, or pointed.\nB. The shape of the horn is mentioned in the description but is not long, curved, or pointed.\nC. The shape of the horn is not mentioned, but the horn of the antelope is mentioned.\nD. The horn or the antelope is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the horn is mentioned in the description and is grey, black, or dark.\nB. The color of the horn is mentioned in the description but is not grey, black, or dark.\nC. The color of the horn is not mentioned, but the horn of the antelope is mentioned.\nD. The horn or the antelope is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the antelope is mentioned in the description.\nB. The tail of the antelope is not mentioned in the description.\nC. The antelope is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deer are mentioned in the description.\nB. The deer are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The car is mentioned in the description.\nB. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is mentioned in the description.\nB. The tree is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are mentioned in the description.\nB. The grass are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the horn is mentioned in the description and is long, curved, or pointed.", + "A. The color of the horn is mentioned in the description and is grey, black, or dark.", + "B. The tail of the antelope is not mentioned in the description.", + "B. The deer are not mentioned in the description.", + "B. The car is not mentioned in the description.", + "B. The tree is not mentioned in the description.", + "B. The grass are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "22107522": { + "pred": "A black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the bow tie is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the bow tie is mentioned in the description and is smooth.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The texture of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the bow tie is mentioned in the description and is smooth.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bow tie is mentioned in the description but is not butterfly-shaped.", + -1 + ], + [ + "The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The shape of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bow tie is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the bow tie is mentioned in the description and is black.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The color of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the bow tie is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bow tie is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the bow tie is mentioned in the description and is fabric.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The material of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the bow tie is mentioned in the description and is fabric.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bow tie is not mentioned in the description.", + 0 + ], + [ + "The neck band of the bow tie is mentioned in the description.", + -1 + ], + [ + "The neck band of the bow tie is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The neck band of the bow tie is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trumpet is mentioned in the description.", + -1 + ], + [ + "The trumpet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trumpet is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The suit is mentioned in the description.", + -1 + ], + [ + "The suit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The suit is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The person is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glasses are mentioned in the description.", + -1 + ], + [ + "The glasses are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The glasses are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bow tie or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bow tie or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the bow tie is mentioned in the description but is not smooth.\nB. The texture of the bow tie is mentioned in the description and is smooth.\nC. The bow tie is not mentioned.\nD. The texture of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bow tie is mentioned in the description but is not butterfly-shaped.\nB. The shape of the bow tie is mentioned in the description and is butterfly-shaped.\nC. The bow tie is not mentioned.\nD. The shape of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the bow tie is mentioned in the description but is not black.\nB. The color of the bow tie is mentioned in the description and is black.\nC. The bow tie is not mentioned.\nD. The color of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the bow tie is mentioned in the description but is not fabric.\nB. The material of the bow tie is mentioned in the description and is fabric.\nC. The bow tie is not mentioned.\nD. The material of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bow tie is not mentioned in the description.\nB. The neck band of the bow tie is mentioned in the description.\nC. The neck band of the bow tie is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trumpet is mentioned in the description.\nB. The trumpet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The suit is mentioned in the description.\nB. The suit is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is mentioned in the description.\nB. The person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glasses are mentioned in the description.\nB. The glasses are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the bow tie is mentioned in the description and is smooth.", + "B. The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + "B. The color of the bow tie is mentioned in the description and is black.", + "B. The material of the bow tie is mentioned in the description and is fabric.", + "C. The neck band of the bow tie is not mentioned in the description.", + "B. The trumpet is not mentioned in the description.", + "B. The suit is not mentioned in the description.", + "B. The person is not mentioned in the description.", + "B. The glasses are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "24017816": { + "pred": "The van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the windshield is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The windshield or the car is not mentioned.", + 0 + ], + [ + "The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + 0.5 + ], + [ + "The shape of the windshield is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the car is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The car is not mentioned.", + 0 + ], + [ + "The color of the car is not mentioned.", + 0 + ], + [ + "The color of the car is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the car is mentioned in the description but is not dark or black.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the window is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The window or the car is not mentioned.", + 0 + ], + [ + "The color of the window is not mentioned, but the window of the car is mentioned.", + 0.5 + ], + [ + "The color of the window is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the window is not mentioned, but the window of the car is mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the side mirror is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The side mirror or the car is not mentioned.", + 0 + ], + [ + "The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + 0.5 + ], + [ + "The color of the side mirror is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the side mirror is mentioned in the description and is dark or black.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fender of the car is not mentioned in the description.", + 1 + ], + [ + "The fender of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The fender of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taillight of the car is not mentioned in the description.", + 1 + ], + [ + "The taillight of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taillight of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire of the car is not mentioned in the description.", + 1 + ], + [ + "The tire of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tire of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The exhaust pipe of the car is not mentioned in the description.", + 1 + ], + [ + "The exhaust pipe of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The exhaust pipe of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hood of the car is not mentioned in the description.", + 1 + ], + [ + "The hood of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The hood of the car is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a car or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a car or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the windshield is mentioned in the description but is not rectangular.\nB. The windshield or the car is not mentioned.\nC. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.\nD. The shape of the windshield is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the car is mentioned in the description but is not dark or black.\nB. The car is not mentioned.\nC. The color of the car is not mentioned.\nD. The color of the car is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the window is mentioned in the description but is not dark or black.\nB. The window or the car is not mentioned.\nC. The color of the window is not mentioned, but the window of the car is mentioned.\nD. The color of the window is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the side mirror is mentioned in the description but is not dark or black.\nB. The side mirror or the car is not mentioned.\nC. The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.\nD. The color of the side mirror is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fender of the car is not mentioned in the description.\nB. The fender of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taillight of the car is not mentioned in the description.\nB. The taillight of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tire of the car is not mentioned in the description.\nB. The tire of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The exhaust pipe of the car is not mentioned in the description.\nB. The exhaust pipe of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hood of the car is not mentioned in the description.\nB. The hood of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + "A. The color of the car is mentioned in the description but is not dark or black.", + "C. The color of the window is not mentioned, but the window of the car is mentioned.", + "D. The color of the side mirror is mentioned in the description and is dark or black.", + "A. The fender of the car is not mentioned in the description.", + "A. The taillight of the car is not mentioned in the description.", + "A. The tire of the car is not mentioned in the description.", + "A. The exhaust pipe of the car is not mentioned in the description.", + "A. The hood of the car is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.25, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "24498027": { + "pred": "A tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the pole is mentioned in the description but is not black or dark.", + -1 + ], + [ + "The color of the pole is not mentioned, but the pole of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the pole is mentioned in the description and is black or dark.", + 1 + ], + [ + "The pole or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the pole is mentioned in the description and is black or dark.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the lamp is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the lamp is mentioned in the description and is white.", + 1 + ], + [ + "The lamp or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the lamp is mentioned in the description but is not white.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The direction of the bars is mentioned in the description but is not horizontal.", + -1 + ], + [ + "The direction of the bars is not mentioned, but the bars of the street lights are mentioned.", + 0.5 + ], + [ + "The direction of the bars is mentioned in the description and is horizontal.", + 1 + ], + [ + "The bars or the street lights are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The direction of the bars is mentioned in the description and is horizontal.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The cable of the street lights is mentioned in the description.", + -1 + ], + [ + "The cable of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The street lights are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The wire of the street lights is mentioned in the description.", + -1 + ], + [ + "The wire of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The street lights are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bus is mentioned in the description.", + -1 + ], + [ + "The bus is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bus is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bike is mentioned in the description.", + -1 + ], + [ + "The bike is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bike is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ambulance is mentioned in the description.", + -1 + ], + [ + "The ambulance is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ambulance is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are street lights or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are street lights or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the pole is mentioned in the description but is not black or dark.\nB. The color of the pole is not mentioned, but the pole of the street lights is mentioned.\nC. The color of the pole is mentioned in the description and is black or dark.\nD. The pole or the street lights is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the lamp is mentioned in the description but is not white.\nB. The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.\nC. The color of the lamp is mentioned in the description and is white.\nD. The lamp or the street lights is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The direction of the bars is mentioned in the description but is not horizontal.\nB. The direction of the bars is not mentioned, but the bars of the street lights are mentioned.\nC. The direction of the bars is mentioned in the description and is horizontal.\nD. The bars or the street lights are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The street lights are not mentioned in the description.\nB. The cable of the street lights is mentioned in the description.\nC. The cable of the street lights is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The street lights are not mentioned in the description.\nB. The wire of the street lights is mentioned in the description.\nC. The wire of the street lights is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bus is mentioned in the description.\nB. The bus is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bike is mentioned in the description.\nB. The bike is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ambulance is mentioned in the description.\nB. The ambulance is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the pole is mentioned in the description and is black or dark.", + "A. The color of the lamp is mentioned in the description but is not white.", + "C. The direction of the bars is mentioned in the description and is horizontal.", + "A. The street lights are not mentioned in the description.", + "A. The street lights are not mentioned in the description.", + "B. The bus is not mentioned in the description.", + "B. The bike is not mentioned in the description.", + "B. The ambulance is not mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.3333333333333333, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "24581953": { + "pred": "A large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the dog is mentioned in the description but is not white or gray.", + -1 + ], + [ + "The color of the dog is mentioned in the description and is white or gray.", + 1 + ], + [ + "The color of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the dog is mentioned in the description and is white or gray.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The posture of the dog is mentioned in the description but is not lying down.", + -1 + ], + [ + "The posture of the dog is mentioned in the description and is lying down.", + 1 + ], + [ + "The posture of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The posture of the dog is not mentioned.", + "pred_index": 2, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tail is mentioned in the description but is not large or long.", + -1 + ], + [ + "The size of the tail is mentioned in the description and is large or long.", + 1 + ], + [ + "The size of the tail is not mentioned, but the tail of the dog is mentioned.", + 0.5 + ], + [ + "The tail or the dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the tail is mentioned in the description and is large or long.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the dog is mentioned in the description but is not large.", + -1 + ], + [ + "The size of the dog is mentioned in the description and is large.", + 1 + ], + [ + "The size of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the dog is mentioned in the description and is large.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the coat is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the coat is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the coat is not mentioned, but the coat of the dog is mentioned.", + 0.5 + ], + [ + "The coat or the dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the coat is mentioned in the description but is not smooth.", + "pred_index": 0, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple is not mentioned in the description.", + 1 + ], + [ + "The temple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The temple is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the dog is not mentioned in the description.", + 1 + ], + [ + "The mouth of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The mouth of the dog is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eye of the dog is not mentioned in the description.", + 1 + ], + [ + "The eye of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The eye of the dog is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the dog is not mentioned in the description.", + 1 + ], + [ + "The nose of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The nose of the dog is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bird is not mentioned in the description.", + 1 + ], + [ + "The bird is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bird is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a dog or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a dog or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the dog is mentioned in the description but is not white or gray.\nB. The color of the dog is mentioned in the description and is white or gray.\nC. The color of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The posture of the dog is mentioned in the description but is not lying down.\nB. The posture of the dog is mentioned in the description and is lying down.\nC. The posture of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the tail is mentioned in the description but is not large or long.\nB. The size of the tail is mentioned in the description and is large or long.\nC. The size of the tail is not mentioned, but the tail of the dog is mentioned.\nD. The tail or the dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the dog is mentioned in the description but is not large.\nB. The size of the dog is mentioned in the description and is large.\nC. The size of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the coat is mentioned in the description but is not smooth.\nB. The texture of the coat is mentioned in the description and is smooth.\nC. The texture of the coat is not mentioned, but the coat of the dog is mentioned.\nD. The coat or the dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The temple is not mentioned in the description.\nB. The temple is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouth of the dog is not mentioned in the description.\nB. The mouth of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eye of the dog is not mentioned in the description.\nB. The eye of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nose of the dog is not mentioned in the description.\nB. The nose of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bird is not mentioned in the description.\nB. The bird is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the dog is mentioned in the description and is white or gray.", + "C. The posture of the dog is not mentioned.", + "B. The size of the tail is mentioned in the description and is large or long.", + "B. The size of the dog is mentioned in the description and is large.", + "A. The texture of the coat is mentioned in the description but is not smooth.", + "A. The temple is not mentioned in the description.", + "A. The mouth of the dog is not mentioned in the description.", + "A. The eye of the dog is not mentioned in the description.", + "A. The nose of the dog is not mentioned in the description.", + "A. The bird is not mentioned in the description." + ], + "score": 0.7, + "score_pos": 0.4, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "24786060": { + "pred": "A light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the towel is mentioned in the description but is not gray or white.", + -1 + ], + [ + "The color of the towel is not mentioned.", + 0 + ], + [ + "The color of the towel is mentioned in the description and is gray or white.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the towel is mentioned in the description and is gray or white.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the towel is mentioned in the description but is not plush.", + -1 + ], + [ + "The texture of the towel is not mentioned.", + 0 + ], + [ + "The texture of the towel is mentioned in the description and is plush.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the towel is mentioned in the description but is not plush.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the towel is mentioned in the description but is not irregular.", + -1 + ], + [ + "The shape of the towel is not mentioned.", + 0 + ], + [ + "The shape of the towel is mentioned in the description and is irregular.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the towel is mentioned in the description but is not irregular.", + "pred_index": 0, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The loop of the towel is mentioned in the description.", + -1 + ], + [ + "The loop of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The loop of the towel is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vanity is mentioned in the description.", + -1 + ], + [ + "The vanity is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The vanity is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The design of the towel is mentioned in the description.", + -1 + ], + [ + "The design of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The design of the towel is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is mentioned in the description.", + -1 + ], + [ + "The toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toilet is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a towel or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a towel or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the towel is mentioned in the description but is not gray or white.\nB. The color of the towel is not mentioned.\nC. The color of the towel is mentioned in the description and is gray or white.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the towel is mentioned in the description but is not plush.\nB. The texture of the towel is not mentioned.\nC. The texture of the towel is mentioned in the description and is plush.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the towel is mentioned in the description but is not irregular.\nB. The shape of the towel is not mentioned.\nC. The shape of the towel is mentioned in the description and is irregular.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The loop of the towel is mentioned in the description.\nB. The loop of the towel is not mentioned in the description.\nC. The towel is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The vanity is mentioned in the description.\nB. The vanity is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The design of the towel is mentioned in the description.\nB. The design of the towel is not mentioned in the description.\nC. The towel is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is mentioned in the description.\nB. The toilet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the towel is mentioned in the description and is gray or white.", + "A. The texture of the towel is mentioned in the description but is not plush.", + "A. The shape of the towel is mentioned in the description but is not irregular.", + "B. The loop of the towel is not mentioned in the description.", + "B. The vanity is not mentioned in the description.", + "B. The sink is not mentioned in the description.", + "B. The design of the towel is not mentioned in the description.", + "B. The toilet is not mentioned in the description." + ], + "score": 0.5, + "score_pos": -0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25054869": { + "pred": "The toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The material of the lid is mentioned in the description but is not ceramic.", + -1 + ], + [ + "The material of the lid is mentioned in the description and is ceramic.", + 1 + ], + [ + "The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The texture of the lid is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the lid is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the lid is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tank or the toilet is not mentioned.", + 0 + ], + [ + "The shape of the tank is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the tank is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the tank is not mentioned, but the tank of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the tank is mentioned in the description but is not rectangular.", + "pred_index": 1, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned.", + 0 + ], + [ + "The color of the toilet is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the toilet is mentioned in the description and is white.", + 1 + ], + [ + "The color of the toilet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the toilet is mentioned in the description but is not white.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush lever of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush lever of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flush lever of the toilet is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush button of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush button of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flush button of the toilet is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet bowl of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet bowl of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toilet bowl of the toilet is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothbrush is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet seat of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet seat of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toilet seat of the toilet is not mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a toilet or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a toilet or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid or the toilet is not mentioned.\nB. The material of the lid is mentioned in the description but is not ceramic.\nC. The material of the lid is mentioned in the description and is ceramic.\nD. The material of the lid is not mentioned, but the lid of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid or the toilet is not mentioned.\nB. The texture of the lid is mentioned in the description but is not smooth.\nC. The texture of the lid is mentioned in the description and is smooth.\nD. The texture of the lid is not mentioned, but the lid of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tank or the toilet is not mentioned.\nB. The shape of the tank is mentioned in the description but is not rectangular.\nC. The shape of the tank is mentioned in the description and is rectangular.\nD. The shape of the tank is not mentioned, but the tank of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned.\nB. The color of the toilet is mentioned in the description but is not white.\nC. The color of the toilet is mentioned in the description and is white.\nD. The color of the toilet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The flush lever of the toilet is mentioned in the description.\nC. The flush lever of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The flush button of the toilet is mentioned in the description.\nC. The flush button of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The toilet bowl of the toilet is mentioned in the description.\nC. The toilet bowl of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothbrush is mentioned in the description.\nB. The toothbrush is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The toilet seat of the toilet is mentioned in the description.\nC. The toilet seat of the toilet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + "C. The texture of the lid is mentioned in the description and is smooth.", + "B. The shape of the tank is mentioned in the description but is not rectangular.", + "B. The color of the toilet is mentioned in the description but is not white.", + "C. The flush lever of the toilet is not mentioned in the description.", + "C. The flush button of the toilet is not mentioned in the description.", + "C. The toilet bowl of the toilet is not mentioned in the description.", + "B. The toothbrush is not mentioned in the description.", + "C. The toilet seat of the toilet is not mentioned in the description." + ], + "score": 0.5, + "score_pos": -0.125, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25273553": { + "pred": "A black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the legs is mentioned in the description but is not slender.", + -1 + ], + [ + "The shape of the legs is mentioned in the description and is slender.", + 1 + ], + [ + "The legs or the tripod are not mentioned.", + 0 + ], + [ + "The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tripod is mentioned in the description but is not plastic or metal.", + -1 + ], + [ + "The material of the tripod is mentioned in the description and is plastic or metal.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The material of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the tripod is not mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tripod is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tripod is mentioned in the description and is black.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The color of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tripod is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The leg locks of the tripod are not mentioned in the description.", + 1 + ], + [ + "The leg locks of the tripod are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The leg locks of the tripod are not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The quick release plate of the tripod is not mentioned in the description.", + 1 + ], + [ + "The quick release plate of the tripod is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The quick release plate of the tripod is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The hot air balloon is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The hot air balloon is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tree is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is not mentioned in the description.", + 1 + ], + [ + "The sky is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sky is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tripod or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tripod or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the legs is mentioned in the description but is not slender.\nB. The shape of the legs is mentioned in the description and is slender.\nC. The legs or the tripod are not mentioned.\nD. The shape of the legs is not mentioned, but the legs of the tripod are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tripod is mentioned in the description but is not plastic or metal.\nB. The material of the tripod is mentioned in the description and is plastic or metal.\nC. The tripod is not mentioned.\nD. The material of the tripod is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tripod is mentioned in the description but is not black.\nB. The color of the tripod is mentioned in the description and is black.\nC. The tripod is not mentioned.\nD. The color of the tripod is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tripod is not mentioned in the description.\nB. The leg locks of the tripod are not mentioned in the description.\nC. The leg locks of the tripod are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tripod is not mentioned in the description.\nB. The quick release plate of the tripod is not mentioned in the description.\nC. The quick release plate of the tripod is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hot air balloon is not mentioned in the description.\nB. The hot air balloon is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is not mentioned in the description.\nB. The tree is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is not mentioned in the description.\nB. The sky is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + "D. The material of the tripod is not mentioned.", + "B. The color of the tripod is mentioned in the description and is black.", + "B. The leg locks of the tripod are not mentioned in the description.", + "B. The quick release plate of the tripod is not mentioned in the description.", + "A. The hot air balloon is not mentioned in the description.", + "A. The tree is not mentioned in the description.", + "A. The sky is not mentioned in the description." + ], + "score": 0.8125, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25419495": { + "pred": "The tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gripping ends/claws is mentioned in the description and is black.", + 1 + ], + [ + "The color of the gripping ends/claws is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the gripping ends/claws is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gripping ends/claws is mentioned in the description and is scalloped.", + 1 + ], + [ + "The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + -1 + ], + [ + "The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + "pred_index": 1, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle/arm is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the handle/arm is mentioned in the description but is not black or metallic.", + -1 + ], + [ + "The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle/arm is mentioned in the description and is black or metallic.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle/arm is mentioned in the description and is metal or rubber.", + 1 + ], + [ + "The material of the handle/arm is mentioned in the description but is not metal or rubber.", + -1 + ], + [ + "The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the handle/arm is mentioned in the description and is metal or rubber.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is mentioned in the description.", + -1 + ], + [ + "The spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The spoon is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spring of the tong is mentioned in the description.", + -1 + ], + [ + "The tong is not mentioned in the description.", + 0 + ], + [ + "The spring of the tong is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The spring of the tong is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is mentioned in the description.", + -1 + ], + [ + "The cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cup is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fork is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tong or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tong or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the gripping ends/claws is mentioned in the description and is black.\nB. The color of the gripping ends/claws is mentioned in the description but is not black.\nC. The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.\nD. The gripping ends/claws or the tong are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the gripping ends/claws is mentioned in the description and is scalloped.\nB. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.\nC. The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.\nD. The gripping ends/claws or the tong are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle/arm is mentioned in the description and is black or metallic.\nB. The color of the handle/arm is mentioned in the description but is not black or metallic.\nC. The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.\nD. The handle/arm or the tong is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handle/arm is mentioned in the description and is metal or rubber.\nB. The material of the handle/arm is mentioned in the description but is not metal or rubber.\nC. The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.\nD. The handle/arm or the tong is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is mentioned in the description.\nB. The spoon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spring of the tong is mentioned in the description.\nB. The tong is not mentioned in the description.\nC. The spring of the tong is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is mentioned in the description.\nB. The cup is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is mentioned in the description.\nB. The fork is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the gripping ends/claws is mentioned in the description and is black.", + "B. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + "A. The color of the handle/arm is mentioned in the description and is black or metallic.", + "A. The material of the handle/arm is mentioned in the description and is metal or rubber.", + "B. The spoon is not mentioned in the description.", + "C. The spring of the tong is not mentioned in the description.", + "B. The plate is not mentioned in the description.", + "B. The cup is not mentioned in the description.", + "B. The fork is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25419516": { + "pred": "A plush toy with a blue face, large white eyes with black pupils, and two pointed ears.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stuffed toy is mentioned in the description but is not plush.", + -1 + ], + [ + "The material of the stuffed toy is not mentioned.", + 0 + ], + [ + "The material of the stuffed toy is mentioned in the description and is plush.", + 1 + ], + [ + "The stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the stuffed toy is mentioned in the description and is plush.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is black.", + 1 + ], + [ + "The eyes or the stuffed toy are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the eyes is mentioned in the description and is black.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the nose is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the nose is mentioned in the description and is blue.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The nose or the stuffed toy is not mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is blue.", + 1 + ], + [ + "The body or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is blue.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the nose is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The shape of the nose is mentioned in the description and is round.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The nose or the stuffed toy is not mentioned.", + "pred_index": 3, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the stuffed toy is not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The tail of the stuffed toy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tail of the stuffed toy is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the stuffed toy are not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The feet of the stuffed toy are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The feet of the stuffed toy are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The door is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stuffed toy or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stuffed toy or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stuffed toy is mentioned in the description but is not plush.\nB. The material of the stuffed toy is not mentioned.\nC. The material of the stuffed toy is mentioned in the description and is plush.\nD. The stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eyes is mentioned in the description but is not black.\nB. The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.\nC. The color of the eyes is mentioned in the description and is black.\nD. The eyes or the stuffed toy are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the nose is mentioned in the description but is not blue.\nB. The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.\nC. The color of the nose is mentioned in the description and is blue.\nD. The nose or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not blue.\nB. The color of the body is not mentioned, but the body of the stuffed toy is mentioned.\nC. The color of the body is mentioned in the description and is blue.\nD. The body or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the nose is mentioned in the description but is not round.\nB. The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.\nC. The shape of the nose is mentioned in the description and is round.\nD. The nose or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the stuffed toy is not mentioned in the description.\nB. The stuffed toy is not mentioned in the description.\nC. The tail of the stuffed toy is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feet of the stuffed toy are not mentioned in the description.\nB. The stuffed toy is not mentioned in the description.\nC. The feet of the stuffed toy are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is not mentioned in the description.\nB. The door is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned in the description.\nB. The fork is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the stuffed toy is mentioned in the description and is plush.", + "C. The color of the eyes is mentioned in the description and is black.", + "D. The nose or the stuffed toy is not mentioned.", + "C. The color of the body is mentioned in the description and is blue.", + "D. The nose or the stuffed toy is not mentioned.", + "A. The tail of the stuffed toy is not mentioned in the description.", + "A. The feet of the stuffed toy are not mentioned in the description.", + "A. The door is not mentioned in the description.", + "A. The chair is not mentioned in the description.", + "A. The fork is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25579493": { + "pred": "A small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the bowl is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the bowl is mentioned in the description and is square or rounded.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the bowl is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the bowl is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the bowl is mentioned in the description but is not white or beige.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the bowl is mentioned in the description but is not white or beige.", + "pred_index": 3, + "question_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The content or the bowl is not mentioned.", + 0 + ], + [ + "The color of the content is not mentioned, but the content of the bowl is mentioned.", + 0.5 + ], + [ + "The color of the content is mentioned in the description and is red, white, or yellow.", + 1 + ], + [ + "The color of the content is mentioned in the description but is not red, white, or yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the content is mentioned in the description and is red, white, or yellow.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the bowl is not mentioned in the description.", + 1 + ], + [ + "The base of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The base of the bowl is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the bowl is not mentioned in the description.", + 1 + ], + [ + "The lid of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lid of the bowl is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The knife is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner is not mentioned in the description.", + 1 + ], + [ + "The burner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The burner is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bowl or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bowl or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The shape of the bowl is not mentioned.\nC. The shape of the bowl is mentioned in the description and is square or rounded.\nD. The shape of the bowl is mentioned in the description but is not square or rounded.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The texture of the bowl is not mentioned.\nC. The texture of the bowl is mentioned in the description and is smooth.\nD. The texture of the bowl is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The color of the bowl is not mentioned.\nC. The color of the bowl is mentioned in the description and is white or beige.\nD. The color of the bowl is mentioned in the description but is not white or beige.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The content or the bowl is not mentioned.\nB. The color of the content is not mentioned, but the content of the bowl is mentioned.\nC. The color of the content is mentioned in the description and is red, white, or yellow.\nD. The color of the content is mentioned in the description but is not red, white, or yellow.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the bowl is not mentioned in the description.\nB. The base of the bowl is mentioned in the description.\nC. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the bowl is not mentioned in the description.\nB. The lid of the bowl is mentioned in the description.\nC. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knife is not mentioned in the description.\nB. The knife is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet is not mentioned in the description.\nB. The cabinet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burner is not mentioned in the description.\nB. The burner is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the bowl is mentioned in the description and is square or rounded.", + "B. The texture of the bowl is not mentioned.", + "D. The color of the bowl is mentioned in the description but is not white or beige.", + "C. The color of the content is mentioned in the description and is red, white, or yellow.", + "A. The base of the bowl is not mentioned in the description.", + "A. The lid of the bowl is not mentioned in the description.", + "A. The knife is not mentioned in the description.", + "A. The cabinet is not mentioned in the description.", + "A. The burner is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.25, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "297718": { + "pred": "A piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the rice is mentioned in the description and is outer layer.", + 1 + ], + [ + "The position of the rice is mentioned in the description but is not outer layer.", + -1 + ], + [ + "The position of the rice is not mentioned, but the rice of the sushi is mentioned.", + 0.5 + ], + [ + "The rice or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The position of the rice is mentioned in the description but is not outer layer.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the wrap is mentioned in the description and is seaweed sheet.", + 1 + ], + [ + "The type of the wrap is mentioned in the description but is not seaweed sheet.", + -1 + ], + [ + "The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.", + 0.5 + ], + [ + "The wrap or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the wrap is mentioned in the description and is seaweed sheet.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.", + 1 + ], + [ + "The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.", + -1 + ], + [ + "The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + 0.5 + ], + [ + "The sesame seeds or the sushi are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the filling is mentioned in the description and is fish or crab meat.", + 1 + ], + [ + "The type of the filling is mentioned in the description but is not fish or crab meat.", + -1 + ], + [ + "The type of the filling is not mentioned, but the filling of the sushi is mentioned.", + 0.5 + ], + [ + "The filling or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the filling is mentioned in the description and is fish or crab meat.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the topping is mentioned in the description and is sesame seeds.", + 1 + ], + [ + "The type of the topping is mentioned in the description but is not sesame seeds.", + -1 + ], + [ + "The type of the topping is not mentioned, but the topping of the sushi is mentioned.", + 0.5 + ], + [ + "The topping or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the topping is mentioned in the description but is not sesame seeds.", + "pred_index": 1, + "question_index": 4, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wasabi of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The wasabi of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wasabi of the sushi is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soy sauce of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The soy sauce of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The soy sauce of the sushi is mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The avocado of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The avocado of the sushi is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickled ginger of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The pickled ginger of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pickled ginger of the sushi is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple chunks are not mentioned in the description.", + 1 + ], + [ + "The pineapple chunks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pineapple chunks are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a sushi or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a sushi or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the rice is mentioned in the description and is outer layer.\nB. The position of the rice is mentioned in the description but is not outer layer.\nC. The position of the rice is not mentioned, but the rice of the sushi is mentioned.\nD. The rice or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the wrap is mentioned in the description and is seaweed sheet.\nB. The type of the wrap is mentioned in the description but is not seaweed sheet.\nC. The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.\nD. The wrap or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.\nB. The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.\nC. The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.\nD. The sesame seeds or the sushi are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the filling is mentioned in the description and is fish or crab meat.\nB. The type of the filling is mentioned in the description but is not fish or crab meat.\nC. The type of the filling is not mentioned, but the filling of the sushi is mentioned.\nD. The filling or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the topping is mentioned in the description and is sesame seeds.\nB. The type of the topping is mentioned in the description but is not sesame seeds.\nC. The type of the topping is not mentioned, but the topping of the sushi is mentioned.\nD. The topping or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wasabi of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The wasabi of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The soy sauce of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The soy sauce of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The avocado of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pickled ginger of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The pickled ginger of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pineapple chunks are not mentioned in the description.\nB. The pineapple chunks are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The position of the rice is mentioned in the description but is not outer layer.", + "A. The type of the wrap is mentioned in the description and is seaweed sheet.", + "A. The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.", + "A. The type of the filling is mentioned in the description and is fish or crab meat.", + "B. The type of the topping is mentioned in the description but is not sesame seeds.", + "A. The wasabi of the sushi is not mentioned in the description.", + "C. The soy sauce of the sushi is mentioned in the description.", + "A. The avocado of the sushi is not mentioned in the description.", + "A. The pickled ginger of the sushi is not mentioned in the description.", + "A. The pineapple chunks are not mentioned in the description." + ], + "score": 0.4, + "score_pos": 0.2, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "361105": { + "pred": "A small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The herb is not mentioned.", + 0 + ], + [ + "The color of the herb is mentioned in the description and is green.", + 1 + ], + [ + "The color of the herb is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the herb is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the herb is mentioned in the description and is green.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stems or the herb are not mentioned.", + 0 + ], + [ + "The shape of the stems is mentioned in the description and is thin or slender.", + 1 + ], + [ + "The shape of the stems is mentioned in the description but is not thin or slender.", + -1 + ], + [ + "The shape of the stems is not mentioned, but the stems of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the stems is mentioned in the description and is thin or slender.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaves or the herb are not mentioned.", + 0 + ], + [ + "The dark spots of the leaves is mentioned in the description and is visible.", + 1 + ], + [ + "The dark spots of the leaves is mentioned in the description but is not visible.", + -1 + ], + [ + "The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flowers of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The flowers of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The flowers of the herb are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The seeds of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The seeds of the herb are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The roots of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The roots of the herb are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cream sauce is not mentioned in the description.", + 1 + ], + [ + "The cream sauce is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cream sauce is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scallops are not mentioned in the description.", + 1 + ], + [ + "The scallops are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The scallops are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a herb or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a herb or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The herb is not mentioned.\nB. The color of the herb is mentioned in the description and is green.\nC. The color of the herb is mentioned in the description but is not green.\nD. The color of the herb is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stems or the herb are not mentioned.\nB. The shape of the stems is mentioned in the description and is thin or slender.\nC. The shape of the stems is mentioned in the description but is not thin or slender.\nD. The shape of the stems is not mentioned, but the stems of the herb are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The leaves or the herb are not mentioned.\nB. The dark spots of the leaves is mentioned in the description and is visible.\nC. The dark spots of the leaves is mentioned in the description but is not visible.\nD. The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flowers of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The flowers of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seeds of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The seeds of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The roots of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The roots of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cream sauce is not mentioned in the description.\nB. The cream sauce is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The scallops are not mentioned in the description.\nB. The scallops are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the herb is mentioned in the description and is green.", + "B. The shape of the stems is mentioned in the description and is thin or slender.", + "D. The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + "A. The flowers of the herb are not mentioned in the description.", + "A. The seeds of the herb are not mentioned in the description.", + "A. The roots of the herb are not mentioned in the description.", + "A. The cream sauce is not mentioned in the description.", + "A. The scallops are not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "1196168": { + "pred": "A rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grille is mentioned in the description and is smooth with ridges.", + 1 + ], + [ + "The texture of the grille is mentioned in the description but is not smooth with ridges.", + -1 + ], + [ + "The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + 0.5 + ], + [ + "The grille or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the grille is mentioned in the description but is not smooth with ridges.", + "pred_index": 1, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the air conditioner is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the air conditioner is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the air conditioner is mentioned in the description and is rectangular.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the air conditioner is mentioned in the description and is white.", + 1 + ], + [ + "The color of the air conditioner is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the air conditioner is not mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the vent is mentioned in the description and is circular.", + 1 + ], + [ + "The shape of the vent is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.", + 0.5 + ], + [ + "The vent or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the vent is mentioned in the description and is circular.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fan is mentioned in the description and is black, grey, silver, or dark.", + 1 + ], + [ + "The color of the fan is mentioned in the description but is not black, grey, silver, or dark.", + -1 + ], + [ + "The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + 0.5 + ], + [ + "The fan or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + "pred_index": 2, + "question_index": 4, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The remote control of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The remote control of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The remote control of the air conditioner is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Hotel Shilaza sign is not mentioned in the description.", + 1 + ], + [ + "The Hotel Shilaza sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The Hotel Shilaza sign is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The display of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The display of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The display of the air conditioner is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The UCO Bank branch is not mentioned in the description.", + 1 + ], + [ + "The UCO Bank branch is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The UCO Bank branch is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycles are not mentioned in the description.", + 1 + ], + [ + "The motorcycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The motorcycles are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an air conditioner or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an air conditioner or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the grille is mentioned in the description and is smooth with ridges.\nB. The texture of the grille is mentioned in the description but is not smooth with ridges.\nC. The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.\nD. The grille or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the air conditioner is mentioned in the description and is rectangular.\nB. The shape of the air conditioner is mentioned in the description but is not rectangular.\nC. The shape of the air conditioner is not mentioned.\nD. The air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the air conditioner is mentioned in the description and is white.\nB. The color of the air conditioner is mentioned in the description but is not white.\nC. The color of the air conditioner is not mentioned.\nD. The air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the vent is mentioned in the description and is circular.\nB. The shape of the vent is mentioned in the description but is not circular.\nC. The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.\nD. The vent or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the fan is mentioned in the description and is black, grey, silver, or dark.\nB. The color of the fan is mentioned in the description but is not black, grey, silver, or dark.\nC. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.\nD. The fan or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The air conditioner is not mentioned in the description.\nB. The remote control of the air conditioner is not mentioned in the description.\nC. The remote control of the air conditioner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Hotel Shilaza sign is not mentioned in the description.\nB. The Hotel Shilaza sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The air conditioner is not mentioned in the description.\nB. The display of the air conditioner is not mentioned in the description.\nC. The display of the air conditioner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The UCO Bank branch is not mentioned in the description.\nB. The UCO Bank branch is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motorcycles are not mentioned in the description.\nB. The motorcycles are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the grille is mentioned in the description but is not smooth with ridges.", + "A. The shape of the air conditioner is mentioned in the description and is rectangular.", + "C. The color of the air conditioner is not mentioned.", + "A. The shape of the vent is mentioned in the description and is circular.", + "C. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + "B. The remote control of the air conditioner is not mentioned in the description.", + "A. The Hotel Shilaza sign is not mentioned in the description.", + "B. The display of the air conditioner is not mentioned in the description.", + "A. The UCO Bank branch is not mentioned in the description.", + "A. The motorcycles are not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.3, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "1894089": { + "pred": "A metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the screwdriver is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The color of the screwdriver is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The color of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the screwdriver is not mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screwdriver is mentioned in the description but is not metal.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The material of the screwdriver is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the screwdriver is mentioned in the description and is metal.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grip is mentioned in the description but is not rough or textured.", + -1 + ], + [ + "The grip or the screwdriver is not mentioned.", + 0 + ], + [ + "The texture of the grip is mentioned in the description and is rough or textured.", + 1 + ], + [ + "The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the grip is mentioned in the description and is rough or textured.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The CD is mentioned in the description.", + -1 + ], + [ + "The CD is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The CD is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire is mentioned in the description.", + -1 + ], + [ + "The wire is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wire is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interchangeable bits of the screwdriver are mentioned in the description.", + -1 + ], + [ + "The interchangeable bits of the screwdriver are not mentioned in the description.", + 1 + ], + [ + "The screwdriver is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The interchangeable bits of the screwdriver are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire cutters are mentioned in the description.", + -1 + ], + [ + "The wire cutters are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wire cutters are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flashlight is mentioned in the description.", + -1 + ], + [ + "The flashlight is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The flashlight is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a screwdriver or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a screwdriver or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the screwdriver is mentioned in the description but is not silver or metallic.\nB. The screwdriver is not mentioned.\nC. The color of the screwdriver is mentioned in the description and is silver or metallic.\nD. The color of the screwdriver is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the screwdriver is mentioned in the description but is not metal.\nB. The screwdriver is not mentioned.\nC. The material of the screwdriver is mentioned in the description and is metal.\nD. The material of the screwdriver is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the grip is mentioned in the description but is not rough or textured.\nB. The grip or the screwdriver is not mentioned.\nC. The texture of the grip is mentioned in the description and is rough or textured.\nD. The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The CD is mentioned in the description.\nB. The CD is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wire is mentioned in the description.\nB. The wire is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The interchangeable bits of the screwdriver are mentioned in the description.\nB. The interchangeable bits of the screwdriver are not mentioned in the description.\nC. The screwdriver is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wire cutters are mentioned in the description.\nB. The wire cutters are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flashlight is mentioned in the description.\nB. The flashlight is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the screwdriver is not mentioned.", + "C. The material of the screwdriver is mentioned in the description and is metal.", + "C. The texture of the grip is mentioned in the description and is rough or textured.", + "B. The CD is not mentioned in the description.", + "B. The wire is not mentioned in the description.", + "B. The interchangeable bits of the screwdriver are not mentioned in the description.", + "B. The wire cutters are not mentioned in the description.", + "B. The flashlight is not mentioned in the description." + ], + "score": 0.875, + "score_pos": 0.6666666666666666, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "2391780": { + "pred": "The bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the wings is mentioned in the description but is not spread or extended.", + -1 + ], + [ + "The position of the wings is not mentioned, but the wings of the wild bird are mentioned.", + 0.5 + ], + [ + "The position of the wings is mentioned in the description and is spread or extended.", + 1 + ], + [ + "The wings or the wild bird are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The position of the wings is mentioned in the description and is spread or extended.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the head is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the head is not mentioned, but the head of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description and is white.", + 1 + ], + [ + "The head or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the head is mentioned in the description but is not white.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white, brown or gray.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white, brown or gray.", + 1 + ], + [ + "The body or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is white, brown or gray.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the beak is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description and is dark or black.", + 1 + ], + [ + "The beak or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the beak is mentioned in the description but is not dark or black.", + "pred_index": 0, + "question_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are mentioned in the description.", + -1 + ], + [ + "The boats are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The boats are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stone walls are mentioned in the description.", + -1 + ], + [ + "The stone walls are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The stone walls are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are mentioned in the description.", + -1 + ], + [ + "The chimneys are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chimneys are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The water is mentioned in the description.", + -1 + ], + [ + "The water is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The water is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a wild bird or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a wild bird or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the wings is mentioned in the description but is not spread or extended.\nB. The position of the wings is not mentioned, but the wings of the wild bird are mentioned.\nC. The position of the wings is mentioned in the description and is spread or extended.\nD. The wings or the wild bird are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the head is mentioned in the description but is not white.\nB. The color of the head is not mentioned, but the head of the wild bird is mentioned.\nC. The color of the head is mentioned in the description and is white.\nD. The head or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not white, brown or gray.\nB. The color of the body is not mentioned, but the body of the wild bird is mentioned.\nC. The color of the body is mentioned in the description and is white, brown or gray.\nD. The body or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the beak is mentioned in the description but is not dark or black.\nB. The color of the beak is not mentioned, but the beak of the wild bird is mentioned.\nC. The color of the beak is mentioned in the description and is dark or black.\nD. The beak or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are mentioned in the description.\nB. The windows are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boats are mentioned in the description.\nB. The boats are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stone walls are mentioned in the description.\nB. The stone walls are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chimneys are mentioned in the description.\nB. The chimneys are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The water is mentioned in the description.\nB. The water is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The position of the wings is mentioned in the description and is spread or extended.", + "A. The color of the head is mentioned in the description but is not white.", + "C. The color of the body is mentioned in the description and is white, brown or gray.", + "A. The color of the beak is mentioned in the description but is not dark or black.", + "B. The windows are not mentioned in the description.", + "B. The boats are not mentioned in the description.", + "B. The stone walls are not mentioned in the description.", + "B. The chimneys are not mentioned in the description.", + "B. The water is not mentioned in the description." + ], + "score": 0.5555555555555556, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4502267": { + "pred": "A green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is elongated.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not elongated.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the green bean is mentioned in the description and is elongated.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is mentioned in the description and is green.", + 1 + ], + [ + "The color of the green bean is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the green bean is mentioned in the description and is green.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is arc or curved.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not arc or curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the green bean is mentioned in the description and is arc or curved.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the green bean is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the green bean is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is not mentioned in the description.", + 1 + ], + [ + "The apple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The apple is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The strings of the green bean are not mentioned in the description.", + 1 + ], + [ + "The strings of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The strings of the green bean are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The seeds of the green bean are not mentioned in the description.", + 1 + ], + [ + "The seeds of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The seeds of the green bean are not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 1 + ], + [ + "The pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pear is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The peach is not mentioned in the description.", + 1 + ], + [ + "The peach is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The peach is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a green bean or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a green bean or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The shape of the green bean is not mentioned.\nC. The shape of the green bean is mentioned in the description and is elongated.\nD. The shape of the green bean is mentioned in the description but is not elongated.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The color of the green bean is not mentioned.\nC. The color of the green bean is mentioned in the description and is green.\nD. The color of the green bean is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The shape of the green bean is not mentioned.\nC. The shape of the green bean is mentioned in the description and is arc or curved.\nD. The shape of the green bean is mentioned in the description but is not arc or curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The texture of the green bean is not mentioned.\nC. The texture of the green bean is mentioned in the description and is smooth.\nD. The texture of the green bean is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The apple is not mentioned in the description.\nB. The apple is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned in the description.\nB. The strings of the green bean are not mentioned in the description.\nC. The strings of the green bean are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned in the description.\nB. The seeds of the green bean are not mentioned in the description.\nC. The seeds of the green bean are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The pear is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The peach is not mentioned in the description.\nB. The peach is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the green bean is mentioned in the description and is elongated.", + "C. The color of the green bean is mentioned in the description and is green.", + "C. The shape of the green bean is mentioned in the description and is arc or curved.", + "C. The texture of the green bean is mentioned in the description and is smooth.", + "A. The apple is not mentioned in the description.", + "B. The strings of the green bean are not mentioned in the description.", + "B. The seeds of the green bean are not mentioned in the description.", + "A. The pear is not mentioned in the description.", + "A. The peach is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4604873": { + "pred": "A tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The size of the jib is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the jib is mentioned in the description and is long.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The size of the jib is mentioned in the description and is long.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tower is not mentioned, but the tower of the crane is mentioned.", + 0.5 + ], + [ + "The tower or the crane is not mentioned.", + 0 + ], + [ + "The size of the tower is mentioned in the description but is not tall.", + -1 + ], + [ + "The size of the tower is mentioned in the description and is tall.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The size of the tower is mentioned in the description and is tall.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + 0.5 + ], + [ + "The hook or the crane is not mentioned.", + 0 + ], + [ + "The visibility of the hook is mentioned in the description but is not visible.", + -1 + ], + [ + "The visibility of the hook is mentioned in the description and is visible.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crane is not mentioned.", + 0 + ], + [ + "The crane is not mentioned.", + 0 + ], + [ + "The material of the crane is mentioned in the description but is not metal or steel.", + -1 + ], + [ + "The material of the crane is mentioned in the description and is metal or steel.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the crane is not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The shape of the jib is mentioned in the description but is not horizontal beam.", + -1 + ], + [ + "The shape of the jib is mentioned in the description and is horizontal beam.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The telescoping sections of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The telescoping sections of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The telescoping sections of the crane are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tracks of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The tracks of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tracks of the crane are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The wheels of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wheels of the crane are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The silhouettes of structures are not mentioned in the description.", + 1 + ], + [ + "The silhouettes of structures are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The silhouettes of structures are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clouds are not mentioned in the description.", + 1 + ], + [ + "The clouds are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The clouds are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a crane or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a crane or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the jib is not mentioned, but the jib of the crane is mentioned.\nB. The jib or the crane is not mentioned.\nC. The size of the jib is mentioned in the description but is not long.\nD. The size of the jib is mentioned in the description and is long.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the tower is not mentioned, but the tower of the crane is mentioned.\nB. The tower or the crane is not mentioned.\nC. The size of the tower is mentioned in the description but is not tall.\nD. The size of the tower is mentioned in the description and is tall.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.\nB. The hook or the crane is not mentioned.\nC. The visibility of the hook is mentioned in the description but is not visible.\nD. The visibility of the hook is mentioned in the description and is visible.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the crane is not mentioned.\nB. The crane is not mentioned.\nC. The material of the crane is mentioned in the description but is not metal or steel.\nD. The material of the crane is mentioned in the description and is metal or steel.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the jib is not mentioned, but the jib of the crane is mentioned.\nB. The jib or the crane is not mentioned.\nC. The shape of the jib is mentioned in the description but is not horizontal beam.\nD. The shape of the jib is mentioned in the description and is horizontal beam.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The telescoping sections of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The telescoping sections of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tracks of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The tracks of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The wheels of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The silhouettes of structures are not mentioned in the description.\nB. The silhouettes of structures are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clouds are not mentioned in the description.\nB. The clouds are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The size of the jib is mentioned in the description and is long.", + "D. The size of the tower is mentioned in the description and is tall.", + "A. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + "A. The material of the crane is not mentioned.", + "A. The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + "A. The telescoping sections of the crane are not mentioned in the description.", + "A. The tracks of the crane are not mentioned in the description.", + "A. The wheels of the crane are not mentioned in the description.", + "A. The silhouettes of structures are not mentioned in the description.", + "A. The clouds are not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "4916799": { + "pred": "A spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the globe is mentioned in the description and is silver and blue.", + 1 + ], + [ + "The globe is not mentioned.", + 0 + ], + [ + "The color of the globe is not mentioned.", + 0 + ], + [ + "The color of the globe is mentioned in the description but is not silver and blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the globe is mentioned in the description but is not silver and blue.", + "pred_index": 3, + "question_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sphere is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The sphere or the globe is not mentioned.", + 0 + ], + [ + "The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + 0.5 + ], + [ + "The material of the sphere is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The sphere or the globe is not mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is mentioned in the description and is circular.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The shape of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The shape of the base is mentioned in the description but is not circular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the base is not mentioned, but the base of the globe is mentioned.", + "pred_index": 2, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the base is mentioned in the description and is gray.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The color of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The color of the base is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the base is not mentioned, but the base of the globe is mentioned.", + "pred_index": 2, + "question_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is mentioned in the description.", + -1 + ], + [ + "The sidewalk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sidewalk is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trees are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The person is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bushes are mentioned in the description.", + -1 + ], + [ + "The bushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bushes are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a globe or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a globe or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the globe is mentioned in the description and is silver and blue.\nB. The globe is not mentioned.\nC. The color of the globe is not mentioned.\nD. The color of the globe is mentioned in the description but is not silver and blue.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the sphere is mentioned in the description and is metal and plastic.\nB. The sphere or the globe is not mentioned.\nC. The material of the sphere is not mentioned, but the sphere of the globe is mentioned.\nD. The material of the sphere is mentioned in the description but is not metal and plastic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the base is mentioned in the description and is circular.\nB. The base or the globe is not mentioned.\nC. The shape of the base is not mentioned, but the base of the globe is mentioned.\nD. The shape of the base is mentioned in the description but is not circular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the base is mentioned in the description and is gray.\nB. The base or the globe is not mentioned.\nC. The color of the base is not mentioned, but the base of the globe is mentioned.\nD. The color of the base is mentioned in the description but is not gray.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sidewalk is mentioned in the description.\nB. The sidewalk is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are mentioned in the description.\nB. The trees are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is mentioned in the description.\nB. The person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bushes are mentioned in the description.\nB. The bushes are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the globe is mentioned in the description but is not silver and blue.", + "B. The sphere or the globe is not mentioned.", + "C. The shape of the base is not mentioned, but the base of the globe is mentioned.", + "C. The color of the base is not mentioned, but the base of the globe is mentioned.", + "B. The sidewalk is not mentioned in the description.", + "B. The trees are not mentioned in the description.", + "B. The sky is not mentioned in the description.", + "B. The person is not mentioned in the description.", + "B. The bushes are not mentioned in the description." + ], + "score": 0.5555555555555556, + "score_pos": 0.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "5718415": { + "pred": "The tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The material of the canopy is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the canopy is mentioned in the description and is fabric.", + 1 + ], + [ + "The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole or the tent is not mentioned.", + 0 + ], + [ + "The material of the pole is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pole is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the pole is not mentioned, but the pole of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the pole is mentioned in the description and is metal.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The color of the canopy is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the canopy is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the canopy is mentioned in the description and is yellow.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The windows of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The windows of the tent are not mentioned in the description.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buildings are mentioned in the description.", + -1 + ], + [ + "The buildings are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The buildings are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walls of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The walls of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The walls of the tent are not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The door of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The door of the tent is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The floor of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The floor of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The floor of the tent is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a tent or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a tent or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The canopy or the tent is not mentioned.\nB. The material of the canopy is mentioned in the description but is not fabric.\nC. The material of the canopy is mentioned in the description and is fabric.\nD. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pole or the tent is not mentioned.\nB. The material of the pole is mentioned in the description but is not metal.\nC. The material of the pole is mentioned in the description and is metal.\nD. The material of the pole is not mentioned, but the pole of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The canopy or the tent is not mentioned.\nB. The color of the canopy is mentioned in the description but is not yellow.\nC. The color of the canopy is mentioned in the description and is yellow.\nD. The color of the canopy is not mentioned, but the canopy of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows of the tent are mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The windows of the tent are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The buildings are mentioned in the description.\nB. The buildings are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The walls of the tent are mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The walls of the tent are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door of the tent is mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The door of the tent is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The floor of the tent is mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The floor of the tent is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + "C. The material of the pole is mentioned in the description and is metal.", + "C. The color of the canopy is mentioned in the description and is yellow.", + "C. The windows of the tent are not mentioned in the description.", + "B. The buildings are not mentioned in the description.", + "C. The walls of the tent are not mentioned in the description.", + "C. The door of the tent is not mentioned in the description.", + "C. The floor of the tent is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "6012878": { + "pred": "A square traffic light with a black background and a red illuminated hand symbol on the left side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the symbol is mentioned in the description but is not red or orange.", + -1 + ], + [ + "The color of the symbol is mentioned in the description and is red or orange.", + 1 + ], + [ + "The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the symbol is mentioned in the description and is red or orange.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The shape of the symbol is mentioned in the description but is not hand outline.", + -1 + ], + [ + "The shape of the symbol is mentioned in the description and is hand outline.", + 1 + ], + [ + "The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the symbol is mentioned in the description and is hand outline.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The texture of the background is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the background is mentioned in the description and is matte.", + 1 + ], + [ + "The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + "pred_index": 3, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the background is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the background is mentioned in the description and is gray or black.", + 1 + ], + [ + "The color of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the background is mentioned in the description and is gray or black.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The reflective surface or the traffic light is not mentioned.", + 0 + ], + [ + "The material of the reflective surface is mentioned in the description but is not glass or plastic.", + -1 + ], + [ + "The material of the reflective surface is mentioned in the description and is glass or plastic.", + 1 + ], + [ + "The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The reflective surface or the traffic light is not mentioned.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walking person symbol of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The walking person symbol of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The walking person symbol of the traffic light is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The pole of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pole of the traffic light is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycles are not mentioned in the description.", + 1 + ], + [ + "The bicycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bicycles are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is not mentioned in the description.", + 1 + ], + [ + "The sidewalk is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sidewalk is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green light of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The green light of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The green light of the traffic light is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a traffic light or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a traffic light or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The symbol or the traffic light is not mentioned.\nB. The color of the symbol is mentioned in the description but is not red or orange.\nC. The color of the symbol is mentioned in the description and is red or orange.\nD. The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The symbol or the traffic light is not mentioned.\nB. The shape of the symbol is mentioned in the description but is not hand outline.\nC. The shape of the symbol is mentioned in the description and is hand outline.\nD. The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background or the traffic light is not mentioned.\nB. The texture of the background is mentioned in the description but is not matte.\nC. The texture of the background is mentioned in the description and is matte.\nD. The texture of the background is not mentioned, but the background of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background or the traffic light is not mentioned.\nB. The color of the background is mentioned in the description but is not gray or black.\nC. The color of the background is mentioned in the description and is gray or black.\nD. The color of the background is not mentioned, but the background of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The reflective surface or the traffic light is not mentioned.\nB. The material of the reflective surface is mentioned in the description but is not glass or plastic.\nC. The material of the reflective surface is mentioned in the description and is glass or plastic.\nD. The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The walking person symbol of the traffic light is not mentioned in the description.\nB. The walking person symbol of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pole of the traffic light is not mentioned in the description.\nB. The pole of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycles are not mentioned in the description.\nB. The bicycles are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sidewalk is not mentioned in the description.\nB. The sidewalk is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green light of the traffic light is not mentioned in the description.\nB. The green light of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the symbol is mentioned in the description and is red or orange.", + "C. The shape of the symbol is mentioned in the description and is hand outline.", + "D. The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + "C. The color of the background is mentioned in the description and is gray or black.", + "A. The reflective surface or the traffic light is not mentioned.", + "A. The walking person symbol of the traffic light is not mentioned in the description.", + "A. The pole of the traffic light is not mentioned in the description.", + "A. The bicycles are not mentioned in the description.", + "A. The sidewalk is not mentioned in the description.", + "A. The green light of the traffic light is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "6820595": { + "pred": "A cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the ear is mentioned in the description but is not triangular.", + -1 + ], + [ + "The ear or the cat is not mentioned.", + 0 + ], + [ + "The shape of the ear is mentioned in the description and is triangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + "pred_index": 0, + "question_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The texture of the fur is mentioned in the description but is not fluffy.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The texture of the fur is mentioned in the description and is fluffy.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The color of the fur is mentioned in the description but is not black and white.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The color of the fur is mentioned in the description and is black and white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the fur is mentioned in the description and is black and white.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the back is mentioned in the description but is not arched.", + -1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ], + [ + "The shape of the back is mentioned in the description and is arched.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the back is not mentioned, but the back of the cat is mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.", + 0.5 + ], + [ + "The color of the underbelly is mentioned in the description but is not white.", + -1 + ], + [ + "The underbelly or the cat is not mentioned.", + 0 + ], + [ + "The color of the underbelly is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the underbelly is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is mentioned in the description.", + -1 + ], + [ + "The door is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The door is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom cabinet is mentioned in the description.", + -1 + ], + [ + "The bathroom cabinet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathroom cabinet is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom sink is mentioned in the description.", + -1 + ], + [ + "The bathroom sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathroom sink is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hairbrush is mentioned in the description.", + -1 + ], + [ + "The hairbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The hairbrush is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a cat or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a cat or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the ear is not mentioned, but the ear of the cat is mentioned.\nB. The shape of the ear is mentioned in the description but is not triangular.\nC. The ear or the cat is not mentioned.\nD. The shape of the ear is mentioned in the description and is triangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the fur is not mentioned, but the fur of the cat is mentioned.\nB. The texture of the fur is mentioned in the description but is not fluffy.\nC. The fur or the cat is not mentioned.\nD. The texture of the fur is mentioned in the description and is fluffy.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the fur is not mentioned, but the fur of the cat is mentioned.\nB. The color of the fur is mentioned in the description but is not black and white.\nC. The fur or the cat is not mentioned.\nD. The color of the fur is mentioned in the description and is black and white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the back is not mentioned, but the back of the cat is mentioned.\nB. The shape of the back is mentioned in the description but is not arched.\nC. The back or the cat is not mentioned.\nD. The shape of the back is mentioned in the description and is arched.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.\nB. The color of the underbelly is mentioned in the description but is not white.\nC. The underbelly or the cat is not mentioned.\nD. The color of the underbelly is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is mentioned in the description.\nB. The door is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathroom cabinet is mentioned in the description.\nB. The bathroom cabinet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathroom sink is mentioned in the description.\nB. The bathroom sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hairbrush is mentioned in the description.\nB. The hairbrush is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + "A. The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + "D. The color of the fur is mentioned in the description and is black and white.", + "A. The shape of the back is not mentioned, but the back of the cat is mentioned.", + "D. The color of the underbelly is mentioned in the description and is white.", + "B. The door is not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The bathroom cabinet is not mentioned in the description.", + "B. The bathroom sink is not mentioned in the description.", + "B. The hairbrush is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "8556674": { + "pred": "A single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description and is orange.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the orange/tangerine is mentioned in the description and is orange.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the surface is mentioned in the description but is not bright orange.", + -1 + ], + [ + "The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The color of the surface is mentioned in the description and is bright orange.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the surface is mentioned in the description and is bright orange.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the orange/tangerine is mentioned in the description and is smooth.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the orange/tangerine is mentioned in the description and is round.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not glossy.", + -1 + ], + [ + "The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The texture of the surface is mentioned in the description and is glossy.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the surface is mentioned in the description and is glossy.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The stem of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The stem of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The stem of the orange/tangerine is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The leaves of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The leaves of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The leaves of the orange/tangerine are not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The segments of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The segments of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The segments of the orange/tangerine are not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceiling lights are mentioned in the description.", + -1 + ], + [ + "The ceiling lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ceiling lights are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The flesh of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The flesh of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The orange/tangerine is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an orange/tangerine or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an orange/tangerine or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The color of the orange/tangerine is mentioned in the description but is not orange.\nC. The color of the orange/tangerine is not mentioned.\nD. The color of the orange/tangerine is mentioned in the description and is orange.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The surface or the orange/tangerine is not mentioned.\nB. The color of the surface is mentioned in the description but is not bright orange.\nC. The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.\nD. The color of the surface is mentioned in the description and is bright orange.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The texture of the orange/tangerine is mentioned in the description but is not smooth.\nC. The texture of the orange/tangerine is not mentioned.\nD. The texture of the orange/tangerine is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The shape of the orange/tangerine is mentioned in the description but is not round.\nC. The shape of the orange/tangerine is not mentioned.\nD. The shape of the orange/tangerine is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The surface or the orange/tangerine is not mentioned.\nB. The texture of the surface is mentioned in the description but is not glossy.\nC. The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.\nD. The texture of the surface is mentioned in the description and is glossy.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The stem of the orange/tangerine is mentioned in the description.\nC. The stem of the orange/tangerine is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The leaves of the orange/tangerine are mentioned in the description.\nC. The leaves of the orange/tangerine are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The segments of the orange/tangerine are mentioned in the description.\nC. The segments of the orange/tangerine are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceiling lights are mentioned in the description.\nB. The ceiling lights are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The flesh of the orange/tangerine is mentioned in the description.\nC. The flesh of the orange/tangerine is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the orange/tangerine is mentioned in the description and is orange.", + "D. The color of the surface is mentioned in the description and is bright orange.", + "D. The texture of the orange/tangerine is mentioned in the description and is smooth.", + "D. The shape of the orange/tangerine is mentioned in the description and is round.", + "D. The texture of the surface is mentioned in the description and is glossy.", + "C. The stem of the orange/tangerine is not mentioned in the description.", + "C. The leaves of the orange/tangerine are not mentioned in the description.", + "C. The segments of the orange/tangerine are not mentioned in the description.", + "B. The ceiling lights are not mentioned in the description.", + "A. The orange/tangerine is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 1.0, + "score_neg": 0.8, + "neg_valid_num": 5, + "recognition_result": true + }, + "8906172": { + "pred": "A black, curved earphone with a smooth, glossy finish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the earphone is mentioned in the description and is dark or black.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description but is not curved.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description and is curved.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the earphone is mentioned in the description and is curved.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control buttons of the earphone are mentioned in the description.", + -1 + ], + [ + "The control buttons of the earphone are not mentioned in the description.", + 1 + ], + [ + "The earphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The control buttons of the earphone are not mentioned in the description.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cell phone is mentioned in the description.", + -1 + ], + [ + "The cell phone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cell phone is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The faucet is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is an earphone or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is an earphone or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the earphone is not mentioned.\nB. The color of the earphone is mentioned in the description but is not dark or black.\nC. The earphone is not mentioned.\nD. The color of the earphone is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the earphone is not mentioned.\nB. The shape of the earphone is mentioned in the description but is not curved.\nC. The earphone is not mentioned.\nD. The shape of the earphone is mentioned in the description and is curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The control buttons of the earphone are mentioned in the description.\nB. The control buttons of the earphone are not mentioned in the description.\nC. The earphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cell phone is mentioned in the description.\nB. The cell phone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is mentioned in the description.\nB. The faucet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the earphone is mentioned in the description and is dark or black.", + "D. The shape of the earphone is mentioned in the description and is curved.", + "B. The control buttons of the earphone are not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The cell phone is not mentioned in the description.", + "B. The faucet is not mentioned in the description.", + "B. The sink is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "10666665": { + "pred": "A round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the clock is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the clock is mentioned in the description and is circular.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock face or the clock is not mentioned.", + 0 + ], + [ + "The color of the clock face is not mentioned, but the clock face of the clock is mentioned.", + 0.5 + ], + [ + "The color of the clock face is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clock face is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the clock face is mentioned in the description and is white.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hour hand or the clock is not mentioned.", + 0 + ], + [ + "The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.", + 0.5 + ], + [ + "The color of the hour hand is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the hour hand is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the hour hand is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The numbers or the clock are not mentioned.", + 0 + ], + [ + "The color of the numbers is not mentioned, but the numbers of the clock are mentioned.", + 0.5 + ], + [ + "The color of the numbers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the numbers is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the numbers is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the clock is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the clock is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the frame is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the frame is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo blind is not mentioned in the description.", + 1 + ], + [ + "The bamboo blind is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo blind is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The microwave is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The digital display of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The digital display of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The digital display of the clock is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pendulum of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The pendulum of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pendulum of the clock is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The faucet is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a clock or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a clock or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock is not mentioned.\nB. The shape of the clock is not mentioned.\nC. The shape of the clock is mentioned in the description but is not circular.\nD. The shape of the clock is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock face or the clock is not mentioned.\nB. The color of the clock face is not mentioned, but the clock face of the clock is mentioned.\nC. The color of the clock face is mentioned in the description but is not white.\nD. The color of the clock face is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hour hand or the clock is not mentioned.\nB. The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.\nC. The color of the hour hand is mentioned in the description but is not black.\nD. The color of the hour hand is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The numbers or the clock are not mentioned.\nB. The color of the numbers is not mentioned, but the numbers of the clock are mentioned.\nC. The color of the numbers is mentioned in the description but is not black.\nD. The color of the numbers is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the clock is not mentioned.\nB. The color of the frame is not mentioned, but the frame of the clock is mentioned.\nC. The color of the frame is mentioned in the description but is not black.\nD. The color of the frame is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo blind is not mentioned in the description.\nB. The bamboo blind is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microwave is not mentioned in the description.\nB. The microwave is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The digital display of the clock is not mentioned in the description.\nB. The clock is not mentioned in the description.\nC. The digital display of the clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pendulum of the clock is not mentioned in the description.\nB. The clock is not mentioned in the description.\nC. The pendulum of the clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is not mentioned in the description.\nB. The faucet is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the clock is mentioned in the description and is circular.", + "D. The color of the clock face is mentioned in the description and is white.", + "D. The color of the hour hand is mentioned in the description and is black.", + "D. The color of the numbers is mentioned in the description and is black.", + "D. The color of the frame is mentioned in the description and is black.", + "A. The bamboo blind is not mentioned in the description.", + "A. The microwave is not mentioned in the description.", + "A. The digital display of the clock is not mentioned in the description.", + "A. The pendulum of the clock is not mentioned in the description.", + "A. The faucet is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "10811497": { + "pred": "The mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the key is not mentioned.", + 0 + ], + [ + "The color of the key is mentioned in the description and is dark green, gray, or black.", + 1 + ], + [ + "The color of the key is mentioned in the description but is not dark green, gray, or black.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The key is not mentioned.", + "pred_index": 3, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the key is not mentioned.", + 0 + ], + [ + "The material of the key is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the key is mentioned in the description but is not plastic.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The key is not mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the key is not mentioned.", + 0 + ], + [ + "The texture of the key is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the key is mentioned in the description but is not smooth.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the key is not mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the key is not mentioned.", + 0 + ], + [ + "The shape of the key is mentioned in the description and is rounded, circular, or oval.", + 1 + ], + [ + "The shape of the key is mentioned in the description but is not rounded, circular, or oval.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the key is mentioned in the description and is rounded, circular, or oval.", + "pred_index": 1, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key bow of the key is not mentioned in the description.", + 1 + ], + [ + "The key bow of the key is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The key is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key teeth of the key are not mentioned in the description.", + 1 + ], + [ + "The key teeth of the key are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The key is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky note is not mentioned in the description.", + 1 + ], + [ + "The sticky note is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky note is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The smartphone is not mentioned in the description.", + 1 + ], + [ + "The smartphone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The smartphone is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The black fabric item is not mentioned in the description.", + 1 + ], + [ + "The black fabric item is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The black fabric item is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a key or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a key or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the key is not mentioned.\nB. The color of the key is mentioned in the description and is dark green, gray, or black.\nC. The color of the key is mentioned in the description but is not dark green, gray, or black.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the key is not mentioned.\nB. The material of the key is mentioned in the description and is plastic.\nC. The material of the key is mentioned in the description but is not plastic.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the key is not mentioned.\nB. The texture of the key is mentioned in the description and is smooth.\nC. The texture of the key is mentioned in the description but is not smooth.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the key is not mentioned.\nB. The shape of the key is mentioned in the description and is rounded, circular, or oval.\nC. The shape of the key is mentioned in the description but is not rounded, circular, or oval.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The key is not mentioned in the description.\nB. The key bow of the key is not mentioned in the description.\nC. The key bow of the key is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The key is not mentioned in the description.\nB. The key teeth of the key are not mentioned in the description.\nC. The key teeth of the key are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky note is not mentioned in the description.\nB. The sticky note is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The smartphone is not mentioned in the description.\nB. The smartphone is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The black fabric item is not mentioned in the description.\nB. The black fabric item is mentioned in the description.\n" + ], + "response": [ + "B. No", + "D. The key is not mentioned.", + "D. The key is not mentioned.", + "A. The texture of the key is not mentioned.", + "B. The shape of the key is mentioned in the description and is rounded, circular, or oval.", + "A. The key is not mentioned in the description.", + "A. The key is not mentioned in the description.", + "A. The sticky note is not mentioned in the description.", + "A. The smartphone is not mentioned in the description.", + "A. The black fabric item is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "neg_valid_num": 5, + "recognition_result": false + }, + "11021562": { + "pred": "The microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + 0.5 + ], + [ + "The texture of the casing is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the casing is mentioned in the description and is smooth.", + 1 + ], + [ + "The casing or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The orientation of the handle is mentioned in the description but is not vertical.", + -1 + ], + [ + "The orientation of the handle is mentioned in the description and is vertical.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + "pred_index": 0, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the microwave is not mentioned.", + 0 + ], + [ + "The color of the microwave is mentioned in the description but is not white, beige, or yellow.", + -1 + ], + [ + "The color of the microwave is mentioned in the description and is white, beige, or yellow.", + 1 + ], + [ + "The microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the microwave is mentioned in the description and is white, beige, or yellow.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the vent is not mentioned, but the vent of the microwave is mentioned.", + 0.5 + ], + [ + "The position of the vent is mentioned in the description but is not top.", + -1 + ], + [ + "The position of the vent is mentioned in the description and is top.", + 1 + ], + [ + "The vent or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The position of the vent is mentioned in the description and is top.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fire extinguisher is mentioned in the description.", + -1 + ], + [ + "The fire extinguisher is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fire extinguisher is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The turntable of the microwave is mentioned in the description.", + -1 + ], + [ + "The turntable of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The turntable of the microwave is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interior light of the microwave is mentioned in the description.", + -1 + ], + [ + "The interior light of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The interior light of the microwave is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rug is mentioned in the description.", + -1 + ], + [ + "The rug is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The rug is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a microwave or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a microwave or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the microwave is mentioned.\nB. The shape of the handle is mentioned in the description but is not curved.\nC. The shape of the handle is mentioned in the description and is curved.\nD. The handle or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.\nB. The texture of the casing is mentioned in the description but is not smooth.\nC. The texture of the casing is mentioned in the description and is smooth.\nD. The casing or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.\nB. The orientation of the handle is mentioned in the description but is not vertical.\nC. The orientation of the handle is mentioned in the description and is vertical.\nD. The handle or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the microwave is not mentioned.\nB. The color of the microwave is mentioned in the description but is not white, beige, or yellow.\nC. The color of the microwave is mentioned in the description and is white, beige, or yellow.\nD. The microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the vent is not mentioned, but the vent of the microwave is mentioned.\nB. The position of the vent is mentioned in the description but is not top.\nC. The position of the vent is mentioned in the description and is top.\nD. The vent or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fire extinguisher is mentioned in the description.\nB. The fire extinguisher is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The turntable of the microwave is mentioned in the description.\nB. The turntable of the microwave is not mentioned in the description.\nC. The microwave is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The interior light of the microwave is mentioned in the description.\nB. The interior light of the microwave is not mentioned in the description.\nC. The microwave is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are mentioned in the description.\nB. The windows are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rug is mentioned in the description.\nB. The rug is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the handle is mentioned in the description and is curved.", + "A. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + "A. The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + "C. The color of the microwave is mentioned in the description and is white, beige, or yellow.", + "C. The position of the vent is mentioned in the description and is top.", + "B. The fire extinguisher is not mentioned in the description.", + "B. The turntable of the microwave is not mentioned in the description.", + "B. The interior light of the microwave is not mentioned in the description.", + "B. The windows are not mentioned in the description.", + "B. The rug is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "11021563": { + "pred": "A stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control panel or the stove is not mentioned.", + 0 + ], + [ + "The location of the control panel is mentioned in the description but is not back.", + -1 + ], + [ + "The location of the control panel is mentioned in the description and is back.", + 1 + ], + [ + "The location of the control panel is not mentioned, but the control panel of the stove is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The location of the control panel is mentioned in the description and is back.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The color of the burners is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the burners is mentioned in the description and is black.", + 1 + ], + [ + "The color of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The burners or the stove are not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The number of the burners is mentioned in the description but is not 4.", + -1 + ], + [ + "The number of the burners is mentioned in the description and is 4.", + 1 + ], + [ + "The number of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The number of the burners is mentioned in the description and is 4.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The shape of the burners is mentioned in the description but is not coiled.", + -1 + ], + [ + "The shape of the burners is mentioned in the description and is coiled.", + 1 + ], + [ + "The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The burners or the stove are not mentioned.", + "pred_index": 0, + "question_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The countertop is mentioned in the description.", + -1 + ], + [ + "The countertop is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The countertop is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The kitchen cabinets are not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The broom is mentioned in the description.", + -1 + ], + [ + "The broom is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The broom is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The induction cooktop surface of the stove is mentioned in the description.", + -1 + ], + [ + "The induction cooktop surface of the stove is not mentioned in the description.", + 1 + ], + [ + "The stove is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The induction cooktop surface of the stove is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The small table is mentioned in the description.", + -1 + ], + [ + "The small table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The small table is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stove or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stove or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The control panel or the stove is not mentioned.\nB. The location of the control panel is mentioned in the description but is not back.\nC. The location of the control panel is mentioned in the description and is back.\nD. The location of the control panel is not mentioned, but the control panel of the stove is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The color of the burners is mentioned in the description but is not black.\nC. The color of the burners is mentioned in the description and is black.\nD. The color of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The number of the burners is mentioned in the description but is not 4.\nC. The number of the burners is mentioned in the description and is 4.\nD. The number of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The shape of the burners is mentioned in the description but is not coiled.\nC. The shape of the burners is mentioned in the description and is coiled.\nD. The shape of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The countertop is mentioned in the description.\nB. The countertop is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The kitchen cabinets are mentioned in the description.\nB. The kitchen cabinets are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The broom is mentioned in the description.\nB. The broom is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The induction cooktop surface of the stove is mentioned in the description.\nB. The induction cooktop surface of the stove is not mentioned in the description.\nC. The stove is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The small table is mentioned in the description.\nB. The small table is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The location of the control panel is mentioned in the description and is back.", + "A. The burners or the stove are not mentioned.", + "C. The number of the burners is mentioned in the description and is 4.", + "A. The burners or the stove are not mentioned.", + "B. The countertop is not mentioned in the description.", + "B. The kitchen cabinets are not mentioned in the description.", + "B. The broom is not mentioned in the description.", + "B. The induction cooktop surface of the stove is not mentioned in the description.", + "B. The small table is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "12348078": { + "pred": "A woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The style of the clothes is mentioned in the description and is t-shirt.", + 1 + ], + [ + "The style of the clothes is mentioned in the description but is not t-shirt.", + -1 + ], + [ + "The style of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The style of the clothes is mentioned in the description and is t-shirt.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The color of the clothes is mentioned in the description and is white.", + 1 + ], + [ + "The color of the clothes is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the clothes is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hairstyle of the person is mentioned in the description and is bun.", + 1 + ], + [ + "The hairstyle of the person is mentioned in the description but is not bun.", + -1 + ], + [ + "The hairstyle of the person is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The hairstyle of the person is mentioned in the description and is bun.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hair color of the person is mentioned in the description and is dark or black.", + 1 + ], + [ + "The hair color of the person is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The hair color of the person is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The hair color of the person is mentioned in the description and is dark or black.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pants or the person are not mentioned.", + 0 + ], + [ + "The color of the pants is mentioned in the description and is black.", + 1 + ], + [ + "The color of the pants is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the pants is not mentioned, but the pants of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the pants is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The mouth of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The mouth of the person is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The face of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The face of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The face of the person is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The nose of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The nose of the person is not mentioned in the description.", + "pred_index": 2, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is mentioned in the description.", + -1 + ], + [ + "The wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wall is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle cart is mentioned in the description.", + -1 + ], + [ + "The bicycle cart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bicycle cart is not mentioned in the description.", + "pred_index": 1, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a person or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a person or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clothes or the person are not mentioned.\nB. The style of the clothes is mentioned in the description and is t-shirt.\nC. The style of the clothes is mentioned in the description but is not t-shirt.\nD. The style of the clothes is not mentioned, but the clothes of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clothes or the person are not mentioned.\nB. The color of the clothes is mentioned in the description and is white.\nC. The color of the clothes is mentioned in the description but is not white.\nD. The color of the clothes is not mentioned, but the clothes of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned.\nB. The hairstyle of the person is mentioned in the description and is bun.\nC. The hairstyle of the person is mentioned in the description but is not bun.\nD. The hairstyle of the person is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned.\nB. The hair color of the person is mentioned in the description and is dark or black.\nC. The hair color of the person is mentioned in the description but is not dark or black.\nD. The hair color of the person is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pants or the person are not mentioned.\nB. The color of the pants is mentioned in the description and is black.\nC. The color of the pants is mentioned in the description but is not black.\nD. The color of the pants is not mentioned, but the pants of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouth of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The mouth of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The face of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The face of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nose of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The nose of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wall is mentioned in the description.\nB. The wall is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycle cart is mentioned in the description.\nB. The bicycle cart is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The style of the clothes is mentioned in the description and is t-shirt.", + "B. The color of the clothes is mentioned in the description and is white.", + "B. The hairstyle of the person is mentioned in the description and is bun.", + "B. The hair color of the person is mentioned in the description and is dark or black.", + "B. The color of the pants is mentioned in the description and is black.", + "C. The mouth of the person is not mentioned in the description.", + "C. The face of the person is not mentioned in the description.", + "C. The nose of the person is not mentioned in the description.", + "B. The wall is not mentioned in the description.", + "B. The bicycle cart is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "13138178": { + "pred": "A blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stool is mentioned in the description and is blue.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The color of the stool is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the stool is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the stool is mentioned in the description and is blue.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is mentioned in the description and is plastic.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the stool is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the stool is mentioned in the description and is plastic.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the seat is mentioned in the description but is not flat.", + "pred_index": 2, + "question_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The armrest of the stool is mentioned in the description.", + -1 + ], + [ + "The armrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The armrest of the stool is not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The backrest of the stool is mentioned in the description.", + -1 + ], + [ + "The backrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The backrest of the stool is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Cooking grate is mentioned in the description.", + -1 + ], + [ + "The Cooking grate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Cooking grate is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the stool is mentioned in the description.", + -1 + ], + [ + "The footrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The footrest of the stool is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swivel base of the stool is mentioned in the description.", + -1 + ], + [ + "The swivel base of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The swivel base of the stool is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a stool or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a stool or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stool is mentioned in the description and is blue.\nB. The stool is not mentioned.\nC. The color of the stool is mentioned in the description but is not blue.\nD. The color of the stool is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stool is mentioned in the description and is plastic.\nB. The stool is not mentioned.\nC. The material of the stool is mentioned in the description but is not plastic.\nD. The material of the stool is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the seat is mentioned in the description and is flat.\nB. The seat or the stool is not mentioned.\nC. The shape of the seat is mentioned in the description but is not flat.\nD. The shape of the seat is not mentioned, but the seat of the stool is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The armrest of the stool is mentioned in the description.\nB. The armrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The backrest of the stool is mentioned in the description.\nB. The backrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Cooking grate is mentioned in the description.\nB. The Cooking grate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The footrest of the stool is mentioned in the description.\nB. The footrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The swivel base of the stool is mentioned in the description.\nB. The swivel base of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the stool is mentioned in the description and is blue.", + "A. The material of the stool is mentioned in the description and is plastic.", + "C. The shape of the seat is mentioned in the description but is not flat.", + "B. The armrest of the stool is not mentioned in the description.", + "B. The backrest of the stool is not mentioned in the description.", + "B. The Cooking grate is not mentioned in the description.", + "B. The footrest of the stool is not mentioned in the description.", + "B. The swivel base of the stool is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "13187927": { + "pred": "The motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the exterior is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the exterior is mentioned in the description and is white.", + 1 + ], + [ + "The exterior or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the exterior is mentioned in the description and is white.", + "pred_index": 1, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the seat is mentioned in the description but is not leather or vinyl.", + -1 + ], + [ + "The material of the seat is mentioned in the description and is leather or vinyl.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the seat is mentioned in the description and is black.", + "pred_index": 1, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the taillight is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the taillight is mentioned in the description and is red.", + 1 + ], + [ + "The taillight or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the taillight is mentioned in the description and is red.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ], + [ + "The license plate or the motorcycle is not mentioned.", + 0 + ], + [ + "The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + "pred_index": 3, + "question_index": 4, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycle is not mentioned in the description.", + 0 + ], + [ + "The windshield of the motorcycle is not mentioned in the description.", + 1 + ], + [ + "The windshield of the motorcycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The windshield of the motorcycle is mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The balconies are not mentioned in the description.", + 1 + ], + [ + "The balconies are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The balconies are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pepsi advertisements are not mentioned in the description.", + 1 + ], + [ + "The Pepsi advertisements are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The Pepsi advertisements are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative metal grill is not mentioned in the description.", + 1 + ], + [ + "The decorative metal grill is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The decorative metal grill is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a motorcycle or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a motorcycle or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the exterior is mentioned in the description but is not white.\nB. The color of the exterior is mentioned in the description and is white.\nC. The exterior or the motorcycle is not mentioned.\nD. The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the seat is mentioned in the description but is not leather or vinyl.\nB. The material of the seat is mentioned in the description and is leather or vinyl.\nC. The seat or the motorcycle is not mentioned.\nD. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the seat is mentioned in the description but is not black.\nB. The color of the seat is mentioned in the description and is black.\nC. The seat or the motorcycle is not mentioned.\nD. The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the taillight is mentioned in the description but is not red.\nB. The color of the taillight is mentioned in the description and is red.\nC. The taillight or the motorcycle is not mentioned.\nD. The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the license plate is mentioned in the description but is not rectangular.\nB. The shape of the license plate is mentioned in the description and is rectangular.\nC. The license plate or the motorcycle is not mentioned.\nD. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motorcycle is not mentioned in the description.\nB. The windshield of the motorcycle is not mentioned in the description.\nC. The windshield of the motorcycle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The balconies are not mentioned in the description.\nB. The balconies are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Pepsi advertisements are not mentioned in the description.\nB. The Pepsi advertisements are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are not mentioned in the description.\nB. The windows are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative metal grill is not mentioned in the description.\nB. The decorative metal grill is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the exterior is mentioned in the description and is white.", + "D. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + "B. The color of the seat is mentioned in the description and is black.", + "B. The color of the taillight is mentioned in the description and is red.", + "D. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + "C. The windshield of the motorcycle is mentioned in the description.", + "A. The balconies are not mentioned in the description.", + "A. The Pepsi advertisements are not mentioned in the description.", + "B. The windows are mentioned in the description.", + "A. The decorative metal grill is not mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.8, + "score_neg": 0.2, + "neg_valid_num": 5, + "recognition_result": true + }, + "14490578": { + "pred": "The harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the seal is mentioned in the description but is not smooth.", + -1 + ], + [ + "The seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the seal is mentioned in the description and is smooth.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not gray, black, or dark.", + -1 + ], + [ + "The skin or the seal is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the seal is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is gray, black, or dark.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the skin is mentioned in the description and is gray, black, or dark.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the body is mentioned in the description but is not elongated.", + -1 + ], + [ + "The body or the seal is not mentioned.", + 0 + ], + [ + "The shape of the body is not mentioned, but the body of the seal is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is elongated.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the body is mentioned in the description and is elongated.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The teeth of the seal are not mentioned in the description.", + 1 + ], + [ + "The teeth of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The teeth of the seal are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the seal are not mentioned in the description.", + 1 + ], + [ + "The claws of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The claws of the seal are not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ears of the seal are not mentioned in the description.", + 1 + ], + [ + "The ears of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ears of the seal are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sand is not mentioned in the description.", + 1 + ], + [ + "The sand is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sand is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rocks are not mentioned in the description.", + 1 + ], + [ + "The rocks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The rocks are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a seal or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a seal or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the seal is mentioned in the description but is not smooth.\nB. The seal is not mentioned.\nC. The texture of the seal is not mentioned.\nD. The texture of the seal is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skin is mentioned in the description but is not gray, black, or dark.\nB. The skin or the seal is not mentioned.\nC. The color of the skin is not mentioned, but the skin of the seal is mentioned.\nD. The color of the skin is mentioned in the description and is gray, black, or dark.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the body is mentioned in the description but is not elongated.\nB. The body or the seal is not mentioned.\nC. The shape of the body is not mentioned, but the body of the seal is mentioned.\nD. The shape of the body is mentioned in the description and is elongated.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The teeth of the seal are not mentioned in the description.\nB. The teeth of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The claws of the seal are not mentioned in the description.\nB. The claws of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ears of the seal are not mentioned in the description.\nB. The ears of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sand is not mentioned in the description.\nB. The sand is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rocks are not mentioned in the description.\nB. The rocks are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the seal is mentioned in the description and is smooth.", + "D. The color of the skin is mentioned in the description and is gray, black, or dark.", + "D. The shape of the body is mentioned in the description and is elongated.", + "A. The teeth of the seal are not mentioned in the description.", + "A. The claws of the seal are not mentioned in the description.", + "A. The ears of the seal are not mentioned in the description.", + "A. The sand is not mentioned in the description.", + "A. The rocks are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "14640483": { + "pred": "A rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.", + 0.5 + ], + [ + "The surface or the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the surface is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the surface is mentioned in the description and is smooth.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.", + -1 + ], + [ + "The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the cutting/chopping board is not mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The material of the cutting/chopping board is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the cutting/chopping board is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the cutting/chopping board is mentioned in the description and is wood.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hanging hole of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The hanging hole of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The hanging hole of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The juice groove of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The juice groove of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The juice groove of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The handle of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The handle of the cutting/chopping board is mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The whisk is mentioned in the description.", + -1 + ], + [ + "The whisk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The whisk is not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensils are not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a cutting/chopping board or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a cutting/chopping board or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.\nB. The surface or the cutting/chopping board is not mentioned.\nC. The texture of the surface is mentioned in the description but is not smooth.\nD. The texture of the surface is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.\nD. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The shape of the cutting/chopping board is mentioned in the description but is not rectangular.\nD. The shape of the cutting/chopping board is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The material of the cutting/chopping board is mentioned in the description but is not wood.\nD. The material of the cutting/chopping board is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hanging hole of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The hanging hole of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The juice groove of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The juice groove of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The handle of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The whisk is mentioned in the description.\nB. The whisk is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensils are mentioned in the description.\nB. The utensils are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the surface is mentioned in the description and is smooth.", + "A. The color of the cutting/chopping board is not mentioned.", + "D. The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + "D. The material of the cutting/chopping board is mentioned in the description and is wood.", + "C. The hanging hole of the cutting/chopping board is not mentioned in the description.", + "C. The juice groove of the cutting/chopping board is not mentioned in the description.", + "A. The handle of the cutting/chopping board is mentioned in the description.", + "B. The whisk is not mentioned in the description.", + "B. The utensils are not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.75, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "16010041": { + "pred": "A pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the chopsticks is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the chopsticks is not mentioned.", + 0 + ], + [ + "The material of the chopsticks is mentioned in the description and is wood.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the chopsticks is mentioned in the description and is wood.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + -1 + ], + [ + "The shape of the chopsticks is not mentioned.", + 0 + ], + [ + "The shape of the chopsticks is mentioned in the description and is long cylindrical.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + "pred_index": 0, + "question_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the chopsticks is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The body or the chopsticks is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the body is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon slices are not mentioned in the description.", + 1 + ], + [ + "The lemon slices are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The lemon slices are not mentioned in the description.", + "pred_index": 0, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the chopsticks are not mentioned in the description.", + 1 + ], + [ + "The chopsticks are not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the chopsticks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The decorative elements of the chopsticks are mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is not mentioned in the description.", + 1 + ], + [ + "The man is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The man is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meat is not mentioned in the description.", + 1 + ], + [ + "The sliced meat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sliced meat is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green garnish is not mentioned in the description.", + 1 + ], + [ + "The green garnish is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The green garnish is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the objects in the description are chopsticks or objects of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the objects in the description are chopsticks or objects of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the chopsticks is mentioned in the description but is not wood.\nB. The material of the chopsticks is not mentioned.\nC. The material of the chopsticks is mentioned in the description and is wood.\nD. The chopsticks are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the chopsticks is mentioned in the description but is not long cylindrical.\nB. The shape of the chopsticks is not mentioned.\nC. The shape of the chopsticks is mentioned in the description and is long cylindrical.\nD. The chopsticks are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the body is mentioned in the description but is not smooth.\nB. The texture of the body is not mentioned, but the body of the chopsticks is mentioned.\nC. The texture of the body is mentioned in the description and is smooth.\nD. The body or the chopsticks is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lemon slices are not mentioned in the description.\nB. The lemon slices are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the chopsticks are not mentioned in the description.\nB. The chopsticks are not mentioned in the description.\nC. The decorative elements of the chopsticks are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is not mentioned in the description.\nB. The man is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sliced meat is not mentioned in the description.\nB. The sliced meat is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green garnish is not mentioned in the description.\nB. The green garnish is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the chopsticks is mentioned in the description and is wood.", + "A. The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + "C. The texture of the body is mentioned in the description and is smooth.", + "A. The lemon slices are not mentioned in the description.", + "C. The decorative elements of the chopsticks are mentioned in the description.", + "A. The man is not mentioned in the description.", + "A. The sliced meat is not mentioned in the description.", + "A. The green garnish is not mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.3333333333333333, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "17072759": { + "pred": "A black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the belt is not mentioned.", + 0 + ], + [ + "The color of the belt is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the belt is mentioned in the description and is gray or black.", + 1 + ], + [ + "The belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the belt is mentioned in the description and is gray or black.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the strap is not mentioned, but the strap of the belt is mentioned.", + "pred_index": 0, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The texture of the strap is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the strap is mentioned in the description and is smooth.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the strap is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The tip of the belt is mentioned in the description.", + -1 + ], + [ + "The tip of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tip of the belt is mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chess board is mentioned in the description.", + -1 + ], + [ + "The chess board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chess board is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blanket is mentioned in the description.", + -1 + ], + [ + "The blanket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The blanket is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The keeper of the belt is mentioned in the description.", + -1 + ], + [ + "The keeper of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The keeper of the belt is mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a belt or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a belt or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the belt is not mentioned.\nB. The color of the belt is mentioned in the description but is not gray or black.\nC. The color of the belt is mentioned in the description and is gray or black.\nD. The belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the strap is not mentioned, but the strap of the belt is mentioned.\nB. The material of the strap is mentioned in the description but is not leather.\nC. The material of the strap is mentioned in the description and is leather.\nD. The strap or the belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the strap is not mentioned, but the strap of the belt is mentioned.\nB. The texture of the strap is mentioned in the description but is not smooth.\nC. The texture of the strap is mentioned in the description and is smooth.\nD. The strap or the belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The belt is not mentioned in the description.\nB. The tip of the belt is mentioned in the description.\nC. The tip of the belt is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chess board is mentioned in the description.\nB. The chess board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blanket is mentioned in the description.\nB. The blanket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The belt is not mentioned in the description.\nB. The keeper of the belt is mentioned in the description.\nC. The keeper of the belt is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the belt is mentioned in the description and is gray or black.", + "A. The material of the strap is not mentioned, but the strap of the belt is mentioned.", + "C. The texture of the strap is mentioned in the description and is smooth.", + "B. The tip of the belt is mentioned in the description.", + "B. The chess board is not mentioned in the description.", + "B. The blanket is not mentioned in the description.", + "B. The sky is not mentioned in the description.", + "B. The keeper of the belt is mentioned in the description." + ], + "score": 0.4375, + "score_pos": 0.8333333333333334, + "score_neg": 0.2, + "neg_valid_num": 5, + "recognition_result": true + }, + "17072764": { + "pred": "A pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the pear is mentioned in the description but is not smooth.", + -1 + ], + [ + "The pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the pear is mentioned in the description and is smooth.", + "pred_index": 3, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the stem is mentioned in the description but is not short.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The size of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The size of the stem is mentioned in the description and is short.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The size of the stem is mentioned in the description and is short.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stem is mentioned in the description but is not brown.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The color of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The color of the stem is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the stem is mentioned in the description and is brown.", + "pred_index": 3, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The skin or the pear is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the pear is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is yellow or green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the skin is mentioned in the description and is yellow or green.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The core of the pear is not mentioned in the description.", + 1 + ], + [ + "The core of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The core of the pear is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The white top is not mentioned in the description.", + 1 + ], + [ + "The white top is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The white top is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are not mentioned in the description.", + 1 + ], + [ + "The grass are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The grass are not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The leaf of the pear is not mentioned in the description.", + 1 + ], + [ + "The leaf of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The leaf of the pear is not mentioned in the description.", + "pred_index": 1, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a pear or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a pear or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the pear is mentioned in the description but is not smooth.\nB. The pear is not mentioned.\nC. The texture of the pear is not mentioned.\nD. The texture of the pear is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the stem is mentioned in the description but is not short.\nB. The stem or the pear is not mentioned.\nC. The size of the stem is not mentioned, but the stem of the pear is mentioned.\nD. The size of the stem is mentioned in the description and is short.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stem is mentioned in the description but is not brown.\nB. The stem or the pear is not mentioned.\nC. The color of the stem is not mentioned, but the stem of the pear is mentioned.\nD. The color of the stem is mentioned in the description and is brown.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skin is mentioned in the description but is not yellow or green.\nB. The skin or the pear is not mentioned.\nC. The color of the skin is not mentioned, but the skin of the pear is mentioned.\nD. The color of the skin is mentioned in the description and is yellow or green.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The core of the pear is not mentioned in the description.\nC. The core of the pear is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The white top is not mentioned in the description.\nB. The white top is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are not mentioned in the description.\nB. The grass are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The leaf of the pear is not mentioned in the description.\nC. The leaf of the pear is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the pear is mentioned in the description and is smooth.", + "D. The size of the stem is mentioned in the description and is short.", + "D. The color of the stem is mentioned in the description and is brown.", + "D. The color of the skin is mentioned in the description and is yellow or green.", + "A. The cup is not mentioned in the description.", + "B. The core of the pear is not mentioned in the description.", + "A. The white top is not mentioned in the description.", + "A. The grass are not mentioned in the description.", + "B. The leaf of the pear is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "18301585": { + "pred": "The bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the backrest is mentioned in the description and is black.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The color of the backrest is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the backrest is mentioned in the description and is black.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the bench are not mentioned.", + 0 + ], + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the bench are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the legs is mentioned in the description and is white.", + "pred_index": 0, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the backrest is mentioned in the description and is slatted.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The shape of the backrest is mentioned in the description but is not slatted.", + -1 + ], + [ + "The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the backrest is mentioned in the description and is slatted.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bollards are mentioned in the description.", + -1 + ], + [ + "The bollards are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bollards are not mentioned in the description.", + "pred_index": 1, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the bench is mentioned in the description.", + -1 + ], + [ + "The storage compartment of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The storage compartment of the bench is not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the bench is mentioned in the description.", + -1 + ], + [ + "The footrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The footrest of the bench is not mentioned in the description.", + "pred_index": 1, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headrest of the bench is mentioned in the description.", + -1 + ], + [ + "The headrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The headrest of the bench is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trees are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a bench or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a bench or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the backrest is mentioned in the description and is black.\nB. The backrest or the bench is not mentioned.\nC. The color of the backrest is mentioned in the description but is not black.\nD. The color of the backrest is not mentioned, but the backrest of the bench is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the legs is mentioned in the description and is white.\nB. The legs or the bench are not mentioned.\nC. The color of the legs is mentioned in the description but is not white.\nD. The color of the legs is not mentioned, but the legs of the bench are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the backrest is mentioned in the description and is slatted.\nB. The backrest or the bench is not mentioned.\nC. The shape of the backrest is mentioned in the description but is not slatted.\nD. The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bollards are mentioned in the description.\nB. The bollards are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The storage compartment of the bench is mentioned in the description.\nB. The storage compartment of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The footrest of the bench is mentioned in the description.\nB. The footrest of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headrest of the bench is mentioned in the description.\nB. The headrest of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are mentioned in the description.\nB. The trees are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the backrest is mentioned in the description and is black.", + "A. The color of the legs is mentioned in the description and is white.", + "A. The shape of the backrest is mentioned in the description and is slatted.", + "B. The bollards are not mentioned in the description.", + "B. The storage compartment of the bench is not mentioned in the description.", + "B. The footrest of the bench is not mentioned in the description.", + "B. The headrest of the bench is not mentioned in the description.", + "B. The trees are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "18680641": { + "pred": "A rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the carpet is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the carpet is not mentioned.", + 0 + ], + [ + "The material of the carpet is mentioned in the description and is fabric.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the carpet is mentioned in the description and is fabric.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the edge is mentioned in the description but is not straight.", + -1 + ], + [ + "The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is straight.", + 1 + ], + [ + "The edge or the carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + "pred_index": 1, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the carpet is mentioned in the description but is not orange or red.", + -1 + ], + [ + "The color of the carpet is not mentioned.", + 0 + ], + [ + "The color of the carpet is mentioned in the description and is orange or red.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the carpet is mentioned in the description and is orange or red.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the carpet is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the carpet is not mentioned.", + 0 + ], + [ + "The shape of the carpet is mentioned in the description and is rectangular.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the carpet is mentioned in the description and is rectangular.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The tassels of the carpet are not mentioned in the description.", + 1 + ], + [ + "The tassels of the carpet are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tassels of the carpet are not mentioned in the description.", + "pred_index": 1, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drainage pipe is not mentioned in the description.", + 1 + ], + [ + "The drainage pipe is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The drainage pipe is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The pattern of the carpet is not mentioned in the description.", + 1 + ], + [ + "The pattern of the carpet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pattern of the carpet is not mentioned in the description.", + "pred_index": 1, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shop sign is not mentioned in the description.", + 1 + ], + [ + "The shop sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The shop sign is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The candy display is not mentioned in the description.", + 1 + ], + [ + "The candy display is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The candy display is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a carpet or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a carpet or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the carpet is mentioned in the description but is not fabric.\nB. The material of the carpet is not mentioned.\nC. The material of the carpet is mentioned in the description and is fabric.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the edge is mentioned in the description but is not straight.\nB. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.\nC. The shape of the edge is mentioned in the description and is straight.\nD. The edge or the carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the carpet is mentioned in the description but is not orange or red.\nB. The color of the carpet is not mentioned.\nC. The color of the carpet is mentioned in the description and is orange or red.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the carpet is mentioned in the description but is not rectangular.\nB. The shape of the carpet is not mentioned.\nC. The shape of the carpet is mentioned in the description and is rectangular.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The carpet is not mentioned in the description.\nB. The tassels of the carpet are not mentioned in the description.\nC. The tassels of the carpet are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drainage pipe is not mentioned in the description.\nB. The drainage pipe is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The carpet is not mentioned in the description.\nB. The pattern of the carpet is not mentioned in the description.\nC. The pattern of the carpet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shop sign is not mentioned in the description.\nB. The shop sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The candy display is not mentioned in the description.\nB. The candy display is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the carpet is mentioned in the description and is fabric.", + "B. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + "C. The color of the carpet is mentioned in the description and is orange or red.", + "C. The shape of the carpet is mentioned in the description and is rectangular.", + "B. The tassels of the carpet are not mentioned in the description.", + "A. The drainage pipe is not mentioned in the description.", + "B. The pattern of the carpet is not mentioned in the description.", + "A. The shop sign is not mentioned in the description.", + "A. The candy display is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25273528": { + "pred": "The balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the hot air balloon is mentioned in the description and is multicolored.", + 1 + ], + [ + "The hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the hot air balloon is mentioned in the description but is not multicolored.", + -1 + ], + [ + "The color of the hot air balloon is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the hot air balloon is mentioned in the description and is multicolored.", + "pred_index": 0, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the envelope is mentioned in the description and is nylon or polyester.", + 1 + ], + [ + "The envelope or the hot air balloon is not mentioned.", + 0 + ], + [ + "The material of the envelope is mentioned in the description but is not nylon or polyester.", + -1 + ], + [ + "The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + "pred_index": 3, + "question_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the basket is mentioned in the description and is bottom.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The position of the basket is mentioned in the description but is not bottom.", + -1 + ], + [ + "The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The position of the basket is mentioned in the description and is bottom.", + "pred_index": 0, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the basket is mentioned in the description and is small.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The size of the basket is mentioned in the description but is not small.", + -1 + ], + [ + "The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "pred_index": 3, + "question_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the basket is mentioned in the description and is dark or black.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the basket is mentioned in the description and is dark or black.", + "pred_index": 0, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fuel tanks of the hot air balloon are not mentioned in the description.", + 1 + ], + [ + "The fuel tanks of the hot air balloon are mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The fuel tanks of the hot air balloon are not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner of the hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The burner of the hot air balloon is mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The burner of the hot air balloon is not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ground is not mentioned in the description.", + 1 + ], + [ + "The ground is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ground is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The people are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trees are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a hot air balloon or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a hot air balloon or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the hot air balloon is mentioned in the description and is multicolored.\nB. The hot air balloon is not mentioned.\nC. The color of the hot air balloon is mentioned in the description but is not multicolored.\nD. The color of the hot air balloon is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the envelope is mentioned in the description and is nylon or polyester.\nB. The envelope or the hot air balloon is not mentioned.\nC. The material of the envelope is mentioned in the description but is not nylon or polyester.\nD. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the basket is mentioned in the description and is bottom.\nB. The basket or the hot air balloon is not mentioned.\nC. The position of the basket is mentioned in the description but is not bottom.\nD. The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the basket is mentioned in the description and is small.\nB. The basket or the hot air balloon is not mentioned.\nC. The size of the basket is mentioned in the description but is not small.\nD. The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the basket is mentioned in the description and is dark or black.\nB. The basket or the hot air balloon is not mentioned.\nC. The color of the basket is mentioned in the description but is not dark or black.\nD. The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fuel tanks of the hot air balloon are not mentioned in the description.\nB. The fuel tanks of the hot air balloon are mentioned in the description.\nC. The hot air balloon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burner of the hot air balloon is not mentioned in the description.\nB. The burner of the hot air balloon is mentioned in the description.\nC. The hot air balloon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ground is not mentioned in the description.\nB. The ground is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are not mentioned in the description.\nB. The people are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are not mentioned in the description.\nB. The trees are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the hot air balloon is mentioned in the description and is multicolored.", + "D. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + "A. The position of the basket is mentioned in the description and is bottom.", + "D. The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + "A. The color of the basket is mentioned in the description and is dark or black.", + "A. The fuel tanks of the hot air balloon are not mentioned in the description.", + "A. The burner of the hot air balloon is not mentioned in the description.", + "A. The ground is not mentioned in the description.", + "A. The people are not mentioned in the description.", + "A. The trees are not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25419509": { + "pred": "A metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned.", + 0 + ], + [ + "The color of the fork is not mentioned.", + 0 + ], + [ + "The color of the fork is mentioned in the description and is metallic.", + 1 + ], + [ + "The color of the fork is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the fork is not mentioned.", + "pred_index": 1, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The material of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The material of the handle is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the handle is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the handle is mentioned in the description and is metal.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The shape of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 2, + "question_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The texture of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The texture of the handle is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handle is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the handle is mentioned in the description and is smooth.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tines or the fork are not mentioned.", + 0 + ], + [ + "The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.", + 0.5 + ], + [ + "The number of parts of the tines is mentioned in the description and is 4.", + 1 + ], + [ + "The number of parts of the tines is mentioned in the description but is not 4.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the tines is mentioned in the description and is 4.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bolster of the fork is not mentioned in the description.", + 1 + ], + [ + "The bolster of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The bolster of the fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plates are not mentioned in the description.", + 1 + ], + [ + "The plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plates are not mentioned in the description.", + "pred_index": 0, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The end cap of the fork is not mentioned in the description.", + 1 + ], + [ + "The end cap of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The end cap of the fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drinks are not mentioned in the description.", + 1 + ], + [ + "The drinks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The drinks are not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ferrule of the fork is not mentioned in the description.", + 1 + ], + [ + "The ferrule of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ferrule of the fork is not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a fork or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a fork or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned.\nB. The color of the fork is not mentioned.\nC. The color of the fork is mentioned in the description and is metallic.\nD. The color of the fork is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The material of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The material of the handle is mentioned in the description and is metal.\nD. The material of the handle is mentioned in the description but is not metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The shape of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The shape of the handle is mentioned in the description and is curved.\nD. The shape of the handle is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The texture of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The texture of the handle is mentioned in the description and is smooth.\nD. The texture of the handle is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tines or the fork are not mentioned.\nB. The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.\nC. The number of parts of the tines is mentioned in the description and is 4.\nD. The number of parts of the tines is mentioned in the description but is not 4.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bolster of the fork is not mentioned in the description.\nB. The bolster of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plates are not mentioned in the description.\nB. The plates are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The end cap of the fork is not mentioned in the description.\nB. The end cap of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drinks are not mentioned in the description.\nB. The drinks are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ferrule of the fork is not mentioned in the description.\nB. The ferrule of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the fork is not mentioned.", + "C. The material of the handle is mentioned in the description and is metal.", + "C. The shape of the handle is mentioned in the description and is curved.", + "C. The texture of the handle is mentioned in the description and is smooth.", + "C. The number of parts of the tines is mentioned in the description and is 4.", + "A. The bolster of the fork is not mentioned in the description.", + "A. The plates are not mentioned in the description.", + "A. The end cap of the fork is not mentioned in the description.", + "A. The drinks are not mentioned in the description.", + "A. The ferrule of the fork is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "neg_valid_num": 5, + "recognition_result": true + }, + "25612310": { + "pred": "A dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is mentioned in the description and is woven.", + 1 + ], + [ + "The texture of the basket is mentioned in the description but is not woven.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the basket is mentioned in the description and is woven.", + "pred_index": 2, + "question_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The material of the basket is not mentioned.", + 0 + ], + [ + "The material of the basket is mentioned in the description and is wicker.", + 1 + ], + [ + "The material of the basket is mentioned in the description but is not wicker.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the basket is mentioned in the description and is wicker.", + "pred_index": 2, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The type of the basket is not mentioned.", + 0 + ], + [ + "The type of the basket is mentioned in the description and is interlaced.", + 1 + ], + [ + "The type of the basket is mentioned in the description but is not interlaced.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the basket is not mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The color of the basket is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description and is brown or wooden.", + 1 + ], + [ + "The color of the basket is mentioned in the description but is not brown or wooden.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the basket is mentioned in the description and is brown or wooden.", + "pred_index": 2, + "question_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The base of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The base of the basket is not mentioned in the description.", + "pred_index": 2, + "question_index": 4, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The handle of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the basket is not mentioned in the description.", + "pred_index": 2, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lid of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the basket is not mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the basket are mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the basket are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The decorative elements of the basket are mentioned in the description.", + "pred_index": 0, + "question_index": 7, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lining of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lining of the basket is not mentioned in the description.", + "pred_index": 2, + "question_index": 8, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a basket or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a basket or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The texture of the basket is not mentioned.\nC. The texture of the basket is mentioned in the description and is woven.\nD. The texture of the basket is mentioned in the description but is not woven.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The material of the basket is not mentioned.\nC. The material of the basket is mentioned in the description and is wicker.\nD. The material of the basket is mentioned in the description but is not wicker.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The type of the basket is not mentioned.\nC. The type of the basket is mentioned in the description and is interlaced.\nD. The type of the basket is mentioned in the description but is not interlaced.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The color of the basket is not mentioned.\nC. The color of the basket is mentioned in the description and is brown or wooden.\nD. The color of the basket is mentioned in the description but is not brown or wooden.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The base of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The handle of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The lid of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the basket are mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The decorative elements of the basket are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lining of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The lining of the basket is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The texture of the basket is mentioned in the description and is woven.", + "C. The material of the basket is mentioned in the description and is wicker.", + "B. The type of the basket is not mentioned.", + "C. The color of the basket is mentioned in the description and is brown or wooden.", + "C. The base of the basket is not mentioned in the description.", + "C. The handle of the basket is not mentioned in the description.", + "C. The lid of the basket is not mentioned in the description.", + "A. The decorative elements of the basket are mentioned in the description.", + "C. The lining of the basket is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.75, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "17265253": { + "pred": "A black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the spokes is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The material of the spokes is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The spokes or the rickshaw are not mentioned.", + "pred_index": 2, + "question_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tire is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The shape of the tire is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the tire is mentioned in the description and is circular.", + "pred_index": 3, + "question_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tire is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The material of the tire is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + "pred_index": 1, + "question_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the spokes is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The color of the spokes is mentioned in the description and is silver.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the spokes is mentioned in the description and is silver.", + "pred_index": 3, + "question_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tire is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the tire is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the tire is mentioned in the description and is black.", + "pred_index": 3, + "question_index": 4, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The path is not mentioned in the description.", + 1 + ], + [ + "The path is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The path is not mentioned in the description.", + "pred_index": 0, + "question_index": 5, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw canopy of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The rickshaw canopy of the rickshaw is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The rickshaw canopy of the rickshaw is mentioned in the description.", + "pred_index": 2, + "question_index": 6, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw handlebars of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The rickshaw handlebars of the rickshaw are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The rickshaw handlebars of the rickshaw are not mentioned in the description.", + "pred_index": 1, + "question_index": 7, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fence is not mentioned in the description.", + "pred_index": 0, + "question_index": 8, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo trees are not mentioned in the description.", + 1 + ], + [ + "The bamboo trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo trees are not mentioned in the description.", + "pred_index": 0, + "question_index": 9, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "Is it likely that the object in the description is a rickshaw or an object of a similar type? Again, It does not have to be an exact match.", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nIs it likely that the object in the description is a rickshaw or an object of a similar type? Again, It does not have to be an exact match.\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the spokes is mentioned in the description but is not metal.\nB. The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.\nC. The spokes or the rickshaw are not mentioned.\nD. The material of the spokes is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the tire is mentioned in the description but is not circular.\nB. The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The shape of the tire is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tire is mentioned in the description but is not rubber.\nB. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The material of the tire is mentioned in the description and is rubber.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the spokes is mentioned in the description but is not silver.\nB. The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.\nC. The spokes or the rickshaw are not mentioned.\nD. The color of the spokes is mentioned in the description and is silver.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tire is mentioned in the description but is not black.\nB. The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The color of the tire is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The path is not mentioned in the description.\nB. The path is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rickshaw is not mentioned in the description.\nB. The rickshaw canopy of the rickshaw is not mentioned in the description.\nC. The rickshaw canopy of the rickshaw is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rickshaw is not mentioned in the description.\nB. The rickshaw handlebars of the rickshaw are not mentioned in the description.\nC. The rickshaw handlebars of the rickshaw are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fence is not mentioned in the description.\nB. The fence is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in an image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo trees are not mentioned in the description.\nB. The bamboo trees are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The spokes or the rickshaw are not mentioned.", + "D. The shape of the tire is mentioned in the description and is circular.", + "B. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + "D. The color of the spokes is mentioned in the description and is silver.", + "D. The color of the tire is mentioned in the description and is black.", + "A. The path is not mentioned in the description.", + "C. The rickshaw canopy of the rickshaw is mentioned in the description.", + "B. The rickshaw handlebars of the rickshaw are not mentioned in the description.", + "A. The fence is not mentioned in the description.", + "A. The bamboo trees are not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.7, + "score_neg": 0.6, + "neg_valid_num": 5, + "recognition_result": true + }, + "avg_pos": 0.5015833333333335, + "avg_neg": 0.8459999999999998 +} \ No newline at end of file diff --git a/evaluation/DLC-Bench/model_outputs/gar_8b_eval_gpt.json b/evaluation/DLC-Bench/model_outputs/gar_8b_eval_gpt.json new file mode 100644 index 0000000000000000000000000000000000000000..95d559b4df1e866b0ebfdb4c487a1cad0c50dc26 --- /dev/null +++ b/evaluation/DLC-Bench/model_outputs/gar_8b_eval_gpt.json @@ -0,0 +1,25034 @@ +{ + "2391781": { + "pred": "The bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The color of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the wing is mentioned in the description but is not grey or brown.", + -1 + ], + [ + "The color of the wing is mentioned in the description and is grey or brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the wing is mentioned in the description and is grey or brown.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feathers or the wild bird are not mentioned.", + 0 + ], + [ + "The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.", + 0.5 + ], + [ + "The color of the feathers is mentioned in the description but is not white, grey, or brown.", + -1 + ], + [ + "The color of the feathers is mentioned in the description and is white, grey, or brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the feathers is mentioned in the description and is white, grey, or brown.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the wild bird is not mentioned.", + 0 + ], + [ + "The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + 0.5 + ], + [ + "The shape of the tail is mentioned in the description but is not fan-like.", + -1 + ], + [ + "The shape of the tail is mentioned in the description and is fan-like.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The null or the wild bird is not mentioned.", + 0 + ], + [ + "The action of the null is not mentioned, but the null of the wild bird is mentioned.", + 0.5 + ], + [ + "The action of the null is mentioned in the description but is not flying.", + -1 + ], + [ + "The action of the null is mentioned in the description and is flying.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The action of the null is mentioned in the description and is flying.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wing or the wild bird is not mentioned.", + 0 + ], + [ + "The position of the wing is not mentioned, but the wing of the wild bird is mentioned.", + 0.5 + ], + [ + "The position of the wing is mentioned in the description but is not extended or outstretched.", + -1 + ], + [ + "The position of the wing is mentioned in the description and is extended or outstretched.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The position of the wing is mentioned in the description and is extended or outstretched.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the wild bird are not mentioned in the description.", + 1 + ], + [ + "The claws of the wild bird are mentioned in the description.", + -1 + ], + [ + "The wild bird is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The claws of the wild bird are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are not mentioned in the description.", + 1 + ], + [ + "The boats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The boats are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are not mentioned in the description.", + 1 + ], + [ + "The chimneys are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chimneys are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bridge is not mentioned in the description.", + 1 + ], + [ + "The bridge is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bridge is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The windows are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is wild bird. Based on the image, is it likely that the object in the description is given class: wild bird or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is wild bird. Based on the image, is it likely that the object in the description is given class: wild bird or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wing or the wild bird is not mentioned.\nB. The color of the wing is not mentioned, but the wing of the wild bird is mentioned.\nC. The color of the wing is mentioned in the description but is not grey or brown.\nD. The color of the wing is mentioned in the description and is grey or brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feathers or the wild bird are not mentioned.\nB. The color of the feathers is not mentioned, but the feathers of the wild bird are mentioned.\nC. The color of the feathers is mentioned in the description but is not white, grey, or brown.\nD. The color of the feathers is mentioned in the description and is white, grey, or brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the wild bird is not mentioned.\nB. The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.\nC. The shape of the tail is mentioned in the description but is not fan-like.\nD. The shape of the tail is mentioned in the description and is fan-like.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The null or the wild bird is not mentioned.\nB. The action of the null is not mentioned, but the null of the wild bird is mentioned.\nC. The action of the null is mentioned in the description but is not flying.\nD. The action of the null is mentioned in the description and is flying.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wing or the wild bird is not mentioned.\nB. The position of the wing is not mentioned, but the wing of the wild bird is mentioned.\nC. The position of the wing is mentioned in the description but is not extended or outstretched.\nD. The position of the wing is mentioned in the description and is extended or outstretched.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The claws of the wild bird are not mentioned in the description.\nB. The claws of the wild bird are mentioned in the description.\nC. The wild bird is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boats are not mentioned in the description.\nB. The boats are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chimneys are not mentioned in the description.\nB. The chimneys are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bridge is not mentioned in the description.\nB. The bridge is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a predominantly white body with a mix of gray and brown feathers on its wings and back. Its wings are outstretched, showing a gradient from white at the base to darker shades towards the tips. The bird's head is slightly turned, with a small, pointed beak visible.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are not mentioned in the description.\nB. The windows are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the wing is mentioned in the description and is grey or brown.", + "D. The color of the feathers is mentioned in the description and is white, grey, or brown.", + "B. The shape of the tail is not mentioned, but the tail of the wild bird is mentioned.", + "D. The action of the null is mentioned in the description and is flying.", + "D. The position of the wing is mentioned in the description and is extended or outstretched.", + "A. The claws of the wild bird are not mentioned in the description.", + "A. The boats are not mentioned in the description.", + "A. The chimneys are not mentioned in the description.", + "A. The bridge is not mentioned in the description.", + "A. The windows are not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "2580323": { + "pred": "A rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the frame is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The shape of the frame is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the frame is mentioned in the description and is rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the picture is mentioned in the description and is white.", + 1 + ], + [ + "The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The background color of the picture is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The background color of the picture is mentioned in the description and is white.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + 1 + ], + [ + "The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.", + 0.5 + ], + [ + "The picture or the picture/frame is not mentioned.", + 0 + ], + [ + "The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.", + 0.5 + ], + [ + "The frame or the picture/frame is not mentioned.", + 0 + ], + [ + "The material of the frame is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the frame is mentioned in the description and is wood.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The radio is mentioned in the description.", + -1 + ], + [ + "The radio is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The radio is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The picture/frame is not mentioned in the description.", + 0 + ], + [ + "The glass of the picture/frame are mentioned in the description.", + -1 + ], + [ + "The glass of the picture/frame are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The glass of the picture/frame are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The keyboard is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The light switch is mentioned in the description.", + -1 + ], + [ + "The light switch is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The light switch is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The curtain is mentioned in the description.", + -1 + ], + [ + "The curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The curtain is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is picture/frame. Based on the image, is it likely that the object in the description is given class: picture/frame or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is picture/frame. Based on the image, is it likely that the object in the description is given class: picture/frame or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the frame is mentioned in the description and is rectangular.\nB. The shape of the frame is not mentioned, but the frame of the picture/frame is mentioned.\nC. The frame or the picture/frame is not mentioned.\nD. The shape of the frame is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background color of the picture is mentioned in the description and is white.\nB. The background color of the picture is not mentioned, but the picture of the picture/frame is mentioned.\nC. The picture or the picture/frame is not mentioned.\nD. The background color of the picture is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.\nB. The type of the picture is not mentioned, but the picture of the picture/frame is mentioned.\nC. The picture or the picture/frame is not mentioned.\nD. The type of the picture is mentioned in the description but is not diagram, schematic, or blueprint.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the frame is mentioned in the description and is wood.\nB. The material of the frame is not mentioned, but the frame of the picture/frame is mentioned.\nC. The frame or the picture/frame is not mentioned.\nD. The material of the frame is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The radio is mentioned in the description.\nB. The radio is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The picture/frame is not mentioned in the description.\nB. The glass of the picture/frame are mentioned in the description.\nC. The glass of the picture/frame are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is mentioned in the description.\nB. The keyboard is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The light switch is mentioned in the description.\nB. The light switch is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden picture frame with a light brown finish, containing a detailed architectural floor plan and elevation drawings. The drawings are monochrome and feature various rooms, furniture, and structural elements. The frame has a simple, smooth design with slightly rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The curtain is mentioned in the description.\nB. The curtain is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the frame is mentioned in the description and is rectangular.", + "A. The background color of the picture is mentioned in the description and is white.", + "A. The type of the picture is mentioned in the description and is diagram, schematic, or blueprint.", + "A. The material of the frame is mentioned in the description and is wood.", + "B. The radio is not mentioned in the description.", + "C. The glass of the picture/frame are not mentioned in the description.", + "B. The keyboard is not mentioned in the description.", + "B. The light switch is not mentioned in the description.", + "B. The curtain is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "4782942": { + "pred": "A dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description but is not conical.", + -1 + ], + [ + "The shape of the megaphone/speaker is mentioned in the description and is conical.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the megaphone/speaker is mentioned in the description and is conical.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The color of the megaphone/speaker is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the megaphone/speaker is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the megaphone/speaker is mentioned in the description and is gray.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front/opening or the megaphone/speaker is not mentioned.", + 0 + ], + [ + "The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.", + 0.5 + ], + [ + "The shape of the front/opening is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the front/opening is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the front/opening is mentioned in the description and is round.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The siren button of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The siren button of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The siren button of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fence is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The strap of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The strap of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The strap of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The megaphone/speaker is not mentioned in the description.", + 0 + ], + [ + "The battery compartment of the megaphone/speaker is not mentioned in the description.", + 1 + ], + [ + "The battery compartment of the megaphone/speaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The battery compartment of the megaphone/speaker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The houses are not mentioned in the description.", + 1 + ], + [ + "The houses are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The houses are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is megaphone/speaker. Based on the image, is it likely that the object in the description is given class: megaphone/speaker or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is megaphone/speaker. Based on the image, is it likely that the object in the description is given class: megaphone/speaker or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned.\nB. The shape of the megaphone/speaker is not mentioned.\nC. The shape of the megaphone/speaker is mentioned in the description but is not conical.\nD. The shape of the megaphone/speaker is mentioned in the description and is conical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned.\nB. The color of the megaphone/speaker is not mentioned.\nC. The color of the megaphone/speaker is mentioned in the description but is not gray.\nD. The color of the megaphone/speaker is mentioned in the description and is gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front/opening or the megaphone/speaker is not mentioned.\nB. The shape of the front/opening is not mentioned, but the front/opening of the megaphone/speaker is mentioned.\nC. The shape of the front/opening is mentioned in the description but is not round.\nD. The shape of the front/opening is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The siren button of the megaphone/speaker is not mentioned in the description.\nC. The siren button of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fence is not mentioned in the description.\nB. The fence is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The strap of the megaphone/speaker is not mentioned in the description.\nC. The strap of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The megaphone/speaker is not mentioned in the description.\nB. The battery compartment of the megaphone/speaker is not mentioned in the description.\nC. The battery compartment of the megaphone/speaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark-colored, conical-shaped horn with a wide, flared opening and a smooth, cylindrical body.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The houses are not mentioned in the description.\nB. The houses are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the megaphone/speaker is mentioned in the description and is conical.", + "D. The color of the megaphone/speaker is mentioned in the description and is gray.", + "D. The shape of the front/opening is mentioned in the description and is round.", + "B. The siren button of the megaphone/speaker is not mentioned in the description.", + "A. The fence is not mentioned in the description.", + "B. The strap of the megaphone/speaker is not mentioned in the description.", + "B. The battery compartment of the megaphone/speaker is not mentioned in the description.", + "A. The houses are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "6037269": { + "pred": "A vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the showerhead is mentioned in the description but is not silver and/or metallic.", + -1 + ], + [ + "The color of the showerhead is not mentioned.", + 0 + ], + [ + "The color of the showerhead is mentioned in the description and is silver and/or metallic.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the showerhead is mentioned in the description and is silver and/or metallic.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the showerhead is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the showerhead is not mentioned.", + 0 + ], + [ + "The texture of the showerhead is mentioned in the description and is smooth.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the showerhead is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the showerhead is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the showerhead is not mentioned.", + 0 + ], + [ + "The shape of the showerhead is mentioned in the description and is circular.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the showerhead is mentioned in the description and is circular.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the showerhead is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the showerhead is not mentioned.", + 0 + ], + [ + "The material of the showerhead is mentioned in the description and is metal.", + 1 + ], + [ + "The showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the showerhead is mentioned in the description and is metal.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the handle is not mentioned, but the handle of the showerhead is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is white.", + 1 + ], + [ + "The handle or the showerhead is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is mentioned in the description but is not white.", + "pred_index": 0, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower hose of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The shower hose of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The shower hose of the showerhead is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bath caddy is mentioned in the description.", + -1 + ], + [ + "The bath caddy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bath caddy is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathtub is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The showerhead filter of the showerhead is mentioned in the description.", + -1 + ], + [ + "The showerhead is not mentioned in the description.", + 0 + ], + [ + "The showerhead filter of the showerhead is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The showerhead filter of the showerhead is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet paper holder is mentioned in the description.", + -1 + ], + [ + "The toilet paper holder is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toilet paper holder is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is showerhead. Based on the image, is it likely that the object in the description is given class: showerhead or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is showerhead. Based on the image, is it likely that the object in the description is given class: showerhead or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the showerhead is mentioned in the description but is not silver and/or metallic.\nB. The color of the showerhead is not mentioned.\nC. The color of the showerhead is mentioned in the description and is silver and/or metallic.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the showerhead is mentioned in the description but is not smooth.\nB. The texture of the showerhead is not mentioned.\nC. The texture of the showerhead is mentioned in the description and is smooth.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the showerhead is mentioned in the description but is not circular.\nB. The shape of the showerhead is not mentioned.\nC. The shape of the showerhead is mentioned in the description and is circular.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the showerhead is mentioned in the description but is not metal.\nB. The material of the showerhead is not mentioned.\nC. The material of the showerhead is mentioned in the description and is metal.\nD. The showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is mentioned in the description but is not white.\nB. The color of the handle is not mentioned, but the handle of the showerhead is mentioned.\nC. The color of the handle is mentioned in the description and is white.\nD. The handle or the showerhead is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shower hose of the showerhead is mentioned in the description.\nB. The showerhead is not mentioned in the description.\nC. The shower hose of the showerhead is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bath caddy is mentioned in the description.\nB. The bath caddy is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is mentioned in the description.\nB. The bathtub is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The showerhead filter of the showerhead is mentioned in the description.\nB. The showerhead is not mentioned in the description.\nC. The showerhead filter of the showerhead is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style shower head with a curved, metallic arm and a cylindrical, cream-colored handle. The shower head itself is round and metallic, with a slightly domed top and a flat bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet paper holder is mentioned in the description.\nB. The toilet paper holder is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the showerhead is mentioned in the description and is silver and/or metallic.", + "C. The texture of the showerhead is mentioned in the description and is smooth.", + "C. The shape of the showerhead is mentioned in the description and is circular.", + "C. The material of the showerhead is mentioned in the description and is metal.", + "A. The color of the handle is mentioned in the description but is not white.", + "C. The shower hose of the showerhead is not mentioned in the description.", + "B. The bath caddy is not mentioned in the description.", + "B. The bathtub is not mentioned in the description.", + "C. The showerhead filter of the showerhead is not mentioned in the description.", + "B. The toilet paper holder is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "recognition_result": true + }, + "7050495": { + "pred": "A black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the handbag is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handbag is not mentioned.", + 0 + ], + [ + "The texture of the handbag is mentioned in the description but is not smooth.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the handbag is mentioned in the description and is smooth.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handbag is mentioned in the description and is black or glossy.", + 1 + ], + [ + "The color of the handbag is not mentioned.", + 0 + ], + [ + "The color of the handbag is mentioned in the description but is not black or glossy.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handbag is mentioned in the description and is black or glossy.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handbag is mentioned in the description and is leather.", + 1 + ], + [ + "The material of the handbag is not mentioned.", + 0 + ], + [ + "The material of the handbag is mentioned in the description but is not leather.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the handbag is mentioned in the description and is leather.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handbag is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the handbag is not mentioned.", + 0 + ], + [ + "The shape of the handbag is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the handbag is mentioned in the description and is rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seam of the handbag is mentioned in the description and is visible.", + 1 + ], + [ + "The seam of the handbag is not mentioned.", + 0 + ], + [ + "The seam of the handbag is mentioned in the description but is not visible.", + -1 + ], + [ + "The handbag is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The seam of the handbag is mentioned in the description and is visible.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The price tags are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The logo of the handbag is not mentioned in the description.", + 1 + ], + [ + "The logo of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The logo of the handbag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The handle of the handbag is not mentioned in the description.", + 1 + ], + [ + "The handle of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The handle of the handbag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The pocket of the handbag is not mentioned in the description.", + 1 + ], + [ + "The pocket of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pocket of the handbag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handbag is not mentioned in the description.", + 0 + ], + [ + "The zipper of the handbag is not mentioned in the description.", + 1 + ], + [ + "The zipper of the handbag is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The zipper of the handbag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is handbag. Based on the image, is it likely that the object in the description is given class: handbag or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is handbag. Based on the image, is it likely that the object in the description is given class: handbag or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the handbag is mentioned in the description and is smooth.\nB. The texture of the handbag is not mentioned.\nC. The texture of the handbag is mentioned in the description but is not smooth.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handbag is mentioned in the description and is black or glossy.\nB. The color of the handbag is not mentioned.\nC. The color of the handbag is mentioned in the description but is not black or glossy.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handbag is mentioned in the description and is leather.\nB. The material of the handbag is not mentioned.\nC. The material of the handbag is mentioned in the description but is not leather.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handbag is mentioned in the description and is rectangular.\nB. The shape of the handbag is not mentioned.\nC. The shape of the handbag is mentioned in the description but is not rectangular.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seam of the handbag is mentioned in the description and is visible.\nB. The seam of the handbag is not mentioned.\nC. The seam of the handbag is mentioned in the description but is not visible.\nD. The handbag is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The price tags are not mentioned in the description.\nB. The price tags are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The logo of the handbag is not mentioned in the description.\nC. The logo of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The handle of the handbag is not mentioned in the description.\nC. The handle of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The pocket of the handbag is not mentioned in the description.\nC. The pocket of the handbag is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black leather handbag with a smooth, slightly glossy finish. The visible part shows a rectangular shape with a subtle seam along the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handbag is not mentioned in the description.\nB. The zipper of the handbag is not mentioned in the description.\nC. The zipper of the handbag is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The texture of the handbag is mentioned in the description and is smooth.", + "A. The color of the handbag is mentioned in the description and is black or glossy.", + "A. The material of the handbag is mentioned in the description and is leather.", + "A. The shape of the handbag is mentioned in the description and is rectangular.", + "A. The seam of the handbag is mentioned in the description and is visible.", + "A. The price tags are not mentioned in the description.", + "B. The logo of the handbag is not mentioned in the description.", + "B. The handle of the handbag is not mentioned in the description.", + "B. The pocket of the handbag is not mentioned in the description.", + "B. The zipper of the handbag is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "8331699": { + "pred": "The visible part of the waste container is black with a smooth surface and a slightly curved edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is not mentioned.", + 0 + ], + [ + "The shape of the computer box is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the computer box is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the computer box is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is not mentioned.", + 0 + ], + [ + "The color of the computer box is mentioned in the description and is black or gray.", + 1 + ], + [ + "The color of the computer box is mentioned in the description but is not black or gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The computer box is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side panel or the computer box is not mentioned.", + 0 + ], + [ + "The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.", + 0.5 + ], + [ + "The color of the side panel is mentioned in the description and is gray.", + 1 + ], + [ + "The color of the side panel is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The side panel or the computer box is not mentioned.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rubber duck is not mentioned in the description.", + 1 + ], + [ + "The rubber duck is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The rubber duck is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power button of the computer box is not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The power button of the computer box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The computer box is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky notes are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The USB ports of the computer box are not mentioned in the description.", + 1 + ], + [ + "The computer box is not mentioned in the description.", + 0 + ], + [ + "The USB ports of the computer box are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The computer box is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is computer box. Based on the image, is it likely that the object in the description is given class: computer box or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is computer box. Based on the image, is it likely that the object in the description is given class: computer box or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The computer box is not mentioned.\nB. The shape of the computer box is not mentioned.\nC. The shape of the computer box is mentioned in the description and is rectangular.\nD. The shape of the computer box is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The computer box is not mentioned.\nB. The color of the computer box is not mentioned.\nC. The color of the computer box is mentioned in the description and is black or gray.\nD. The color of the computer box is mentioned in the description but is not black or gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The side panel or the computer box is not mentioned.\nB. The color of the side panel is not mentioned, but the side panel of the computer box is mentioned.\nC. The color of the side panel is mentioned in the description and is gray.\nD. The color of the side panel is mentioned in the description but is not gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rubber duck is not mentioned in the description.\nB. The rubber duck is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The power button of the computer box is not mentioned in the description.\nB. The computer box is not mentioned in the description.\nC. The power button of the computer box is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky notes are not mentioned in the description.\nB. The sticky notes are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the waste container is black with a smooth surface and a slightly curved edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The USB ports of the computer box are not mentioned in the description.\nB. The computer box is not mentioned in the description.\nC. The USB ports of the computer box are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the computer box is not mentioned.", + "A. The computer box is not mentioned.", + "A. The side panel or the computer box is not mentioned.", + "A. The chair is not mentioned in the description.", + "A. The rubber duck is not mentioned in the description.", + "B. The computer box is not mentioned in the description.", + "A. The sticky notes are not mentioned in the description.", + "B. The computer box is not mentioned in the description." + ], + "score": 0.375, + "score_pos": 0.0, + "score_neg": 0.6, + "recognition_result": true + }, + "8556676": { + "pred": "A deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the apple is mentioned in the description and is small.", + 1 + ], + [ + "The size of the apple is not mentioned.", + 0 + ], + [ + "The size of the apple is mentioned in the description but is not small.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the apple is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the apple is mentioned in the description and is smooth or glossy.", + 1 + ], + [ + "The texture of the apple is not mentioned.", + 0 + ], + [ + "The texture of the apple is mentioned in the description but is not smooth or glossy.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the apple is mentioned in the description and is smooth or glossy.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the apple is mentioned in the description and is red.", + 1 + ], + [ + "The color of the apple is not mentioned.", + 0 + ], + [ + "The color of the apple is mentioned in the description but is not red.", + -1 + ], + [ + "The apple is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the apple is mentioned in the description and is red.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon is mentioned in the description.", + -1 + ], + [ + "The lemon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The lemon is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange is mentioned in the description.", + -1 + ], + [ + "The orange is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The orange is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple is mentioned in the description.", + -1 + ], + [ + "The pineapple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The pineapple is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the apple are mentioned in the description.", + -1 + ], + [ + "The seeds of the apple are not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The seeds of the apple are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the apple is mentioned in the description.", + -1 + ], + [ + "The stem of the apple is not mentioned in the description.", + 1 + ], + [ + "The apple is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The stem of the apple is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is apple. Based on the image, is it likely that the object in the description is given class: apple or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is apple. Based on the image, is it likely that the object in the description is given class: apple or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the apple is mentioned in the description and is small.\nB. The size of the apple is not mentioned.\nC. The size of the apple is mentioned in the description but is not small.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the apple is mentioned in the description and is smooth or glossy.\nB. The texture of the apple is not mentioned.\nC. The texture of the apple is mentioned in the description but is not smooth or glossy.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the apple is mentioned in the description and is red.\nB. The color of the apple is not mentioned.\nC. The color of the apple is mentioned in the description but is not red.\nD. The apple is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lemon is mentioned in the description.\nB. The lemon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange is mentioned in the description.\nB. The orange is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pineapple is mentioned in the description.\nB. The pineapple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seeds of the apple are mentioned in the description.\nB. The seeds of the apple are not mentioned in the description.\nC. The apple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA deep red apple with a smooth, glossy surface. The apple has a slightly irregular shape with a prominent bulge on the left side and a smaller bulge on the right side. The bottom part of the apple is slightly darker, almost black, with a few small, reflective spots.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stem of the apple is mentioned in the description.\nB. The stem of the apple is not mentioned in the description.\nC. The apple is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The size of the apple is not mentioned.", + "A. The texture of the apple is mentioned in the description and is smooth or glossy.", + "A. The color of the apple is mentioned in the description and is red.", + "B. The lemon is not mentioned in the description.", + "B. The orange is not mentioned in the description.", + "B. The pineapple is not mentioned in the description.", + "B. The seeds of the apple are not mentioned in the description.", + "B. The stem of the apple is not mentioned in the description." + ], + "score": 0.875, + "score_pos": 0.6666666666666666, + "score_neg": 1.0, + "recognition_result": true + }, + "11012500": { + "pred": "A burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the lettuce is mentioned in the description but is not shredded.", + -1 + ], + [ + "The lettuce or the taco is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is shredded.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the lettuce is mentioned in the description and is shredded.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.", + 0.5 + ], + [ + "The shape of the tomato is mentioned in the description but is not sliced.", + -1 + ], + [ + "The tomato or the taco is not mentioned.", + 0 + ], + [ + "The shape of the tomato is mentioned in the description and is sliced.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the tomato is mentioned in the description and is sliced.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the arugula is not mentioned, but the arugula of the taco is mentioned.", + 0.5 + ], + [ + "The color of the arugula is mentioned in the description but is not green.", + -1 + ], + [ + "The arugula or the taco is not mentioned.", + 0 + ], + [ + "The color of the arugula is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the arugula is mentioned in the description and is green.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.", + 0.5 + ], + [ + "The color of the tortilla is mentioned in the description but is not white.", + -1 + ], + [ + "The tortilla or the taco is not mentioned.", + 0 + ], + [ + "The color of the tortilla is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tortilla is mentioned in the description but is not white.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The whipped cream of the taco is not mentioned in the description.", + 1 + ], + [ + "The whipped cream of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The whipped cream of the taco is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + 1 + ], + [ + "The two glasses of lemonade with lemon slices and straws are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The nuts of the taco are not mentioned in the description.", + 1 + ], + [ + "The nuts of the taco are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The nuts of the taco are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandwich with vegetables are not mentioned in the description.", + 1 + ], + [ + "The sandwich with vegetables are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sandwich with vegetables are mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taco is not mentioned in the description.", + 0 + ], + [ + "The chocolate of the taco is not mentioned in the description.", + 1 + ], + [ + "The chocolate of the taco is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The chocolate of the taco is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is taco. Based on the image, is it likely that the object in the description is given class: taco or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is taco. Based on the image, is it likely that the object in the description is given class: taco or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the lettuce is not mentioned, but the lettuce of the taco is mentioned.\nB. The shape of the lettuce is mentioned in the description but is not shredded.\nC. The lettuce or the taco is not mentioned.\nD. The shape of the lettuce is mentioned in the description and is shredded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the tomato is not mentioned, but the tomato of the taco is mentioned.\nB. The shape of the tomato is mentioned in the description but is not sliced.\nC. The tomato or the taco is not mentioned.\nD. The shape of the tomato is mentioned in the description and is sliced.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the arugula is not mentioned, but the arugula of the taco is mentioned.\nB. The color of the arugula is mentioned in the description but is not green.\nC. The arugula or the taco is not mentioned.\nD. The color of the arugula is mentioned in the description and is green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tortilla is not mentioned, but the tortilla of the taco is mentioned.\nB. The color of the tortilla is mentioned in the description but is not white.\nC. The tortilla or the taco is not mentioned.\nD. The color of the tortilla is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The whipped cream of the taco is not mentioned in the description.\nC. The whipped cream of the taco is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.\nB. The two glasses of lemonade with lemon slices and straws are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The nuts of the taco are not mentioned in the description.\nC. The nuts of the taco are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandwich with vegetables are not mentioned in the description.\nB. The sandwich with vegetables are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA burrito filled with fresh green arugula, a slice of ripe tomato, shredded lettuce, and a layer of seasoned ground meat, all wrapped in a soft, lightly toasted tortilla.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taco is not mentioned in the description.\nB. The chocolate of the taco is not mentioned in the description.\nC. The chocolate of the taco is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the lettuce is mentioned in the description and is shredded.", + "D. The shape of the tomato is mentioned in the description and is sliced.", + "D. The color of the arugula is mentioned in the description and is green.", + "B. The color of the tortilla is mentioned in the description but is not white.", + "B. The whipped cream of the taco is not mentioned in the description.", + "A. The two glasses of lemonade with lemon slices and straws are not mentioned in the description.", + "B. The nuts of the taco are not mentioned in the description.", + "B. The sandwich with vegetables are mentioned in the description.", + "B. The chocolate of the taco is not mentioned in the description." + ], + "score": 0.5555555555555556, + "score_pos": 0.5, + "score_neg": 0.6, + "recognition_result": true + }, + "12348080": { + "pred": "A pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handles is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the handles is mentioned in the description and is plastic.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The material of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the handles is mentioned in the description and is plastic.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blades is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The color of the blades is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The color of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the blades is mentioned in the description and is silver or metallic.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blades is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blades is mentioned in the description and is metal.", + 1 + ], + [ + "The blades or the scissors are not mentioned.", + 0 + ], + [ + "The material of the blades is not mentioned, but the blades of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the blades is mentioned in the description and is metal.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handles is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the handles is mentioned in the description and is red.", + 1 + ], + [ + "The handles or the scissors are not mentioned.", + 0 + ], + [ + "The color of the handles is not mentioned, but the handles of the scissors are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the handles is mentioned in the description and is red.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The adjustment screw of the scissors is not mentioned in the description.", + 1 + ], + [ + "The adjustment screw of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The adjustment screw of the scissors is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blade guard of the scissors is not mentioned in the description.", + 1 + ], + [ + "The blade guard of the scissors is mentioned in the description.", + -1 + ], + [ + "The scissors are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The blade guard of the scissors is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tricycle cart is not mentioned in the description.", + 1 + ], + [ + "The tricycle cart is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tricycle cart is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The baskets of fruit are not mentioned in the description.", + 1 + ], + [ + "The baskets of fruit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The baskets of fruit are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scale is not mentioned in the description.", + 1 + ], + [ + "The scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The scale is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is scissors. Based on the image, is it likely that the object in the description is given class: scissors or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is scissors. Based on the image, is it likely that the object in the description is given class: scissors or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handles is mentioned in the description but is not plastic.\nB. The material of the handles is mentioned in the description and is plastic.\nC. The handles or the scissors are not mentioned.\nD. The material of the handles is not mentioned, but the handles of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the blades is mentioned in the description but is not silver or metallic.\nB. The color of the blades is mentioned in the description and is silver or metallic.\nC. The blades or the scissors are not mentioned.\nD. The color of the blades is not mentioned, but the blades of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the blades is mentioned in the description but is not metal.\nB. The material of the blades is mentioned in the description and is metal.\nC. The blades or the scissors are not mentioned.\nD. The material of the blades is not mentioned, but the blades of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handles is mentioned in the description but is not red.\nB. The color of the handles is mentioned in the description and is red.\nC. The handles or the scissors are not mentioned.\nD. The color of the handles is not mentioned, but the handles of the scissors are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The adjustment screw of the scissors is not mentioned in the description.\nB. The adjustment screw of the scissors is mentioned in the description.\nC. The scissors are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blade guard of the scissors is not mentioned in the description.\nB. The blade guard of the scissors is mentioned in the description.\nC. The scissors are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tricycle cart is not mentioned in the description.\nB. The tricycle cart is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The baskets of fruit are not mentioned in the description.\nB. The baskets of fruit are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of scissors with bright red plastic handles and metallic blades. The handles are oval-shaped with a smooth, glossy finish. The blades are straight and sharp, with a slight taper towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The scale is not mentioned in the description.\nB. The scale is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the handles is mentioned in the description and is plastic.", + "B. The color of the blades is mentioned in the description and is silver or metallic.", + "B. The material of the blades is mentioned in the description and is metal.", + "B. The color of the handles is mentioned in the description and is red.", + "A. The adjustment screw of the scissors is not mentioned in the description.", + "A. The blade guard of the scissors is not mentioned in the description.", + "A. The tricycle cart is not mentioned in the description.", + "A. The baskets of fruit are not mentioned in the description.", + "A. The scale is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "16951734": { + "pred": "A wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description and is yellow, golden, or brown.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The color of the potato is mentioned in the description but is not yellow, golden, or brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description and is smooth.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The texture of the potato is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description and is irregular.", + 1 + ], + [ + "The potato is not mentioned.", + 0 + ], + [ + "The shape of the potato is mentioned in the description but is not irregular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The potato is not mentioned.", + "pred_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprouts of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The sprouts of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The potato is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned in the description.", + 1 + ], + [ + "The bowl is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bowl is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the potato are not mentioned in the description.", + 1 + ], + [ + "The potato is not mentioned in the description.", + 0 + ], + [ + "The roots of the potato are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The potato is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The liquid is not mentioned in the description.", + 1 + ], + [ + "The liquid is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The liquid is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is potato. Based on the image, is it likely that the object in the description is given class: potato or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is potato. Based on the image, is it likely that the object in the description is given class: potato or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the potato is not mentioned.\nB. The color of the potato is mentioned in the description and is yellow, golden, or brown.\nC. The potato is not mentioned.\nD. The color of the potato is mentioned in the description but is not yellow, golden, or brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the potato is not mentioned.\nB. The texture of the potato is mentioned in the description and is smooth.\nC. The potato is not mentioned.\nD. The texture of the potato is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the potato is not mentioned.\nB. The shape of the potato is mentioned in the description and is irregular.\nC. The potato is not mentioned.\nD. The shape of the potato is mentioned in the description but is not irregular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sprouts of the potato are not mentioned in the description.\nB. The potato is not mentioned in the description.\nC. The sprouts of the potato are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned in the description.\nB. The bowl is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The roots of the potato are not mentioned in the description.\nB. The potato is not mentioned in the description.\nC. The roots of the potato are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wedge of cantaloupe with a smooth, light orange flesh and a thin, pale rind.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The liquid is not mentioned in the description.\nB. The liquid is mentioned in the description.\n" + ], + "response": [ + "B. No", + "C. The potato is not mentioned.", + "C. The potato is not mentioned.", + "C. The potato is not mentioned.", + "A. The cup is not mentioned in the description.", + "B. The potato is not mentioned in the description.", + "A. The bowl is not mentioned in the description.", + "B. The potato is not mentioned in the description.", + "A. The liquid is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "recognition_result": false + }, + "17265254": { + "pred": "A traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The shape of the wheels is mentioned in the description and is circular or spoked.", + 1 + ], + [ + "The shape of the wheels is mentioned in the description but is not circular or spoked.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the wheels is mentioned in the description and is circular or spoked.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.", + 0.5 + ], + [ + "The seat or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the seat is mentioned in the description but is not black.", + "pred_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + 0.5 + ], + [ + "The wheels or the rickshaw are not mentioned.", + 0 + ], + [ + "The number of parts of the wheels is mentioned in the description and is 2.", + 1 + ], + [ + "The number of parts of the wheels is mentioned in the description but is not 2.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + "pred_index": 0, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The lights of the rickshaw are mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lights of the rickshaw are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The people are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The storage compartment of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The storage compartment of the rickshaw is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo forest is not mentioned in the description.", + 1 + ], + [ + "The bamboo forest is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo forest is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The horn of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The horn of the rickshaw is mentioned in the description.", + -1 + ], + [ + "The rickshaw is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The horn of the rickshaw is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is rickshaw. Based on the image, is it likely that the object in the description is given class: rickshaw or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is rickshaw. Based on the image, is it likely that the object in the description is given class: rickshaw or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.\nB. The wheels or the rickshaw are not mentioned.\nC. The shape of the wheels is mentioned in the description and is circular or spoked.\nD. The shape of the wheels is mentioned in the description but is not circular or spoked.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the seat is not mentioned, but the seat of the rickshaw is mentioned.\nB. The seat or the rickshaw is not mentioned.\nC. The color of the seat is mentioned in the description and is black.\nD. The color of the seat is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.\nB. The wheels or the rickshaw are not mentioned.\nC. The number of parts of the wheels is mentioned in the description and is 2.\nD. The number of parts of the wheels is mentioned in the description but is not 2.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lights of the rickshaw are not mentioned in the description.\nB. The lights of the rickshaw are mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are not mentioned in the description.\nB. The people are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The storage compartment of the rickshaw is not mentioned in the description.\nB. The storage compartment of the rickshaw is mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo forest is not mentioned in the description.\nB. The bamboo forest is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA traditional rickshaw with a black frame and a red seat, featuring a curved handlebar and a single front wheel with spokes and a rubber tire.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The horn of the rickshaw is not mentioned in the description.\nB. The horn of the rickshaw is mentioned in the description.\nC. The rickshaw is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the wheels is mentioned in the description and is circular or spoked.", + "D. The color of the seat is mentioned in the description but is not black.", + "A. The number of parts of the wheels is not mentioned, but the wheels of the rickshaw are mentioned.", + "A. The lights of the rickshaw are not mentioned in the description.", + "A. The people are not mentioned in the description.", + "A. The storage compartment of the rickshaw is not mentioned in the description.", + "A. The bamboo forest is not mentioned in the description.", + "A. The horn of the rickshaw is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "recognition_result": true + }, + "18845103": { + "pred": "A metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the handle is mentioned in the description and is metallic.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The material of the bowl is mentioned in the description and is metal.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The material of the bowl is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The color of the bowl is mentioned in the description and is metallic.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the bowl is mentioned in the description and is round or oval.", + 1 + ], + [ + "The bowl or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description but is not round or oval.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bowl is mentioned in the description and is round or oval.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the spoon is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + 1 + ], + [ + "The handle or the spoon is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The engraved handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The engraved handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The engraved handle of the spoon is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The kitchen cabinets are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is not mentioned in the description.", + 0 + ], + [ + "The twisted handle of the spoon is mentioned in the description.", + -1 + ], + [ + "The twisted handle of the spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The twisted handle of the spoon is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is spoon. Based on the image, is it likely that the object in the description is given class: spoon or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is spoon. Based on the image, is it likely that the object in the description is given class: spoon or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is not mentioned, but the handle of the spoon is mentioned.\nB. The color of the handle is mentioned in the description and is metallic.\nC. The handle or the spoon is not mentioned.\nD. The color of the handle is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The material of the bowl is mentioned in the description and is metal.\nC. The bowl or the spoon is not mentioned.\nD. The material of the bowl is mentioned in the description but is not metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The color of the bowl is mentioned in the description and is metallic.\nC. The bowl or the spoon is not mentioned.\nD. The color of the bowl is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bowl is not mentioned, but the bowl of the spoon is mentioned.\nB. The shape of the bowl is mentioned in the description and is round or oval.\nC. The bowl or the spoon is not mentioned.\nD. The shape of the bowl is mentioned in the description but is not round or oval.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the spoon is mentioned.\nB. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.\nC. The handle or the spoon is not mentioned.\nD. The shape of the handle is mentioned in the description but is not long, elongated, straight, or slender.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is not mentioned in the description.\nB. The engraved handle of the spoon is mentioned in the description.\nC. The engraved handle of the spoon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is mentioned in the description.\nB. The cutting board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The kitchen cabinets are mentioned in the description.\nB. The kitchen cabinets are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic spoon with a slightly curved handle and a shallow, oval-shaped bowl. The handle has a smooth, reflective surface with a subtle taper towards the bowl.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is not mentioned in the description.\nB. The twisted handle of the spoon is mentioned in the description.\nC. The twisted handle of the spoon is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the handle is not mentioned, but the handle of the spoon is mentioned.", + "A. The material of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "A. The color of the bowl is not mentioned, but the bowl of the spoon is mentioned.", + "B. The shape of the bowl is mentioned in the description and is round or oval.", + "B. The shape of the handle is mentioned in the description and is long, elongated, straight, or slender.", + "C. The engraved handle of the spoon is not mentioned in the description.", + "B. The cutting board is not mentioned in the description.", + "B. The kitchen cabinets are not mentioned in the description.", + "B. The sink is not mentioned in the description.", + "C. The twisted handle of the spoon is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "recognition_result": true + }, + "20993402": { + "pred": "A roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the tape is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the tape is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the tape is mentioned in the description and is smooth.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tape roll is mentioned in the description but is not beige, white, or transparent.", + -1 + ], + [ + "The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + 1 + ], + [ + "The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the tape is mentioned in the description but is not adhesive tape.", + -1 + ], + [ + "The type of the tape is mentioned in the description and is adhesive tape.", + 1 + ], + [ + "The type of the tape is not mentioned.", + 0 + ], + [ + "The tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the tape is mentioned in the description and is adhesive tape.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the core is mentioned in the description but is not cardboard.", + -1 + ], + [ + "The material of the core is mentioned in the description and is cardboard.", + 1 + ], + [ + "The material of the core is not mentioned, but the core of the tape is mentioned.", + 0.5 + ], + [ + "The core or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the core is mentioned in the description and is cardboard.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tape roll is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the tape roll is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + 0.5 + ], + [ + "The tape roll or the tape is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The dispenser of the tape is not mentioned in the description.", + 1 + ], + [ + "The dispenser of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The dispenser of the tape is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trees are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The window is not mentioned in the description.", + 1 + ], + [ + "The window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The window is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stack of plates are not mentioned in the description.", + 1 + ], + [ + "The stack of plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The stack of plates are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 0 + ], + [ + "The cutting edge of the tape is not mentioned in the description.", + 1 + ], + [ + "The cutting edge of the tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The cutting edge of the tape is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tape. Based on the image, is it likely that the object in the description is given class: tape or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tape. Based on the image, is it likely that the object in the description is given class: tape or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the tape is mentioned in the description but is not smooth.\nB. The texture of the tape is mentioned in the description and is smooth.\nC. The texture of the tape is not mentioned.\nD. The tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tape roll is mentioned in the description but is not beige, white, or transparent.\nB. The color of the tape roll is mentioned in the description and is beige, white, or transparent.\nC. The color of the tape roll is not mentioned, but the tape roll of the tape is mentioned.\nD. The tape roll or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the tape is mentioned in the description but is not adhesive tape.\nB. The type of the tape is mentioned in the description and is adhesive tape.\nC. The type of the tape is not mentioned.\nD. The tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the core is mentioned in the description but is not cardboard.\nB. The material of the core is mentioned in the description and is cardboard.\nC. The material of the core is not mentioned, but the core of the tape is mentioned.\nD. The core or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tape roll is mentioned in the description but is not plastic.\nB. The material of the tape roll is mentioned in the description and is plastic.\nC. The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.\nD. The tape roll or the tape is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The dispenser of the tape is not mentioned in the description.\nC. The dispenser of the tape is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are not mentioned in the description.\nB. The trees are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The window is not mentioned in the description.\nB. The window is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stack of plates are not mentioned in the description.\nB. The stack of plates are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA roll of translucent adhesive tape with a smooth, glossy surface and a slightly reflective finish. The tape is wound tightly around a central cardboard core, which is visible at the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The cutting edge of the tape is not mentioned in the description.\nC. The cutting edge of the tape is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the tape is mentioned in the description and is smooth.", + "B. The color of the tape roll is mentioned in the description and is beige, white, or transparent.", + "B. The type of the tape is mentioned in the description and is adhesive tape.", + "B. The material of the core is mentioned in the description and is cardboard.", + "C. The material of the tape roll is not mentioned, but the tape roll of the tape is mentioned.", + "B. The dispenser of the tape is not mentioned in the description.", + "A. The trees are not mentioned in the description.", + "A. The window is not mentioned in the description.", + "A. The stack of plates are not mentioned in the description.", + "B. The cutting edge of the tape is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "21529954": { + "pred": "A cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the can/container is not mentioned.", + 0 + ], + [ + "The material of the can/container is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the can/container is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the cap/lid is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the cap/lid is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the cap/lid is mentioned in the description and is circular.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/lid or the can/container is not mentioned.", + 0 + ], + [ + "The color of the cap/lid is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the cap/lid is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the cap/lid is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the can/container is not mentioned.", + 0 + ], + [ + "The shape of the body is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the body is not mentioned, but the body of the can/container is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is cylindrical.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the body is mentioned in the description and is cylindrical.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the can/container is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description but is not green, white, yellow.", + -1 + ], + [ + "The color of the label is not mentioned, but the label of the can/container is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description and is green, white, yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the label is mentioned in the description and is green, white, yellow.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The raspberries are mentioned in the description.", + -1 + ], + [ + "The raspberries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The raspberries are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red bell peppers are mentioned in the description.", + -1 + ], + [ + "The red bell peppers are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The red bell peppers are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ginger is mentioned in the description.", + -1 + ], + [ + "The ginger is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ginger is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sweet potato is mentioned in the description.", + -1 + ], + [ + "The sweet potato is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sweet potato is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the can/container is mentioned in the description.", + -1 + ], + [ + "The base of the can/container is not mentioned in the description.", + 1 + ], + [ + "The can/container is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The base of the can/container is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is can/container. Based on the image, is it likely that the object in the description is given class: can/container or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is can/container. Based on the image, is it likely that the object in the description is given class: can/container or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The can/container is not mentioned.\nB. The material of the can/container is mentioned in the description but is not plastic.\nC. The material of the can/container is not mentioned.\nD. The material of the can/container is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/lid or the can/container is not mentioned.\nB. The shape of the cap/lid is mentioned in the description but is not circular.\nC. The shape of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.\nD. The shape of the cap/lid is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/lid or the can/container is not mentioned.\nB. The color of the cap/lid is mentioned in the description but is not white.\nC. The color of the cap/lid is not mentioned, but the cap/lid of the can/container is mentioned.\nD. The color of the cap/lid is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the can/container is not mentioned.\nB. The shape of the body is mentioned in the description but is not cylindrical.\nC. The shape of the body is not mentioned, but the body of the can/container is mentioned.\nD. The shape of the body is mentioned in the description and is cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label or the can/container is not mentioned.\nB. The color of the label is mentioned in the description but is not green, white, yellow.\nC. The color of the label is not mentioned, but the label of the can/container is mentioned.\nD. The color of the label is mentioned in the description and is green, white, yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The raspberries are mentioned in the description.\nB. The raspberries are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The red bell peppers are mentioned in the description.\nB. The red bell peppers are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ginger is mentioned in the description.\nB. The ginger is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sweet potato is mentioned in the description.\nB. The sweet potato is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical can with a white cap, featuring a vibrant design. The top half is orange with a small white logo, while the bottom half is green with a large, stylized white text. The can has a slightly curved shape and a glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the can/container is mentioned in the description.\nB. The base of the can/container is not mentioned in the description.\nC. The can/container is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the can/container is not mentioned.", + "D. The shape of the cap/lid is mentioned in the description and is circular.", + "D. The color of the cap/lid is mentioned in the description and is white.", + "D. The shape of the body is mentioned in the description and is cylindrical.", + "D. The color of the label is mentioned in the description and is green, white, yellow.", + "B. The raspberries are not mentioned in the description.", + "B. The red bell peppers are not mentioned in the description.", + "B. The ginger is not mentioned in the description.", + "B. The sweet potato is not mentioned in the description.", + "B. The base of the can/container is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "22879790": { + "pred": "A single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the garlic is mentioned in the description and is white.", + 1 + ], + [ + "The color of the garlic is not mentioned.", + 0 + ], + [ + "The garlic is not mentioned.", + 0 + ], + [ + "The color of the garlic is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The garlic is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the skin is mentioned in the description and is papery.", + 1 + ], + [ + "The texture of the skin is not mentioned, but the skin of the garlic is mentioned.", + 0.5 + ], + [ + "The skin or the garlic is not mentioned.", + 0 + ], + [ + "The texture of the skin is mentioned in the description but is not papery.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the skin is mentioned in the description and is papery.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the root is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the root is not mentioned, but the root of the garlic is mentioned.", + 0.5 + ], + [ + "The root or the garlic is not mentioned.", + 0 + ], + [ + "The color of the root is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the root is mentioned in the description and is brown.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaf of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The leaf of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The garlic is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + 1 + ], + [ + "The ceramic rooster and hen salt and pepper shakers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stem of the garlic is not mentioned in the description.", + 1 + ], + [ + "The garlic is not mentioned in the description.", + 0 + ], + [ + "The stem of the garlic is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The garlic is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceramic hen salt shaker is not mentioned in the description.", + 1 + ], + [ + "The ceramic hen salt shaker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ceramic hen salt shaker is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green glass rooster is not mentioned in the description.", + 1 + ], + [ + "The green glass rooster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The green glass rooster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is garlic. Based on the image, is it likely that the object in the description is given class: garlic or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is garlic. Based on the image, is it likely that the object in the description is given class: garlic or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the garlic is mentioned in the description and is white.\nB. The color of the garlic is not mentioned.\nC. The garlic is not mentioned.\nD. The color of the garlic is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the skin is mentioned in the description and is papery.\nB. The texture of the skin is not mentioned, but the skin of the garlic is mentioned.\nC. The skin or the garlic is not mentioned.\nD. The texture of the skin is mentioned in the description but is not papery.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the root is mentioned in the description and is brown.\nB. The color of the root is not mentioned, but the root of the garlic is mentioned.\nC. The root or the garlic is not mentioned.\nD. The color of the root is mentioned in the description but is not brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The leaf of the garlic is not mentioned in the description.\nB. The garlic is not mentioned in the description.\nC. The leaf of the garlic is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.\nB. The ceramic rooster and hen salt and pepper shakers are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stem of the garlic is not mentioned in the description.\nB. The garlic is not mentioned in the description.\nC. The stem of the garlic is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceramic hen salt shaker is not mentioned in the description.\nB. The ceramic hen salt shaker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, large, white onion with a smooth, slightly shiny surface. The onion has a bulbous shape with a few thin, papery layers visible near the top. The root end is dark brown and slightly shriveled, with a few small roots extending from it.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green glass rooster is not mentioned in the description.\nB. The green glass rooster is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The garlic is not mentioned.", + "A. The texture of the skin is mentioned in the description and is papery.", + "A. The color of the root is mentioned in the description and is brown.", + "B. The garlic is not mentioned in the description.", + "A. The ceramic rooster and hen salt and pepper shakers are not mentioned in the description.", + "B. The garlic is not mentioned in the description.", + "A. The ceramic hen salt shaker is not mentioned in the description.", + "A. The green glass rooster is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.6666666666666666, + "score_neg": 0.6, + "recognition_result": true + }, + "24010373": { + "pred": "The guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The neck or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the neck is mentioned in the description and is straight or slightly curved.", + 1 + ], + [ + "The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the neck is mentioned in the description but is not straight or slightly curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the guitar is not mentioned.", + 0 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is not mentioned, but the body of the guitar is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the body is not mentioned, but the body of the guitar is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strings or the guitar are not mentioned.", + 0 + ], + [ + "The number of parts of the strings is mentioned in the description and is 6.", + 1 + ], + [ + "The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + 0.5 + ], + [ + "The number of parts of the strings is mentioned in the description but is not 6.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sound hole or the guitar is not mentioned.", + 0 + ], + [ + "The shape of the sound hole is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.", + 0.5 + ], + [ + "The shape of the sound hole is mentioned in the description but is not round.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the sound hole is mentioned in the description and is round.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description and is black.", + 1 + ], + [ + "The color of the guitar is not mentioned.", + 0 + ], + [ + "The color of the guitar is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the guitar is mentioned in the description but is not black.", + "pred_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The violin is not mentioned in the description.", + 1 + ], + [ + "The violin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The violin is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The guitar is not mentioned in the description.", + 0 + ], + [ + "The pickguard of the guitar is not mentioned in the description.", + 1 + ], + [ + "The pickguard of the guitar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pickguard of the guitar is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is not mentioned in the description.", + 1 + ], + [ + "The sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sign is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The music stands are not mentioned in the description.", + 1 + ], + [ + "The music stands are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The music stands are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The books are not mentioned in the description.", + 1 + ], + [ + "The books are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The books are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is guitar. Based on the image, is it likely that the object in the description is given class: guitar or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is guitar. Based on the image, is it likely that the object in the description is given class: guitar or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The neck or the guitar is not mentioned.\nB. The shape of the neck is mentioned in the description and is straight or slightly curved.\nC. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.\nD. The shape of the neck is mentioned in the description but is not straight or slightly curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the guitar is not mentioned.\nB. The texture of the body is mentioned in the description and is smooth.\nC. The texture of the body is not mentioned, but the body of the guitar is mentioned.\nD. The texture of the body is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strings or the guitar are not mentioned.\nB. The number of parts of the strings is mentioned in the description and is 6.\nC. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.\nD. The number of parts of the strings is mentioned in the description but is not 6.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sound hole or the guitar is not mentioned.\nB. The shape of the sound hole is mentioned in the description and is round.\nC. The shape of the sound hole is not mentioned, but the sound hole of the guitar is mentioned.\nD. The shape of the sound hole is mentioned in the description but is not round.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The guitar is not mentioned.\nB. The color of the guitar is mentioned in the description and is black.\nC. The color of the guitar is not mentioned.\nD. The color of the guitar is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The violin is not mentioned in the description.\nB. The violin is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The guitar is not mentioned in the description.\nB. The pickguard of the guitar is not mentioned in the description.\nC. The pickguard of the guitar is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign is not mentioned in the description.\nB. The sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The music stands are not mentioned in the description.\nB. The music stands are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe guitar has a dark, glossy body with a cutaway design. The neck is dark with white dot inlays on the fretboard. The headstock is also dark, matching the neck, and features tuning pegs. The body has a circular soundhole and a bridge with white bridge pins.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The books are not mentioned in the description.\nB. The books are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the neck is not mentioned, but the neck of the guitar is mentioned.", + "C. The texture of the body is not mentioned, but the body of the guitar is mentioned.", + "C. The number of parts of the strings is not mentioned, but the strings of the guitar are mentioned.", + "B. The shape of the sound hole is mentioned in the description and is round.", + "D. The color of the guitar is mentioned in the description but is not black.", + "A. The violin is not mentioned in the description.", + "B. The pickguard of the guitar is not mentioned in the description.", + "A. The sign is not mentioned in the description.", + "A. The music stands are not mentioned in the description.", + "A. The books are not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.3, + "score_neg": 1.0, + "recognition_result": true + }, + "24694197": { + "pred": "A ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The skin or the avocado is not mentioned.", + 0 + ], + [ + "The color of the skin is mentioned in the description but is not dark green.", + -1 + ], + [ + "The color of the skin is mentioned in the description and is dark green.", + 1 + ], + [ + "The color of the skin is not mentioned, but the skin of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the skin is mentioned in the description and is dark green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The texture of the flesh is mentioned in the description but is not creamy.", + -1 + ], + [ + "The texture of the flesh is mentioned in the description and is creamy.", + 1 + ], + [ + "The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the flesh is mentioned in the description and is creamy.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pit or the avocado is not mentioned.", + 0 + ], + [ + "The shape of the pit is mentioned in the description but is not oval.", + -1 + ], + [ + "The shape of the pit is mentioned in the description and is oval.", + 1 + ], + [ + "The shape of the pit is not mentioned, but the pit of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the pit is mentioned in the description and is oval.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flesh or the avocado is not mentioned.", + 0 + ], + [ + "The color of the flesh is mentioned in the description but is not light green.", + -1 + ], + [ + "The color of the flesh is mentioned in the description and is light green.", + 1 + ], + [ + "The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The leaves of the avocado are mentioned in the description.", + -1 + ], + [ + "The leaves of the avocado are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The leaves of the avocado are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utility pole is mentioned in the description.", + -1 + ], + [ + "The utility pole is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utility pole is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado is not mentioned in the description.", + 0 + ], + [ + "The stem of the avocado is mentioned in the description.", + -1 + ], + [ + "The stem of the avocado is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The stem of the avocado is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is mentioned in the description.", + -1 + ], + [ + "The building is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The building is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are mentioned in the description.", + -1 + ], + [ + "The people are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The people are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is avocado. Based on the image, is it likely that the object in the description is given class: avocado or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is avocado. Based on the image, is it likely that the object in the description is given class: avocado or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The skin or the avocado is not mentioned.\nB. The color of the skin is mentioned in the description but is not dark green.\nC. The color of the skin is mentioned in the description and is dark green.\nD. The color of the skin is not mentioned, but the skin of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flesh or the avocado is not mentioned.\nB. The texture of the flesh is mentioned in the description but is not creamy.\nC. The texture of the flesh is mentioned in the description and is creamy.\nD. The texture of the flesh is not mentioned, but the flesh of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pit or the avocado is not mentioned.\nB. The shape of the pit is mentioned in the description but is not oval.\nC. The shape of the pit is mentioned in the description and is oval.\nD. The shape of the pit is not mentioned, but the pit of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flesh or the avocado is not mentioned.\nB. The color of the flesh is mentioned in the description but is not light green.\nC. The color of the flesh is mentioned in the description and is light green.\nD. The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado is not mentioned in the description.\nB. The leaves of the avocado are mentioned in the description.\nC. The leaves of the avocado are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utility pole is mentioned in the description.\nB. The utility pole is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado is not mentioned in the description.\nB. The stem of the avocado is mentioned in the description.\nC. The stem of the avocado is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is mentioned in the description.\nB. The building is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA ripe avocado with a bumpy, dark green skin and a central pit cavity filled with a reddish-brown, creamy substance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are mentioned in the description.\nB. The people are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the skin is mentioned in the description and is dark green.", + "C. The texture of the flesh is mentioned in the description and is creamy.", + "C. The shape of the pit is mentioned in the description and is oval.", + "D. The color of the flesh is not mentioned, but the flesh of the avocado is mentioned.", + "C. The leaves of the avocado are not mentioned in the description.", + "B. The utility pole is not mentioned in the description.", + "C. The stem of the avocado is not mentioned in the description.", + "B. The building is not mentioned in the description.", + "B. The people are not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "279135": { + "pred": "The ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The color of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The color of the binding is mentioned in the description and is black or orange.", + 1 + ], + [ + "The color of the binding is mentioned in the description but is not black or orange.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the binding is not mentioned, but the binding of the ski is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The color of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The color of the deck is mentioned in the description and is black, white, or orange.", + 1 + ], + [ + "The color of the deck is mentioned in the description but is not black, white, or orange.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the deck is mentioned in the description and is black, white, or orange.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The binding or the ski is not mentioned.", + 0 + ], + [ + "The material of the binding is not mentioned, but the binding of the ski is mentioned.", + 0.5 + ], + [ + "The material of the binding is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The material of the binding is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the binding is not mentioned, but the binding of the ski is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deck or the ski is not mentioned.", + 0 + ], + [ + "The shape of the deck is not mentioned, but the deck of the ski is mentioned.", + 0.5 + ], + [ + "The shape of the deck is mentioned in the description and is slightly curved.", + 1 + ], + [ + "The shape of the deck is mentioned in the description but is not slightly curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the deck is mentioned in the description and is slightly curved.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the ski is not mentioned.", + 0 + ], + [ + "The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.", + 0.5 + ], + [ + "The texture/pattern of the tail is mentioned in the description and is geometric shapes.", + 1 + ], + [ + "The texture/pattern of the tail is mentioned in the description but is not geometric shapes.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The tail or the ski is not mentioned.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the ski are not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The wheels of the ski are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wheels of the ski are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wooden post is not mentioned in the description.", + 1 + ], + [ + "The wooden post is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The wooden post is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass window is not mentioned in the description.", + 1 + ], + [ + "The glass window is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The glass window is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski base of the ski is not mentioned in the description.", + 1 + ], + [ + "The ski is not mentioned in the description.", + 0 + ], + [ + "The ski base of the ski is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The ski base of the ski is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ski poles are not mentioned in the description.", + 1 + ], + [ + "The ski poles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ski poles are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is ski. Based on the image, is it likely that the object in the description is given class: ski or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is ski. Based on the image, is it likely that the object in the description is given class: ski or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The binding or the ski is not mentioned.\nB. The color of the binding is not mentioned, but the binding of the ski is mentioned.\nC. The color of the binding is mentioned in the description and is black or orange.\nD. The color of the binding is mentioned in the description but is not black or orange.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deck or the ski is not mentioned.\nB. The color of the deck is not mentioned, but the deck of the ski is mentioned.\nC. The color of the deck is mentioned in the description and is black, white, or orange.\nD. The color of the deck is mentioned in the description but is not black, white, or orange.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The binding or the ski is not mentioned.\nB. The material of the binding is not mentioned, but the binding of the ski is mentioned.\nC. The material of the binding is mentioned in the description and is metal and plastic.\nD. The material of the binding is mentioned in the description but is not metal and plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deck or the ski is not mentioned.\nB. The shape of the deck is not mentioned, but the deck of the ski is mentioned.\nC. The shape of the deck is mentioned in the description and is slightly curved.\nD. The shape of the deck is mentioned in the description but is not slightly curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the ski is not mentioned.\nB. The texture/pattern of the tail is not mentioned, but the tail of the ski is mentioned.\nC. The texture/pattern of the tail is mentioned in the description and is geometric shapes.\nD. The texture/pattern of the tail is mentioned in the description but is not geometric shapes.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the ski are not mentioned in the description.\nB. The ski is not mentioned in the description.\nC. The wheels of the ski are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wooden post is not mentioned in the description.\nB. The wooden post is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glass window is not mentioned in the description.\nB. The glass window is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ski base of the ski is not mentioned in the description.\nB. The ski is not mentioned in the description.\nC. The ski base of the ski is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe ski features a predominantly black base with intricate orange and white geometric patterns. The design includes a series of interconnected shapes and lines, creating a dynamic and modern appearance. The tip of the ski is slightly curved and tapers to a point, with the pattern continuing seamlessly along the length of the ski.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ski poles are not mentioned in the description.\nB. The ski poles are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the binding is not mentioned, but the binding of the ski is mentioned.", + "C. The color of the deck is mentioned in the description and is black, white, or orange.", + "B. The material of the binding is not mentioned, but the binding of the ski is mentioned.", + "C. The shape of the deck is mentioned in the description and is slightly curved.", + "A. The tail or the ski is not mentioned.", + "A. The wheels of the ski are not mentioned in the description.", + "A. The wooden post is not mentioned in the description.", + "A. The glass window is not mentioned in the description.", + "C. The ski base of the ski is mentioned in the description.", + "A. The ski poles are not mentioned in the description." + ], + "score": 0.6, + "score_pos": 0.6, + "score_neg": 0.6, + "recognition_result": true + }, + "622329": { + "pred": "A rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The color of the eraser is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the eraser is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eraser is mentioned in the description but is not brown.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eraser is not mentioned.", + 0 + ], + [ + "The material of the eraser is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the eraser is mentioned in the description and is rubber.", + 1 + ], + [ + "The material of the eraser is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the eraser is not mentioned.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The corner or the eraser is not mentioned.", + 0 + ], + [ + "The shape of the corner is mentioned in the description but is not rounded.", + -1 + ], + [ + "The shape of the corner is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the corner is not mentioned, but the corner of the eraser is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the corner is mentioned in the description and is rounded.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The desk organizer is not mentioned in the description.", + 1 + ], + [ + "The desk organizer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The desk organizer is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper sleeve of the eraser is not mentioned in the description.", + 1 + ], + [ + "The eraser is not mentioned in the description.", + 0 + ], + [ + "The paper sleeve of the eraser is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The paper sleeve of the eraser is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The phone is not mentioned in the description.", + 1 + ], + [ + "The phone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The phone is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky notes are not mentioned in the description.", + 1 + ], + [ + "The sticky notes are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky notes are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is not mentioned in the description.", + 1 + ], + [ + "The tape is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tape is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is eraser. Based on the image, is it likely that the object in the description is given class: eraser or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is eraser. Based on the image, is it likely that the object in the description is given class: eraser or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eraser is not mentioned.\nB. The color of the eraser is mentioned in the description but is not brown.\nC. The color of the eraser is mentioned in the description and is brown.\nD. The color of the eraser is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eraser is not mentioned.\nB. The material of the eraser is mentioned in the description but is not rubber.\nC. The material of the eraser is mentioned in the description and is rubber.\nD. The material of the eraser is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The corner or the eraser is not mentioned.\nB. The shape of the corner is mentioned in the description but is not rounded.\nC. The shape of the corner is mentioned in the description and is rounded.\nD. The shape of the corner is not mentioned, but the corner of the eraser is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The desk organizer is not mentioned in the description.\nB. The desk organizer is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paper sleeve of the eraser is not mentioned in the description.\nB. The eraser is not mentioned in the description.\nC. The paper sleeve of the eraser is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The phone is not mentioned in the description.\nB. The phone is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky notes are not mentioned in the description.\nB. The sticky notes are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, flat, beige-colored eraser with a slightly rough texture and rounded edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is not mentioned in the description.\nB. The tape is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the eraser is mentioned in the description but is not brown.", + "D. The material of the eraser is not mentioned.", + "C. The shape of the corner is mentioned in the description and is rounded.", + "A. The desk organizer is not mentioned in the description.", + "A. The paper sleeve of the eraser is not mentioned in the description.", + "A. The phone is not mentioned in the description.", + "A. The sticky notes are not mentioned in the description.", + "A. The tape is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.0, + "score_neg": 1.0, + "recognition_result": true + }, + "622332": { + "pred": "A black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base plate is mentioned in the description and is metallic.", + 1 + ], + [ + "The base plate or the stapler is not mentioned.", + 0 + ], + [ + "The material of the base plate is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stapler is mentioned in the description and is rectangular.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The shape of the stapler is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the stapler is mentioned in the description and is rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo is mentioned in the description and is white.", + 1 + ], + [ + "The logo or the stapler is not mentioned.", + 0 + ], + [ + "The color of the logo is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo is not mentioned, but the logo of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the logo is mentioned in the description and is white.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the top cover is mentioned in the description and is black.", + 1 + ], + [ + "The top cover or the stapler is not mentioned.", + 0 + ], + [ + "The color of the top cover is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the top cover is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stapler is mentioned in the description and is black.", + 1 + ], + [ + "The stapler is not mentioned.", + 0 + ], + [ + "The color of the stapler is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the stapler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the stapler is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tape is mentioned in the description.", + -1 + ], + [ + "The tape is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tape is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple remover of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple remover of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The staple remover of the stapler is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paper clips are mentioned in the description.", + -1 + ], + [ + "The paper clips are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The paper clips are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The paintbrushes are mentioned in the description.", + -1 + ], + [ + "The paintbrushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The paintbrushes are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The staple storage compartment of the stapler is mentioned in the description.", + -1 + ], + [ + "The stapler is not mentioned in the description.", + 0 + ], + [ + "The staple storage compartment of the stapler is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The staple storage compartment of the stapler is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stapler. Based on the image, is it likely that the object in the description is given class: stapler or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stapler. Based on the image, is it likely that the object in the description is given class: stapler or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the base plate is mentioned in the description and is metallic.\nB. The base plate or the stapler is not mentioned.\nC. The material of the base plate is mentioned in the description but is not metallic.\nD. The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the stapler is mentioned in the description and is rectangular.\nB. The stapler is not mentioned.\nC. The shape of the stapler is mentioned in the description but is not rectangular.\nD. The shape of the stapler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the logo is mentioned in the description and is white.\nB. The logo or the stapler is not mentioned.\nC. The color of the logo is mentioned in the description but is not white.\nD. The color of the logo is not mentioned, but the logo of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the top cover is mentioned in the description and is black.\nB. The top cover or the stapler is not mentioned.\nC. The color of the top cover is mentioned in the description but is not black.\nD. The color of the top cover is not mentioned, but the top cover of the stapler is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stapler is mentioned in the description and is black.\nB. The stapler is not mentioned.\nC. The color of the stapler is mentioned in the description but is not black.\nD. The color of the stapler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tape is mentioned in the description.\nB. The tape is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The staple remover of the stapler is mentioned in the description.\nB. The stapler is not mentioned in the description.\nC. The staple remover of the stapler is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paper clips are mentioned in the description.\nB. The paper clips are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The paintbrushes are mentioned in the description.\nB. The paintbrushes are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, rectangular stapler with a glossy finish. The top surface features a white logo and text. The front edge has a slightly raised, horizontal groove.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The staple storage compartment of the stapler is mentioned in the description.\nB. The stapler is not mentioned in the description.\nC. The staple storage compartment of the stapler is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the base plate is not mentioned, but the base plate of the stapler is mentioned.", + "A. The shape of the stapler is mentioned in the description and is rectangular.", + "A. The color of the logo is mentioned in the description and is white.", + "A. The color of the top cover is mentioned in the description and is black.", + "A. The color of the stapler is mentioned in the description and is black.", + "B. The tape is not mentioned in the description.", + "C. The staple remover of the stapler is not mentioned in the description.", + "B. The paper clips are not mentioned in the description.", + "B. The paintbrushes are not mentioned in the description.", + "C. The staple storage compartment of the stapler is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "1075308": { + "pred": "A vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + 1 + ], + [ + "The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the frame is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the frame is mentioned in the description but is not plastic.", + -1 + ], + [ + "The frame or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the monitor/tv is mentioned in the description and is black.", + 1 + ], + [ + "The color of the monitor/tv is mentioned in the description but is not black.", + -1 + ], + [ + "The monitor/tv is not mentioned.", + 0 + ], + [ + "The color of the monitor/tv is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the monitor/tv is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screen is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the screen is mentioned in the description but is not glass.", + -1 + ], + [ + "The screen or the monitor/tv is not mentioned.", + 0 + ], + [ + "The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chairs are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The remote control of the monitor/tv is mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The remote control of the monitor/tv is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The remote control of the monitor/tv is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ports of the monitor/tv are mentioned in the description.", + -1 + ], + [ + "The monitor/tv is not mentioned in the description.", + 0 + ], + [ + "The ports of the monitor/tv are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The ports of the monitor/tv are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glass are mentioned in the description.", + -1 + ], + [ + "The glass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The glass are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The range hood is mentioned in the description.", + -1 + ], + [ + "The range hood is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The range hood is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is monitor/tv. Based on the image, is it likely that the object in the description is given class: monitor/tv or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is monitor/tv. Based on the image, is it likely that the object in the description is given class: monitor/tv or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.\nB. The shape of the screen is mentioned in the description but is not rectangular or rounded rectangular.\nC. The screen or the monitor/tv is not mentioned.\nD. The shape of the screen is not mentioned, but the screen of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the frame is mentioned in the description and is plastic.\nB. The material of the frame is mentioned in the description but is not plastic.\nC. The frame or the monitor/tv is not mentioned.\nD. The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the monitor/tv is mentioned in the description and is black.\nB. The color of the monitor/tv is mentioned in the description but is not black.\nC. The monitor/tv is not mentioned.\nD. The color of the monitor/tv is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the screen is mentioned in the description and is glass.\nB. The material of the screen is mentioned in the description but is not glass.\nC. The screen or the monitor/tv is not mentioned.\nD. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chairs are mentioned in the description.\nB. The chairs are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The remote control of the monitor/tv is mentioned in the description.\nB. The monitor/tv is not mentioned in the description.\nC. The remote control of the monitor/tv is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ports of the monitor/tv are mentioned in the description.\nB. The monitor/tv is not mentioned in the description.\nC. The ports of the monitor/tv are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glass are mentioned in the description.\nB. The glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA vintage-style television set with a boxy, black frame and a slightly curved screen. The top of the television features a series of control buttons and a small display screen.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The range hood is mentioned in the description.\nB. The range hood is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the screen is mentioned in the description and is rectangular or rounded rectangular.", + "D. The material of the frame is not mentioned, but the frame of the monitor/tv is mentioned.", + "A. The color of the monitor/tv is mentioned in the description and is black.", + "D. The material of the screen is not mentioned, but the screen of the monitor/tv is mentioned.", + "B. The chairs are not mentioned in the description.", + "C. The remote control of the monitor/tv is not mentioned in the description.", + "C. The ports of the monitor/tv are not mentioned in the description.", + "B. The glass are not mentioned in the description.", + "B. The range hood is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "1770866": { + "pred": "A white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the text is mentioned in the description but is not handwritten.", + -1 + ], + [ + "The type of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The type of the text is mentioned in the description and is handwritten.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The type of the text is mentioned in the description and is handwritten.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + 0.5 + ], + [ + "The board or the sign/banner is not mentioned.", + 0 + ], + [ + "The shape of the board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background color of the sign/banner is mentioned in the description but is not white.", + -1 + ], + [ + "The background color of the sign/banner is not mentioned.", + 0 + ], + [ + "The sign/banner is not mentioned.", + 0 + ], + [ + "The background color of the sign/banner is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The background color of the sign/banner is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the text is mentioned in the description but is not black or blue and red.", + -1 + ], + [ + "The color of the text is not mentioned, but the text of the sign/banner is mentioned.", + 0.5 + ], + [ + "The text or the sign/banner is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is black or blue and red.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the text is mentioned in the description and is black or blue and red.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The salami is not mentioned in the description.", + 1 + ], + [ + "The salami is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The salami is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meats are not mentioned in the description.", + 1 + ], + [ + "The sliced meats are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sliced meats are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The duster of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The duster of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The duster of the sign/banner is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign/banner is not mentioned in the description.", + 0 + ], + [ + "The marker of the sign/banner is not mentioned in the description.", + 1 + ], + [ + "The marker of the sign/banner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The marker of the sign/banner is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The price tags are not mentioned in the description.", + 1 + ], + [ + "The price tags are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The price tags are mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is sign/banner. Based on the image, is it likely that the object in the description is given class: sign/banner or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is sign/banner. Based on the image, is it likely that the object in the description is given class: sign/banner or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the text is mentioned in the description but is not handwritten.\nB. The type of the text is not mentioned, but the text of the sign/banner is mentioned.\nC. The text or the sign/banner is not mentioned.\nD. The type of the text is mentioned in the description and is handwritten.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the board is mentioned in the description but is not rectangular.\nB. The shape of the board is not mentioned, but the board of the sign/banner is mentioned.\nC. The board or the sign/banner is not mentioned.\nD. The shape of the board is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background color of the sign/banner is mentioned in the description but is not white.\nB. The background color of the sign/banner is not mentioned.\nC. The sign/banner is not mentioned.\nD. The background color of the sign/banner is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the text is mentioned in the description but is not black or blue and red.\nB. The color of the text is not mentioned, but the text of the sign/banner is mentioned.\nC. The text or the sign/banner is not mentioned.\nD. The color of the text is mentioned in the description and is black or blue and red.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The salami is not mentioned in the description.\nB. The salami is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sliced meats are not mentioned in the description.\nB. The sliced meats are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign/banner is not mentioned in the description.\nB. The duster of the sign/banner is not mentioned in the description.\nC. The duster of the sign/banner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign/banner is not mentioned in the description.\nB. The marker of the sign/banner is not mentioned in the description.\nC. The marker of the sign/banner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white tag with handwritten text in blue and red marker. The blue text reads \"LIBRA\" and \"my tabouts\" in a cursive style. Below, in red marker, the text \"Add $50\" is written in a bold, sans-serif font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The price tags are not mentioned in the description.\nB. The price tags are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The type of the text is mentioned in the description and is handwritten.", + "B. The shape of the board is not mentioned, but the board of the sign/banner is mentioned.", + "D. The background color of the sign/banner is mentioned in the description and is white.", + "D. The color of the text is mentioned in the description and is black or blue and red.", + "A. The salami is not mentioned in the description.", + "A. The sliced meats are not mentioned in the description.", + "B. The duster of the sign/banner is not mentioned in the description.", + "C. The marker of the sign/banner is mentioned in the description.", + "B. The price tags are mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.875, + "score_neg": 0.2, + "recognition_result": true + }, + "2391761": { + "pred": "The canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hull or the boat is not mentioned.", + 0 + ], + [ + "The color of the hull is mentioned in the description and is brown.", + 1 + ], + [ + "The color of the hull is mentioned in the description but is not brown.", + -1 + ], + [ + "The color of the hull is not mentioned, but the hull of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the hull is mentioned in the description and is brown.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cover or the boat is not mentioned.", + 0 + ], + [ + "The color of the cover is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cover is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the cover is not mentioned, but the cover of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cover is mentioned in the description and is blue.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The life preserver/life buoy or the boat is not mentioned.", + 0 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description and is red or white.", + 1 + ], + [ + "The color of the life preserver/life buoy is mentioned in the description but is not red or white.", + -1 + ], + [ + "The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the life preserver/life buoy is mentioned in the description and is red or white.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motor or the boat is not mentioned.", + 0 + ], + [ + "The color of the motor is mentioned in the description and is black.", + 1 + ], + [ + "The color of the motor is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the motor is not mentioned, but the motor of the boat is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the motor is not mentioned, but the motor of the boat is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rudder of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The rudder of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The rudder of the boat is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sail of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The sail of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The sail of the boat is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabin of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The cabin of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cabin of the boat is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The anchor of the boat is mentioned in the description.", + -1 + ], + [ + "The boat is not mentioned in the description.", + 0 + ], + [ + "The anchor of the boat is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The anchor of the boat is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ducks are mentioned in the description.", + -1 + ], + [ + "The ducks are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ducks are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is boat. Based on the image, is it likely that the object in the description is given class: boat or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is boat. Based on the image, is it likely that the object in the description is given class: boat or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hull or the boat is not mentioned.\nB. The color of the hull is mentioned in the description and is brown.\nC. The color of the hull is mentioned in the description but is not brown.\nD. The color of the hull is not mentioned, but the hull of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cover or the boat is not mentioned.\nB. The color of the cover is mentioned in the description and is blue.\nC. The color of the cover is mentioned in the description but is not blue.\nD. The color of the cover is not mentioned, but the cover of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The life preserver/life buoy or the boat is not mentioned.\nB. The color of the life preserver/life buoy is mentioned in the description and is red or white.\nC. The color of the life preserver/life buoy is mentioned in the description but is not red or white.\nD. The color of the life preserver/life buoy is not mentioned, but the life preserver/life buoy of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motor or the boat is not mentioned.\nB. The color of the motor is mentioned in the description and is black.\nC. The color of the motor is mentioned in the description but is not black.\nD. The color of the motor is not mentioned, but the motor of the boat is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rudder of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The rudder of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sail of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The sail of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabin of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The cabin of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The anchor of the boat is mentioned in the description.\nB. The boat is not mentioned in the description.\nC. The anchor of the boat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe canoe features a blue tarpaulin cover secured over its wooden frame. The visible part of the canoe's hull is made of wooden planks, with a natural brown finish. The canoe has a pointed bow and a slightly raised stern. A white fender is attached to the side, and a red and white lifebuoy is visible inside the canoe.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ducks are mentioned in the description.\nB. The ducks are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the hull is mentioned in the description and is brown.", + "B. The color of the cover is mentioned in the description and is blue.", + "B. The color of the life preserver/life buoy is mentioned in the description and is red or white.", + "D. The color of the motor is not mentioned, but the motor of the boat is mentioned.", + "C. The rudder of the boat is not mentioned in the description.", + "C. The sail of the boat is not mentioned in the description.", + "C. The cabin of the boat is not mentioned in the description.", + "C. The anchor of the boat is not mentioned in the description.", + "B. The ducks are not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "2580318": { + "pred": "The mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is not mentioned.", + 0 + ], + [ + "The color of the mouse is mentioned in the description but is not gray.", + -1 + ], + [ + "The color of the mouse is mentioned in the description and is gray.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the mouse is mentioned in the description and is gray.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is not mentioned.", + 0 + ], + [ + "The texture of the mouse is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the mouse is mentioned in the description and is matte.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the mouse is mentioned in the description but is not matte.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is not mentioned.", + 0 + ], + [ + "The shape of the mouse is mentioned in the description but is not ergonomic.", + -1 + ], + [ + "The shape of the mouse is mentioned in the description and is ergonomic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the mouse is mentioned in the description and is ergonomic.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Paper is mentioned in the description.", + -1 + ], + [ + "The Paper is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Paper is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The cable of the mouse is mentioned in the description.", + -1 + ], + [ + "The cable of the mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cable of the mouse is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Framed diagram is mentioned in the description.", + -1 + ], + [ + "The Framed diagram is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Framed diagram is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pen is mentioned in the description.", + -1 + ], + [ + "The Pen is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Pen is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is not mentioned in the description.", + 0 + ], + [ + "The side buttons of the mouse are mentioned in the description.", + -1 + ], + [ + "The side buttons of the mouse are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The side buttons of the mouse are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is mouse. Based on the image, is it likely that the object in the description is given class: mouse or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is mouse. Based on the image, is it likely that the object in the description is given class: mouse or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The color of the mouse is not mentioned.\nC. The color of the mouse is mentioned in the description but is not gray.\nD. The color of the mouse is mentioned in the description and is gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The texture of the mouse is not mentioned.\nC. The texture of the mouse is mentioned in the description but is not matte.\nD. The texture of the mouse is mentioned in the description and is matte.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned.\nB. The shape of the mouse is not mentioned.\nC. The shape of the mouse is mentioned in the description but is not ergonomic.\nD. The shape of the mouse is mentioned in the description and is ergonomic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Paper is mentioned in the description.\nB. The Paper is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned in the description.\nB. The cable of the mouse is mentioned in the description.\nC. The cable of the mouse is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Framed diagram is mentioned in the description.\nB. The Framed diagram is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Pen is mentioned in the description.\nB. The Pen is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse has a smooth, metallic surface with a slightly curved, ergonomic shape. The visible part is a triangular section with a gradient of light and dark shades, giving it a sleek and modern appearance.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is not mentioned in the description.\nB. The side buttons of the mouse are mentioned in the description.\nC. The side buttons of the mouse are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the mouse is mentioned in the description and is gray.", + "C. The texture of the mouse is mentioned in the description but is not matte.", + "D. The shape of the mouse is mentioned in the description and is ergonomic.", + "B. The Paper is not mentioned in the description.", + "C. The cable of the mouse is not mentioned in the description.", + "B. The Framed diagram is not mentioned in the description.", + "B. The Pen is not mentioned in the description.", + "C. The side buttons of the mouse are not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "recognition_result": true + }, + "2588513": { + "pred": "A rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wood block is not mentioned.", + 0 + ], + [ + "The color of the wood block is mentioned in the description but is not wooden or brown.", + -1 + ], + [ + "The color of the wood block is mentioned in the description and is wooden or brown.", + 1 + ], + [ + "The wood block is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the wood block is mentioned in the description and is wooden or brown.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the top is not mentioned, but the top of the wood block is mentioned.", + 0.5 + ], + [ + "The texture of the top is mentioned in the description but is not grain.", + -1 + ], + [ + "The texture of the top is mentioned in the description and is grain.", + 1 + ], + [ + "The top or the wood block is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the top is mentioned in the description and is grain.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The metal clip of the wood block is not mentioned in the description.", + 1 + ], + [ + "The metal clip of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The metal clip of the wood block is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The machine with a motor are not mentioned in the description.", + 1 + ], + [ + "The machine with a motor are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The machine with a motor are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickup is not mentioned in the description.", + 1 + ], + [ + "The pickup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pickup is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wood block is not mentioned in the description.", + 0 + ], + [ + "The plastic handle of the wood block is not mentioned in the description.", + 1 + ], + [ + "The plastic handle of the wood block is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The plastic handle of the wood block is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The copper wire spool is not mentioned in the description.", + 1 + ], + [ + "The copper wire spool is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The copper wire spool is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is wood block. Based on the image, is it likely that the object in the description is given class: wood block or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is wood block. Based on the image, is it likely that the object in the description is given class: wood block or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wood block is not mentioned.\nB. The color of the wood block is mentioned in the description but is not wooden or brown.\nC. The color of the wood block is mentioned in the description and is wooden or brown.\nD. The wood block is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the top is not mentioned, but the top of the wood block is mentioned.\nB. The texture of the top is mentioned in the description but is not grain.\nC. The texture of the top is mentioned in the description and is grain.\nD. The top or the wood block is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wood block is not mentioned in the description.\nB. The metal clip of the wood block is not mentioned in the description.\nC. The metal clip of the wood block is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The machine with a motor are not mentioned in the description.\nB. The machine with a motor are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pickup is not mentioned in the description.\nB. The pickup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wood block is not mentioned in the description.\nB. The plastic handle of the wood block is not mentioned in the description.\nC. The plastic handle of the wood block is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden block with a light beige color and visible wood grain texture. The block has a black base and a white band wrapped around its middle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The copper wire spool is not mentioned in the description.\nB. The copper wire spool is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the wood block is mentioned in the description and is wooden or brown.", + "C. The texture of the top is mentioned in the description and is grain.", + "B. The metal clip of the wood block is not mentioned in the description.", + "A. The machine with a motor are not mentioned in the description.", + "A. The pickup is not mentioned in the description.", + "B. The plastic handle of the wood block is not mentioned in the description.", + "A. The copper wire spool is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "3993075": { + "pred": "A white pen with a red cap and a green and blue design on the barrel.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cap is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the cap is mentioned in the description and is orange.", + 1 + ], + [ + "The cap or the marker is not mentioned.", + 0 + ], + [ + "The color of the cap is not mentioned, but the cap of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the cap is mentioned in the description but is not orange.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the marker is mentioned in the description but is not cylindrical.", + -1 + ], + [ + "The shape of the marker is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The marker is not mentioned.", + 0 + ], + [ + "The shape of the marker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the marker is not mentioned.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The body or the marker is not mentioned.", + 0 + ], + [ + "The material of the body is not mentioned, but the body of the marker is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the body is not mentioned, but the body of the marker is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The children are mentioned in the description.", + -1 + ], + [ + "The children are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The children are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is mentioned in the description.", + -1 + ], + [ + "The table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The table is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clip of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The clip of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The clip of the marker is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label of the marker is mentioned in the description.", + -1 + ], + [ + "The marker is not mentioned in the description.", + 0 + ], + [ + "The label of the marker is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The label of the marker is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chairs are mentioned in the description.", + -1 + ], + [ + "The chairs are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chairs are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is marker. Based on the image, is it likely that the object in the description is given class: marker or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is marker. Based on the image, is it likely that the object in the description is given class: marker or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cap is mentioned in the description but is not orange.\nB. The color of the cap is mentioned in the description and is orange.\nC. The cap or the marker is not mentioned.\nD. The color of the cap is not mentioned, but the cap of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not white.\nB. The color of the body is mentioned in the description and is white.\nC. The body or the marker is not mentioned.\nD. The color of the body is not mentioned, but the body of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the marker is mentioned in the description but is not cylindrical.\nB. The shape of the marker is mentioned in the description and is cylindrical.\nC. The marker is not mentioned.\nD. The shape of the marker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the body is mentioned in the description but is not plastic.\nB. The material of the body is mentioned in the description and is plastic.\nC. The body or the marker is not mentioned.\nD. The material of the body is not mentioned, but the body of the marker is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The children are mentioned in the description.\nB. The children are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table is mentioned in the description.\nB. The table is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clip of the marker is mentioned in the description.\nB. The marker is not mentioned in the description.\nC. The clip of the marker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label of the marker is mentioned in the description.\nB. The marker is not mentioned in the description.\nC. The label of the marker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white pen with a red cap and a green and blue design on the barrel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chairs are mentioned in the description.\nB. The chairs are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the cap is mentioned in the description but is not orange.", + "B. The color of the body is mentioned in the description and is white.", + "D. The shape of the marker is not mentioned.", + "D. The material of the body is not mentioned, but the body of the marker is mentioned.", + "B. The children are not mentioned in the description.", + "B. The table is not mentioned in the description.", + "C. The clip of the marker is not mentioned in the description.", + "C. The label of the marker is not mentioned in the description.", + "B. The chairs are not mentioned in the description." + ], + "score": 0.6111111111111112, + "score_pos": 0.125, + "score_neg": 1.0, + "recognition_result": true + }, + "4027486": { + "pred": "The bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.", + 0.5 + ], + [ + "The license plate or the truck is not mentioned.", + 0 + ], + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the license plate is mentioned in the description and is rectangular.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the front is not mentioned, but the front of the truck is mentioned.", + 0.5 + ], + [ + "The front or the truck is not mentioned.", + 0 + ], + [ + "The color of the front is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the front is mentioned in the description and is blue.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the front is not mentioned, but the front of the truck is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the grille is not mentioned, but the grille of the truck is mentioned.", + 0.5 + ], + [ + "The grille or the truck is not mentioned.", + 0 + ], + [ + "The color of the grille is mentioned in the description but is not black, green, or blue.", + -1 + ], + [ + "The color of the grille is mentioned in the description and is black, green, or blue.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the grille is not mentioned, but the grille of the truck is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The door handle of the truck is mentioned in the description.", + -1 + ], + [ + "The door handle of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The door handle of the truck is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The rear part of the truck is mentioned in the description.", + -1 + ], + [ + "The rear part of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The rear part of the truck is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The exhaust pipe of the truck is mentioned in the description.", + -1 + ], + [ + "The exhaust pipe of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The exhaust pipe of the truck is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The tail light of the truck is mentioned in the description.", + -1 + ], + [ + "The tail light of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The tail light of the truck is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The truck is not mentioned in the description.", + 0 + ], + [ + "The cargo area of the truck is mentioned in the description.", + -1 + ], + [ + "The cargo area of the truck is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cargo area of the truck is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is truck. Based on the image, is it likely that the object in the description is given class: truck or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is truck. Based on the image, is it likely that the object in the description is given class: truck or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the license plate is not mentioned, but the license plate of the truck is mentioned.\nB. The license plate or the truck is not mentioned.\nC. The shape of the license plate is mentioned in the description but is not rectangular.\nD. The shape of the license plate is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the front is not mentioned, but the front of the truck is mentioned.\nB. The front or the truck is not mentioned.\nC. The color of the front is mentioned in the description but is not blue.\nD. The color of the front is mentioned in the description and is blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the grille is not mentioned, but the grille of the truck is mentioned.\nB. The grille or the truck is not mentioned.\nC. The color of the grille is mentioned in the description but is not black, green, or blue.\nD. The color of the grille is mentioned in the description and is black, green, or blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The door handle of the truck is mentioned in the description.\nC. The door handle of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The rear part of the truck is mentioned in the description.\nC. The rear part of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The exhaust pipe of the truck is mentioned in the description.\nC. The exhaust pipe of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The tail light of the truck is mentioned in the description.\nC. The tail light of the truck is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bus is predominantly blue with a white section near the bottom. It has a rectangular window with a black frame and a visible license plate that reads \"SABF.\" The bus features a sleek, modern design with a slightly curved roof and a small, white, triangular logo near the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The truck is not mentioned in the description.\nB. The cargo area of the truck is mentioned in the description.\nC. The cargo area of the truck is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the license plate is mentioned in the description and is rectangular.", + "A. The color of the front is not mentioned, but the front of the truck is mentioned.", + "A. The color of the grille is not mentioned, but the grille of the truck is mentioned.", + "C. The door handle of the truck is not mentioned in the description.", + "B. The rear part of the truck is mentioned in the description.", + "C. The exhaust pipe of the truck is not mentioned in the description.", + "C. The tail light of the truck is not mentioned in the description.", + "C. The cargo area of the truck is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.6666666666666666, + "score_neg": 0.6, + "recognition_result": true + }, + "4243725": { + "pred": "A curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the soap is mentioned in the description and is bar soap.", + 1 + ], + [ + "The type of the soap is mentioned in the description but is not bar soap.", + -1 + ], + [ + "The type of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The soap is not mentioned.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the soap is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The color of the soap is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the soap is not mentioned.", + 0 + ], + [ + "The soap is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the soap is mentioned in the description and is yellow or brown.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the soap are not mentioned in the description.", + 1 + ], + [ + "The decorative elements of the soap are mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The soap is not mentioned in the description.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is not mentioned in the description.", + 1 + ], + [ + "The cutting board is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cutting board is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dishwasher is not mentioned in the description.", + 1 + ], + [ + "The dishwasher is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The dishwasher is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative peppers are not mentioned in the description.", + 1 + ], + [ + "The decorative peppers are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The decorative peppers are mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The logo of the soap is not mentioned in the description.", + 1 + ], + [ + "The logo of the soap is mentioned in the description.", + -1 + ], + [ + "The soap is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The logo of the soap is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is soap. Based on the image, is it likely that the object in the description is given class: soap or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is soap. Based on the image, is it likely that the object in the description is given class: soap or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the soap is mentioned in the description and is bar soap.\nB. The type of the soap is mentioned in the description but is not bar soap.\nC. The type of the soap is not mentioned.\nD. The soap is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the soap is mentioned in the description and is yellow or brown.\nB. The color of the soap is mentioned in the description but is not yellow or brown.\nC. The color of the soap is not mentioned.\nD. The soap is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the soap are not mentioned in the description.\nB. The decorative elements of the soap are mentioned in the description.\nC. The soap is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is not mentioned in the description.\nB. The cutting board is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The dishwasher is not mentioned in the description.\nB. The dishwasher is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative peppers are not mentioned in the description.\nB. The decorative peppers are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA curved, elongated object with a gradient of colors ranging from light yellow to dark brown, featuring a smooth, glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The logo of the soap is not mentioned in the description.\nB. The logo of the soap is mentioned in the description.\nC. The soap is not mentioned in the description.\n" + ], + "response": [ + "B. No", + "D. The soap is not mentioned.", + "A. The color of the soap is mentioned in the description and is yellow or brown.", + "C. The soap is not mentioned in the description.", + "A. The cutting board is not mentioned in the description.", + "A. The dishwasher is not mentioned in the description.", + "B. The decorative peppers are mentioned in the description.", + "A. The logo of the soap is not mentioned in the description." + ], + "score": -0.14285714285714285, + "score_pos": 0.0, + "score_neg": -0.2, + "recognition_result": false + }, + "4781902": { + "pred": "A dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is not mentioned.", + 0 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the stool is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the stool is mentioned in the description and is wood.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the leg is not mentioned, but the leg of the stool is mentioned.", + 0.5 + ], + [ + "The leg or the stool is not mentioned.", + 0 + ], + [ + "The material of the leg is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the leg is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the leg is mentioned in the description and is wood.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The shape of the crossbar is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the crossbar is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + 0.5 + ], + [ + "The crossbar or the stool is not mentioned.", + 0 + ], + [ + "The material of the crossbar is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the crossbar is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the crossbar is mentioned in the description and is wood.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the seat is mentioned in the description and is flat.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building in the background are mentioned in the description.", + -1 + ], + [ + "The building in the background are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The building in the background are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swing set is mentioned in the description.", + -1 + ], + [ + "The swing set is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The swing set is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The slide is mentioned in the description.", + -1 + ], + [ + "The slide is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The slide is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The climbing wall is mentioned in the description.", + -1 + ], + [ + "The climbing wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The climbing wall is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stool. Based on the image, is it likely that the object in the description is given class: stool or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stool. Based on the image, is it likely that the object in the description is given class: stool or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stool is not mentioned.\nB. The stool is not mentioned.\nC. The material of the stool is mentioned in the description but is not wood.\nD. The material of the stool is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the leg is not mentioned, but the leg of the stool is mentioned.\nB. The leg or the stool is not mentioned.\nC. The material of the leg is mentioned in the description but is not wood.\nD. The material of the leg is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.\nB. The crossbar or the stool is not mentioned.\nC. The shape of the crossbar is mentioned in the description but is not rectangular.\nD. The shape of the crossbar is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the crossbar is not mentioned, but the crossbar of the stool is mentioned.\nB. The crossbar or the stool is not mentioned.\nC. The material of the crossbar is mentioned in the description but is not wood.\nD. The material of the crossbar is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the seat is not mentioned, but the seat of the stool is mentioned.\nB. The seat or the stool is not mentioned.\nC. The shape of the seat is mentioned in the description but is not flat.\nD. The shape of the seat is mentioned in the description and is flat.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building in the background are mentioned in the description.\nB. The building in the background are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The swing set is mentioned in the description.\nB. The swing set is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The slide is mentioned in the description.\nB. The slide is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wooden stool with a triangular seat and four legs, each leg angling outward and connected by a lower horizontal support beam.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The climbing wall is mentioned in the description.\nB. The climbing wall is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the stool is mentioned in the description and is wood.", + "D. The material of the leg is mentioned in the description and is wood.", + "A. The shape of the crossbar is not mentioned, but the crossbar of the stool is mentioned.", + "D. The material of the crossbar is mentioned in the description and is wood.", + "D. The shape of the seat is mentioned in the description and is flat.", + "B. The sky is not mentioned in the description.", + "B. The building in the background are not mentioned in the description.", + "B. The swing set is not mentioned in the description.", + "B. The slide is not mentioned in the description.", + "B. The climbing wall is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "4782949": { + "pred": "A cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the drum is mentioned in the description but is not round or circular.", + -1 + ], + [ + "The shape of the drum is mentioned in the description and is round or circular.", + 1 + ], + [ + "The shape of the drum is not mentioned.", + 0 + ], + [ + "The drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the drum is mentioned in the description and is round or circular.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the body is mentioned in the description and is red.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the drum is mentioned.", + 0.5 + ], + [ + "The body or the drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the body is mentioned in the description but is not red.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the rim is mentioned in the description but is not metallic.", + -1 + ], + [ + "The material of the rim is mentioned in the description and is metallic.", + 1 + ], + [ + "The material of the rim is not mentioned, but the rim of the drum is mentioned.", + 0.5 + ], + [ + "The rim or the drum is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the rim is mentioned in the description and is metallic.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum stand of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum stand of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drum stand of the drum is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Trees are mentioned in the description.", + -1 + ], + [ + "The Trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Trees are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drum pedal of the drum is mentioned in the description.", + -1 + ], + [ + "The drum is not mentioned in the description.", + 0 + ], + [ + "The drum pedal of the drum is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drum pedal of the drum is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Superior Foods sign is mentioned in the description.", + -1 + ], + [ + "The Superior Foods sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Superior Foods sign is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The People are mentioned in the description.", + -1 + ], + [ + "The People are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The People are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is drum. Based on the image, is it likely that the object in the description is given class: drum or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is drum. Based on the image, is it likely that the object in the description is given class: drum or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the drum is mentioned in the description but is not round or circular.\nB. The shape of the drum is mentioned in the description and is round or circular.\nC. The shape of the drum is not mentioned.\nD. The drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not red.\nB. The color of the body is mentioned in the description and is red.\nC. The color of the body is not mentioned, but the body of the drum is mentioned.\nD. The body or the drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the rim is mentioned in the description but is not metallic.\nB. The material of the rim is mentioned in the description and is metallic.\nC. The material of the rim is not mentioned, but the rim of the drum is mentioned.\nD. The rim or the drum is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drum stand of the drum is mentioned in the description.\nB. The drum is not mentioned in the description.\nC. The drum stand of the drum is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Trees are mentioned in the description.\nB. The Trees are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drum pedal of the drum is mentioned in the description.\nB. The drum is not mentioned in the description.\nC. The drum pedal of the drum is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Superior Foods sign is mentioned in the description.\nB. The Superior Foods sign is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical drum with a dark brown, textured surface and a metallic rim. The drum has a blue and white striped pattern on the side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The People are mentioned in the description.\nB. The People are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the drum is mentioned in the description and is round or circular.", + "A. The color of the body is mentioned in the description but is not red.", + "B. The material of the rim is mentioned in the description and is metallic.", + "C. The drum stand of the drum is not mentioned in the description.", + "B. The Trees are not mentioned in the description.", + "C. The drum pedal of the drum is not mentioned in the description.", + "B. The Superior Foods sign is not mentioned in the description.", + "B. The People are not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "recognition_result": true + }, + "5211280": { + "pred": "A stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the rice cooker is mentioned in the description and is silver and black.", + 1 + ], + [ + "The color of the rice cooker is mentioned in the description but is not silver and black.", + -1 + ], + [ + "The color of the rice cooker is not mentioned.", + 0 + ], + [ + "The rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the rice cooker is mentioned in the description and is silver and black.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.", + 0.5 + ], + [ + "The handle or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the display is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the display is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + 0.5 + ], + [ + "The display or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description and is silver.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the rice cooker is mentioned.", + 0.5 + ], + [ + "The body or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the body is mentioned in the description and is silver.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the base is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the base is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + 0.5 + ], + [ + "The base or the rice cooker is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The red crates are mentioned in the description.", + -1 + ], + [ + "The red crates are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The red crates are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The generator is mentioned in the description.", + -1 + ], + [ + "The generator is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The generator is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The steam vent of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The steam vent of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The steam vent of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The inner pot of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The inner pot of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The inner pot of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup of the rice cooker is mentioned in the description.", + -1 + ], + [ + "The measuring cup of the rice cooker is not mentioned in the description.", + 1 + ], + [ + "The rice cooker is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The measuring cup of the rice cooker is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is rice cooker. Based on the image, is it likely that the object in the description is given class: rice cooker or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is rice cooker. Based on the image, is it likely that the object in the description is given class: rice cooker or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the rice cooker is mentioned in the description and is silver and black.\nB. The color of the rice cooker is mentioned in the description but is not silver and black.\nC. The color of the rice cooker is not mentioned.\nD. The rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is mentioned in the description and is curved.\nB. The shape of the handle is mentioned in the description but is not curved.\nC. The shape of the handle is not mentioned, but the handle of the rice cooker is mentioned.\nD. The handle or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the display is mentioned in the description and is rectangular.\nB. The shape of the display is mentioned in the description but is not rectangular.\nC. The shape of the display is not mentioned, but the display of the rice cooker is mentioned.\nD. The display or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description and is silver.\nB. The color of the body is mentioned in the description but is not silver.\nC. The color of the body is not mentioned, but the body of the rice cooker is mentioned.\nD. The body or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the base is mentioned in the description and is plastic.\nB. The material of the base is mentioned in the description but is not plastic.\nC. The material of the base is not mentioned, but the base of the rice cooker is mentioned.\nD. The base or the rice cooker is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The red crates are mentioned in the description.\nB. The red crates are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The generator is mentioned in the description.\nB. The generator is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The steam vent of the rice cooker is mentioned in the description.\nB. The steam vent of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The inner pot of the rice cooker is mentioned in the description.\nB. The inner pot of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel crock pot with a curved, dark gray handle on top. The control panel features a digital display in the center, surrounded by various buttons and indicators. The buttons are arranged in a semi-circular pattern around the display, with labels in both English and another language. The crock pot has a smooth, reflective surface and a slightly tapered design towards the base.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The measuring cup of the rice cooker is mentioned in the description.\nB. The measuring cup of the rice cooker is not mentioned in the description.\nC. The rice cooker is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the rice cooker is mentioned in the description and is silver and black.", + "A. The shape of the handle is mentioned in the description and is curved.", + "C. The shape of the display is not mentioned, but the display of the rice cooker is mentioned.", + "A. The color of the body is mentioned in the description and is silver.", + "C. The material of the base is not mentioned, but the base of the rice cooker is mentioned.", + "B. The red crates are not mentioned in the description.", + "B. The generator is not mentioned in the description.", + "B. The steam vent of the rice cooker is not mentioned in the description.", + "B. The inner pot of the rice cooker is not mentioned in the description.", + "B. The measuring cup of the rice cooker is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "5718392": { + "pred": "The box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the storage box is mentioned in the description but is not wicker/woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is not mentioned.", + 0 + ], + [ + "The material of the storage box is mentioned in the description and is wicker/woven.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the storage box is mentioned in the description and is wicker/woven.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the storage box is mentioned in the description but is not woven.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is not mentioned.", + 0 + ], + [ + "The texture of the storage box is mentioned in the description and is woven.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the storage box is mentioned in the description and is woven.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the storage box is mentioned in the description but is not brown.", + -1 + ], + [ + "The storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is not mentioned.", + 0 + ], + [ + "The color of the storage box is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the storage box is mentioned in the description and is brown.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the storage box are mentioned in the description.", + -1 + ], + [ + "The wheels of the storage box are not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The wheels of the storage box are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensils are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the storage box is mentioned in the description.", + -1 + ], + [ + "The handle of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The handle of the storage box is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the storage box is mentioned in the description.", + -1 + ], + [ + "The lock of the storage box is not mentioned in the description.", + 1 + ], + [ + "The storage box is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The lock of the storage box is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blue canopy is mentioned in the description.", + -1 + ], + [ + "The blue canopy is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The blue canopy is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is storage box. Based on the image, is it likely that the object in the description is given class: storage box or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is storage box. Based on the image, is it likely that the object in the description is given class: storage box or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the storage box is mentioned in the description but is not wicker/woven.\nB. The storage box is not mentioned.\nC. The material of the storage box is not mentioned.\nD. The material of the storage box is mentioned in the description and is wicker/woven.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the storage box is mentioned in the description but is not woven.\nB. The storage box is not mentioned.\nC. The texture of the storage box is not mentioned.\nD. The texture of the storage box is mentioned in the description and is woven.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the storage box is mentioned in the description but is not brown.\nB. The storage box is not mentioned.\nC. The color of the storage box is not mentioned.\nD. The color of the storage box is mentioned in the description and is brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the storage box are mentioned in the description.\nB. The wheels of the storage box are not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensils are mentioned in the description.\nB. The utensils are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the storage box is mentioned in the description.\nB. The handle of the storage box is not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lock of the storage box is mentioned in the description.\nB. The lock of the storage box is not mentioned in the description.\nC. The storage box is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe box is a rectangular prism with a woven pattern of interlocking dark brown and light brown strips. The surface has a textured appearance, with the weave creating a series of small, diamond-shaped openings.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blue canopy is mentioned in the description.\nB. The blue canopy is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the storage box is mentioned in the description and is wicker/woven.", + "D. The texture of the storage box is mentioned in the description and is woven.", + "D. The color of the storage box is mentioned in the description and is brown.", + "B. The wheels of the storage box are not mentioned in the description.", + "B. The utensils are not mentioned in the description.", + "B. The handle of the storage box is not mentioned in the description.", + "B. The lock of the storage box is not mentioned in the description.", + "B. The blue canopy is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "6037272": { + "pred": "A green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle or the bottle is not mentioned.", + 0 + ], + [ + "The material of the bottle is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + 0.5 + ], + [ + "The material of the bottle is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is green.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text on the label or the bottle are not mentioned.", + 0 + ], + [ + "The color of the text on the label is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.", + 0.5 + ], + [ + "The color of the text on the label is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The text on the label or the bottle are not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description and is green.", + 1 + ], + [ + "The color of the bottle is not mentioned.", + 0 + ], + [ + "The color of the bottle is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the bottle is mentioned in the description and is green.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap/top or the bottle is not mentioned.", + 0 + ], + [ + "The shape of the cap/top is mentioned in the description and is flat or tapered.", + 1 + ], + [ + "The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.", + 0.5 + ], + [ + "The shape of the cap/top is mentioned in the description but is not flat or tapered.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the cap/top is mentioned in the description and is flat or tapered.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nozzle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The nozzle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The nozzle of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shower curtain is mentioned in the description.", + -1 + ], + [ + "The shower curtain is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The shower curtain is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is mentioned in the description.", + -1 + ], + [ + "The bathtub is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathtub is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pump of the bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The pump of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The pump of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bottle. Based on the image, is it likely that the object in the description is given class: bottle or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bottle. Based on the image, is it likely that the object in the description is given class: bottle or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle or the bottle is not mentioned.\nB. The material of the bottle is mentioned in the description and is plastic.\nC. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.\nD. The material of the bottle is mentioned in the description but is not plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap or the bottle is not mentioned.\nB. The color of the cap is mentioned in the description and is green.\nC. The color of the cap is not mentioned, but the cap of the bottle is mentioned.\nD. The color of the cap is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text on the label or the bottle are not mentioned.\nB. The color of the text on the label is mentioned in the description and is white.\nC. The color of the text on the label is not mentioned, but the text on the label of the bottle are mentioned.\nD. The color of the text on the label is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned.\nB. The color of the bottle is mentioned in the description and is green.\nC. The color of the bottle is not mentioned.\nD. The color of the bottle is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap/top or the bottle is not mentioned.\nB. The shape of the cap/top is mentioned in the description and is flat or tapered.\nC. The shape of the cap/top is not mentioned, but the cap/top of the bottle is mentioned.\nD. The shape of the cap/top is mentioned in the description but is not flat or tapered.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nozzle of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The nozzle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shower curtain is mentioned in the description.\nB. The shower curtain is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is mentioned in the description.\nB. The bathtub is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The handle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green, cylindrical shampoo bottle with a slightly tapered end. The bottle has a smooth surface with a small, circular, orange and white label near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pump of the bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\nC. The pump of the bottle is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the bottle is not mentioned, but the bottle of the bottle is mentioned.", + "C. The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + "A. The text on the label or the bottle are not mentioned.", + "B. The color of the bottle is mentioned in the description and is green.", + "B. The shape of the cap/top is mentioned in the description and is flat or tapered.", + "C. The nozzle of the bottle is not mentioned in the description.", + "B. The shower curtain is not mentioned in the description.", + "B. The bathtub is not mentioned in the description.", + "C. The handle of the bottle is not mentioned in the description.", + "C. The pump of the bottle is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "recognition_result": true + }, + "6820594": { + "pred": "A medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ears is mentioned in the description but is not triangular or pointed.", + -1 + ], + [ + "The shape of the ears is not mentioned, but the ears of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the ears is mentioned in the description and is triangular or pointed.", + 1 + ], + [ + "The ears or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the ears is mentioned in the description and is triangular or pointed.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is yellow or green.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the eyes is mentioned in the description and is yellow or green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the cat are mentioned.", + 0.5 + ], + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the legs is not mentioned, but the legs of the cat are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the eyes is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.", + 0.5 + ], + [ + "The shape of the eyes is mentioned in the description and is round.", + 1 + ], + [ + "The eyes or the cat are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the eyes is mentioned in the description and is round.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the back is mentioned in the description but is not brown or black.", + -1 + ], + [ + "The color of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The color of the back is mentioned in the description and is brown or black.", + 1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the back is mentioned in the description and is brown or black.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothpaste is mentioned in the description.", + -1 + ], + [ + "The toothpaste is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothpaste is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothbrush is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the cat is mentioned in the description.", + -1 + ], + [ + "The tail of the cat is not mentioned in the description.", + 1 + ], + [ + "The cat is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the cat is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The faucet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is cat. Based on the image, is it likely that the object in the description is given class: cat or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is cat. Based on the image, is it likely that the object in the description is given class: cat or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the ears is mentioned in the description but is not triangular or pointed.\nB. The shape of the ears is not mentioned, but the ears of the cat are mentioned.\nC. The shape of the ears is mentioned in the description and is triangular or pointed.\nD. The ears or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eyes is mentioned in the description but is not yellow or green.\nB. The color of the eyes is not mentioned, but the eyes of the cat are mentioned.\nC. The color of the eyes is mentioned in the description and is yellow or green.\nD. The eyes or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the legs is mentioned in the description but is not white.\nB. The color of the legs is not mentioned, but the legs of the cat are mentioned.\nC. The color of the legs is mentioned in the description and is white.\nD. The legs or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the eyes is mentioned in the description but is not round.\nB. The shape of the eyes is not mentioned, but the eyes of the cat are mentioned.\nC. The shape of the eyes is mentioned in the description and is round.\nD. The eyes or the cat are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the back is mentioned in the description but is not brown or black.\nB. The color of the back is not mentioned, but the back of the cat is mentioned.\nC. The color of the back is mentioned in the description and is brown or black.\nD. The back or the cat is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothpaste is mentioned in the description.\nB. The toothpaste is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothbrush is mentioned in the description.\nB. The toothbrush is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the cat is mentioned in the description.\nB. The tail of the cat is not mentioned in the description.\nC. The cat is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA medium-sized cat with a predominantly white face and underbelly, featuring a mix of dark brown and black patches on its back and sides. The cat has large, round, light green eyes and a pink nose. Its ears are upright, with the left ear having a light brown patch and the right ear being mostly white. The cat's fur is short and smooth.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is mentioned in the description.\nB. The faucet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the ears is mentioned in the description and is triangular or pointed.", + "C. The color of the eyes is mentioned in the description and is yellow or green.", + "B. The color of the legs is not mentioned, but the legs of the cat are mentioned.", + "C. The shape of the eyes is mentioned in the description and is round.", + "C. The color of the back is mentioned in the description and is brown or black.", + "B. The toothpaste is not mentioned in the description.", + "B. The toothbrush is not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The tail of the cat is not mentioned in the description.", + "B. The faucet is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "5718424": { + "pred": "A rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + 0.5 + ], + [ + "The color of the laces is mentioned in the description but is not white or green.", + -1 + ], + [ + "The color of the laces is mentioned in the description and is white or green.", + 1 + ], + [ + "The laces or the sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sneakers is not mentioned.", + 0 + ], + [ + "The color of the sneakers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the sneakers is mentioned in the description and is black.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the sneakers is mentioned in the description but is not black.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sneakers is not mentioned.", + 0 + ], + [ + "The material of the sneakers is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sneakers is mentioned in the description and is rubber.", + 1 + ], + [ + "The sneakers are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the sneakers is not mentioned.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table is not mentioned in the description.", + 1 + ], + [ + "The table is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The table is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The base of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The base of the sneakers is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heel of the sneakers is not mentioned in the description.", + 1 + ], + [ + "The sneakers are not mentioned in the description.", + 0 + ], + [ + "The heel of the sneakers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The heel of the sneakers is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The person is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The food is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is sneakers. Based on the image, is it likely that the object in the description is given class: sneakers or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is sneakers. Based on the image, is it likely that the object in the description is given class: sneakers or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.\nB. The color of the laces is mentioned in the description but is not white or green.\nC. The color of the laces is mentioned in the description and is white or green.\nD. The laces or the sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sneakers is not mentioned.\nB. The color of the sneakers is mentioned in the description but is not black.\nC. The color of the sneakers is mentioned in the description and is black.\nD. The sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the sneakers is not mentioned.\nB. The material of the sneakers is mentioned in the description but is not rubber.\nC. The material of the sneakers is mentioned in the description and is rubber.\nD. The sneakers are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table is not mentioned in the description.\nB. The table is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the sneakers is not mentioned in the description.\nB. The sneakers are not mentioned in the description.\nC. The base of the sneakers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The heel of the sneakers is not mentioned in the description.\nB. The sneakers are not mentioned in the description.\nC. The heel of the sneakers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned in the description.\nB. The person is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rugged, dark-colored shoe with a thick, textured sole and a prominent, rounded toe. The shoe features a light-colored trim around the opening and a visible lace-up design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is not mentioned in the description.\nB. The food is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the laces is not mentioned, but the laces of the sneakers are mentioned.", + "B. The color of the sneakers is mentioned in the description but is not black.", + "A. The material of the sneakers is not mentioned.", + "A. The table is not mentioned in the description.", + "C. The base of the sneakers is mentioned in the description.", + "A. The heel of the sneakers is not mentioned in the description.", + "A. The person is not mentioned in the description.", + "A. The food is not mentioned in the description." + ], + "score": 0.3125, + "score_pos": -0.16666666666666666, + "score_neg": 0.6, + "recognition_result": true + }, + "6055310": { + "pred": "A wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the blade is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the blade is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the blade is mentioned in the description and is yellow or golden.", + 1 + ], + [ + "The color of the blade is mentioned in the description but is not yellow or golden.", + -1 + ], + [ + "The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the blade is mentioned in the description and is long.", + 1 + ], + [ + "The size of the blade is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the blade is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the blade is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + 0.5 + ], + [ + "The blade or the tape measure/ruler is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The case of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The case of the tape measure/ruler is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathtub is not mentioned in the description.", + 1 + ], + [ + "The bathtub is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bathtub is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lock of the tape measure/ruler is not mentioned in the description.", + 1 + ], + [ + "The tape measure/ruler is not mentioned in the description.", + 0 + ], + [ + "The lock of the tape measure/ruler is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lock of the tape measure/ruler is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The door is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The box is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tape measure/ruler. Based on the image, is it likely that the object in the description is given class: tape measure/ruler or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tape measure/ruler. Based on the image, is it likely that the object in the description is given class: tape measure/ruler or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the blade is mentioned in the description and is rectangular.\nB. The shape of the blade is mentioned in the description but is not rectangular.\nC. The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the blade is mentioned in the description and is yellow or golden.\nB. The color of the blade is mentioned in the description but is not yellow or golden.\nC. The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the blade is mentioned in the description and is long.\nB. The size of the blade is mentioned in the description but is not long.\nC. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the blade is mentioned in the description and is metal.\nB. The material of the blade is mentioned in the description but is not metal.\nC. The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.\nD. The blade or the tape measure/ruler is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case of the tape measure/ruler is not mentioned in the description.\nB. The tape measure/ruler is not mentioned in the description.\nC. The case of the tape measure/ruler is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathtub is not mentioned in the description.\nB. The bathtub is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lock of the tape measure/ruler is not mentioned in the description.\nB. The tape measure/ruler is not mentioned in the description.\nC. The lock of the tape measure/ruler is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is not mentioned in the description.\nB. The door is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden measuring stick with a natural finish, featuring black measurement markings in centimeters and millimeters. The stick has a slightly tapered end and a metal tip at the opposite end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The box is not mentioned in the description.\nB. The box is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "C. The color of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "C. The size of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "C. The material of the blade is not mentioned, but the blade of the tape measure/ruler is mentioned.", + "A. The case of the tape measure/ruler is not mentioned in the description.", + "A. The bathtub is not mentioned in the description.", + "A. The lock of the tape measure/ruler is not mentioned in the description.", + "A. The door is not mentioned in the description.", + "A. The box is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "recognition_result": true + }, + "8201777": { + "pred": "A black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the van is mentioned in the description but is not black.", + -1 + ], + [ + "The van is not mentioned.", + 0 + ], + [ + "The color of the van is not mentioned.", + 0 + ], + [ + "The color of the van is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the van is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sticker is mentioned in the description but is not white.", + -1 + ], + [ + "The sticker or the van is not mentioned.", + 0 + ], + [ + "The color of the sticker is not mentioned, but the sticker of the van is mentioned.", + 0.5 + ], + [ + "The color of the sticker is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the sticker is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The taillight or the van is not mentioned.", + 0 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + 0.5 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the license plate is mentioned in the description but is not yellow.", + -1 + ], + [ + "The license plate or the van is not mentioned.", + 0 + ], + [ + "The color of the license plate is not mentioned, but the license plate of the van is mentioned.", + 0.5 + ], + [ + "The color of the license plate is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the license plate is mentioned in the description and is yellow.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the sign is mentioned in the description but is not taxi.", + -1 + ], + [ + "The sign or the van is not mentioned.", + 0 + ], + [ + "The text of the sign is not mentioned, but the sign of the van is mentioned.", + 0.5 + ], + [ + "The text of the sign is mentioned in the description and is taxi.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The text of the sign is mentioned in the description and is taxi.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The grill of the van is not mentioned in the description.", + 1 + ], + [ + "The grill of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The grill of the van is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The building is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the van is not mentioned in the description.", + 1 + ], + [ + "The front bumper of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The front bumper of the van is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The van is not mentioned in the description.", + 0 + ], + [ + "The antenna of the van is not mentioned in the description.", + 1 + ], + [ + "The antenna of the van is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The antenna of the van is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned in the description.", + 1 + ], + [ + "The person is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The person is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is van. Based on the image, is it likely that the object in the description is given class: van or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is van. Based on the image, is it likely that the object in the description is given class: van or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the van is mentioned in the description but is not black.\nB. The van is not mentioned.\nC. The color of the van is not mentioned.\nD. The color of the van is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sticker is mentioned in the description but is not white.\nB. The sticker or the van is not mentioned.\nC. The color of the sticker is not mentioned, but the sticker of the van is mentioned.\nD. The color of the sticker is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the taillight is mentioned in the description but is not plastic.\nB. The taillight or the van is not mentioned.\nC. The material of the taillight is not mentioned, but the taillight of the van is mentioned.\nD. The material of the taillight is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the license plate is mentioned in the description but is not yellow.\nB. The license plate or the van is not mentioned.\nC. The color of the license plate is not mentioned, but the license plate of the van is mentioned.\nD. The color of the license plate is mentioned in the description and is yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text of the sign is mentioned in the description but is not taxi.\nB. The sign or the van is not mentioned.\nC. The text of the sign is not mentioned, but the sign of the van is mentioned.\nD. The text of the sign is mentioned in the description and is taxi.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The grill of the van is not mentioned in the description.\nC. The grill of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is not mentioned in the description.\nB. The building is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The front bumper of the van is not mentioned in the description.\nC. The front bumper of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The van is not mentioned in the description.\nB. The antenna of the van is not mentioned in the description.\nC. The antenna of the van is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black van with a rear window displaying the word \"TAXI\" in yellow letters. The van has a yellow license plate with black text and a small white sticker below it. The rear lights are vertically aligned on both sides, and the van has a small emblem above the license plate.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned in the description.\nB. The person is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the van is mentioned in the description and is black.", + "D. The color of the sticker is mentioned in the description and is white.", + "C. The material of the taillight is not mentioned, but the taillight of the van is mentioned.", + "D. The color of the license plate is mentioned in the description and is yellow.", + "D. The text of the sign is mentioned in the description and is taxi.", + "B. The grill of the van is not mentioned in the description.", + "A. The building is not mentioned in the description.", + "B. The front bumper of the van is not mentioned in the description.", + "B. The antenna of the van is not mentioned in the description.", + "A. The person is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "8331685": { + "pred": "A black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is not mentioned.", + 0 + ], + [ + "The color of the headphone is mentioned in the description and is black.", + 1 + ], + [ + "The color of the headphone is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the headphone is mentioned in the description and is black.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ear cup or the headphone is not mentioned.", + 0 + ], + [ + "The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.", + 0.5 + ], + [ + "The type of the ear cup is mentioned in the description and is cushioned.", + 1 + ], + [ + "The type of the ear cup is mentioned in the description but is not cushioned.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The type of the ear cup is mentioned in the description and is cushioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headband or the headphone is not mentioned.", + 0 + ], + [ + "The shape of the headband is not mentioned, but the headband of the headphone is mentioned.", + 0.5 + ], + [ + "The shape of the headband is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the headband is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the headband is mentioned in the description and is curved.", + "pred_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone of the headphone is not mentioned in the description.", + 1 + ], + [ + "The microphone of the headphone is mentioned in the description.", + -1 + ], + [ + "The headphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The microphone of the headphone is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is not mentioned in the description.", + 1 + ], + [ + "The keyboard is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The keyboard is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned in the description.", + 1 + ], + [ + "The clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The clock is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 1 + ], + [ + "The bottle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bottle is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is headphone. Based on the image, is it likely that the object in the description is given class: headphone or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is headphone. Based on the image, is it likely that the object in the description is given class: headphone or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headphone is not mentioned.\nB. The color of the headphone is not mentioned.\nC. The color of the headphone is mentioned in the description and is black.\nD. The color of the headphone is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ear cup or the headphone is not mentioned.\nB. The type of the ear cup is not mentioned, but the ear cup of the headphone is mentioned.\nC. The type of the ear cup is mentioned in the description and is cushioned.\nD. The type of the ear cup is mentioned in the description but is not cushioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headband or the headphone is not mentioned.\nB. The shape of the headband is not mentioned, but the headband of the headphone is mentioned.\nC. The shape of the headband is mentioned in the description and is curved.\nD. The shape of the headband is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microphone of the headphone is not mentioned in the description.\nB. The microphone of the headphone is mentioned in the description.\nC. The headphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is not mentioned in the description.\nB. The keyboard is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock is not mentioned in the description.\nB. The clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black over-ear headphone with a curved headband and a cushioned earcup. The earcup has a circular shape with a smooth, matte finish. The headband is attached to the earcup with a visible hinge mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The bottle is mentioned in the description.\n" + ], + "response": [ + "B. No", + "C. The color of the headphone is mentioned in the description and is black.", + "C. The type of the ear cup is mentioned in the description and is cushioned.", + "C. The shape of the headband is mentioned in the description and is curved.", + "A. The microphone of the headphone is not mentioned in the description.", + "A. The keyboard is not mentioned in the description.", + "A. The plant is not mentioned in the description.", + "A. The clock is not mentioned in the description.", + "A. The bottle is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "recognition_result": false + }, + "8331718": { + "pred": "A black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the notebook is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the notebook is mentioned in the description and is black.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The color of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the notebook is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the notebook is mentioned in the description but is not spiral-bound.", + -1 + ], + [ + "The type of the notebook is mentioned in the description and is spiral-bound.", + 1 + ], + [ + "The notebook is not mentioned.", + 0 + ], + [ + "The type of the notebook is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The type of the notebook is mentioned in the description and is spiral-bound.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text of the cover is mentioned in the description but is not YAHOO.", + -1 + ], + [ + "The text of the cover is mentioned in the description and is YAHOO.", + 1 + ], + [ + "The cover or the notebook is not mentioned.", + 0 + ], + [ + "The text of the cover is not mentioned, but the cover of the notebook is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The text of the cover is mentioned in the description but is not YAHOO.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the logo/text on the cover is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the logo/text on the cover is mentioned in the description and is white.", + 1 + ], + [ + "The logo/text on the cover or the notebook are not mentioned.", + 0 + ], + [ + "The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the logo/text on the cover is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is mentioned in the description.", + -1 + ], + [ + "The chair is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chair is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is mentioned in the description.", + -1 + ], + [ + "The bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bottle is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bookmark of the notebook is mentioned in the description.", + -1 + ], + [ + "The notebook is not mentioned in the description.", + 0 + ], + [ + "The bookmark of the notebook is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The bookmark of the notebook is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouse is mentioned in the description.", + -1 + ], + [ + "The mouse is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mouse is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keyboard is mentioned in the description.", + -1 + ], + [ + "The keyboard is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The keyboard is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is notebook. Based on the image, is it likely that the object in the description is given class: notebook or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is notebook. Based on the image, is it likely that the object in the description is given class: notebook or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the notebook is mentioned in the description but is not black.\nB. The color of the notebook is mentioned in the description and is black.\nC. The notebook is not mentioned.\nD. The color of the notebook is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the notebook is mentioned in the description but is not spiral-bound.\nB. The type of the notebook is mentioned in the description and is spiral-bound.\nC. The notebook is not mentioned.\nD. The type of the notebook is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text of the cover is mentioned in the description but is not YAHOO.\nB. The text of the cover is mentioned in the description and is YAHOO.\nC. The cover or the notebook is not mentioned.\nD. The text of the cover is not mentioned, but the cover of the notebook is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the logo/text on the cover is mentioned in the description but is not white.\nB. The color of the logo/text on the cover is mentioned in the description and is white.\nC. The logo/text on the cover or the notebook are not mentioned.\nD. The color of the logo/text on the cover is not mentioned, but the logo/text on the cover of the notebook are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is mentioned in the description.\nB. The chair is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is mentioned in the description.\nB. The bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bookmark of the notebook is mentioned in the description.\nB. The notebook is not mentioned in the description.\nC. The bookmark of the notebook is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouse is mentioned in the description.\nB. The mouse is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black spiral-bound notebook with a white cover featuring the word \"Xtreme\" in a stylized font.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keyboard is mentioned in the description.\nB. The keyboard is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the notebook is mentioned in the description and is black.", + "B. The type of the notebook is mentioned in the description and is spiral-bound.", + "A. The text of the cover is mentioned in the description but is not YAHOO.", + "B. The color of the logo/text on the cover is mentioned in the description and is white.", + "B. The chair is not mentioned in the description.", + "B. The bottle is not mentioned in the description.", + "C. The bookmark of the notebook is not mentioned in the description.", + "B. The mouse is not mentioned in the description.", + "B. The keyboard is not mentioned in the description." + ], + "score": 0.7777777777777778, + "score_pos": 0.5, + "score_neg": 1.0, + "recognition_result": true + }, + "8557176": { + "pred": "The watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The color of the case is mentioned in the description but is not golden.", + -1 + ], + [ + "The color of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The color of the case is mentioned in the description and is golden.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the case is mentioned in the description and is golden.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The case or the watch is not mentioned.", + 0 + ], + [ + "The material of the case is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the case is not mentioned, but the case of the watch is mentioned.", + 0.5 + ], + [ + "The material of the case is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the case is mentioned in the description and is metal.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The color of the strap is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The color of the strap is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the strap is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the watch is not mentioned.", + 0 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is not mentioned, but the strap of the watch is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the strap is not mentioned, but the strap of the watch is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is not mentioned in the description.", + 1 + ], + [ + "The food is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The food is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The oven is not mentioned in the description.", + 1 + ], + [ + "The oven is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The oven is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The crab cracker is not mentioned in the description.", + 1 + ], + [ + "The crab cracker is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The crab cracker is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The jar is not mentioned in the description.", + 1 + ], + [ + "The jar is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The jar is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is watch. Based on the image, is it likely that the object in the description is given class: watch or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is watch. Based on the image, is it likely that the object in the description is given class: watch or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case or the watch is not mentioned.\nB. The color of the case is mentioned in the description but is not golden.\nC. The color of the case is not mentioned, but the case of the watch is mentioned.\nD. The color of the case is mentioned in the description and is golden.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The case or the watch is not mentioned.\nB. The material of the case is mentioned in the description but is not metal.\nC. The material of the case is not mentioned, but the case of the watch is mentioned.\nD. The material of the case is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the watch is not mentioned.\nB. The color of the strap is mentioned in the description but is not black.\nC. The color of the strap is not mentioned, but the strap of the watch is mentioned.\nD. The color of the strap is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the watch is not mentioned.\nB. The material of the strap is mentioned in the description but is not leather.\nC. The material of the strap is not mentioned, but the strap of the watch is mentioned.\nD. The material of the strap is mentioned in the description and is leather.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is not mentioned in the description.\nB. The food is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The oven is not mentioned in the description.\nB. The oven is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The crab cracker is not mentioned in the description.\nB. The crab cracker is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe watch features a rectangular gold case with a white dial. The strap is black with a textured pattern and a gold buckle.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The jar is not mentioned in the description.\nB. The jar is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the case is mentioned in the description and is golden.", + "D. The material of the case is mentioned in the description and is metal.", + "D. The color of the strap is mentioned in the description and is black.", + "C. The material of the strap is not mentioned, but the strap of the watch is mentioned.", + "A. The food is not mentioned in the description.", + "A. The cup is not mentioned in the description.", + "A. The oven is not mentioned in the description.", + "A. The crab cracker is not mentioned in the description.", + "A. The jar is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "8557195": { + "pred": "A beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the body is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the body is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the body is not mentioned, but the body of the toaster is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the toaster is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the toaster is mentioned in the description but is not white or beige.", + -1 + ], + [ + "The color of the toaster is not mentioned.", + 0 + ], + [ + "The toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the toaster is mentioned in the description and is white or beige.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the toaster is mentioned.", + 0.5 + ], + [ + "The body or the toaster is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the body is mentioned in the description and is smooth.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lever of the toaster is not mentioned in the description.", + 1 + ], + [ + "The lever of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lever of the toaster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The knife is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cord of the toaster is not mentioned in the description.", + 1 + ], + [ + "The cord of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The cord of the toaster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knob of the toaster is not mentioned in the description.", + 1 + ], + [ + "The knob of the toaster is mentioned in the description.", + -1 + ], + [ + "The toaster is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The knob of the toaster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is not mentioned in the description.", + 1 + ], + [ + "The utensil is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensil is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is toaster. Based on the image, is it likely that the object in the description is given class: toaster or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is toaster. Based on the image, is it likely that the object in the description is given class: toaster or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the body is mentioned in the description and is plastic.\nB. The material of the body is mentioned in the description but is not plastic.\nC. The material of the body is not mentioned, but the body of the toaster is mentioned.\nD. The body or the toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the toaster is mentioned in the description and is white or beige.\nB. The color of the toaster is mentioned in the description but is not white or beige.\nC. The color of the toaster is not mentioned.\nD. The toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the body is mentioned in the description and is smooth.\nB. The texture of the body is mentioned in the description but is not smooth.\nC. The texture of the body is not mentioned, but the body of the toaster is mentioned.\nD. The body or the toaster is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lever of the toaster is not mentioned in the description.\nB. The lever of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knife is not mentioned in the description.\nB. The knife is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cord of the toaster is not mentioned in the description.\nB. The cord of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knob of the toaster is not mentioned in the description.\nB. The knob of the toaster is mentioned in the description.\nC. The toaster is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA beige, rectangular bread maker with a smooth surface and slightly rounded edges. The top edge has a small, dark opening.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensil is not mentioned in the description.\nB. The utensil is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the body is not mentioned, but the body of the toaster is mentioned.", + "A. The color of the toaster is mentioned in the description and is white or beige.", + "A. The texture of the body is mentioned in the description and is smooth.", + "A. The lever of the toaster is not mentioned in the description.", + "A. The knife is not mentioned in the description.", + "A. The cord of the toaster is not mentioned in the description.", + "A. The knob of the toaster is not mentioned in the description.", + "B. The utensil is mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.8333333333333334, + "score_neg": 0.6, + "recognition_result": true + }, + "9766617": { + "pred": "The goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The head or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the head is mentioned in the description and is black.", + 1 + ], + [ + "The color of the head is not mentioned, but the head of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the head is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The beak or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the beak is mentioned in the description and is black.", + 1 + ], + [ + "The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the beak is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the body is mentioned in the description and is grey or brown.", + 1 + ], + [ + "The color of the body is not mentioned, but the body of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description but is not grey or brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the body is mentioned in the description and is grey or brown.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail or the duck/goose is not mentioned.", + 0 + ], + [ + "The color of the tail is mentioned in the description and is black and white.", + 1 + ], + [ + "The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.", + 0.5 + ], + [ + "The color of the tail is mentioned in the description but is not black and white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tail is mentioned in the description and is black and white.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wings or the duck/goose are not mentioned.", + 0 + ], + [ + "The shape of the wings is mentioned in the description and is folded.", + 1 + ], + [ + "The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + 0.5 + ], + [ + "The shape of the wings is mentioned in the description but is not folded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the duck/goose are mentioned in the description.", + -1 + ], + [ + "The feet of the duck/goose are not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The feet of the duck/goose are mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mud of the duck/goose is mentioned in the description.", + -1 + ], + [ + "The mud of the duck/goose is not mentioned in the description.", + 1 + ], + [ + "The duck/goose is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The mud of the duck/goose is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The grass are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pigeon is mentioned in the description.", + -1 + ], + [ + "The pigeon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The pigeon is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tree is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is duck/goose. Based on the image, is it likely that the object in the description is given class: duck/goose or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is duck/goose. Based on the image, is it likely that the object in the description is given class: duck/goose or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The head or the duck/goose is not mentioned.\nB. The color of the head is mentioned in the description and is black.\nC. The color of the head is not mentioned, but the head of the duck/goose is mentioned.\nD. The color of the head is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The beak or the duck/goose is not mentioned.\nB. The color of the beak is mentioned in the description and is black.\nC. The color of the beak is not mentioned, but the beak of the duck/goose is mentioned.\nD. The color of the beak is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the duck/goose is not mentioned.\nB. The color of the body is mentioned in the description and is grey or brown.\nC. The color of the body is not mentioned, but the body of the duck/goose is mentioned.\nD. The color of the body is mentioned in the description but is not grey or brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail or the duck/goose is not mentioned.\nB. The color of the tail is mentioned in the description and is black and white.\nC. The color of the tail is not mentioned, but the tail of the duck/goose is mentioned.\nD. The color of the tail is mentioned in the description but is not black and white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wings or the duck/goose are not mentioned.\nB. The shape of the wings is mentioned in the description and is folded.\nC. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.\nD. The shape of the wings is mentioned in the description but is not folded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feet of the duck/goose are mentioned in the description.\nB. The feet of the duck/goose are not mentioned in the description.\nC. The duck/goose is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mud of the duck/goose is mentioned in the description.\nB. The mud of the duck/goose is not mentioned in the description.\nC. The duck/goose is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are mentioned in the description.\nB. The grass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pigeon is mentioned in the description.\nB. The pigeon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe goose has a predominantly brown body with a pattern of darker brown and black feathers on its back. Its head is black with a white patch on the side of its neck. The beak is black, and the legs and feet are also black. The underbelly is white, and the tail feathers are black with a white tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is mentioned in the description.\nB. The tree is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the head is mentioned in the description and is black.", + "B. The color of the beak is mentioned in the description and is black.", + "B. The color of the body is mentioned in the description and is grey or brown.", + "B. The color of the tail is mentioned in the description and is black and white.", + "C. The shape of the wings is not mentioned, but the wings of the duck/goose are mentioned.", + "A. The feet of the duck/goose are mentioned in the description.", + "B. The mud of the duck/goose is not mentioned in the description.", + "B. The grass are not mentioned in the description.", + "B. The pigeon is not mentioned in the description.", + "B. The tree is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.9, + "score_neg": 0.6, + "recognition_result": true + }, + "11021544": { + "pred": "A metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the faucet is mentioned in the description but is not metallic.", + -1 + ], + [ + "The texture of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The texture of the faucet is mentioned in the description and is metallic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the faucet is mentioned in the description and is metallic.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the spout is mentioned in the description but is not curved or arc.", + -1 + ], + [ + "The shape of the spout is not mentioned, but the spout of the faucet is mentioned.", + 0.5 + ], + [ + "The spout or the faucet is not mentioned.", + 0 + ], + [ + "The shape of the spout is mentioned in the description and is curved or arc.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the spout is mentioned in the description and is curved or arc.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the faucet is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the faucet is not mentioned.", + 0 + ], + [ + "The faucet is not mentioned.", + 0 + ], + [ + "The material of the faucet is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the faucet is mentioned in the description and is metal.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soap dispenser of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The soap dispenser of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The soap dispenser of the faucet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The side spray of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The side spray of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The side spray of the faucet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sprayer of the faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is not mentioned in the description.", + 0 + ], + [ + "The sprayer of the faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The sprayer of the faucet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet door is not mentioned in the description.", + 1 + ], + [ + "The cabinet door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet door is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is faucet. Based on the image, is it likely that the object in the description is given class: faucet or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is faucet. Based on the image, is it likely that the object in the description is given class: faucet or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the faucet is mentioned in the description but is not metallic.\nB. The texture of the faucet is not mentioned.\nC. The faucet is not mentioned.\nD. The texture of the faucet is mentioned in the description and is metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the spout is mentioned in the description but is not curved or arc.\nB. The shape of the spout is not mentioned, but the spout of the faucet is mentioned.\nC. The spout or the faucet is not mentioned.\nD. The shape of the spout is mentioned in the description and is curved or arc.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the faucet is mentioned in the description but is not metal.\nB. The material of the faucet is not mentioned.\nC. The faucet is not mentioned.\nD. The material of the faucet is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The soap dispenser of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The soap dispenser of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The side spray of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The side spray of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sprayer of the faucet is not mentioned in the description.\nB. The faucet is not mentioned in the description.\nC. The sprayer of the faucet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic, curved faucet with a polished finish, featuring a single lever handle and a long, slender spout.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet door is not mentioned in the description.\nB. The cabinet door is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the faucet is mentioned in the description and is metallic.", + "D. The shape of the spout is mentioned in the description and is curved or arc.", + "D. The material of the faucet is mentioned in the description and is metal.", + "A. The soap dispenser of the faucet is not mentioned in the description.", + "A. The side spray of the faucet is not mentioned in the description.", + "A. The chair is not mentioned in the description.", + "A. The sprayer of the faucet is not mentioned in the description.", + "A. The cabinet door is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "11775390": { + "pred": "A green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the sandals is not mentioned.", + 0 + ], + [ + "The color of the sandals is mentioned in the description and is green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the sandals is mentioned in the description and is green.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description but is not perforated.", + -1 + ], + [ + "The style of the sandals is not mentioned.", + 0 + ], + [ + "The style of the sandals is mentioned in the description and is perforated.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The style of the sandals is mentioned in the description and is perforated.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sandals are not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the sandals is not mentioned.", + 0 + ], + [ + "The material of the sandals is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the sandals is mentioned in the description and is rubber.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The strap or the sandals is not mentioned.", + 0 + ], + [ + "The number of parts of the strap is mentioned in the description but is not one.", + -1 + ], + [ + "The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + 0.5 + ], + [ + "The number of parts of the strap is mentioned in the description and is one.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trailer is not mentioned in the description.", + 1 + ], + [ + "The trailer is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trailer is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire is not mentioned in the description.", + 1 + ], + [ + "The tire is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tire is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pumpkins are not mentioned in the description.", + 1 + ], + [ + "The pumpkins are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pumpkins are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The box is not mentioned in the description.", + 1 + ], + [ + "The box is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The box is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boy is not mentioned in the description.", + 1 + ], + [ + "The boy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The boy is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is sandals. Based on the image, is it likely that the object in the description is given class: sandals or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is sandals. Based on the image, is it likely that the object in the description is given class: sandals or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The color of the sandals is mentioned in the description but is not green.\nC. The color of the sandals is not mentioned.\nD. The color of the sandals is mentioned in the description and is green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The style of the sandals is mentioned in the description but is not perforated.\nC. The style of the sandals is not mentioned.\nD. The style of the sandals is mentioned in the description and is perforated.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sandals are not mentioned.\nB. The material of the sandals is mentioned in the description but is not rubber.\nC. The material of the sandals is not mentioned.\nD. The material of the sandals is mentioned in the description and is rubber.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The strap or the sandals is not mentioned.\nB. The number of parts of the strap is mentioned in the description but is not one.\nC. The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.\nD. The number of parts of the strap is mentioned in the description and is one.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trailer is not mentioned in the description.\nB. The trailer is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tire is not mentioned in the description.\nB. The tire is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pumpkins are not mentioned in the description.\nB. The pumpkins are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The box is not mentioned in the description.\nB. The box is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green rubber shoe with a textured sole and multiple circular holes on the side. The shoe features a black and white design on the upper part, with green laces threaded through the eyelets.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boy is not mentioned in the description.\nB. The boy is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the sandals is mentioned in the description and is green.", + "D. The style of the sandals is mentioned in the description and is perforated.", + "D. The material of the sandals is mentioned in the description and is rubber.", + "C. The number of parts of the strap is not mentioned, but the strap of the sandals is mentioned.", + "A. The trailer is not mentioned in the description.", + "A. The tire is not mentioned in the description.", + "A. The pumpkins are not mentioned in the description.", + "A. The box is not mentioned in the description.", + "A. The boy is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "11950619": { + "pred": "The dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle is mentioned in the description and is white, beige, or wooden color.", + 1 + ], + [ + "The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The color of the handle is mentioned in the description but is not white, beige, or wooden color.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle is mentioned in the description and is white, beige, or wooden color.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle is mentioned in the description and is wood.", + 1 + ], + [ + "The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The handle or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the handle is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cover is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The cover or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The material of the cover is mentioned in the description but is not plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The cover or the table tennis paddle is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is mentioned in the description and is rounded.", + 1 + ], + [ + "The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.", + 0.5 + ], + [ + "The head or the table tennis paddle is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the head is mentioned in the description but is not rounded.", + "pred_index": 3, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The dumbbell is not mentioned in the description.", + 1 + ], + [ + "The dumbbell is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The dumbbell is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The edge tape of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The edge tape of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The edge tape of the table tennis paddle is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The table tennis paddle is not mentioned in the description.", + 0 + ], + [ + "The logo of the table tennis paddle is not mentioned in the description.", + 1 + ], + [ + "The logo of the table tennis paddle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The logo of the table tennis paddle is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mat is not mentioned in the description.", + 1 + ], + [ + "The mat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The mat is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is table tennis paddle. Based on the image, is it likely that the object in the description is given class: table tennis paddle or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is table tennis paddle. Based on the image, is it likely that the object in the description is given class: table tennis paddle or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle is mentioned in the description and is white, beige, or wooden color.\nB. The color of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.\nC. The handle or the table tennis paddle is not mentioned.\nD. The color of the handle is mentioned in the description but is not white, beige, or wooden color.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handle is mentioned in the description and is wood.\nB. The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.\nC. The handle or the table tennis paddle is not mentioned.\nD. The material of the handle is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cover is mentioned in the description and is plastic.\nB. The material of the cover is not mentioned, but the cover of the table tennis paddle is mentioned.\nC. The cover or the table tennis paddle is not mentioned.\nD. The material of the cover is mentioned in the description but is not plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the head is mentioned in the description and is rounded.\nB. The shape of the head is not mentioned, but the head of the table tennis paddle is mentioned.\nC. The head or the table tennis paddle is not mentioned.\nD. The shape of the head is mentioned in the description but is not rounded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The dumbbell is not mentioned in the description.\nB. The dumbbell is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table tennis paddle is not mentioned in the description.\nB. The edge tape of the table tennis paddle is not mentioned in the description.\nC. The edge tape of the table tennis paddle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The table tennis paddle is not mentioned in the description.\nB. The logo of the table tennis paddle is not mentioned in the description.\nC. The logo of the table tennis paddle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mat is not mentioned in the description.\nB. The mat is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe dumbbell features a white, rectangular handle with rounded edges and a smooth surface. The handle is attached to a metallic, rectangular weight plate with a series of evenly spaced, vertical slots. The weight plate is secured to the handle with a visible screw.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet is not mentioned in the description.\nB. The cabinet is mentioned in the description.\n" + ], + "response": [ + "B. No", + "A. The color of the handle is mentioned in the description and is white, beige, or wooden color.", + "B. The material of the handle is not mentioned, but the handle of the table tennis paddle is mentioned.", + "C. The cover or the table tennis paddle is not mentioned.", + "D. The shape of the head is mentioned in the description but is not rounded.", + "A. The dumbbell is not mentioned in the description.", + "B. The edge tape of the table tennis paddle is not mentioned in the description.", + "B. The logo of the table tennis paddle is not mentioned in the description.", + "A. The mat is not mentioned in the description.", + "A. The cabinet is not mentioned in the description." + ], + "score": -0.1111111111111111, + "score_pos": -0.25, + "score_neg": 0.0, + "recognition_result": false + }, + "12178946": { + "pred": "A yellow bottle with a blue label featuring white text.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The shape of the bottle is not mentioned.", + 0 + ], + [ + "The shape of the bottle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the bottle is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cap or the bottle is not mentioned.", + 0 + ], + [ + "The color of the cap is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the cap is not mentioned, but the cap of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the cap is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the cap is mentioned in the description and is yellow.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The label or the bottle is not mentioned.", + 0 + ], + [ + "The color of the label is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the label is not mentioned, but the label of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the label is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the label is mentioned in the description and is blue.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The text or the bottle is not mentioned.", + 0 + ], + [ + "The color of the text is mentioned in the description and is white.", + 1 + ], + [ + "The color of the text is not mentioned, but the text of the bottle is mentioned.", + 0.5 + ], + [ + "The color of the text is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the text is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is mentioned in the description.", + -1 + ], + [ + "The apple is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The apple is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The handle of the bottle is mentioned in the description.", + -1 + ], + [ + "The handle of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottle is not mentioned in the description.", + 0 + ], + [ + "The trigger of the bottle is mentioned in the description.", + -1 + ], + [ + "The trigger of the bottle is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The trigger of the bottle is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The measuring cup is mentioned in the description.", + -1 + ], + [ + "The measuring cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The measuring cup is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cutting board is mentioned in the description.", + -1 + ], + [ + "The cutting board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cutting board is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bottle. Based on the image, is it likely that the object in the description is given class: bottle or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bottle. Based on the image, is it likely that the object in the description is given class: bottle or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned.\nB. The shape of the bottle is mentioned in the description and is cylindrical.\nC. The shape of the bottle is not mentioned.\nD. The shape of the bottle is mentioned in the description but is not cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cap or the bottle is not mentioned.\nB. The color of the cap is mentioned in the description and is yellow.\nC. The color of the cap is not mentioned, but the cap of the bottle is mentioned.\nD. The color of the cap is mentioned in the description but is not yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The label or the bottle is not mentioned.\nB. The color of the label is mentioned in the description and is blue.\nC. The color of the label is not mentioned, but the label of the bottle is mentioned.\nD. The color of the label is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The text or the bottle is not mentioned.\nB. The color of the text is mentioned in the description and is white.\nC. The color of the text is not mentioned, but the text of the bottle is mentioned.\nD. The color of the text is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The apple is mentioned in the description.\nB. The apple is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The handle of the bottle is mentioned in the description.\nC. The handle of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottle is not mentioned in the description.\nB. The trigger of the bottle is mentioned in the description.\nC. The trigger of the bottle is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The measuring cup is mentioned in the description.\nB. The measuring cup is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA yellow bottle with a blue label featuring white text.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cutting board is mentioned in the description.\nB. The cutting board is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the bottle is not mentioned.", + "B. The color of the cap is mentioned in the description and is yellow.", + "B. The color of the label is mentioned in the description and is blue.", + "B. The color of the text is mentioned in the description and is white.", + "B. The apple is not mentioned in the description.", + "C. The handle of the bottle is not mentioned in the description.", + "C. The trigger of the bottle is not mentioned in the description.", + "B. The measuring cup is not mentioned in the description.", + "B. The cutting board is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "12348079": { + "pred": "A rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the weighing platform is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the weighing platform is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the weighing platform is mentioned in the description and is rectangular.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The weighing platform or the scale is not mentioned.", + 0 + ], + [ + "The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.", + 0.5 + ], + [ + "The color of the weighing platform is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the weighing platform is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the weighing platform is mentioned in the description and is blue.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The keypad or the scale is not mentioned.", + 0 + ], + [ + "The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + 0.5 + ], + [ + "The shape of the keypad is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the keypad is mentioned in the description but is not rectangular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The display screen or the scale is not mentioned.", + 0 + ], + [ + "The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + 0.5 + ], + [ + "The color of the display screen is mentioned in the description and is black.", + 1 + ], + [ + "The color of the display screen is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The body or the scale is not mentioned.", + 0 + ], + [ + "The color of the body is not mentioned, but the body of the scale is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white.", + 1 + ], + [ + "The color of the body is mentioned in the description but is not white.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is white.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The power cord of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The power cord of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The power cord of the scale is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The usb cable of the scale is not mentioned in the description.", + 1 + ], + [ + "The scale is not mentioned in the description.", + 0 + ], + [ + "The usb cable of the scale is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The usb cable of the scale is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle is not mentioned in the description.", + 1 + ], + [ + "The bicycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bicycle is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The child is not mentioned in the description.", + 1 + ], + [ + "The child is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The child is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is not mentioned in the description.", + 1 + ], + [ + "The woman is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The woman is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is scale. Based on the image, is it likely that the object in the description is given class: scale or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is scale. Based on the image, is it likely that the object in the description is given class: scale or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The weighing platform or the scale is not mentioned.\nB. The shape of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.\nC. The shape of the weighing platform is mentioned in the description and is rectangular.\nD. The shape of the weighing platform is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The weighing platform or the scale is not mentioned.\nB. The color of the weighing platform is not mentioned, but the weighing platform of the scale is mentioned.\nC. The color of the weighing platform is mentioned in the description and is blue.\nD. The color of the weighing platform is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The keypad or the scale is not mentioned.\nB. The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.\nC. The shape of the keypad is mentioned in the description and is rectangular.\nD. The shape of the keypad is mentioned in the description but is not rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The display screen or the scale is not mentioned.\nB. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.\nC. The color of the display screen is mentioned in the description and is black.\nD. The color of the display screen is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The body or the scale is not mentioned.\nB. The color of the body is not mentioned, but the body of the scale is mentioned.\nC. The color of the body is mentioned in the description and is white.\nD. The color of the body is mentioned in the description but is not white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The power cord of the scale is not mentioned in the description.\nB. The scale is not mentioned in the description.\nC. The power cord of the scale is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The usb cable of the scale is not mentioned in the description.\nB. The scale is not mentioned in the description.\nC. The usb cable of the scale is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycle is not mentioned in the description.\nB. The bicycle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The child is not mentioned in the description.\nB. The child is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular digital weighing scale with a metallic blue weighing platform. The scale has a white base with a control panel on the left side, featuring several buttons and a small display screen. The edges of the scale are slightly rounded, and the weighing platform has a textured surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The woman is not mentioned in the description.\nB. The woman is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the weighing platform is mentioned in the description and is rectangular.", + "C. The color of the weighing platform is mentioned in the description and is blue.", + "B. The shape of the keypad is not mentioned, but the keypad of the scale is mentioned.", + "B. The color of the display screen is not mentioned, but the display screen of the scale is mentioned.", + "C. The color of the body is mentioned in the description and is white.", + "A. The power cord of the scale is not mentioned in the description.", + "A. The usb cable of the scale is not mentioned in the description.", + "A. The bicycle is not mentioned in the description.", + "A. The child is not mentioned in the description.", + "A. The woman is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "14832137": { + "pred": "A cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.", + -1 + ], + [ + "The shape of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the barrel/bucket is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned.", + 0 + ], + [ + "The color of the barrel/bucket is mentioned in the description and is purple.", + 1 + ], + [ + "The color of the barrel/bucket is mentioned in the description but is not purple.", + -1 + ], + [ + "The color of the barrel/bucket is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the barrel/bucket is mentioned in the description and is purple.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The bottom of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The bottom of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The bottom of the barrel/bucket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The heart is mentioned in the description.", + -1 + ], + [ + "The heart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The heart is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The barrel/bucket is not mentioned in the description.", + 0 + ], + [ + "The lid of the barrel/bucket is mentioned in the description.", + -1 + ], + [ + "The lid of the barrel/bucket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the barrel/bucket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cake is mentioned in the description.", + -1 + ], + [ + "The cake is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cake is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is barrel/bucket. Based on the image, is it likely that the object in the description is given class: barrel/bucket or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is barrel/bucket. Based on the image, is it likely that the object in the description is given class: barrel/bucket or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned.\nB. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.\nC. The shape of the barrel/bucket is mentioned in the description but is not round or cylindrical.\nD. The shape of the barrel/bucket is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the barrel/bucket is not mentioned.\nB. The shape of the handle is mentioned in the description and is curved.\nC. The shape of the handle is mentioned in the description but is not curved.\nD. The shape of the handle is not mentioned, but the handle of the barrel/bucket is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned.\nB. The color of the barrel/bucket is mentioned in the description and is purple.\nC. The color of the barrel/bucket is mentioned in the description but is not purple.\nD. The color of the barrel/bucket is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned in the description.\nB. The bottom of the barrel/bucket is mentioned in the description.\nC. The bottom of the barrel/bucket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The heart is mentioned in the description.\nB. The heart is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The barrel/bucket is not mentioned in the description.\nB. The lid of the barrel/bucket is mentioned in the description.\nC. The lid of the barrel/bucket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cake is mentioned in the description.\nB. The cake is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cylindrical, light purple plastic bucket with a smooth surface and a slightly flared rim. The bucket has a small, curved handle attached near the top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the barrel/bucket is mentioned in the description and is round or cylindrical.", + "B. The shape of the handle is mentioned in the description and is curved.", + "B. The color of the barrel/bucket is mentioned in the description and is purple.", + "C. The bottom of the barrel/bucket is not mentioned in the description.", + "B. The heart is not mentioned in the description.", + "C. The lid of the barrel/bucket is not mentioned in the description.", + "B. The cake is not mentioned in the description.", + "B. The plate is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "15050320": { + "pred": "A dark brown wine glass with a wide, flat base and a slender stem.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The material of the wine glass is mentioned in the description and is glass.", + 1 + ], + [ + "The material of the wine glass is mentioned in the description but is not glass.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the wine glass is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wine glass is not mentioned.", + 0 + ], + [ + "The wine glass are not mentioned.", + 0 + ], + [ + "The color of the wine glass is mentioned in the description and is transparent.", + 1 + ], + [ + "The color of the wine glass is mentioned in the description but is not transparent.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the wine glass is mentioned in the description but is not transparent.", + "pred_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is not mentioned, but the base of the wine glass is mentioned.", + 0.5 + ], + [ + "The base or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the base is mentioned in the description and is round or flat.", + 1 + ], + [ + "The shape of the base is mentioned in the description but is not round or flat.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the base is mentioned in the description and is round or flat.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.", + 0.5 + ], + [ + "The stem or the wine glass is not mentioned.", + 0 + ], + [ + "The shape of the stem is mentioned in the description and is slender.", + 1 + ], + [ + "The shape of the stem is mentioned in the description but is not slender.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the stem is mentioned in the description and is slender.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The bowl of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The bowl of the wine glass is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is not mentioned in the description.", + 1 + ], + [ + "The plate is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plate is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rim of the wine glass is not mentioned in the description.", + 1 + ], + [ + "The rim of the wine glass is mentioned in the description.", + -1 + ], + [ + "The wine glass are not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The rim of the wine glass is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The napkin is not mentioned in the description.", + 1 + ], + [ + "The napkin is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The napkin is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is wine glass. Based on the image, is it likely that the object in the description is given class: wine glass or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is wine glass. Based on the image, is it likely that the object in the description is given class: wine glass or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the wine glass is not mentioned.\nB. The wine glass are not mentioned.\nC. The material of the wine glass is mentioned in the description and is glass.\nD. The material of the wine glass is mentioned in the description but is not glass.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wine glass is not mentioned.\nB. The wine glass are not mentioned.\nC. The color of the wine glass is mentioned in the description and is transparent.\nD. The color of the wine glass is mentioned in the description but is not transparent.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the base is not mentioned, but the base of the wine glass is mentioned.\nB. The base or the wine glass is not mentioned.\nC. The shape of the base is mentioned in the description and is round or flat.\nD. The shape of the base is mentioned in the description but is not round or flat.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the stem is not mentioned, but the stem of the wine glass is mentioned.\nB. The stem or the wine glass is not mentioned.\nC. The shape of the stem is mentioned in the description and is slender.\nD. The shape of the stem is mentioned in the description but is not slender.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl of the wine glass is not mentioned in the description.\nB. The bowl of the wine glass is mentioned in the description.\nC. The wine glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is not mentioned in the description.\nB. The plate is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rim of the wine glass is not mentioned in the description.\nB. The rim of the wine glass is mentioned in the description.\nC. The wine glass are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned in the description.\nB. The fork is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wine glass with a wide, flat base and a slender stem.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The napkin is not mentioned in the description.\nB. The napkin is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The material of the wine glass is not mentioned.", + "D. The color of the wine glass is mentioned in the description but is not transparent.", + "C. The shape of the base is mentioned in the description and is round or flat.", + "C. The shape of the stem is mentioned in the description and is slender.", + "A. The bowl of the wine glass is not mentioned in the description.", + "A. The plate is not mentioned in the description.", + "A. The rim of the wine glass is not mentioned in the description.", + "A. The fork is not mentioned in the description.", + "A. The napkin is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.25, + "score_neg": 1.0, + "recognition_result": true + }, + "16957916": { + "pred": "Fresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is not mentioned.", + 0 + ], + [ + "The color of the lettuce is mentioned in the description and is white/green.", + 1 + ], + [ + "The color of the lettuce is mentioned in the description but is not white/green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the lettuce is mentioned in the description and is white/green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the lettuce is mentioned in the description and is curved or irregular.", + 1 + ], + [ + "The shape of the lettuce is mentioned in the description but is not curved or irregular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the lettuce is mentioned in the description and is curved or irregular.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The edge or the lettuce is not mentioned.", + 0 + ], + [ + "The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.", + 1 + ], + [ + "The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tartar sauce is mentioned in the description.", + -1 + ], + [ + "The tartar sauce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tartar sauce is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fried fish are mentioned in the description.", + -1 + ], + [ + "The fried fish are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fried fish are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The french fries are mentioned in the description.", + -1 + ], + [ + "The french fries are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The french fries are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensil is mentioned in the description.", + -1 + ], + [ + "The utensil is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The utensil is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is lettuce. Based on the image, is it likely that the object in the description is given class: lettuce or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is lettuce. Based on the image, is it likely that the object in the description is given class: lettuce or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lettuce is not mentioned.\nB. The color of the lettuce is not mentioned.\nC. The color of the lettuce is mentioned in the description and is white/green.\nD. The color of the lettuce is mentioned in the description but is not white/green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lettuce is not mentioned.\nB. The shape of the lettuce is not mentioned.\nC. The shape of the lettuce is mentioned in the description and is curved or irregular.\nD. The shape of the lettuce is mentioned in the description but is not curved or irregular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The edge or the lettuce is not mentioned.\nB. The shape of the edge is not mentioned, but the edge of the lettuce is mentioned.\nC. The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.\nD. The shape of the edge is mentioned in the description but is not jagged, ruffled, or rough.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tartar sauce is mentioned in the description.\nB. The tartar sauce is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fried fish are mentioned in the description.\nB. The fried fish are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The french fries are mentioned in the description.\nB. The french fries are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nFresh green lettuce leaves with ruffled edges and a crisp texture, exhibiting a gradient of color from pale green at the base to a darker green towards the tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensil is mentioned in the description.\nB. The utensil is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the lettuce is mentioned in the description and is white/green.", + "C. The shape of the lettuce is mentioned in the description and is curved or irregular.", + "C. The shape of the edge is mentioned in the description and is jagged, ruffled, or rough.", + "B. The tartar sauce is not mentioned in the description.", + "B. The fried fish are not mentioned in the description.", + "B. The plate is not mentioned in the description.", + "B. The french fries are not mentioned in the description.", + "B. The utensil is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "17385866": { + "pred": "A scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + 0.5 + ], + [ + "The scoop or the ice cream is not mentioned.", + 0 + ], + [ + "The shape of the scoop is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the scoop is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The material of the cone is mentioned in the description but is not waffle.", + -1 + ], + [ + "The material of the cone is mentioned in the description and is waffle.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The cone or the ice cream is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cone is not mentioned, but the cone of the ice cream is mentioned.", + 0.5 + ], + [ + "The cone or the ice cream is not mentioned.", + 0 + ], + [ + "The color of the cone is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the cone is mentioned in the description and is yellow.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The cone or the ice cream is not mentioned.", + "pred_index": 1, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The cherry of the ice cream is mentioned in the description.", + -1 + ], + [ + "The cherry of the ice cream is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cherry of the ice cream is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The woman is mentioned in the description.", + -1 + ], + [ + "The woman is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The woman is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ice cream is not mentioned in the description.", + 0 + ], + [ + "The sprinkles of the ice cream are mentioned in the description.", + -1 + ], + [ + "The sprinkles of the ice cream are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The sprinkles of the ice cream are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The man is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Turkish flag is mentioned in the description.", + -1 + ], + [ + "The Turkish flag is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Turkish flag is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is ice cream. Based on the image, is it likely that the object in the description is given class: ice cream or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is ice cream. Based on the image, is it likely that the object in the description is given class: ice cream or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.\nB. The scoop or the ice cream is not mentioned.\nC. The shape of the scoop is mentioned in the description but is not round.\nD. The shape of the scoop is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cone is not mentioned, but the cone of the ice cream is mentioned.\nB. The cone or the ice cream is not mentioned.\nC. The material of the cone is mentioned in the description but is not waffle.\nD. The material of the cone is mentioned in the description and is waffle.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cone is not mentioned, but the cone of the ice cream is mentioned.\nB. The cone or the ice cream is not mentioned.\nC. The color of the cone is mentioned in the description but is not yellow.\nD. The color of the cone is mentioned in the description and is yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ice cream is not mentioned in the description.\nB. The cherry of the ice cream is mentioned in the description.\nC. The cherry of the ice cream is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The woman is mentioned in the description.\nB. The woman is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ice cream is not mentioned in the description.\nB. The sprinkles of the ice cream are mentioned in the description.\nC. The sprinkles of the ice cream are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is mentioned in the description.\nB. The man is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA scoop of vanilla ice cream topped with a slice of red strawberry, resting on a bed of green mint leaves.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Turkish flag is mentioned in the description.\nB. The Turkish flag is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the scoop is not mentioned, but the scoop of the ice cream is mentioned.", + "B. The cone or the ice cream is not mentioned.", + "B. The cone or the ice cream is not mentioned.", + "C. The cherry of the ice cream is not mentioned in the description.", + "B. The woman is not mentioned in the description.", + "C. The sprinkles of the ice cream are not mentioned in the description.", + "B. The man is not mentioned in the description.", + "B. The Turkish flag is not mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.16666666666666666, + "score_neg": 1.0, + "recognition_result": true + }, + "17404769": { + "pred": "The car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gas cap is mentioned in the description but is not square.", + -1 + ], + [ + "The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.", + 0.5 + ], + [ + "The gas cap or the suv is not mentioned.", + 0 + ], + [ + "The shape of the gas cap is mentioned in the description and is square.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the gas cap is mentioned in the description and is square.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the taillight is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + 0.5 + ], + [ + "The taillight or the suv is not mentioned.", + 0 + ], + [ + "The material of the taillight is mentioned in the description and is plastic.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the suv is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the suv is not mentioned.", + 0 + ], + [ + "The suv is not mentioned.", + 0 + ], + [ + "The color of the suv is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The suv is not mentioned.", + "pred_index": 2, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front bumper of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front bumper of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The suv is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The front wheel of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The front wheel of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The suv is not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grille of the suv is not mentioned in the description.", + 1 + ], + [ + "The suv is not mentioned in the description.", + 0 + ], + [ + "The grille of the suv is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The grille of the suv is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The building is not mentioned in the description.", + 1 + ], + [ + "The building is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The building is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tree is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is suv. Based on the image, is it likely that the object in the description is given class: suv or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is suv. Based on the image, is it likely that the object in the description is given class: suv or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the gas cap is mentioned in the description but is not square.\nB. The shape of the gas cap is not mentioned, but the gas cap of the suv is mentioned.\nC. The gas cap or the suv is not mentioned.\nD. The shape of the gas cap is mentioned in the description and is square.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the taillight is mentioned in the description but is not plastic.\nB. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.\nC. The taillight or the suv is not mentioned.\nD. The material of the taillight is mentioned in the description and is plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the suv is mentioned in the description but is not white.\nB. The color of the suv is not mentioned.\nC. The suv is not mentioned.\nD. The color of the suv is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front bumper of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The front bumper of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The front wheel of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The front wheel of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grille of the suv is not mentioned in the description.\nB. The suv is not mentioned in the description.\nC. The grille of the suv is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The building is not mentioned in the description.\nB. The building is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe car is a white minivan with a rear design featuring a large, dark-tinted rear window and a smaller, rectangular window on the side. The rear lights are vertically aligned and wrap around the side of the vehicle. The car has a visible rear wheel with a five-spoke alloy rim. There is a small, square fuel cap located on the side panel near the rear wheel.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is not mentioned in the description.\nB. The tree is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the gas cap is mentioned in the description and is square.", + "B. The material of the taillight is not mentioned, but the taillight of the suv is mentioned.", + "C. The suv is not mentioned.", + "B. The suv is not mentioned in the description.", + "B. The suv is not mentioned in the description.", + "A. The grille of the suv is not mentioned in the description.", + "A. The building is not mentioned in the description.", + "A. The tree is not mentioned in the description." + ], + "score": 0.5625, + "score_pos": 0.5, + "score_neg": 0.6, + "recognition_result": true + }, + "18217373": { + "pred": "The spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the frame is mentioned in the description but is not black or metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the frame is mentioned in the description and is black or metallic.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the frame is not mentioned, but the frame of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the frame is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the frame is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the frame is mentioned in the description and is square or rounded.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lens or the glasses are not mentioned.", + 0 + ], + [ + "The color of the lens is not mentioned, but the lens of the glasses are mentioned.", + 0.5 + ], + [ + "The color of the lens is mentioned in the description and is clear.", + 1 + ], + [ + "The color of the lens is mentioned in the description but is not clear.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the lens is mentioned in the description and is clear.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple arm or the glasses is not mentioned.", + 0 + ], + [ + "The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + 0.5 + ], + [ + "The shape of the temple arm is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the temple arm is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fruit is mentioned in the description.", + -1 + ], + [ + "The fruit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fruit is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The women are mentioned in the description.", + -1 + ], + [ + "The women are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The women are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vegetables are mentioned in the description.", + -1 + ], + [ + "The vegetables are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The vegetables are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The produce is mentioned in the description.", + -1 + ], + [ + "The produce is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The produce is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The food is mentioned in the description.", + -1 + ], + [ + "The food is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The food is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is glasses. Based on the image, is it likely that the object in the description is given class: glasses or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is glasses. Based on the image, is it likely that the object in the description is given class: glasses or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the glasses is not mentioned.\nB. The color of the frame is not mentioned, but the frame of the glasses is mentioned.\nC. The color of the frame is mentioned in the description and is black or metallic.\nD. The color of the frame is mentioned in the description but is not black or metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the glasses is not mentioned.\nB. The shape of the frame is not mentioned, but the frame of the glasses is mentioned.\nC. The shape of the frame is mentioned in the description and is square or rounded.\nD. The shape of the frame is mentioned in the description but is not square or rounded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lens or the glasses are not mentioned.\nB. The color of the lens is not mentioned, but the lens of the glasses are mentioned.\nC. The color of the lens is mentioned in the description and is clear.\nD. The color of the lens is mentioned in the description but is not clear.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The temple arm or the glasses is not mentioned.\nB. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.\nC. The shape of the temple arm is mentioned in the description and is curved.\nD. The shape of the temple arm is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fruit is mentioned in the description.\nB. The fruit is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The women are mentioned in the description.\nB. The women are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The vegetables are mentioned in the description.\nB. The vegetables are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The produce is mentioned in the description.\nB. The produce is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe spectacles feature a round, gold-colored frame with a thin, dark brown temple arm. The lens is a light, translucent yellow.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The food is mentioned in the description.\nB. The food is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the frame is mentioned in the description and is black or metallic.", + "C. The shape of the frame is mentioned in the description and is square or rounded.", + "C. The color of the lens is mentioned in the description and is clear.", + "B. The shape of the temple arm is not mentioned, but the temple arm of the glasses is mentioned.", + "B. The fruit is not mentioned in the description.", + "B. The women are not mentioned in the description.", + "B. The vegetables are not mentioned in the description.", + "B. The produce is not mentioned in the description.", + "B. The food is not mentioned in the description." + ], + "score": 0.9444444444444444, + "score_pos": 0.875, + "score_neg": 1.0, + "recognition_result": true + }, + "19455186": { + "pred": "A blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cart/trolley is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the cart/trolley is not mentioned.", + 0 + ], + [ + "The color of the cart/trolley is mentioned in the description but is not blue.", + -1 + ], + [ + "The cart/trolley is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the cart/trolley is mentioned in the description and is blue.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bars is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The shape of the bars is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The bars or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the wheels is mentioned in the description and is small.", + 1 + ], + [ + "The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The size of the wheels is mentioned in the description but is not small.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the wheels is mentioned in the description and is black.", + 1 + ], + [ + "The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + 0.5 + ], + [ + "The color of the wheels is mentioned in the description but is not black.", + -1 + ], + [ + "The wheels or the cart/trolley are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the wheels is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The lid of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the cart/trolley is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket of the cart/trolley is mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The basket of the cart/trolley is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The basket of the cart/trolley is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shelves are mentioned in the description.", + -1 + ], + [ + "The shelves are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The shelves are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drawers of the cart/trolley are mentioned in the description.", + -1 + ], + [ + "The cart/trolley is not mentioned in the description.", + 0 + ], + [ + "The drawers of the cart/trolley are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The drawers of the cart/trolley are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lights are mentioned in the description.", + -1 + ], + [ + "The lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The lights are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is cart/trolley. Based on the image, is it likely that the object in the description is given class: cart/trolley or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is cart/trolley. Based on the image, is it likely that the object in the description is given class: cart/trolley or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cart/trolley is mentioned in the description and is blue.\nB. The color of the cart/trolley is not mentioned.\nC. The color of the cart/trolley is mentioned in the description but is not blue.\nD. The cart/trolley is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bars is mentioned in the description and is rectangular.\nB. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.\nC. The shape of the bars is mentioned in the description but is not rectangular.\nD. The bars or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the wheels is mentioned in the description and is small.\nB. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.\nC. The size of the wheels is mentioned in the description but is not small.\nD. The wheels or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the wheels is mentioned in the description and is black.\nB. The color of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.\nC. The color of the wheels is mentioned in the description but is not black.\nD. The wheels or the cart/trolley are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the cart/trolley is mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The lid of the cart/trolley is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket of the cart/trolley is mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The basket of the cart/trolley is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shelves are mentioned in the description.\nB. The shelves are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drawers of the cart/trolley are mentioned in the description.\nB. The cart/trolley is not mentioned in the description.\nC. The drawers of the cart/trolley are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue metal cart with a rectangular frame and four black wheels. The cart has two horizontal blue bars across the front, with a small white label affixed to the upper bar.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lights are mentioned in the description.\nB. The lights are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the cart/trolley is mentioned in the description and is blue.", + "B. The shape of the bars is not mentioned, but the bars of the cart/trolley are mentioned.", + "B. The size of the wheels is not mentioned, but the wheels of the cart/trolley are mentioned.", + "A. The color of the wheels is mentioned in the description and is black.", + "C. The lid of the cart/trolley is not mentioned in the description.", + "C. The basket of the cart/trolley is not mentioned in the description.", + "B. The shelves are not mentioned in the description.", + "C. The drawers of the cart/trolley are not mentioned in the description.", + "B. The lights are not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "19610023": { + "pred": "A bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the slippers is mentioned in the description but is not green.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The color of the slippers is mentioned in the description and is green.", + 1 + ], + [ + "The color of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the slippers is mentioned in the description and is green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the toe box is mentioned in the description but is not round.", + -1 + ], + [ + "The toe box or the slippers is not mentioned.", + 0 + ], + [ + "The shape of the toe box is mentioned in the description and is round.", + 1 + ], + [ + "The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the toe box is mentioned in the description and is round.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the slippers is mentioned in the description but is not frog-shaped.", + -1 + ], + [ + "The slippers are not mentioned.", + 0 + ], + [ + "The shape of the slippers is mentioned in the description and is frog-shaped.", + 1 + ], + [ + "The shape of the slippers is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the slippers is mentioned in the description and is frog-shaped.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The lining of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lining of the slippers is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shirt is not mentioned in the description.", + 1 + ], + [ + "The shirt is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The shirt is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buckle of the slippers is not mentioned in the description.", + 1 + ], + [ + "The slippers are not mentioned in the description.", + 0 + ], + [ + "The buckle of the slippers is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The buckle of the slippers is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is not mentioned in the description.", + 1 + ], + [ + "The wall is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The wall is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is slippers. Based on the image, is it likely that the object in the description is given class: slippers or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is slippers. Based on the image, is it likely that the object in the description is given class: slippers or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the slippers is mentioned in the description but is not green.\nB. The slippers are not mentioned.\nC. The color of the slippers is mentioned in the description and is green.\nD. The color of the slippers is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the toe box is mentioned in the description but is not round.\nB. The toe box or the slippers is not mentioned.\nC. The shape of the toe box is mentioned in the description and is round.\nD. The shape of the toe box is not mentioned, but the toe box of the slippers is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the slippers is mentioned in the description but is not frog-shaped.\nB. The slippers are not mentioned.\nC. The shape of the slippers is mentioned in the description and is frog-shaped.\nD. The shape of the slippers is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lining of the slippers is not mentioned in the description.\nB. The slippers are not mentioned in the description.\nC. The lining of the slippers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shirt is not mentioned in the description.\nB. The shirt is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The buckle of the slippers is not mentioned in the description.\nB. The slippers are not mentioned in the description.\nC. The buckle of the slippers is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA bright green, frog-shaped slipper with a smooth, rounded body and a wide, open mouth. The slipper has a small, raised bump on the top of its head, resembling an eye.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wall is not mentioned in the description.\nB. The wall is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the slippers is mentioned in the description and is green.", + "C. The shape of the toe box is mentioned in the description and is round.", + "C. The shape of the slippers is mentioned in the description and is frog-shaped.", + "A. The plant is not mentioned in the description.", + "A. The lining of the slippers is not mentioned in the description.", + "A. The shirt is not mentioned in the description.", + "A. The buckle of the slippers is not mentioned in the description.", + "A. The wall is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "19610025": { + "pred": "A white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the ear is mentioned in the description and is large.", + 1 + ], + [ + "The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The size of the ear is mentioned in the description but is not large.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the ear is mentioned in the description and is pink.", + 1 + ], + [ + "The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + 0.5 + ], + [ + "The ear or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the ear is mentioned in the description but is not pink.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eye is mentioned in the description and is black.", + 1 + ], + [ + "The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + 0.5 + ], + [ + "The eye or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the eye is mentioned in the description but is not black.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the shirt is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The shirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the shirt is mentioned in the description but is not yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the shirt is mentioned in the description and is yellow.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skirt is mentioned in the description and is blue.", + 1 + ], + [ + "The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + 0.5 + ], + [ + "The skirt or the rabbit is not mentioned.", + 0 + ], + [ + "The color of the skirt is mentioned in the description but is not blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The whisker of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The whisker of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The whisker of the rabbit is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The teeth of the rabbit are not mentioned in the description.", + 1 + ], + [ + "The teeth of the rabbit are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The teeth of the rabbit are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plant is not mentioned in the description.", + 1 + ], + [ + "The plant is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plant is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rabbit is not mentioned in the description.", + 0 + ], + [ + "The tail of the rabbit is not mentioned in the description.", + 1 + ], + [ + "The tail of the rabbit is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the rabbit is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The poster is not mentioned in the description.", + 1 + ], + [ + "The poster is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The poster is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is rabbit. Based on the image, is it likely that the object in the description is given class: rabbit or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is rabbit. Based on the image, is it likely that the object in the description is given class: rabbit or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the ear is mentioned in the description and is large.\nB. The size of the ear is not mentioned, but the ear of the rabbit is mentioned.\nC. The ear or the rabbit is not mentioned.\nD. The size of the ear is mentioned in the description but is not large.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the ear is mentioned in the description and is pink.\nB. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.\nC. The ear or the rabbit is not mentioned.\nD. The color of the ear is mentioned in the description but is not pink.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eye is mentioned in the description and is black.\nB. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.\nC. The eye or the rabbit is not mentioned.\nD. The color of the eye is mentioned in the description but is not black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the shirt is mentioned in the description and is yellow.\nB. The color of the shirt is not mentioned, but the shirt of the rabbit is mentioned.\nC. The shirt or the rabbit is not mentioned.\nD. The color of the shirt is mentioned in the description but is not yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skirt is mentioned in the description and is blue.\nB. The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.\nC. The skirt or the rabbit is not mentioned.\nD. The color of the skirt is mentioned in the description but is not blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The whisker of the rabbit is not mentioned in the description.\nC. The whisker of the rabbit is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The teeth of the rabbit are not mentioned in the description.\nC. The teeth of the rabbit are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plant is not mentioned in the description.\nB. The plant is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rabbit is not mentioned in the description.\nB. The tail of the rabbit is not mentioned in the description.\nC. The tail of the rabbit is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA white rabbit with upright ears, wearing a yellow shirt and blue pants, is holding a brown basket on its back.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The poster is not mentioned in the description.\nB. The poster is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The size of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "B. The color of the ear is not mentioned, but the ear of the rabbit is mentioned.", + "B. The color of the eye is not mentioned, but the eye of the rabbit is mentioned.", + "A. The color of the shirt is mentioned in the description and is yellow.", + "B. The color of the skirt is not mentioned, but the skirt of the rabbit is mentioned.", + "B. The whisker of the rabbit is not mentioned in the description.", + "B. The teeth of the rabbit are not mentioned in the description.", + "A. The plant is not mentioned in the description.", + "B. The tail of the rabbit is not mentioned in the description.", + "A. The poster is not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "recognition_result": true + }, + "20568676": { + "pred": "A stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the rim is mentioned in the description and is round.", + 1 + ], + [ + "The rim or the pot is not mentioned.", + 0 + ], + [ + "The shape of the rim is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the rim is not mentioned, but the rim of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the rim is not mentioned, but the rim of the pot is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the content is mentioned in the description and is yellow or brown.", + 1 + ], + [ + "The content or the pot is not mentioned.", + 0 + ], + [ + "The color of the content is mentioned in the description but is not yellow or brown.", + -1 + ], + [ + "The color of the content is not mentioned, but the content of the pot is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the content is not mentioned, but the content of the pot is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the pot is mentioned in the description and is metal.", + 1 + ], + [ + "The pot is not mentioned.", + 0 + ], + [ + "The material of the pot is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pot is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the pot is mentioned in the description and is metal.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spout of the pot is mentioned in the description.", + -1 + ], + [ + "The spout of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The spout of the pot is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bowl is mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bottom of the pot is mentioned in the description.", + -1 + ], + [ + "The bottom of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The bottom of the pot is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the pot is mentioned in the description.", + -1 + ], + [ + "The lid of the pot is not mentioned in the description.", + 1 + ], + [ + "The pot is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The lid of the pot is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stove is mentioned in the description.", + -1 + ], + [ + "The stove is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The stove is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is pot. Based on the image, is it likely that the object in the description is given class: pot or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is pot. Based on the image, is it likely that the object in the description is given class: pot or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the rim is mentioned in the description and is round.\nB. The rim or the pot is not mentioned.\nC. The shape of the rim is mentioned in the description but is not round.\nD. The shape of the rim is not mentioned, but the rim of the pot is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the content is mentioned in the description and is yellow or brown.\nB. The content or the pot is not mentioned.\nC. The color of the content is mentioned in the description but is not yellow or brown.\nD. The color of the content is not mentioned, but the content of the pot is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the pot is mentioned in the description and is metal.\nB. The pot is not mentioned.\nC. The material of the pot is mentioned in the description but is not metal.\nD. The material of the pot is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spout of the pot is mentioned in the description.\nB. The spout of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is mentioned in the description.\nB. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bottom of the pot is mentioned in the description.\nB. The bottom of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the pot is mentioned in the description.\nB. The lid of the pot is not mentioned in the description.\nC. The pot is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel bowl filled with a mixture of chopped nuts and a yellow spatula resting on top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stove is mentioned in the description.\nB. The stove is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the rim is not mentioned, but the rim of the pot is mentioned.", + "D. The color of the content is not mentioned, but the content of the pot is mentioned.", + "A. The material of the pot is mentioned in the description and is metal.", + "B. The spout of the pot is not mentioned in the description.", + "A. The bowl is mentioned in the description.", + "B. The bottom of the pot is not mentioned in the description.", + "B. The lid of the pot is not mentioned in the description.", + "B. The stove is not mentioned in the description." + ], + "score": 0.625, + "score_pos": 0.6666666666666666, + "score_neg": 0.6, + "recognition_result": true + }, + "21107974": { + "pred": "A wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the head is mentioned in the description and is round or cylindrical.", + 1 + ], + [ + "The head or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the head is mentioned in the description but is not round or cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the head is mentioned in the description and is round or cylindrical.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is cylindrical.", + 1 + ], + [ + "The handle or the gavel/mallet is not mentioned.", + 0 + ], + [ + "The shape of the handle is mentioned in the description but is not cylindrical.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the handle is mentioned in the description and is cylindrical.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description and is brown.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The color of the gavel/mallet is mentioned in the description but is not brown.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the gavel/mallet is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description and is wood.", + 1 + ], + [ + "The gavel/mallet is not mentioned.", + 0 + ], + [ + "The material of the gavel/mallet is mentioned in the description but is not wood.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the gavel/mallet is mentioned in the description and is wood.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microphone is mentioned in the description.", + -1 + ], + [ + "The microphone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The microphone is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is mentioned in the description.", + -1 + ], + [ + "The man is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The man is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The podium is mentioned in the description.", + -1 + ], + [ + "The podium is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The podium is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The gavel/mallet is not mentioned in the description.", + 0 + ], + [ + "The neck of the gavel/mallet is mentioned in the description.", + -1 + ], + [ + "The neck of the gavel/mallet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The neck of the gavel/mallet is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sign is mentioned in the description.", + -1 + ], + [ + "The sign is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sign is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is gavel/mallet. Based on the image, is it likely that the object in the description is given class: gavel/mallet or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is gavel/mallet. Based on the image, is it likely that the object in the description is given class: gavel/mallet or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the head is not mentioned, but the head of the gavel/mallet is mentioned.\nB. The shape of the head is mentioned in the description and is round or cylindrical.\nC. The head or the gavel/mallet is not mentioned.\nD. The shape of the head is mentioned in the description but is not round or cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the gavel/mallet is mentioned.\nB. The shape of the handle is mentioned in the description and is cylindrical.\nC. The handle or the gavel/mallet is not mentioned.\nD. The shape of the handle is mentioned in the description but is not cylindrical.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the gavel/mallet is not mentioned.\nB. The color of the gavel/mallet is mentioned in the description and is brown.\nC. The gavel/mallet is not mentioned.\nD. The color of the gavel/mallet is mentioned in the description but is not brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the gavel/mallet is not mentioned.\nB. The material of the gavel/mallet is mentioned in the description and is wood.\nC. The gavel/mallet is not mentioned.\nD. The material of the gavel/mallet is mentioned in the description but is not wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microphone is mentioned in the description.\nB. The microphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is mentioned in the description.\nB. The man is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The podium is mentioned in the description.\nB. The podium is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The gavel/mallet is not mentioned in the description.\nB. The neck of the gavel/mallet is mentioned in the description.\nC. The neck of the gavel/mallet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA wooden gavel with a cylindrical head and a smooth, slightly tapered handle. The head features a prominent, rounded end and a series of horizontal grooves near the top. The handle is uniformly cylindrical and extends straight from the head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sign is mentioned in the description.\nB. The sign is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the head is mentioned in the description and is round or cylindrical.", + "B. The shape of the handle is mentioned in the description and is cylindrical.", + "A. The color of the gavel/mallet is not mentioned.", + "B. The material of the gavel/mallet is mentioned in the description and is wood.", + "B. The microphone is not mentioned in the description.", + "B. The man is not mentioned in the description.", + "B. The podium is not mentioned in the description.", + "B. The neck of the gavel/mallet is mentioned in the description.", + "B. The sign is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.75, + "score_neg": 0.6, + "recognition_result": true + }, + "22064315": { + "pred": "The visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the horn is mentioned in the description and is long, curved, or pointed.", + 1 + ], + [ + "The shape of the horn is mentioned in the description but is not long, curved, or pointed.", + -1 + ], + [ + "The shape of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the horn is mentioned in the description and is long, curved, or pointed.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the horn is mentioned in the description and is grey, black, or dark.", + 1 + ], + [ + "The color of the horn is mentioned in the description but is not grey, black, or dark.", + -1 + ], + [ + "The color of the horn is not mentioned, but the horn of the antelope is mentioned.", + 0.5 + ], + [ + "The horn or the antelope is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the horn is mentioned in the description and is grey, black, or dark.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the antelope is mentioned in the description.", + -1 + ], + [ + "The tail of the antelope is not mentioned in the description.", + 1 + ], + [ + "The antelope is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tail of the antelope is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The deer are mentioned in the description.", + -1 + ], + [ + "The deer are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The deer are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The car is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is mentioned in the description.", + -1 + ], + [ + "The tree is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The tree is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are mentioned in the description.", + -1 + ], + [ + "The grass are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The grass are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is antelope. Based on the image, is it likely that the object in the description is given class: antelope or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is antelope. Based on the image, is it likely that the object in the description is given class: antelope or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the horn is mentioned in the description and is long, curved, or pointed.\nB. The shape of the horn is mentioned in the description but is not long, curved, or pointed.\nC. The shape of the horn is not mentioned, but the horn of the antelope is mentioned.\nD. The horn or the antelope is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the horn is mentioned in the description and is grey, black, or dark.\nB. The color of the horn is mentioned in the description but is not grey, black, or dark.\nC. The color of the horn is not mentioned, but the horn of the antelope is mentioned.\nD. The horn or the antelope is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the antelope is mentioned in the description.\nB. The tail of the antelope is not mentioned in the description.\nC. The antelope is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The deer are mentioned in the description.\nB. The deer are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The car is mentioned in the description.\nB. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is mentioned in the description.\nB. The tree is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe visible part of the gazelle shows a pair of long, curved horns with a dark, almost black coloration. The horns are smooth and taper to a point. The base of the horns is attached to a light brown, slightly textured head.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are mentioned in the description.\nB. The grass are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the horn is mentioned in the description and is long, curved, or pointed.", + "A. The color of the horn is mentioned in the description and is grey, black, or dark.", + "B. The tail of the antelope is not mentioned in the description.", + "B. The deer are not mentioned in the description.", + "B. The car is not mentioned in the description.", + "B. The tree is not mentioned in the description.", + "B. The grass are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "22107522": { + "pred": "A black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the bow tie is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the bow tie is mentioned in the description and is smooth.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The texture of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the bow tie is mentioned in the description and is smooth.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the bow tie is mentioned in the description but is not butterfly-shaped.", + -1 + ], + [ + "The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The shape of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the bow tie is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the bow tie is mentioned in the description and is black.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The color of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the bow tie is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the bow tie is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the bow tie is mentioned in the description and is fabric.", + 1 + ], + [ + "The bow tie is not mentioned.", + 0 + ], + [ + "The material of the bow tie is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the bow tie is mentioned in the description and is fabric.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bow tie is not mentioned in the description.", + 0 + ], + [ + "The neck band of the bow tie is mentioned in the description.", + -1 + ], + [ + "The neck band of the bow tie is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The neck band of the bow tie is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trumpet is mentioned in the description.", + -1 + ], + [ + "The trumpet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trumpet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The suit is mentioned in the description.", + -1 + ], + [ + "The suit is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The suit is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The person is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The glasses are mentioned in the description.", + -1 + ], + [ + "The glasses are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The glasses are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bow tie. Based on the image, is it likely that the object in the description is given class: bow tie or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bow tie. Based on the image, is it likely that the object in the description is given class: bow tie or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the bow tie is mentioned in the description but is not smooth.\nB. The texture of the bow tie is mentioned in the description and is smooth.\nC. The bow tie is not mentioned.\nD. The texture of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the bow tie is mentioned in the description but is not butterfly-shaped.\nB. The shape of the bow tie is mentioned in the description and is butterfly-shaped.\nC. The bow tie is not mentioned.\nD. The shape of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the bow tie is mentioned in the description but is not black.\nB. The color of the bow tie is mentioned in the description and is black.\nC. The bow tie is not mentioned.\nD. The color of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the bow tie is mentioned in the description but is not fabric.\nB. The material of the bow tie is mentioned in the description and is fabric.\nC. The bow tie is not mentioned.\nD. The material of the bow tie is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bow tie is not mentioned in the description.\nB. The neck band of the bow tie is mentioned in the description.\nC. The neck band of the bow tie is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trumpet is mentioned in the description.\nB. The trumpet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The suit is mentioned in the description.\nB. The suit is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is mentioned in the description.\nB. The person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black bow tie with a smooth, satin-like finish, featuring a classic butterfly shape with pointed tips.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The glasses are mentioned in the description.\nB. The glasses are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The texture of the bow tie is mentioned in the description and is smooth.", + "B. The shape of the bow tie is mentioned in the description and is butterfly-shaped.", + "B. The color of the bow tie is mentioned in the description and is black.", + "B. The material of the bow tie is mentioned in the description and is fabric.", + "C. The neck band of the bow tie is not mentioned in the description.", + "B. The trumpet is not mentioned in the description.", + "B. The suit is not mentioned in the description.", + "B. The person is not mentioned in the description.", + "B. The glasses are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "24017816": { + "pred": "The van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the windshield is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The windshield or the car is not mentioned.", + 0 + ], + [ + "The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + 0.5 + ], + [ + "The shape of the windshield is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the car is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The car is not mentioned.", + 0 + ], + [ + "The color of the car is not mentioned.", + 0 + ], + [ + "The color of the car is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the car is mentioned in the description but is not dark or black.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the window is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The window or the car is not mentioned.", + 0 + ], + [ + "The color of the window is not mentioned, but the window of the car is mentioned.", + 0.5 + ], + [ + "The color of the window is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the window is mentioned in the description and is dark or black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the side mirror is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The side mirror or the car is not mentioned.", + 0 + ], + [ + "The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.", + 0.5 + ], + [ + "The color of the side mirror is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the side mirror is mentioned in the description and is dark or black.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fender of the car is not mentioned in the description.", + 1 + ], + [ + "The fender of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The fender of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The taillight of the car is not mentioned in the description.", + 1 + ], + [ + "The taillight of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The taillight of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tire of the car is not mentioned in the description.", + 1 + ], + [ + "The tire of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tire of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The exhaust pipe of the car is not mentioned in the description.", + 1 + ], + [ + "The exhaust pipe of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The exhaust pipe of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hood of the car is not mentioned in the description.", + 1 + ], + [ + "The hood of the car is mentioned in the description.", + -1 + ], + [ + "The car is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The hood of the car is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is car. Based on the image, is it likely that the object in the description is given class: car or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is car. Based on the image, is it likely that the object in the description is given class: car or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the windshield is mentioned in the description but is not rectangular.\nB. The windshield or the car is not mentioned.\nC. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.\nD. The shape of the windshield is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the car is mentioned in the description but is not dark or black.\nB. The car is not mentioned.\nC. The color of the car is not mentioned.\nD. The color of the car is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the window is mentioned in the description but is not dark or black.\nB. The window or the car is not mentioned.\nC. The color of the window is not mentioned, but the window of the car is mentioned.\nD. The color of the window is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the side mirror is mentioned in the description but is not dark or black.\nB. The side mirror or the car is not mentioned.\nC. The color of the side mirror is not mentioned, but the side mirror of the car is mentioned.\nD. The color of the side mirror is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fender of the car is not mentioned in the description.\nB. The fender of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The taillight of the car is not mentioned in the description.\nB. The taillight of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tire of the car is not mentioned in the description.\nB. The tire of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The exhaust pipe of the car is not mentioned in the description.\nB. The exhaust pipe of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe van is white with a large, rectangular side window and a side mirror. The window is tinted, and the side mirror is black. The van has a sleek, modern design with smooth lines and a slightly curved roof.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hood of the car is not mentioned in the description.\nB. The hood of the car is mentioned in the description.\nC. The car is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the windshield is not mentioned, but the windshield of the car is mentioned.", + "A. The color of the car is mentioned in the description but is not dark or black.", + "D. The color of the window is mentioned in the description and is dark or black.", + "D. The color of the side mirror is mentioned in the description and is dark or black.", + "A. The fender of the car is not mentioned in the description.", + "A. The taillight of the car is not mentioned in the description.", + "A. The tire of the car is not mentioned in the description.", + "A. The exhaust pipe of the car is not mentioned in the description.", + "A. The hood of the car is not mentioned in the description." + ], + "score": 0.7222222222222222, + "score_pos": 0.375, + "score_neg": 1.0, + "recognition_result": true + }, + "24498027": { + "pred": "A tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the pole is mentioned in the description but is not black or dark.", + -1 + ], + [ + "The color of the pole is not mentioned, but the pole of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the pole is mentioned in the description and is black or dark.", + 1 + ], + [ + "The pole or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the pole is mentioned in the description and is black or dark.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the lamp is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.", + 0.5 + ], + [ + "The color of the lamp is mentioned in the description and is white.", + 1 + ], + [ + "The lamp or the street lights is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the lamp is mentioned in the description but is not white.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The direction of the bars is mentioned in the description but is not horizontal.", + -1 + ], + [ + "The direction of the bars is not mentioned, but the bars of the street lights are mentioned.", + 0.5 + ], + [ + "The direction of the bars is mentioned in the description and is horizontal.", + 1 + ], + [ + "The bars or the street lights are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The direction of the bars is mentioned in the description and is horizontal.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The cable of the street lights is mentioned in the description.", + -1 + ], + [ + "The cable of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The cable of the street lights is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The street lights are not mentioned in the description.", + 0 + ], + [ + "The wire of the street lights is mentioned in the description.", + -1 + ], + [ + "The wire of the street lights is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The wire of the street lights is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bus is mentioned in the description.", + -1 + ], + [ + "The bus is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bus is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bike is mentioned in the description.", + -1 + ], + [ + "The bike is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bike is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ambulance is mentioned in the description.", + -1 + ], + [ + "The ambulance is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ambulance is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is street lights. Based on the image, is it likely that the object in the description is given class: street lights or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is street lights. Based on the image, is it likely that the object in the description is given class: street lights or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the pole is mentioned in the description but is not black or dark.\nB. The color of the pole is not mentioned, but the pole of the street lights is mentioned.\nC. The color of the pole is mentioned in the description and is black or dark.\nD. The pole or the street lights is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the lamp is mentioned in the description but is not white.\nB. The color of the lamp is not mentioned, but the lamp of the street lights is mentioned.\nC. The color of the lamp is mentioned in the description and is white.\nD. The lamp or the street lights is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The direction of the bars is mentioned in the description but is not horizontal.\nB. The direction of the bars is not mentioned, but the bars of the street lights are mentioned.\nC. The direction of the bars is mentioned in the description and is horizontal.\nD. The bars or the street lights are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The street lights are not mentioned in the description.\nB. The cable of the street lights is mentioned in the description.\nC. The cable of the street lights is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The street lights are not mentioned in the description.\nB. The wire of the street lights is mentioned in the description.\nC. The wire of the street lights is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bus is mentioned in the description.\nB. The bus is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bike is mentioned in the description.\nB. The bike is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, slender black pole with a decorative, ornate top featuring a small, pointed finial. The pole has a rectangular, box-like structure attached near the top, and a smaller, horizontal arm extending from the middle section.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ambulance is mentioned in the description.\nB. The ambulance is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the pole is mentioned in the description and is black or dark.", + "A. The color of the lamp is mentioned in the description but is not white.", + "C. The direction of the bars is mentioned in the description and is horizontal.", + "C. The cable of the street lights is not mentioned in the description.", + "C. The wire of the street lights is not mentioned in the description.", + "B. The bus is not mentioned in the description.", + "B. The bike is not mentioned in the description.", + "B. The ambulance is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "recognition_result": true + }, + "24581953": { + "pred": "A large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the dog is mentioned in the description but is not white or gray.", + -1 + ], + [ + "The color of the dog is mentioned in the description and is white or gray.", + 1 + ], + [ + "The color of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the dog is mentioned in the description and is white or gray.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The posture of the dog is mentioned in the description but is not lying down.", + -1 + ], + [ + "The posture of the dog is mentioned in the description and is lying down.", + 1 + ], + [ + "The posture of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The posture of the dog is mentioned in the description and is lying down.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tail is mentioned in the description but is not large or long.", + -1 + ], + [ + "The size of the tail is mentioned in the description and is large or long.", + 1 + ], + [ + "The size of the tail is not mentioned, but the tail of the dog is mentioned.", + 0.5 + ], + [ + "The tail or the dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the tail is mentioned in the description and is large or long.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the dog is mentioned in the description but is not large.", + -1 + ], + [ + "The size of the dog is mentioned in the description and is large.", + 1 + ], + [ + "The size of the dog is not mentioned.", + 0 + ], + [ + "The dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The size of the dog is mentioned in the description and is large.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the coat is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the coat is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the coat is not mentioned, but the coat of the dog is mentioned.", + 0.5 + ], + [ + "The coat or the dog is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the coat is not mentioned, but the coat of the dog is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The temple is not mentioned in the description.", + 1 + ], + [ + "The temple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The temple is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the dog is not mentioned in the description.", + 1 + ], + [ + "The mouth of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The mouth of the dog is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The eye of the dog is not mentioned in the description.", + 1 + ], + [ + "The eye of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The eye of the dog is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the dog is not mentioned in the description.", + 1 + ], + [ + "The nose of the dog is mentioned in the description.", + -1 + ], + [ + "The dog is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The nose of the dog is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bird is not mentioned in the description.", + 1 + ], + [ + "The bird is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bird is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is dog. Based on the image, is it likely that the object in the description is given class: dog or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is dog. Based on the image, is it likely that the object in the description is given class: dog or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the dog is mentioned in the description but is not white or gray.\nB. The color of the dog is mentioned in the description and is white or gray.\nC. The color of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The posture of the dog is mentioned in the description but is not lying down.\nB. The posture of the dog is mentioned in the description and is lying down.\nC. The posture of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the tail is mentioned in the description but is not large or long.\nB. The size of the tail is mentioned in the description and is large or long.\nC. The size of the tail is not mentioned, but the tail of the dog is mentioned.\nD. The tail or the dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the dog is mentioned in the description but is not large.\nB. The size of the dog is mentioned in the description and is large.\nC. The size of the dog is not mentioned.\nD. The dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the coat is mentioned in the description but is not smooth.\nB. The texture of the coat is mentioned in the description and is smooth.\nC. The texture of the coat is not mentioned, but the coat of the dog is mentioned.\nD. The coat or the dog is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The temple is not mentioned in the description.\nB. The temple is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouth of the dog is not mentioned in the description.\nB. The mouth of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The eye of the dog is not mentioned in the description.\nB. The eye of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nose of the dog is not mentioned in the description.\nB. The nose of the dog is mentioned in the description.\nC. The dog is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA large, light-colored carnivore with a robust, muscular body and a thick, short coat. It has a broad head with small, rounded ears and a long, tapering tail. The legs are sturdy and strong, with large paws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bird is not mentioned in the description.\nB. The bird is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the dog is mentioned in the description and is white or gray.", + "B. The posture of the dog is mentioned in the description and is lying down.", + "B. The size of the tail is mentioned in the description and is large or long.", + "B. The size of the dog is mentioned in the description and is large.", + "C. The texture of the coat is not mentioned, but the coat of the dog is mentioned.", + "A. The temple is not mentioned in the description.", + "A. The mouth of the dog is not mentioned in the description.", + "A. The eye of the dog is not mentioned in the description.", + "A. The nose of the dog is not mentioned in the description.", + "A. The bird is not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "24786060": { + "pred": "A light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the towel is mentioned in the description but is not gray or white.", + -1 + ], + [ + "The color of the towel is not mentioned.", + 0 + ], + [ + "The color of the towel is mentioned in the description and is gray or white.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the towel is mentioned in the description and is gray or white.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the towel is mentioned in the description but is not plush.", + -1 + ], + [ + "The texture of the towel is not mentioned.", + 0 + ], + [ + "The texture of the towel is mentioned in the description and is plush.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the towel is mentioned in the description but is not plush.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the towel is mentioned in the description but is not irregular.", + -1 + ], + [ + "The shape of the towel is not mentioned.", + 0 + ], + [ + "The shape of the towel is mentioned in the description and is irregular.", + 1 + ], + [ + "The towel is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the towel is mentioned in the description and is irregular.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The loop of the towel is mentioned in the description.", + -1 + ], + [ + "The loop of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The loop of the towel is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The vanity is mentioned in the description.", + -1 + ], + [ + "The vanity is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The vanity is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The design of the towel is mentioned in the description.", + -1 + ], + [ + "The design of the towel is not mentioned in the description.", + 1 + ], + [ + "The towel is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The design of the towel is mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is mentioned in the description.", + -1 + ], + [ + "The toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toilet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is towel. Based on the image, is it likely that the object in the description is given class: towel or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is towel. Based on the image, is it likely that the object in the description is given class: towel or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the towel is mentioned in the description but is not gray or white.\nB. The color of the towel is not mentioned.\nC. The color of the towel is mentioned in the description and is gray or white.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the towel is mentioned in the description but is not plush.\nB. The texture of the towel is not mentioned.\nC. The texture of the towel is mentioned in the description and is plush.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the towel is mentioned in the description but is not irregular.\nB. The shape of the towel is not mentioned.\nC. The shape of the towel is mentioned in the description and is irregular.\nD. The towel is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The loop of the towel is mentioned in the description.\nB. The loop of the towel is not mentioned in the description.\nC. The towel is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The vanity is mentioned in the description.\nB. The vanity is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The design of the towel is mentioned in the description.\nB. The design of the towel is not mentioned in the description.\nC. The towel is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA light gray towel with a soft, slightly wrinkled texture, hanging loosely with a gentle curve.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is mentioned in the description.\nB. The toilet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the towel is mentioned in the description and is gray or white.", + "A. The texture of the towel is mentioned in the description but is not plush.", + "C. The shape of the towel is mentioned in the description and is irregular.", + "B. The loop of the towel is not mentioned in the description.", + "B. The vanity is not mentioned in the description.", + "B. The sink is not mentioned in the description.", + "A. The design of the towel is mentioned in the description.", + "B. The toilet is not mentioned in the description." + ], + "score": 0.5, + "score_pos": 0.3333333333333333, + "score_neg": 0.6, + "recognition_result": true + }, + "25054869": { + "pred": "The toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The material of the lid is mentioned in the description but is not ceramic.", + -1 + ], + [ + "The material of the lid is mentioned in the description and is ceramic.", + 1 + ], + [ + "The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid or the toilet is not mentioned.", + 0 + ], + [ + "The texture of the lid is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the lid is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the lid is not mentioned, but the lid of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the lid is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tank or the toilet is not mentioned.", + 0 + ], + [ + "The shape of the tank is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the tank is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the tank is not mentioned, but the tank of the toilet is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the tank is mentioned in the description but is not rectangular.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned.", + 0 + ], + [ + "The color of the toilet is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the toilet is mentioned in the description and is white.", + 1 + ], + [ + "The color of the toilet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the toilet is mentioned in the description but is not white.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush lever of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush lever of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flush lever of the toilet is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The flush button of the toilet is mentioned in the description.", + -1 + ], + [ + "The flush button of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flush button of the toilet is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet bowl of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet bowl of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The toilet bowl of the toilet is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toothbrush is mentioned in the description.", + -1 + ], + [ + "The toothbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The toothbrush is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The toilet is not mentioned in the description.", + 0 + ], + [ + "The toilet seat of the toilet is mentioned in the description.", + -1 + ], + [ + "The toilet seat of the toilet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The toilet seat of the toilet is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is toilet. Based on the image, is it likely that the object in the description is given class: toilet or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is toilet. Based on the image, is it likely that the object in the description is given class: toilet or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid or the toilet is not mentioned.\nB. The material of the lid is mentioned in the description but is not ceramic.\nC. The material of the lid is mentioned in the description and is ceramic.\nD. The material of the lid is not mentioned, but the lid of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid or the toilet is not mentioned.\nB. The texture of the lid is mentioned in the description but is not smooth.\nC. The texture of the lid is mentioned in the description and is smooth.\nD. The texture of the lid is not mentioned, but the lid of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tank or the toilet is not mentioned.\nB. The shape of the tank is mentioned in the description but is not rectangular.\nC. The shape of the tank is mentioned in the description and is rectangular.\nD. The shape of the tank is not mentioned, but the tank of the toilet is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned.\nB. The color of the toilet is mentioned in the description but is not white.\nC. The color of the toilet is mentioned in the description and is white.\nD. The color of the toilet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The flush lever of the toilet is mentioned in the description.\nC. The flush lever of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The flush button of the toilet is mentioned in the description.\nC. The flush button of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The toilet bowl of the toilet is mentioned in the description.\nC. The toilet bowl of the toilet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toothbrush is mentioned in the description.\nB. The toothbrush is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe toilet features a smooth, rounded lid with a glossy finish, seamlessly integrated into the tank. The tank has a slightly curved, angular design with a uniform, light beige color.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The toilet is not mentioned in the description.\nB. The toilet seat of the toilet is mentioned in the description.\nC. The toilet seat of the toilet is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the lid is not mentioned, but the lid of the toilet is mentioned.", + "C. The texture of the lid is mentioned in the description and is smooth.", + "B. The shape of the tank is mentioned in the description but is not rectangular.", + "B. The color of the toilet is mentioned in the description but is not white.", + "C. The flush lever of the toilet is not mentioned in the description.", + "C. The flush button of the toilet is not mentioned in the description.", + "C. The toilet bowl of the toilet is not mentioned in the description.", + "B. The toothbrush is not mentioned in the description.", + "B. The toilet seat of the toilet is mentioned in the description." + ], + "score": 0.2777777777777778, + "score_pos": -0.125, + "score_neg": 0.6, + "recognition_result": true + }, + "25273553": { + "pred": "A black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the legs is mentioned in the description but is not slender.", + -1 + ], + [ + "The shape of the legs is mentioned in the description and is slender.", + 1 + ], + [ + "The legs or the tripod are not mentioned.", + 0 + ], + [ + "The shape of the legs is not mentioned, but the legs of the tripod are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the legs is mentioned in the description and is slender.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tripod is mentioned in the description but is not plastic or metal.", + -1 + ], + [ + "The material of the tripod is mentioned in the description and is plastic or metal.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The material of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the tripod is not mentioned.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tripod is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tripod is mentioned in the description and is black.", + 1 + ], + [ + "The tripod is not mentioned.", + 0 + ], + [ + "The color of the tripod is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the tripod is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The leg locks of the tripod are not mentioned in the description.", + 1 + ], + [ + "The leg locks of the tripod are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The leg locks of the tripod are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tripod is not mentioned in the description.", + 0 + ], + [ + "The quick release plate of the tripod is not mentioned in the description.", + 1 + ], + [ + "The quick release plate of the tripod is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The quick release plate of the tripod is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The hot air balloon is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The hot air balloon is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tree is not mentioned in the description.", + 1 + ], + [ + "The tree is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The tree is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is not mentioned in the description.", + 1 + ], + [ + "The sky is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sky is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tripod. Based on the image, is it likely that the object in the description is given class: tripod or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tripod. Based on the image, is it likely that the object in the description is given class: tripod or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the legs is mentioned in the description but is not slender.\nB. The shape of the legs is mentioned in the description and is slender.\nC. The legs or the tripod are not mentioned.\nD. The shape of the legs is not mentioned, but the legs of the tripod are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tripod is mentioned in the description but is not plastic or metal.\nB. The material of the tripod is mentioned in the description and is plastic or metal.\nC. The tripod is not mentioned.\nD. The material of the tripod is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tripod is mentioned in the description but is not black.\nB. The color of the tripod is mentioned in the description and is black.\nC. The tripod is not mentioned.\nD. The color of the tripod is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tripod is not mentioned in the description.\nB. The leg locks of the tripod are not mentioned in the description.\nC. The leg locks of the tripod are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tripod is not mentioned in the description.\nB. The quick release plate of the tripod is not mentioned in the description.\nC. The quick release plate of the tripod is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hot air balloon is not mentioned in the description.\nB. The hot air balloon is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tree is not mentioned in the description.\nB. The tree is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black tripod with a central column and three legs, each leg featuring a rubber foot for stability. The legs are connected to a central hub, which is part of the tripod's support structure.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is not mentioned in the description.\nB. The sky is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The shape of the legs is mentioned in the description and is slender.", + "D. The material of the tripod is not mentioned.", + "B. The color of the tripod is mentioned in the description and is black.", + "B. The leg locks of the tripod are not mentioned in the description.", + "B. The quick release plate of the tripod is not mentioned in the description.", + "A. The hot air balloon is not mentioned in the description.", + "A. The tree is not mentioned in the description.", + "A. The sky is not mentioned in the description." + ], + "score": 0.875, + "score_pos": 0.6666666666666666, + "score_neg": 1.0, + "recognition_result": true + }, + "25419495": { + "pred": "The tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the gripping ends/claws is mentioned in the description and is black.", + 1 + ], + [ + "The color of the gripping ends/claws is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the gripping ends/claws is mentioned in the description and is scalloped.", + 1 + ], + [ + "The shape of the gripping ends/claws is mentioned in the description but is not scalloped.", + -1 + ], + [ + "The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + 0.5 + ], + [ + "The gripping ends/claws or the tong are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the handle/arm is mentioned in the description and is black or metallic.", + 1 + ], + [ + "The color of the handle/arm is mentioned in the description but is not black or metallic.", + -1 + ], + [ + "The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the handle/arm is mentioned in the description and is black or metallic.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the handle/arm is mentioned in the description and is metal or rubber.", + 1 + ], + [ + "The material of the handle/arm is mentioned in the description but is not metal or rubber.", + -1 + ], + [ + "The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.", + 0.5 + ], + [ + "The handle/arm or the tong is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the handle/arm is mentioned in the description and is metal or rubber.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spoon is mentioned in the description.", + -1 + ], + [ + "The spoon is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The spoon is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The spring of the tong is mentioned in the description.", + -1 + ], + [ + "The tong is not mentioned in the description.", + 0 + ], + [ + "The spring of the tong is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The spring of the tong is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plate is mentioned in the description.", + -1 + ], + [ + "The plate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The plate is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is mentioned in the description.", + -1 + ], + [ + "The cup is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cup is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fork is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tong. Based on the image, is it likely that the object in the description is given class: tong or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tong. Based on the image, is it likely that the object in the description is given class: tong or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the gripping ends/claws is mentioned in the description and is black.\nB. The color of the gripping ends/claws is mentioned in the description but is not black.\nC. The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.\nD. The gripping ends/claws or the tong are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the gripping ends/claws is mentioned in the description and is scalloped.\nB. The shape of the gripping ends/claws is mentioned in the description but is not scalloped.\nC. The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.\nD. The gripping ends/claws or the tong are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the handle/arm is mentioned in the description and is black or metallic.\nB. The color of the handle/arm is mentioned in the description but is not black or metallic.\nC. The color of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.\nD. The handle/arm or the tong is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the handle/arm is mentioned in the description and is metal or rubber.\nB. The material of the handle/arm is mentioned in the description but is not metal or rubber.\nC. The material of the handle/arm is not mentioned, but the handle/arm of the tong is mentioned.\nD. The handle/arm or the tong is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spoon is mentioned in the description.\nB. The spoon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The spring of the tong is mentioned in the description.\nB. The tong is not mentioned in the description.\nC. The spring of the tong is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plate is mentioned in the description.\nB. The plate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is mentioned in the description.\nB. The cup is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tongs have a metallic, slightly curved arm with a black rubberized grip handle. The handle is ergonomically designed with a smooth, matte finish. The tongs are open, showing the inner surfaces of the arms, which are also metallic and slightly curved.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is mentioned in the description.\nB. The fork is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + "C. The shape of the gripping ends/claws is not mentioned, but the gripping ends/claws of the tong are mentioned.", + "A. The color of the handle/arm is mentioned in the description and is black or metallic.", + "A. The material of the handle/arm is mentioned in the description and is metal or rubber.", + "B. The spoon is not mentioned in the description.", + "C. The spring of the tong is not mentioned in the description.", + "B. The plate is not mentioned in the description.", + "B. The cup is not mentioned in the description.", + "B. The fork is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "25419516": { + "pred": "A plush toy with a blue face, large white eyes with black pupils, and two pointed ears.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stuffed toy is mentioned in the description but is not plush.", + -1 + ], + [ + "The material of the stuffed toy is not mentioned.", + 0 + ], + [ + "The material of the stuffed toy is mentioned in the description and is plush.", + 1 + ], + [ + "The stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the stuffed toy is mentioned in the description and is plush.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the eyes is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.", + 0.5 + ], + [ + "The color of the eyes is mentioned in the description and is black.", + 1 + ], + [ + "The eyes or the stuffed toy are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the eyes is mentioned in the description and is black.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the nose is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the nose is mentioned in the description and is blue.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is blue.", + 1 + ], + [ + "The body or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is blue.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the nose is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + 0.5 + ], + [ + "The shape of the nose is mentioned in the description and is round.", + 1 + ], + [ + "The nose or the stuffed toy is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tail of the stuffed toy is not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The tail of the stuffed toy is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tail of the stuffed toy is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The feet of the stuffed toy are not mentioned in the description.", + 1 + ], + [ + "The stuffed toy is not mentioned in the description.", + 0 + ], + [ + "The feet of the stuffed toy are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The feet of the stuffed toy are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is not mentioned in the description.", + 1 + ], + [ + "The door is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The door is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chair is not mentioned in the description.", + 1 + ], + [ + "The chair is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The chair is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned in the description.", + 1 + ], + [ + "The fork is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stuffed toy. Based on the image, is it likely that the object in the description is given class: stuffed toy or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stuffed toy. Based on the image, is it likely that the object in the description is given class: stuffed toy or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stuffed toy is mentioned in the description but is not plush.\nB. The material of the stuffed toy is not mentioned.\nC. The material of the stuffed toy is mentioned in the description and is plush.\nD. The stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the eyes is mentioned in the description but is not black.\nB. The color of the eyes is not mentioned, but the eyes of the stuffed toy are mentioned.\nC. The color of the eyes is mentioned in the description and is black.\nD. The eyes or the stuffed toy are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the nose is mentioned in the description but is not blue.\nB. The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.\nC. The color of the nose is mentioned in the description and is blue.\nD. The nose or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not blue.\nB. The color of the body is not mentioned, but the body of the stuffed toy is mentioned.\nC. The color of the body is mentioned in the description and is blue.\nD. The body or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the nose is mentioned in the description but is not round.\nB. The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.\nC. The shape of the nose is mentioned in the description and is round.\nD. The nose or the stuffed toy is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tail of the stuffed toy is not mentioned in the description.\nB. The stuffed toy is not mentioned in the description.\nC. The tail of the stuffed toy is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The feet of the stuffed toy are not mentioned in the description.\nB. The stuffed toy is not mentioned in the description.\nC. The feet of the stuffed toy are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is not mentioned in the description.\nB. The door is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chair is not mentioned in the description.\nB. The chair is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA plush toy with a blue face, large white eyes with black pupils, and two pointed ears.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned in the description.\nB. The fork is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the stuffed toy is mentioned in the description and is plush.", + "C. The color of the eyes is mentioned in the description and is black.", + "B. The color of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + "C. The color of the body is mentioned in the description and is blue.", + "B. The shape of the nose is not mentioned, but the nose of the stuffed toy is mentioned.", + "A. The tail of the stuffed toy is not mentioned in the description.", + "A. The feet of the stuffed toy are not mentioned in the description.", + "A. The door is not mentioned in the description.", + "A. The chair is not mentioned in the description.", + "A. The fork is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "25579493": { + "pred": "A small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is not mentioned.", + 0 + ], + [ + "The shape of the bowl is mentioned in the description and is square or rounded.", + 1 + ], + [ + "The shape of the bowl is mentioned in the description but is not square or rounded.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the bowl is mentioned in the description and is square or rounded.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is not mentioned.", + 0 + ], + [ + "The texture of the bowl is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the bowl is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The texture of the bowl is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is not mentioned.", + 0 + ], + [ + "The color of the bowl is mentioned in the description and is white or beige.", + 1 + ], + [ + "The color of the bowl is mentioned in the description but is not white or beige.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the bowl is mentioned in the description but is not white or beige.", + "pred_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The content or the bowl is not mentioned.", + 0 + ], + [ + "The color of the content is not mentioned, but the content of the bowl is mentioned.", + 0.5 + ], + [ + "The color of the content is mentioned in the description and is red, white, or yellow.", + 1 + ], + [ + "The color of the content is mentioned in the description but is not red, white, or yellow.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the content is mentioned in the description and is red, white, or yellow.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the bowl is not mentioned in the description.", + 1 + ], + [ + "The base of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The base of the bowl is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the bowl is not mentioned in the description.", + 1 + ], + [ + "The lid of the bowl is mentioned in the description.", + -1 + ], + [ + "The bowl is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The lid of the bowl is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The knife is not mentioned in the description.", + 1 + ], + [ + "The knife is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The knife is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cabinet is not mentioned in the description.", + 1 + ], + [ + "The cabinet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cabinet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner is not mentioned in the description.", + 1 + ], + [ + "The burner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The burner is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bowl. Based on the image, is it likely that the object in the description is given class: bowl or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bowl. Based on the image, is it likely that the object in the description is given class: bowl or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The shape of the bowl is not mentioned.\nC. The shape of the bowl is mentioned in the description and is square or rounded.\nD. The shape of the bowl is mentioned in the description but is not square or rounded.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The texture of the bowl is not mentioned.\nC. The texture of the bowl is mentioned in the description and is smooth.\nD. The texture of the bowl is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bowl is not mentioned.\nB. The color of the bowl is not mentioned.\nC. The color of the bowl is mentioned in the description and is white or beige.\nD. The color of the bowl is mentioned in the description but is not white or beige.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The content or the bowl is not mentioned.\nB. The color of the content is not mentioned, but the content of the bowl is mentioned.\nC. The color of the content is mentioned in the description and is red, white, or yellow.\nD. The color of the content is mentioned in the description but is not red, white, or yellow.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the bowl is not mentioned in the description.\nB. The base of the bowl is mentioned in the description.\nC. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the bowl is not mentioned in the description.\nB. The lid of the bowl is mentioned in the description.\nC. The bowl is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The knife is not mentioned in the description.\nB. The knife is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cabinet is not mentioned in the description.\nB. The cabinet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small, square-shaped bowl with rounded edges, featuring a light blue exterior and a white interior. The bowl contains a mixture of white rice and a small piece of red food item in the center.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burner is not mentioned in the description.\nB. The burner is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the bowl is mentioned in the description and is square or rounded.", + "B. The texture of the bowl is not mentioned.", + "D. The color of the bowl is mentioned in the description but is not white or beige.", + "C. The color of the content is mentioned in the description and is red, white, or yellow.", + "A. The base of the bowl is not mentioned in the description.", + "A. The lid of the bowl is not mentioned in the description.", + "A. The knife is not mentioned in the description.", + "A. The cabinet is not mentioned in the description.", + "A. The burner is not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.25, + "score_neg": 1.0, + "recognition_result": true + }, + "297718": { + "pred": "A piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the rice is mentioned in the description and is outer layer.", + 1 + ], + [ + "The position of the rice is mentioned in the description but is not outer layer.", + -1 + ], + [ + "The position of the rice is not mentioned, but the rice of the sushi is mentioned.", + 0.5 + ], + [ + "The rice or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The position of the rice is mentioned in the description and is outer layer.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the wrap is mentioned in the description and is seaweed sheet.", + 1 + ], + [ + "The type of the wrap is mentioned in the description but is not seaweed sheet.", + -1 + ], + [ + "The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.", + 0.5 + ], + [ + "The wrap or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the wrap is mentioned in the description and is seaweed sheet.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.", + 1 + ], + [ + "The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.", + -1 + ], + [ + "The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + 0.5 + ], + [ + "The sesame seeds or the sushi are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the filling is mentioned in the description and is fish or crab meat.", + 1 + ], + [ + "The type of the filling is mentioned in the description but is not fish or crab meat.", + -1 + ], + [ + "The type of the filling is not mentioned, but the filling of the sushi is mentioned.", + 0.5 + ], + [ + "The filling or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the filling is mentioned in the description and is fish or crab meat.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The type of the topping is mentioned in the description and is sesame seeds.", + 1 + ], + [ + "The type of the topping is mentioned in the description but is not sesame seeds.", + -1 + ], + [ + "The type of the topping is not mentioned, but the topping of the sushi is mentioned.", + 0.5 + ], + [ + "The topping or the sushi is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The type of the topping is mentioned in the description and is sesame seeds.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wasabi of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The wasabi of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wasabi of the sushi is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The soy sauce of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The soy sauce of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The soy sauce of the sushi is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The avocado of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The avocado of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The avocado of the sushi is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pickled ginger of the sushi is not mentioned in the description.", + 1 + ], + [ + "The sushi is not mentioned in the description.", + 0 + ], + [ + "The pickled ginger of the sushi is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pickled ginger of the sushi is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pineapple chunks are not mentioned in the description.", + 1 + ], + [ + "The pineapple chunks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pineapple chunks are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is sushi. Based on the image, is it likely that the object in the description is given class: sushi or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is sushi. Based on the image, is it likely that the object in the description is given class: sushi or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the rice is mentioned in the description and is outer layer.\nB. The position of the rice is mentioned in the description but is not outer layer.\nC. The position of the rice is not mentioned, but the rice of the sushi is mentioned.\nD. The rice or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the wrap is mentioned in the description and is seaweed sheet.\nB. The type of the wrap is mentioned in the description but is not seaweed sheet.\nC. The type of the wrap is not mentioned, but the wrap of the sushi is mentioned.\nD. The wrap or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the sesame seeds is mentioned in the description and is black, yellow, or light brown.\nB. The color of the sesame seeds is mentioned in the description but is not black, yellow, or light brown.\nC. The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.\nD. The sesame seeds or the sushi are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the filling is mentioned in the description and is fish or crab meat.\nB. The type of the filling is mentioned in the description but is not fish or crab meat.\nC. The type of the filling is not mentioned, but the filling of the sushi is mentioned.\nD. The filling or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The type of the topping is mentioned in the description and is sesame seeds.\nB. The type of the topping is mentioned in the description but is not sesame seeds.\nC. The type of the topping is not mentioned, but the topping of the sushi is mentioned.\nD. The topping or the sushi is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wasabi of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The wasabi of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The soy sauce of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The soy sauce of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The avocado of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The avocado of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pickled ginger of the sushi is not mentioned in the description.\nB. The sushi is not mentioned in the description.\nC. The pickled ginger of the sushi is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA piece of sushi with a bed of white rice wrapped in a dark seaweed sheet, filled with a generous portion of pink and white crab meat. The top is sprinkled with sesame seeds and a light drizzle of soy sauce.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pineapple chunks are not mentioned in the description.\nB. The pineapple chunks are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The position of the rice is mentioned in the description and is outer layer.", + "A. The type of the wrap is mentioned in the description and is seaweed sheet.", + "C. The color of the sesame seeds is not mentioned, but the sesame seeds of the sushi are mentioned.", + "A. The type of the filling is mentioned in the description and is fish or crab meat.", + "A. The type of the topping is mentioned in the description and is sesame seeds.", + "A. The wasabi of the sushi is not mentioned in the description.", + "C. The soy sauce of the sushi is mentioned in the description.", + "A. The avocado of the sushi is not mentioned in the description.", + "A. The pickled ginger of the sushi is not mentioned in the description.", + "A. The pineapple chunks are not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.9, + "score_neg": 0.6, + "recognition_result": true + }, + "361105": { + "pred": "A small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The herb is not mentioned.", + 0 + ], + [ + "The color of the herb is mentioned in the description and is green.", + 1 + ], + [ + "The color of the herb is mentioned in the description but is not green.", + -1 + ], + [ + "The color of the herb is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the herb is mentioned in the description and is green.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stems or the herb are not mentioned.", + 0 + ], + [ + "The shape of the stems is mentioned in the description and is thin or slender.", + 1 + ], + [ + "The shape of the stems is mentioned in the description but is not thin or slender.", + -1 + ], + [ + "The shape of the stems is not mentioned, but the stems of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the stems is mentioned in the description and is thin or slender.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The leaves or the herb are not mentioned.", + 0 + ], + [ + "The dark spots of the leaves is mentioned in the description and is visible.", + 1 + ], + [ + "The dark spots of the leaves is mentioned in the description but is not visible.", + -1 + ], + [ + "The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flowers of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The flowers of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The flowers of the herb are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The seeds of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The seeds of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The seeds of the herb are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The roots of the herb are not mentioned in the description.", + 1 + ], + [ + "The herb is not mentioned in the description.", + 0 + ], + [ + "The roots of the herb are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The roots of the herb are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cream sauce is not mentioned in the description.", + 1 + ], + [ + "The cream sauce is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cream sauce is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The scallops are not mentioned in the description.", + 1 + ], + [ + "The scallops are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The scallops are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is herb. Based on the image, is it likely that the object in the description is given class: herb or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is herb. Based on the image, is it likely that the object in the description is given class: herb or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The herb is not mentioned.\nB. The color of the herb is mentioned in the description and is green.\nC. The color of the herb is mentioned in the description but is not green.\nD. The color of the herb is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stems or the herb are not mentioned.\nB. The shape of the stems is mentioned in the description and is thin or slender.\nC. The shape of the stems is mentioned in the description but is not thin or slender.\nD. The shape of the stems is not mentioned, but the stems of the herb are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The leaves or the herb are not mentioned.\nB. The dark spots of the leaves is mentioned in the description and is visible.\nC. The dark spots of the leaves is mentioned in the description but is not visible.\nD. The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flowers of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The flowers of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The seeds of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The seeds of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The roots of the herb are not mentioned in the description.\nB. The herb is not mentioned in the description.\nC. The roots of the herb are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cream sauce is not mentioned in the description.\nB. The cream sauce is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA small cluster of fresh, vibrant green leaves with a smooth texture, attached to a thin, green stem. The leaves are broad and slightly serrated at the edges, with a glossy surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The scallops are not mentioned in the description.\nB. The scallops are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the herb is mentioned in the description and is green.", + "B. The shape of the stems is mentioned in the description and is thin or slender.", + "D. The dark spots of the leaves is not mentioned, but the leaves of the herb are mentioned.", + "A. The flowers of the herb are not mentioned in the description.", + "A. The seeds of the herb are not mentioned in the description.", + "A. The roots of the herb are not mentioned in the description.", + "A. The cream sauce is not mentioned in the description.", + "A. The scallops are not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "recognition_result": true + }, + "1196168": { + "pred": "A rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grille is mentioned in the description and is smooth with ridges.", + 1 + ], + [ + "The texture of the grille is mentioned in the description but is not smooth with ridges.", + -1 + ], + [ + "The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + 0.5 + ], + [ + "The grille or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the air conditioner is mentioned in the description and is rectangular.", + 1 + ], + [ + "The shape of the air conditioner is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the air conditioner is mentioned in the description and is rectangular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the air conditioner is mentioned in the description and is white.", + 1 + ], + [ + "The color of the air conditioner is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the air conditioner is not mentioned.", + 0 + ], + [ + "The air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the air conditioner is not mentioned.", + "pred_index": 2, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the vent is mentioned in the description and is circular.", + 1 + ], + [ + "The shape of the vent is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.", + 0.5 + ], + [ + "The vent or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the vent is mentioned in the description and is circular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fan is mentioned in the description and is black, grey, silver, or dark.", + 1 + ], + [ + "The color of the fan is mentioned in the description but is not black, grey, silver, or dark.", + -1 + ], + [ + "The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + 0.5 + ], + [ + "The fan or the air conditioner is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The remote control of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The remote control of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The remote control of the air conditioner is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Hotel Shilaza sign is not mentioned in the description.", + 1 + ], + [ + "The Hotel Shilaza sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The Hotel Shilaza sign is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The air conditioner is not mentioned in the description.", + 0 + ], + [ + "The display of the air conditioner is not mentioned in the description.", + 1 + ], + [ + "The display of the air conditioner is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The display of the air conditioner is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The UCO Bank branch is not mentioned in the description.", + 1 + ], + [ + "The UCO Bank branch is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The UCO Bank branch is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycles are not mentioned in the description.", + 1 + ], + [ + "The motorcycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The motorcycles are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is air conditioner. Based on the image, is it likely that the object in the description is given class: air conditioner or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is air conditioner. Based on the image, is it likely that the object in the description is given class: air conditioner or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the grille is mentioned in the description and is smooth with ridges.\nB. The texture of the grille is mentioned in the description but is not smooth with ridges.\nC. The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.\nD. The grille or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the air conditioner is mentioned in the description and is rectangular.\nB. The shape of the air conditioner is mentioned in the description but is not rectangular.\nC. The shape of the air conditioner is not mentioned.\nD. The air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the air conditioner is mentioned in the description and is white.\nB. The color of the air conditioner is mentioned in the description but is not white.\nC. The color of the air conditioner is not mentioned.\nD. The air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the vent is mentioned in the description and is circular.\nB. The shape of the vent is mentioned in the description but is not circular.\nC. The shape of the vent is not mentioned, but the vent of the air conditioner is mentioned.\nD. The vent or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the fan is mentioned in the description and is black, grey, silver, or dark.\nB. The color of the fan is mentioned in the description but is not black, grey, silver, or dark.\nC. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.\nD. The fan or the air conditioner is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The air conditioner is not mentioned in the description.\nB. The remote control of the air conditioner is not mentioned in the description.\nC. The remote control of the air conditioner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Hotel Shilaza sign is not mentioned in the description.\nB. The Hotel Shilaza sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The air conditioner is not mentioned in the description.\nB. The display of the air conditioner is not mentioned in the description.\nC. The display of the air conditioner is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The UCO Bank branch is not mentioned in the description.\nB. The UCO Bank branch is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, wall-mounted air conditioner with a large circular vent on the left side, featuring a grid pattern. The right side of the unit has a smooth surface with a small rectangular panel and a few visible screws.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motorcycles are not mentioned in the description.\nB. The motorcycles are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The texture of the grille is not mentioned, but the grille of the air conditioner is mentioned.", + "A. The shape of the air conditioner is mentioned in the description and is rectangular.", + "C. The color of the air conditioner is not mentioned.", + "A. The shape of the vent is mentioned in the description and is circular.", + "C. The color of the fan is not mentioned, but the fan of the air conditioner is mentioned.", + "B. The remote control of the air conditioner is not mentioned in the description.", + "A. The Hotel Shilaza sign is not mentioned in the description.", + "B. The display of the air conditioner is not mentioned in the description.", + "A. The UCO Bank branch is not mentioned in the description.", + "A. The motorcycles are not mentioned in the description." + ], + "score": 0.8, + "score_pos": 0.6, + "score_neg": 1.0, + "recognition_result": true + }, + "1894089": { + "pred": "A metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the screwdriver is mentioned in the description but is not silver or metallic.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The color of the screwdriver is mentioned in the description and is silver or metallic.", + 1 + ], + [ + "The color of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the screwdriver is mentioned in the description and is silver or metallic.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the screwdriver is mentioned in the description but is not metal.", + -1 + ], + [ + "The screwdriver is not mentioned.", + 0 + ], + [ + "The material of the screwdriver is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the screwdriver is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the screwdriver is mentioned in the description and is metal.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the grip is mentioned in the description but is not rough or textured.", + -1 + ], + [ + "The grip or the screwdriver is not mentioned.", + 0 + ], + [ + "The texture of the grip is mentioned in the description and is rough or textured.", + 1 + ], + [ + "The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the grip is mentioned in the description and is rough or textured.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The CD is mentioned in the description.", + -1 + ], + [ + "The CD is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The CD is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire is mentioned in the description.", + -1 + ], + [ + "The wire is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wire is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interchangeable bits of the screwdriver are mentioned in the description.", + -1 + ], + [ + "The interchangeable bits of the screwdriver are not mentioned in the description.", + 1 + ], + [ + "The screwdriver is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The interchangeable bits of the screwdriver are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wire cutters are mentioned in the description.", + -1 + ], + [ + "The wire cutters are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wire cutters are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The flashlight is mentioned in the description.", + -1 + ], + [ + "The flashlight is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The flashlight is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is screwdriver. Based on the image, is it likely that the object in the description is given class: screwdriver or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is screwdriver. Based on the image, is it likely that the object in the description is given class: screwdriver or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the screwdriver is mentioned in the description but is not silver or metallic.\nB. The screwdriver is not mentioned.\nC. The color of the screwdriver is mentioned in the description and is silver or metallic.\nD. The color of the screwdriver is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the screwdriver is mentioned in the description but is not metal.\nB. The screwdriver is not mentioned.\nC. The material of the screwdriver is mentioned in the description and is metal.\nD. The material of the screwdriver is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the grip is mentioned in the description but is not rough or textured.\nB. The grip or the screwdriver is not mentioned.\nC. The texture of the grip is mentioned in the description and is rough or textured.\nD. The texture of the grip is not mentioned, but the grip of the screwdriver is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The CD is mentioned in the description.\nB. The CD is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wire is mentioned in the description.\nB. The wire is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The interchangeable bits of the screwdriver are mentioned in the description.\nB. The interchangeable bits of the screwdriver are not mentioned in the description.\nC. The screwdriver is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wire cutters are mentioned in the description.\nB. The wire cutters are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic screwdriver with a flathead tip and a cylindrical shaft. The handle is textured for grip and has a slight taper towards the tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The flashlight is mentioned in the description.\nB. The flashlight is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the screwdriver is mentioned in the description and is silver or metallic.", + "C. The material of the screwdriver is mentioned in the description and is metal.", + "C. The texture of the grip is mentioned in the description and is rough or textured.", + "B. The CD is not mentioned in the description.", + "B. The wire is not mentioned in the description.", + "B. The interchangeable bits of the screwdriver are not mentioned in the description.", + "B. The wire cutters are not mentioned in the description.", + "B. The flashlight is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "2391780": { + "pred": "The bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the wings is mentioned in the description but is not spread or extended.", + -1 + ], + [ + "The position of the wings is not mentioned, but the wings of the wild bird are mentioned.", + 0.5 + ], + [ + "The position of the wings is mentioned in the description and is spread or extended.", + 1 + ], + [ + "The wings or the wild bird are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The position of the wings is mentioned in the description and is spread or extended.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the head is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the head is not mentioned, but the head of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the head is mentioned in the description and is white.", + 1 + ], + [ + "The head or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the head is not mentioned, but the head of the wild bird is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the body is mentioned in the description but is not white, brown or gray.", + -1 + ], + [ + "The color of the body is not mentioned, but the body of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the body is mentioned in the description and is white, brown or gray.", + 1 + ], + [ + "The body or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the body is mentioned in the description and is white, brown or gray.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the beak is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + 0.5 + ], + [ + "The color of the beak is mentioned in the description and is dark or black.", + 1 + ], + [ + "The beak or the wild bird is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The boats are mentioned in the description.", + -1 + ], + [ + "The boats are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The boats are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The stone walls are mentioned in the description.", + -1 + ], + [ + "The stone walls are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The stone walls are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chimneys are mentioned in the description.", + -1 + ], + [ + "The chimneys are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chimneys are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The water is mentioned in the description.", + -1 + ], + [ + "The water is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The water is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is wild bird. Based on the image, is it likely that the object in the description is given class: wild bird or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is wild bird. Based on the image, is it likely that the object in the description is given class: wild bird or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the wings is mentioned in the description but is not spread or extended.\nB. The position of the wings is not mentioned, but the wings of the wild bird are mentioned.\nC. The position of the wings is mentioned in the description and is spread or extended.\nD. The wings or the wild bird are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the head is mentioned in the description but is not white.\nB. The color of the head is not mentioned, but the head of the wild bird is mentioned.\nC. The color of the head is mentioned in the description and is white.\nD. The head or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the body is mentioned in the description but is not white, brown or gray.\nB. The color of the body is not mentioned, but the body of the wild bird is mentioned.\nC. The color of the body is mentioned in the description and is white, brown or gray.\nD. The body or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the beak is mentioned in the description but is not dark or black.\nB. The color of the beak is not mentioned, but the beak of the wild bird is mentioned.\nC. The color of the beak is mentioned in the description and is dark or black.\nD. The beak or the wild bird is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are mentioned in the description.\nB. The windows are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The boats are mentioned in the description.\nB. The boats are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The stone walls are mentioned in the description.\nB. The stone walls are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chimneys are mentioned in the description.\nB. The chimneys are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bird has a long, slender neck and a pointed beak. Its body is covered in brown feathers with a slightly lighter underbelly. The wings are outstretched, showing a mix of brown and white feathers with a distinct pattern. The tail feathers are short and pointed.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The water is mentioned in the description.\nB. The water is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The position of the wings is mentioned in the description and is spread or extended.", + "B. The color of the head is not mentioned, but the head of the wild bird is mentioned.", + "C. The color of the body is mentioned in the description and is white, brown or gray.", + "B. The color of the beak is not mentioned, but the beak of the wild bird is mentioned.", + "B. The windows are not mentioned in the description.", + "B. The boats are not mentioned in the description.", + "B. The stone walls are not mentioned in the description.", + "B. The chimneys are not mentioned in the description.", + "B. The water is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "4502267": { + "pred": "A green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is elongated.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not elongated.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the green bean is mentioned in the description and is elongated.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is not mentioned.", + 0 + ], + [ + "The color of the green bean is mentioned in the description and is green.", + 1 + ], + [ + "The color of the green bean is mentioned in the description but is not green.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the green bean is mentioned in the description and is green.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is not mentioned.", + 0 + ], + [ + "The shape of the green bean is mentioned in the description and is arc or curved.", + 1 + ], + [ + "The shape of the green bean is mentioned in the description but is not arc or curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the green bean is mentioned in the description and is arc or curved.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is not mentioned.", + 0 + ], + [ + "The texture of the green bean is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the green bean is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the green bean is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The apple is not mentioned in the description.", + 1 + ], + [ + "The apple is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The apple is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The strings of the green bean are not mentioned in the description.", + 1 + ], + [ + "The strings of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The strings of the green bean are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green bean is not mentioned in the description.", + 0 + ], + [ + "The seeds of the green bean are not mentioned in the description.", + 1 + ], + [ + "The seeds of the green bean are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The seeds of the green bean are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 1 + ], + [ + "The pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The pear is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The peach is not mentioned in the description.", + 1 + ], + [ + "The peach is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The peach is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is green bean. Based on the image, is it likely that the object in the description is given class: green bean or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is green bean. Based on the image, is it likely that the object in the description is given class: green bean or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The shape of the green bean is not mentioned.\nC. The shape of the green bean is mentioned in the description and is elongated.\nD. The shape of the green bean is mentioned in the description but is not elongated.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The color of the green bean is not mentioned.\nC. The color of the green bean is mentioned in the description and is green.\nD. The color of the green bean is mentioned in the description but is not green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The shape of the green bean is not mentioned.\nC. The shape of the green bean is mentioned in the description and is arc or curved.\nD. The shape of the green bean is mentioned in the description but is not arc or curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned.\nB. The texture of the green bean is not mentioned.\nC. The texture of the green bean is mentioned in the description and is smooth.\nD. The texture of the green bean is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The apple is not mentioned in the description.\nB. The apple is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned in the description.\nB. The strings of the green bean are not mentioned in the description.\nC. The strings of the green bean are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green bean is not mentioned in the description.\nB. The seeds of the green bean are not mentioned in the description.\nC. The seeds of the green bean are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The pear is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA green bean with a smooth, slightly curved surface, featuring a gradient of light to dark green hues. The bean has a tapered end and a small, pointed tip.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The peach is not mentioned in the description.\nB. The peach is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the green bean is mentioned in the description and is elongated.", + "C. The color of the green bean is mentioned in the description and is green.", + "C. The shape of the green bean is mentioned in the description and is arc or curved.", + "C. The texture of the green bean is mentioned in the description and is smooth.", + "A. The apple is not mentioned in the description.", + "B. The strings of the green bean are not mentioned in the description.", + "B. The seeds of the green bean are not mentioned in the description.", + "A. The pear is not mentioned in the description.", + "A. The peach is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "4604873": { + "pred": "A tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The size of the jib is mentioned in the description but is not long.", + -1 + ], + [ + "The size of the jib is mentioned in the description and is long.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the jib is not mentioned, but the jib of the crane is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the tower is not mentioned, but the tower of the crane is mentioned.", + 0.5 + ], + [ + "The tower or the crane is not mentioned.", + 0 + ], + [ + "The size of the tower is mentioned in the description but is not tall.", + -1 + ], + [ + "The size of the tower is mentioned in the description and is tall.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The size of the tower is mentioned in the description and is tall.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + 0.5 + ], + [ + "The hook or the crane is not mentioned.", + 0 + ], + [ + "The visibility of the hook is mentioned in the description but is not visible.", + -1 + ], + [ + "The visibility of the hook is mentioned in the description and is visible.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the crane is not mentioned.", + 0 + ], + [ + "The crane is not mentioned.", + 0 + ], + [ + "The material of the crane is mentioned in the description but is not metal or steel.", + -1 + ], + [ + "The material of the crane is mentioned in the description and is metal or steel.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the crane is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the jib is not mentioned, but the jib of the crane is mentioned.", + 0.5 + ], + [ + "The jib or the crane is not mentioned.", + 0 + ], + [ + "The shape of the jib is mentioned in the description but is not horizontal beam.", + -1 + ], + [ + "The shape of the jib is mentioned in the description and is horizontal beam.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the jib is mentioned in the description and is horizontal beam.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The telescoping sections of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The telescoping sections of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The telescoping sections of the crane are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tracks of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The tracks of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The tracks of the crane are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wheels of the crane are not mentioned in the description.", + 1 + ], + [ + "The crane is not mentioned in the description.", + 0 + ], + [ + "The wheels of the crane are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The wheels of the crane are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The silhouettes of structures are not mentioned in the description.", + 1 + ], + [ + "The silhouettes of structures are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The silhouettes of structures are mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clouds are not mentioned in the description.", + 1 + ], + [ + "The clouds are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The clouds are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is crane. Based on the image, is it likely that the object in the description is given class: crane or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is crane. Based on the image, is it likely that the object in the description is given class: crane or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the jib is not mentioned, but the jib of the crane is mentioned.\nB. The jib or the crane is not mentioned.\nC. The size of the jib is mentioned in the description but is not long.\nD. The size of the jib is mentioned in the description and is long.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the tower is not mentioned, but the tower of the crane is mentioned.\nB. The tower or the crane is not mentioned.\nC. The size of the tower is mentioned in the description but is not tall.\nD. The size of the tower is mentioned in the description and is tall.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.\nB. The hook or the crane is not mentioned.\nC. The visibility of the hook is mentioned in the description but is not visible.\nD. The visibility of the hook is mentioned in the description and is visible.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the crane is not mentioned.\nB. The crane is not mentioned.\nC. The material of the crane is mentioned in the description but is not metal or steel.\nD. The material of the crane is mentioned in the description and is metal or steel.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the jib is not mentioned, but the jib of the crane is mentioned.\nB. The jib or the crane is not mentioned.\nC. The shape of the jib is mentioned in the description but is not horizontal beam.\nD. The shape of the jib is mentioned in the description and is horizontal beam.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The telescoping sections of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The telescoping sections of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tracks of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The tracks of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wheels of the crane are not mentioned in the description.\nB. The crane is not mentioned in the description.\nC. The wheels of the crane are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The silhouettes of structures are not mentioned in the description.\nB. The silhouettes of structures are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA tall, lattice-style mobile crane with a long, horizontal boom extending to the left. The crane has a rectangular base and a vertical mast with a series of diagonal cross-bracing. The boom is supported by a series of cables and pulleys, and there is a hook at the end of the boom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clouds are not mentioned in the description.\nB. The clouds are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The size of the jib is not mentioned, but the jib of the crane is mentioned.", + "D. The size of the tower is mentioned in the description and is tall.", + "A. The visibility of the hook is not mentioned, but the hook of the crane is mentioned.", + "A. The material of the crane is not mentioned.", + "D. The shape of the jib is mentioned in the description and is horizontal beam.", + "A. The telescoping sections of the crane are not mentioned in the description.", + "A. The tracks of the crane are not mentioned in the description.", + "A. The wheels of the crane are not mentioned in the description.", + "B. The silhouettes of structures are mentioned in the description.", + "A. The clouds are not mentioned in the description." + ], + "score": 0.6, + "score_pos": 0.6, + "score_neg": 0.6, + "recognition_result": true + }, + "4916799": { + "pred": "A spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the globe is mentioned in the description and is silver and blue.", + 1 + ], + [ + "The globe is not mentioned.", + 0 + ], + [ + "The color of the globe is not mentioned.", + 0 + ], + [ + "The color of the globe is mentioned in the description but is not silver and blue.", + -1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the globe is mentioned in the description but is not silver and blue.", + "pred_index": 3, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the sphere is mentioned in the description and is metal and plastic.", + 1 + ], + [ + "The sphere or the globe is not mentioned.", + 0 + ], + [ + "The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + 0.5 + ], + [ + "The material of the sphere is mentioned in the description but is not metal and plastic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the base is mentioned in the description and is circular.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The shape of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The shape of the base is mentioned in the description but is not circular.", + -1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the base is mentioned in the description and is circular.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the base is mentioned in the description and is gray.", + 1 + ], + [ + "The base or the globe is not mentioned.", + 0 + ], + [ + "The color of the base is not mentioned, but the base of the globe is mentioned.", + 0.5 + ], + [ + "The color of the base is mentioned in the description but is not gray.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the base is not mentioned, but the base of the globe is mentioned.", + "pred_index": 2, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is mentioned in the description.", + -1 + ], + [ + "The sidewalk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sidewalk is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trees are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The person is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bushes are mentioned in the description.", + -1 + ], + [ + "The bushes are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bushes are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is globe. Based on the image, is it likely that the object in the description is given class: globe or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is globe. Based on the image, is it likely that the object in the description is given class: globe or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the globe is mentioned in the description and is silver and blue.\nB. The globe is not mentioned.\nC. The color of the globe is not mentioned.\nD. The color of the globe is mentioned in the description but is not silver and blue.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the sphere is mentioned in the description and is metal and plastic.\nB. The sphere or the globe is not mentioned.\nC. The material of the sphere is not mentioned, but the sphere of the globe is mentioned.\nD. The material of the sphere is mentioned in the description but is not metal and plastic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the base is mentioned in the description and is circular.\nB. The base or the globe is not mentioned.\nC. The shape of the base is not mentioned, but the base of the globe is mentioned.\nD. The shape of the base is mentioned in the description but is not circular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the base is mentioned in the description and is gray.\nB. The base or the globe is not mentioned.\nC. The color of the base is not mentioned, but the base of the globe is mentioned.\nD. The color of the base is mentioned in the description but is not gray.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sidewalk is mentioned in the description.\nB. The sidewalk is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are mentioned in the description.\nB. The trees are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is mentioned in the description.\nB. The person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA spherical sculpture composed of numerous small, white, dome-shaped elements arranged in a grid pattern. The sphere is mounted on a cylindrical base and features a blue band with the word \"Pune\" in blue letters. There are also green and yellow accents on the sphere.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bushes are mentioned in the description.\nB. The bushes are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the globe is mentioned in the description but is not silver and blue.", + "C. The material of the sphere is not mentioned, but the sphere of the globe is mentioned.", + "A. The shape of the base is mentioned in the description and is circular.", + "C. The color of the base is not mentioned, but the base of the globe is mentioned.", + "B. The sidewalk is not mentioned in the description.", + "B. The trees are not mentioned in the description.", + "B. The sky is not mentioned in the description.", + "B. The person is not mentioned in the description.", + "B. The bushes are not mentioned in the description." + ], + "score": 0.6666666666666666, + "score_pos": 0.25, + "score_neg": 1.0, + "recognition_result": true + }, + "5718415": { + "pred": "The tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The material of the canopy is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the canopy is mentioned in the description and is fabric.", + 1 + ], + [ + "The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole or the tent is not mentioned.", + 0 + ], + [ + "The material of the pole is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the pole is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the pole is not mentioned, but the pole of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the pole is mentioned in the description and is metal.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The canopy or the tent is not mentioned.", + 0 + ], + [ + "The color of the canopy is mentioned in the description but is not yellow.", + -1 + ], + [ + "The color of the canopy is mentioned in the description and is yellow.", + 1 + ], + [ + "The color of the canopy is not mentioned, but the canopy of the tent is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the canopy is mentioned in the description and is yellow.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The windows of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The windows of the tent are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The buildings are mentioned in the description.", + -1 + ], + [ + "The buildings are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The buildings are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walls of the tent are mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The walls of the tent are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The walls of the tent are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The door of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The door of the tent is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The floor of the tent is mentioned in the description.", + -1 + ], + [ + "The tent is not mentioned in the description.", + 0 + ], + [ + "The floor of the tent is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The floor of the tent is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is tent. Based on the image, is it likely that the object in the description is given class: tent or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is tent. Based on the image, is it likely that the object in the description is given class: tent or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The canopy or the tent is not mentioned.\nB. The material of the canopy is mentioned in the description but is not fabric.\nC. The material of the canopy is mentioned in the description and is fabric.\nD. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pole or the tent is not mentioned.\nB. The material of the pole is mentioned in the description but is not metal.\nC. The material of the pole is mentioned in the description and is metal.\nD. The material of the pole is not mentioned, but the pole of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The canopy or the tent is not mentioned.\nB. The color of the canopy is mentioned in the description but is not yellow.\nC. The color of the canopy is mentioned in the description and is yellow.\nD. The color of the canopy is not mentioned, but the canopy of the tent is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows of the tent are mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The windows of the tent are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The buildings are mentioned in the description.\nB. The buildings are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The walls of the tent are mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The walls of the tent are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door of the tent is mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The door of the tent is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe tent has a yellow canopy with a slightly curved edge. The visible part of the tent includes a vertical metal pole supporting the canopy.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The floor of the tent is mentioned in the description.\nB. The tent is not mentioned in the description.\nC. The floor of the tent is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The material of the canopy is not mentioned, but the canopy of the tent is mentioned.", + "C. The material of the pole is mentioned in the description and is metal.", + "C. The color of the canopy is mentioned in the description and is yellow.", + "C. The windows of the tent are not mentioned in the description.", + "B. The buildings are not mentioned in the description.", + "C. The walls of the tent are not mentioned in the description.", + "C. The door of the tent is not mentioned in the description.", + "C. The floor of the tent is not mentioned in the description." + ], + "score": 0.9375, + "score_pos": 0.8333333333333334, + "score_neg": 1.0, + "recognition_result": true + }, + "6012878": { + "pred": "A square traffic light with a black background and a red illuminated hand symbol on the left side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the symbol is mentioned in the description but is not red or orange.", + -1 + ], + [ + "The color of the symbol is mentioned in the description and is red or orange.", + 1 + ], + [ + "The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the symbol is mentioned in the description and is red or orange.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The symbol or the traffic light is not mentioned.", + 0 + ], + [ + "The shape of the symbol is mentioned in the description but is not hand outline.", + -1 + ], + [ + "The shape of the symbol is mentioned in the description and is hand outline.", + 1 + ], + [ + "The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the symbol is mentioned in the description and is hand outline.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The texture of the background is mentioned in the description but is not matte.", + -1 + ], + [ + "The texture of the background is mentioned in the description and is matte.", + 1 + ], + [ + "The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The background or the traffic light is not mentioned.", + 0 + ], + [ + "The color of the background is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the background is mentioned in the description and is gray or black.", + 1 + ], + [ + "The color of the background is not mentioned, but the background of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the background is mentioned in the description and is gray or black.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The reflective surface or the traffic light is not mentioned.", + 0 + ], + [ + "The material of the reflective surface is mentioned in the description but is not glass or plastic.", + -1 + ], + [ + "The material of the reflective surface is mentioned in the description and is glass or plastic.", + 1 + ], + [ + "The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The walking person symbol of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The walking person symbol of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The walking person symbol of the traffic light is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pole of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The pole of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pole of the traffic light is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycles are not mentioned in the description.", + 1 + ], + [ + "The bicycles are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bicycles are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sidewalk is not mentioned in the description.", + 1 + ], + [ + "The sidewalk is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sidewalk is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green light of the traffic light is not mentioned in the description.", + 1 + ], + [ + "The green light of the traffic light is mentioned in the description.", + -1 + ], + [ + "The traffic light is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The green light of the traffic light is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is traffic light. Based on the image, is it likely that the object in the description is given class: traffic light or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is traffic light. Based on the image, is it likely that the object in the description is given class: traffic light or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The symbol or the traffic light is not mentioned.\nB. The color of the symbol is mentioned in the description but is not red or orange.\nC. The color of the symbol is mentioned in the description and is red or orange.\nD. The color of the symbol is not mentioned, but the symbol of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The symbol or the traffic light is not mentioned.\nB. The shape of the symbol is mentioned in the description but is not hand outline.\nC. The shape of the symbol is mentioned in the description and is hand outline.\nD. The shape of the symbol is not mentioned, but the symbol of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background or the traffic light is not mentioned.\nB. The texture of the background is mentioned in the description but is not matte.\nC. The texture of the background is mentioned in the description and is matte.\nD. The texture of the background is not mentioned, but the background of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The background or the traffic light is not mentioned.\nB. The color of the background is mentioned in the description but is not gray or black.\nC. The color of the background is mentioned in the description and is gray or black.\nD. The color of the background is not mentioned, but the background of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The reflective surface or the traffic light is not mentioned.\nB. The material of the reflective surface is mentioned in the description but is not glass or plastic.\nC. The material of the reflective surface is mentioned in the description and is glass or plastic.\nD. The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The walking person symbol of the traffic light is not mentioned in the description.\nB. The walking person symbol of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pole of the traffic light is not mentioned in the description.\nB. The pole of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycles are not mentioned in the description.\nB. The bicycles are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sidewalk is not mentioned in the description.\nB. The sidewalk is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA square traffic light with a black background and a red illuminated hand symbol on the left side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green light of the traffic light is not mentioned in the description.\nB. The green light of the traffic light is mentioned in the description.\nC. The traffic light is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the symbol is mentioned in the description and is red or orange.", + "C. The shape of the symbol is mentioned in the description and is hand outline.", + "D. The texture of the background is not mentioned, but the background of the traffic light is mentioned.", + "C. The color of the background is mentioned in the description and is gray or black.", + "D. The material of the reflective surface is not mentioned, but the reflective surface of the traffic light is mentioned.", + "A. The walking person symbol of the traffic light is not mentioned in the description.", + "A. The pole of the traffic light is not mentioned in the description.", + "A. The bicycles are not mentioned in the description.", + "A. The sidewalk is not mentioned in the description.", + "A. The green light of the traffic light is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "6820595": { + "pred": "A cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the ear is mentioned in the description but is not triangular.", + -1 + ], + [ + "The ear or the cat is not mentioned.", + 0 + ], + [ + "The shape of the ear is mentioned in the description and is triangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The texture of the fur is mentioned in the description but is not fluffy.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The texture of the fur is mentioned in the description and is fluffy.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the fur is not mentioned, but the fur of the cat is mentioned.", + 0.5 + ], + [ + "The color of the fur is mentioned in the description but is not black and white.", + -1 + ], + [ + "The fur or the cat is not mentioned.", + 0 + ], + [ + "The color of the fur is mentioned in the description and is black and white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the fur is mentioned in the description and is black and white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the back is not mentioned, but the back of the cat is mentioned.", + 0.5 + ], + [ + "The shape of the back is mentioned in the description but is not arched.", + -1 + ], + [ + "The back or the cat is not mentioned.", + 0 + ], + [ + "The shape of the back is mentioned in the description and is arched.", + 1 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the back is not mentioned, but the back of the cat is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.", + 0.5 + ], + [ + "The color of the underbelly is mentioned in the description but is not white.", + -1 + ], + [ + "The underbelly or the cat is not mentioned.", + 0 + ], + [ + "The color of the underbelly is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the underbelly is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The door is mentioned in the description.", + -1 + ], + [ + "The door is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The door is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom cabinet is mentioned in the description.", + -1 + ], + [ + "The bathroom cabinet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathroom cabinet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bathroom sink is mentioned in the description.", + -1 + ], + [ + "The bathroom sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bathroom sink is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hairbrush is mentioned in the description.", + -1 + ], + [ + "The hairbrush is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The hairbrush is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is cat. Based on the image, is it likely that the object in the description is given class: cat or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is cat. Based on the image, is it likely that the object in the description is given class: cat or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the ear is not mentioned, but the ear of the cat is mentioned.\nB. The shape of the ear is mentioned in the description but is not triangular.\nC. The ear or the cat is not mentioned.\nD. The shape of the ear is mentioned in the description and is triangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the fur is not mentioned, but the fur of the cat is mentioned.\nB. The texture of the fur is mentioned in the description but is not fluffy.\nC. The fur or the cat is not mentioned.\nD. The texture of the fur is mentioned in the description and is fluffy.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the fur is not mentioned, but the fur of the cat is mentioned.\nB. The color of the fur is mentioned in the description but is not black and white.\nC. The fur or the cat is not mentioned.\nD. The color of the fur is mentioned in the description and is black and white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the back is not mentioned, but the back of the cat is mentioned.\nB. The shape of the back is mentioned in the description but is not arched.\nC. The back or the cat is not mentioned.\nD. The shape of the back is mentioned in the description and is arched.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the underbelly is not mentioned, but the underbelly of the cat is mentioned.\nB. The color of the underbelly is mentioned in the description but is not white.\nC. The underbelly or the cat is not mentioned.\nD. The color of the underbelly is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The door is mentioned in the description.\nB. The door is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathroom cabinet is mentioned in the description.\nB. The bathroom cabinet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bathroom sink is mentioned in the description.\nB. The bathroom sink is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA cat with a white face, black ears, and a black patch over its left eye. The body is predominantly black with a white underbelly and a white patch on its right side. The tail is black.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hairbrush is mentioned in the description.\nB. The hairbrush is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The shape of the ear is not mentioned, but the ear of the cat is mentioned.", + "A. The texture of the fur is not mentioned, but the fur of the cat is mentioned.", + "D. The color of the fur is mentioned in the description and is black and white.", + "A. The shape of the back is not mentioned, but the back of the cat is mentioned.", + "D. The color of the underbelly is mentioned in the description and is white.", + "B. The door is not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The bathroom cabinet is not mentioned in the description.", + "B. The bathroom sink is not mentioned in the description.", + "B. The hairbrush is not mentioned in the description." + ], + "score": 0.85, + "score_pos": 0.7, + "score_neg": 1.0, + "recognition_result": true + }, + "8556674": { + "pred": "A single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description but is not orange.", + -1 + ], + [ + "The color of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the orange/tangerine is mentioned in the description and is orange.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the orange/tangerine is mentioned in the description and is orange.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The color of the surface is mentioned in the description but is not bright orange.", + -1 + ], + [ + "The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The color of the surface is mentioned in the description and is bright orange.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the surface is mentioned in the description and is bright orange.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the orange/tangerine is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the orange/tangerine is mentioned in the description and is smooth.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description but is not round.", + -1 + ], + [ + "The shape of the orange/tangerine is not mentioned.", + 0 + ], + [ + "The shape of the orange/tangerine is mentioned in the description and is round.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the orange/tangerine is mentioned in the description and is round.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The surface or the orange/tangerine is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not glossy.", + -1 + ], + [ + "The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.", + 0.5 + ], + [ + "The texture of the surface is mentioned in the description and is glossy.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the surface is mentioned in the description and is glossy.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The stem of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The stem of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The stem of the orange/tangerine is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The leaves of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The leaves of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The leaves of the orange/tangerine are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The segments of the orange/tangerine are mentioned in the description.", + -1 + ], + [ + "The segments of the orange/tangerine are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The segments of the orange/tangerine are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ceiling lights are mentioned in the description.", + -1 + ], + [ + "The ceiling lights are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The ceiling lights are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orange/tangerine is not mentioned in the description.", + 0 + ], + [ + "The flesh of the orange/tangerine is mentioned in the description.", + -1 + ], + [ + "The flesh of the orange/tangerine is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The flesh of the orange/tangerine is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is orange/tangerine. Based on the image, is it likely that the object in the description is given class: orange/tangerine or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is orange/tangerine. Based on the image, is it likely that the object in the description is given class: orange/tangerine or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The color of the orange/tangerine is mentioned in the description but is not orange.\nC. The color of the orange/tangerine is not mentioned.\nD. The color of the orange/tangerine is mentioned in the description and is orange.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The surface or the orange/tangerine is not mentioned.\nB. The color of the surface is mentioned in the description but is not bright orange.\nC. The color of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.\nD. The color of the surface is mentioned in the description and is bright orange.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The texture of the orange/tangerine is mentioned in the description but is not smooth.\nC. The texture of the orange/tangerine is not mentioned.\nD. The texture of the orange/tangerine is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned.\nB. The shape of the orange/tangerine is mentioned in the description but is not round.\nC. The shape of the orange/tangerine is not mentioned.\nD. The shape of the orange/tangerine is mentioned in the description and is round.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The surface or the orange/tangerine is not mentioned.\nB. The texture of the surface is mentioned in the description but is not glossy.\nC. The texture of the surface is not mentioned, but the surface of the orange/tangerine is mentioned.\nD. The texture of the surface is mentioned in the description and is glossy.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The stem of the orange/tangerine is mentioned in the description.\nC. The stem of the orange/tangerine is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The leaves of the orange/tangerine are mentioned in the description.\nC. The leaves of the orange/tangerine are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The segments of the orange/tangerine are mentioned in the description.\nC. The segments of the orange/tangerine are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ceiling lights are mentioned in the description.\nB. The ceiling lights are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA single, round orange with a smooth, glossy surface. The orange has a vibrant, bright orange color with a small, lighter patch near the top left.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orange/tangerine is not mentioned in the description.\nB. The flesh of the orange/tangerine is mentioned in the description.\nC. The flesh of the orange/tangerine is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the orange/tangerine is mentioned in the description and is orange.", + "D. The color of the surface is mentioned in the description and is bright orange.", + "D. The texture of the orange/tangerine is mentioned in the description and is smooth.", + "D. The shape of the orange/tangerine is mentioned in the description and is round.", + "D. The texture of the surface is mentioned in the description and is glossy.", + "C. The stem of the orange/tangerine is not mentioned in the description.", + "C. The leaves of the orange/tangerine are not mentioned in the description.", + "C. The segments of the orange/tangerine are not mentioned in the description.", + "B. The ceiling lights are not mentioned in the description.", + "C. The flesh of the orange/tangerine is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "8906172": { + "pred": "A black, curved earphone with a smooth, glossy finish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The color of the earphone is mentioned in the description and is dark or black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the earphone is mentioned in the description and is dark or black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description but is not curved.", + -1 + ], + [ + "The earphone is not mentioned.", + 0 + ], + [ + "The shape of the earphone is mentioned in the description and is curved.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the earphone is mentioned in the description and is curved.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control buttons of the earphone are mentioned in the description.", + -1 + ], + [ + "The control buttons of the earphone are not mentioned in the description.", + 1 + ], + [ + "The earphone is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The control buttons of the earphone are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mirror is mentioned in the description.", + -1 + ], + [ + "The mirror is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The mirror is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cell phone is mentioned in the description.", + -1 + ], + [ + "The cell phone is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The cell phone is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is mentioned in the description.", + -1 + ], + [ + "The faucet is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The faucet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sink is mentioned in the description.", + -1 + ], + [ + "The sink is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sink is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is earphone. Based on the image, is it likely that the object in the description is given class: earphone or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is earphone. Based on the image, is it likely that the object in the description is given class: earphone or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the earphone is not mentioned.\nB. The color of the earphone is mentioned in the description but is not dark or black.\nC. The earphone is not mentioned.\nD. The color of the earphone is mentioned in the description and is dark or black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the earphone is not mentioned.\nB. The shape of the earphone is mentioned in the description but is not curved.\nC. The earphone is not mentioned.\nD. The shape of the earphone is mentioned in the description and is curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The control buttons of the earphone are mentioned in the description.\nB. The control buttons of the earphone are not mentioned in the description.\nC. The earphone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mirror is mentioned in the description.\nB. The mirror is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cell phone is mentioned in the description.\nB. The cell phone is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is mentioned in the description.\nB. The faucet is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black, curved earphone with a smooth, glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sink is mentioned in the description.\nB. The sink is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The color of the earphone is mentioned in the description and is dark or black.", + "D. The shape of the earphone is mentioned in the description and is curved.", + "B. The control buttons of the earphone are not mentioned in the description.", + "B. The mirror is not mentioned in the description.", + "B. The cell phone is not mentioned in the description.", + "B. The faucet is not mentioned in the description.", + "B. The sink is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "10666665": { + "pred": "A round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is not mentioned.", + 0 + ], + [ + "The shape of the clock is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the clock is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the clock is mentioned in the description and is circular.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clock face or the clock is not mentioned.", + 0 + ], + [ + "The color of the clock face is not mentioned, but the clock face of the clock is mentioned.", + 0.5 + ], + [ + "The color of the clock face is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clock face is mentioned in the description and is white.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the clock face is mentioned in the description and is white.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hour hand or the clock is not mentioned.", + 0 + ], + [ + "The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.", + 0.5 + ], + [ + "The color of the hour hand is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the hour hand is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the hour hand is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The numbers or the clock are not mentioned.", + 0 + ], + [ + "The color of the numbers is not mentioned, but the numbers of the clock are mentioned.", + 0.5 + ], + [ + "The color of the numbers is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the numbers is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the numbers is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The frame or the clock is not mentioned.", + 0 + ], + [ + "The color of the frame is not mentioned, but the frame of the clock is mentioned.", + 0.5 + ], + [ + "The color of the frame is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the frame is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the frame is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo blind is not mentioned in the description.", + 1 + ], + [ + "The bamboo blind is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo blind is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The microwave is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The digital display of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The digital display of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The digital display of the clock is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pendulum of the clock is not mentioned in the description.", + 1 + ], + [ + "The clock is not mentioned in the description.", + 0 + ], + [ + "The pendulum of the clock is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The pendulum of the clock is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The faucet is not mentioned in the description.", + 1 + ], + [ + "The faucet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The faucet is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is clock. Based on the image, is it likely that the object in the description is given class: clock or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is clock. Based on the image, is it likely that the object in the description is given class: clock or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock is not mentioned.\nB. The shape of the clock is not mentioned.\nC. The shape of the clock is mentioned in the description but is not circular.\nD. The shape of the clock is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clock face or the clock is not mentioned.\nB. The color of the clock face is not mentioned, but the clock face of the clock is mentioned.\nC. The color of the clock face is mentioned in the description but is not white.\nD. The color of the clock face is mentioned in the description and is white.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hour hand or the clock is not mentioned.\nB. The color of the hour hand is not mentioned, but the hour hand of the clock is mentioned.\nC. The color of the hour hand is mentioned in the description but is not black.\nD. The color of the hour hand is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The numbers or the clock are not mentioned.\nB. The color of the numbers is not mentioned, but the numbers of the clock are mentioned.\nC. The color of the numbers is mentioned in the description but is not black.\nD. The color of the numbers is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The frame or the clock is not mentioned.\nB. The color of the frame is not mentioned, but the frame of the clock is mentioned.\nC. The color of the frame is mentioned in the description but is not black.\nD. The color of the frame is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo blind is not mentioned in the description.\nB. The bamboo blind is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The microwave is not mentioned in the description.\nB. The microwave is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The digital display of the clock is not mentioned in the description.\nB. The clock is not mentioned in the description.\nC. The digital display of the clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pendulum of the clock is not mentioned in the description.\nB. The clock is not mentioned in the description.\nC. The pendulum of the clock is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA round wall clock with a black frame and a white face. The clock features black Arabic numerals at each hour mark, with the numbers 12, 3, 6, and 9 in larger font. The clock has three black hands: an hour hand, a minute hand, and a second hand. The hour hand is pointing between the 10 and 11, the minute hand is pointing at 12, and the second hand is pointing at 6.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The faucet is not mentioned in the description.\nB. The faucet is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The shape of the clock is mentioned in the description and is circular.", + "D. The color of the clock face is mentioned in the description and is white.", + "D. The color of the hour hand is mentioned in the description and is black.", + "D. The color of the numbers is mentioned in the description and is black.", + "D. The color of the frame is mentioned in the description and is black.", + "A. The bamboo blind is not mentioned in the description.", + "A. The microwave is not mentioned in the description.", + "A. The digital display of the clock is not mentioned in the description.", + "A. The pendulum of the clock is not mentioned in the description.", + "A. The faucet is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "10811497": { + "pred": "The mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the key is not mentioned.", + 0 + ], + [ + "The color of the key is mentioned in the description and is dark green, gray, or black.", + 1 + ], + [ + "The color of the key is mentioned in the description but is not dark green, gray, or black.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The key is not mentioned.", + "pred_index": 3, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the key is not mentioned.", + 0 + ], + [ + "The material of the key is mentioned in the description and is plastic.", + 1 + ], + [ + "The material of the key is mentioned in the description but is not plastic.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the key is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the key is not mentioned.", + 0 + ], + [ + "The texture of the key is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the key is mentioned in the description but is not smooth.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the key is not mentioned.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the key is not mentioned.", + 0 + ], + [ + "The shape of the key is mentioned in the description and is rounded, circular, or oval.", + 1 + ], + [ + "The shape of the key is mentioned in the description but is not rounded, circular, or oval.", + -1 + ], + [ + "The key is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "D. The key is not mentioned.", + "pred_index": 3, + "eval_result": 0 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key bow of the key is not mentioned in the description.", + 1 + ], + [ + "The key bow of the key is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The key is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The key is not mentioned in the description.", + 0 + ], + [ + "The key teeth of the key are not mentioned in the description.", + 1 + ], + [ + "The key teeth of the key are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The key teeth of the key are not mentioned in the description.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sticky note is not mentioned in the description.", + 1 + ], + [ + "The sticky note is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sticky note is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The smartphone is not mentioned in the description.", + 1 + ], + [ + "The smartphone is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The smartphone is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The black fabric item is not mentioned in the description.", + 1 + ], + [ + "The black fabric item is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The black fabric item is not mentioned in the description.", + "pred_index": 0, + "eval_result": 0 + } + ], + "details_recognition": [ + { + "question": "The object in the image is key. Based on the image, is it likely that the object in the description is given class: key or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "B. No", + "pred_index": 1, + "eval_result": "incorrect" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is key. Based on the image, is it likely that the object in the description is given class: key or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the key is not mentioned.\nB. The color of the key is mentioned in the description and is dark green, gray, or black.\nC. The color of the key is mentioned in the description but is not dark green, gray, or black.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the key is not mentioned.\nB. The material of the key is mentioned in the description and is plastic.\nC. The material of the key is mentioned in the description but is not plastic.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the key is not mentioned.\nB. The texture of the key is mentioned in the description and is smooth.\nC. The texture of the key is mentioned in the description but is not smooth.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the key is not mentioned.\nB. The shape of the key is mentioned in the description and is rounded, circular, or oval.\nC. The shape of the key is mentioned in the description but is not rounded, circular, or oval.\nD. The key is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The key is not mentioned in the description.\nB. The key bow of the key is not mentioned in the description.\nC. The key bow of the key is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The key is not mentioned in the description.\nB. The key teeth of the key are not mentioned in the description.\nC. The key teeth of the key are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sticky note is not mentioned in the description.\nB. The sticky note is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The smartphone is not mentioned in the description.\nB. The smartphone is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe mouse is a dark green, oval-shaped device with a smooth surface. It has a small, circular indentation near the bottom edge.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The black fabric item is not mentioned in the description.\nB. The black fabric item is mentioned in the description.\n" + ], + "response": [ + "B. No", + "D. The key is not mentioned.", + "A. The material of the key is not mentioned.", + "A. The texture of the key is not mentioned.", + "D. The key is not mentioned.", + "A. The key is not mentioned in the description.", + "B. The key teeth of the key are not mentioned in the description.", + "A. The sticky note is not mentioned in the description.", + "A. The smartphone is not mentioned in the description.", + "A. The black fabric item is not mentioned in the description." + ], + "score": 0.0, + "score_pos": 0.0, + "score_neg": 0.0, + "recognition_result": false + }, + "11021562": { + "pred": "The microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + 0.5 + ], + [ + "The texture of the casing is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the casing is mentioned in the description and is smooth.", + 1 + ], + [ + "The casing or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + 0.5 + ], + [ + "The orientation of the handle is mentioned in the description but is not vertical.", + -1 + ], + [ + "The orientation of the handle is mentioned in the description and is vertical.", + 1 + ], + [ + "The handle or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the microwave is not mentioned.", + 0 + ], + [ + "The color of the microwave is mentioned in the description but is not white, beige, or yellow.", + -1 + ], + [ + "The color of the microwave is mentioned in the description and is white, beige, or yellow.", + 1 + ], + [ + "The microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the microwave is mentioned in the description and is white, beige, or yellow.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the vent is not mentioned, but the vent of the microwave is mentioned.", + 0.5 + ], + [ + "The position of the vent is mentioned in the description but is not top.", + -1 + ], + [ + "The position of the vent is mentioned in the description and is top.", + 1 + ], + [ + "The vent or the microwave is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The position of the vent is mentioned in the description and is top.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fire extinguisher is mentioned in the description.", + -1 + ], + [ + "The fire extinguisher is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The fire extinguisher is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The turntable of the microwave is mentioned in the description.", + -1 + ], + [ + "The turntable of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The turntable of the microwave is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The interior light of the microwave is mentioned in the description.", + -1 + ], + [ + "The interior light of the microwave is not mentioned in the description.", + 1 + ], + [ + "The microwave is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The interior light of the microwave is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are mentioned in the description.", + -1 + ], + [ + "The windows are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The windows are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rug is mentioned in the description.", + -1 + ], + [ + "The rug is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The rug is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is microwave. Based on the image, is it likely that the object in the description is given class: microwave or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is microwave. Based on the image, is it likely that the object in the description is given class: microwave or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the handle is not mentioned, but the handle of the microwave is mentioned.\nB. The shape of the handle is mentioned in the description but is not curved.\nC. The shape of the handle is mentioned in the description and is curved.\nD. The handle or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.\nB. The texture of the casing is mentioned in the description but is not smooth.\nC. The texture of the casing is mentioned in the description and is smooth.\nD. The casing or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.\nB. The orientation of the handle is mentioned in the description but is not vertical.\nC. The orientation of the handle is mentioned in the description and is vertical.\nD. The handle or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the microwave is not mentioned.\nB. The color of the microwave is mentioned in the description but is not white, beige, or yellow.\nC. The color of the microwave is mentioned in the description and is white, beige, or yellow.\nD. The microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the vent is not mentioned, but the vent of the microwave is mentioned.\nB. The position of the vent is mentioned in the description but is not top.\nC. The position of the vent is mentioned in the description and is top.\nD. The vent or the microwave is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fire extinguisher is mentioned in the description.\nB. The fire extinguisher is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The turntable of the microwave is mentioned in the description.\nB. The turntable of the microwave is not mentioned in the description.\nC. The microwave is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The interior light of the microwave is mentioned in the description.\nB. The interior light of the microwave is not mentioned in the description.\nC. The microwave is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are mentioned in the description.\nB. The windows are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe microwave oven has a white exterior with a rectangular shape. It features a prominent, curved handle on the front door, which is also white. The control panel is located on the right side of the door, with a series of buttons and a small display screen. The top of the microwave has a vented section for ventilation.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rug is mentioned in the description.\nB. The rug is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The shape of the handle is mentioned in the description and is curved.", + "A. The texture of the casing is not mentioned, but the casing of the microwave is mentioned.", + "A. The orientation of the handle is not mentioned, but the handle of the microwave is mentioned.", + "C. The color of the microwave is mentioned in the description and is white, beige, or yellow.", + "C. The position of the vent is mentioned in the description and is top.", + "B. The fire extinguisher is not mentioned in the description.", + "B. The turntable of the microwave is not mentioned in the description.", + "B. The interior light of the microwave is not mentioned in the description.", + "B. The windows are not mentioned in the description.", + "B. The rug is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "11021563": { + "pred": "A stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The control panel or the stove is not mentioned.", + 0 + ], + [ + "The location of the control panel is mentioned in the description but is not back.", + -1 + ], + [ + "The location of the control panel is mentioned in the description and is back.", + 1 + ], + [ + "The location of the control panel is not mentioned, but the control panel of the stove is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The location of the control panel is mentioned in the description and is back.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The color of the burners is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the burners is mentioned in the description and is black.", + 1 + ], + [ + "The color of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the burners is not mentioned, but the burners of the stove are mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The number of the burners is mentioned in the description but is not 4.", + -1 + ], + [ + "The number of the burners is mentioned in the description and is 4.", + 1 + ], + [ + "The number of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The number of the burners is mentioned in the description and is 4.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burners or the stove are not mentioned.", + 0 + ], + [ + "The shape of the burners is mentioned in the description but is not coiled.", + -1 + ], + [ + "The shape of the burners is mentioned in the description and is coiled.", + 1 + ], + [ + "The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + "pred_index": 3, + "eval_result": 0.5 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The countertop is mentioned in the description.", + -1 + ], + [ + "The countertop is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The countertop is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The kitchen cabinets are mentioned in the description.", + -1 + ], + [ + "The kitchen cabinets are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The kitchen cabinets are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The broom is mentioned in the description.", + -1 + ], + [ + "The broom is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The broom is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The induction cooktop surface of the stove is mentioned in the description.", + -1 + ], + [ + "The induction cooktop surface of the stove is not mentioned in the description.", + 1 + ], + [ + "The stove is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The induction cooktop surface of the stove is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The small table is mentioned in the description.", + -1 + ], + [ + "The small table is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The small table is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stove. Based on the image, is it likely that the object in the description is given class: stove or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stove. Based on the image, is it likely that the object in the description is given class: stove or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The control panel or the stove is not mentioned.\nB. The location of the control panel is mentioned in the description but is not back.\nC. The location of the control panel is mentioned in the description and is back.\nD. The location of the control panel is not mentioned, but the control panel of the stove is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The color of the burners is mentioned in the description but is not black.\nC. The color of the burners is mentioned in the description and is black.\nD. The color of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The number of the burners is mentioned in the description but is not 4.\nC. The number of the burners is mentioned in the description and is 4.\nD. The number of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burners or the stove are not mentioned.\nB. The shape of the burners is mentioned in the description but is not coiled.\nC. The shape of the burners is mentioned in the description and is coiled.\nD. The shape of the burners is not mentioned, but the burners of the stove are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The countertop is mentioned in the description.\nB. The countertop is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The kitchen cabinets are mentioned in the description.\nB. The kitchen cabinets are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The broom is mentioned in the description.\nB. The broom is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The induction cooktop surface of the stove is mentioned in the description.\nB. The induction cooktop surface of the stove is not mentioned in the description.\nC. The stove is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA stainless steel gas stove with a black control panel featuring four knobs. The stove has a rectangular shape with a slightly raised back panel. The control panel is positioned at the back, and the stove has a smooth, reflective surface.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The small table is mentioned in the description.\nB. The small table is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The location of the control panel is mentioned in the description and is back.", + "D. The color of the burners is not mentioned, but the burners of the stove are mentioned.", + "C. The number of the burners is mentioned in the description and is 4.", + "D. The shape of the burners is not mentioned, but the burners of the stove are mentioned.", + "B. The countertop is not mentioned in the description.", + "B. The kitchen cabinets are not mentioned in the description.", + "B. The broom is not mentioned in the description.", + "B. The induction cooktop surface of the stove is not mentioned in the description.", + "B. The small table is not mentioned in the description." + ], + "score": 0.8888888888888888, + "score_pos": 0.75, + "score_neg": 1.0, + "recognition_result": true + }, + "12348078": { + "pred": "A woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The style of the clothes is mentioned in the description and is t-shirt.", + 1 + ], + [ + "The style of the clothes is mentioned in the description but is not t-shirt.", + -1 + ], + [ + "The style of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The style of the clothes is mentioned in the description and is t-shirt.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The clothes or the person are not mentioned.", + 0 + ], + [ + "The color of the clothes is mentioned in the description and is white.", + 1 + ], + [ + "The color of the clothes is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the clothes is not mentioned, but the clothes of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the clothes is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hairstyle of the person is mentioned in the description and is bun.", + 1 + ], + [ + "The hairstyle of the person is mentioned in the description but is not bun.", + -1 + ], + [ + "The hairstyle of the person is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The hairstyle of the person is mentioned in the description and is bun.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The person is not mentioned.", + 0 + ], + [ + "The hair color of the person is mentioned in the description and is dark or black.", + 1 + ], + [ + "The hair color of the person is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The hair color of the person is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The hair color of the person is mentioned in the description and is dark or black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pants or the person are not mentioned.", + 0 + ], + [ + "The color of the pants is mentioned in the description and is black.", + 1 + ], + [ + "The color of the pants is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the pants is not mentioned, but the pants of the person are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the pants is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The mouth of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The mouth of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The mouth of the person is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The face of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The face of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The face of the person is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The nose of the person is mentioned in the description.", + -1 + ], + [ + "The person is not mentioned in the description.", + 0 + ], + [ + "The nose of the person is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The nose of the person is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The wall is mentioned in the description.", + -1 + ], + [ + "The wall is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The wall is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bicycle cart is mentioned in the description.", + -1 + ], + [ + "The bicycle cart is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bicycle cart is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is person. Based on the image, is it likely that the object in the description is given class: person or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is person. Based on the image, is it likely that the object in the description is given class: person or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clothes or the person are not mentioned.\nB. The style of the clothes is mentioned in the description and is t-shirt.\nC. The style of the clothes is mentioned in the description but is not t-shirt.\nD. The style of the clothes is not mentioned, but the clothes of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The clothes or the person are not mentioned.\nB. The color of the clothes is mentioned in the description and is white.\nC. The color of the clothes is mentioned in the description but is not white.\nD. The color of the clothes is not mentioned, but the clothes of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned.\nB. The hairstyle of the person is mentioned in the description and is bun.\nC. The hairstyle of the person is mentioned in the description but is not bun.\nD. The hairstyle of the person is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The person is not mentioned.\nB. The hair color of the person is mentioned in the description and is dark or black.\nC. The hair color of the person is mentioned in the description but is not dark or black.\nD. The hair color of the person is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pants or the person are not mentioned.\nB. The color of the pants is mentioned in the description and is black.\nC. The color of the pants is mentioned in the description but is not black.\nD. The color of the pants is not mentioned, but the pants of the person are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The mouth of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The mouth of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The face of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The face of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The nose of the person is mentioned in the description.\nB. The person is not mentioned in the description.\nC. The nose of the person is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The wall is mentioned in the description.\nB. The wall is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA woman with dark hair tied up in a bun, wearing a white t-shirt with red text and graphics on the front, and black pants. She is holding a baby in her arms.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bicycle cart is mentioned in the description.\nB. The bicycle cart is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The style of the clothes is mentioned in the description and is t-shirt.", + "B. The color of the clothes is mentioned in the description and is white.", + "B. The hairstyle of the person is mentioned in the description and is bun.", + "B. The hair color of the person is mentioned in the description and is dark or black.", + "B. The color of the pants is mentioned in the description and is black.", + "C. The mouth of the person is not mentioned in the description.", + "C. The face of the person is not mentioned in the description.", + "C. The nose of the person is not mentioned in the description.", + "B. The wall is not mentioned in the description.", + "B. The bicycle cart is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "13138178": { + "pred": "A blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stool is mentioned in the description and is blue.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The color of the stool is mentioned in the description but is not blue.", + -1 + ], + [ + "The color of the stool is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the stool is mentioned in the description and is blue.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the stool is mentioned in the description and is plastic.", + 1 + ], + [ + "The stool is not mentioned.", + 0 + ], + [ + "The material of the stool is mentioned in the description but is not plastic.", + -1 + ], + [ + "The material of the stool is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the stool is mentioned in the description and is plastic.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the seat is mentioned in the description and is flat.", + 1 + ], + [ + "The seat or the stool is not mentioned.", + 0 + ], + [ + "The shape of the seat is mentioned in the description but is not flat.", + -1 + ], + [ + "The shape of the seat is not mentioned, but the seat of the stool is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the seat is mentioned in the description but is not flat.", + "pred_index": 2, + "eval_result": -1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The armrest of the stool is mentioned in the description.", + -1 + ], + [ + "The armrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The armrest of the stool is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The backrest of the stool is mentioned in the description.", + -1 + ], + [ + "The backrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The backrest of the stool is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Cooking grate is mentioned in the description.", + -1 + ], + [ + "The Cooking grate is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The Cooking grate is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the stool is mentioned in the description.", + -1 + ], + [ + "The footrest of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The footrest of the stool is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The swivel base of the stool is mentioned in the description.", + -1 + ], + [ + "The swivel base of the stool is not mentioned in the description.", + 1 + ], + [ + "The stool is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The swivel base of the stool is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is stool. Based on the image, is it likely that the object in the description is given class: stool or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is stool. Based on the image, is it likely that the object in the description is given class: stool or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stool is mentioned in the description and is blue.\nB. The stool is not mentioned.\nC. The color of the stool is mentioned in the description but is not blue.\nD. The color of the stool is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the stool is mentioned in the description and is plastic.\nB. The stool is not mentioned.\nC. The material of the stool is mentioned in the description but is not plastic.\nD. The material of the stool is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the seat is mentioned in the description and is flat.\nB. The seat or the stool is not mentioned.\nC. The shape of the seat is mentioned in the description but is not flat.\nD. The shape of the seat is not mentioned, but the seat of the stool is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The armrest of the stool is mentioned in the description.\nB. The armrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The backrest of the stool is mentioned in the description.\nB. The backrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Cooking grate is mentioned in the description.\nB. The Cooking grate is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The footrest of the stool is mentioned in the description.\nB. The footrest of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA blue plastic stool with a smooth, curved seat and rounded legs. The stool has a simple, sturdy design with a slightly glossy finish.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The swivel base of the stool is mentioned in the description.\nB. The swivel base of the stool is not mentioned in the description.\nC. The stool is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the stool is mentioned in the description and is blue.", + "A. The material of the stool is mentioned in the description and is plastic.", + "C. The shape of the seat is mentioned in the description but is not flat.", + "B. The armrest of the stool is not mentioned in the description.", + "B. The backrest of the stool is not mentioned in the description.", + "B. The Cooking grate is not mentioned in the description.", + "B. The footrest of the stool is not mentioned in the description.", + "B. The swivel base of the stool is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.3333333333333333, + "score_neg": 1.0, + "recognition_result": true + }, + "13187927": { + "pred": "The motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the exterior is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the exterior is mentioned in the description and is white.", + 1 + ], + [ + "The exterior or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the exterior is mentioned in the description and is white.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the seat is mentioned in the description but is not leather or vinyl.", + -1 + ], + [ + "The material of the seat is mentioned in the description and is leather or vinyl.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the seat is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the seat is mentioned in the description and is black.", + 1 + ], + [ + "The seat or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the seat is mentioned in the description and is black.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the taillight is mentioned in the description but is not red.", + -1 + ], + [ + "The color of the taillight is mentioned in the description and is red.", + 1 + ], + [ + "The taillight or the motorcycle is not mentioned.", + 0 + ], + [ + "The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the taillight is mentioned in the description and is red.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the license plate is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the license plate is mentioned in the description and is rectangular.", + 1 + ], + [ + "The license plate or the motorcycle is not mentioned.", + 0 + ], + [ + "The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the license plate is mentioned in the description and is rectangular.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The motorcycle is not mentioned in the description.", + 0 + ], + [ + "The windshield of the motorcycle is not mentioned in the description.", + 1 + ], + [ + "The windshield of the motorcycle is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The windshield of the motorcycle is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The balconies are not mentioned in the description.", + 1 + ], + [ + "The balconies are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The balconies are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The Pepsi advertisements are not mentioned in the description.", + 1 + ], + [ + "The Pepsi advertisements are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The Pepsi advertisements are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The windows are not mentioned in the description.", + 1 + ], + [ + "The windows are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The windows are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative metal grill is not mentioned in the description.", + 1 + ], + [ + "The decorative metal grill is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The decorative metal grill is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is motorcycle. Based on the image, is it likely that the object in the description is given class: motorcycle or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is motorcycle. Based on the image, is it likely that the object in the description is given class: motorcycle or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the exterior is mentioned in the description but is not white.\nB. The color of the exterior is mentioned in the description and is white.\nC. The exterior or the motorcycle is not mentioned.\nD. The color of the exterior is not mentioned, but the exterior of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the seat is mentioned in the description but is not leather or vinyl.\nB. The material of the seat is mentioned in the description and is leather or vinyl.\nC. The seat or the motorcycle is not mentioned.\nD. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the seat is mentioned in the description but is not black.\nB. The color of the seat is mentioned in the description and is black.\nC. The seat or the motorcycle is not mentioned.\nD. The color of the seat is not mentioned, but the seat of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the taillight is mentioned in the description but is not red.\nB. The color of the taillight is mentioned in the description and is red.\nC. The taillight or the motorcycle is not mentioned.\nD. The color of the taillight is not mentioned, but the taillight of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the license plate is mentioned in the description but is not rectangular.\nB. The shape of the license plate is mentioned in the description and is rectangular.\nC. The license plate or the motorcycle is not mentioned.\nD. The shape of the license plate is not mentioned, but the license plate of the motorcycle is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The motorcycle is not mentioned in the description.\nB. The windshield of the motorcycle is not mentioned in the description.\nC. The windshield of the motorcycle is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The balconies are not mentioned in the description.\nB. The balconies are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The Pepsi advertisements are not mentioned in the description.\nB. The Pepsi advertisements are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The windows are not mentioned in the description.\nB. The windows are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe motorcycle is a white scooter with a sleek, modern design. It features a black seat and a rear storage compartment with a red reflector. The rear light is integrated into the storage compartment, and the scooter has a visible license plate mounted below the light. The handlebars are equipped with rearview mirrors, and the front section includes a headlight and a windshield.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative metal grill is not mentioned in the description.\nB. The decorative metal grill is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the exterior is mentioned in the description and is white.", + "D. The material of the seat is not mentioned, but the seat of the motorcycle is mentioned.", + "B. The color of the seat is mentioned in the description and is black.", + "B. The color of the taillight is mentioned in the description and is red.", + "B. The shape of the license plate is mentioned in the description and is rectangular.", + "C. The windshield of the motorcycle is mentioned in the description.", + "A. The balconies are not mentioned in the description.", + "A. The Pepsi advertisements are not mentioned in the description.", + "A. The windows are not mentioned in the description.", + "A. The decorative metal grill is not mentioned in the description." + ], + "score": 0.75, + "score_pos": 0.9, + "score_neg": 0.6, + "recognition_result": true + }, + "14490578": { + "pred": "The harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the seal is mentioned in the description but is not smooth.", + -1 + ], + [ + "The seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is not mentioned.", + 0 + ], + [ + "The texture of the seal is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the seal is mentioned in the description and is smooth.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not gray, black, or dark.", + -1 + ], + [ + "The skin or the seal is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the seal is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is gray, black, or dark.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the skin is mentioned in the description and is gray, black, or dark.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the body is mentioned in the description but is not elongated.", + -1 + ], + [ + "The body or the seal is not mentioned.", + 0 + ], + [ + "The shape of the body is not mentioned, but the body of the seal is mentioned.", + 0.5 + ], + [ + "The shape of the body is mentioned in the description and is elongated.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the body is mentioned in the description and is elongated.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The teeth of the seal are not mentioned in the description.", + 1 + ], + [ + "The teeth of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The teeth of the seal are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The claws of the seal are not mentioned in the description.", + 1 + ], + [ + "The claws of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The claws of the seal are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ears of the seal are not mentioned in the description.", + 1 + ], + [ + "The ears of the seal are mentioned in the description.", + -1 + ], + [ + "The seal is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ears of the seal are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sand is not mentioned in the description.", + 1 + ], + [ + "The sand is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sand is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rocks are not mentioned in the description.", + 1 + ], + [ + "The rocks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The rocks are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is seal. Based on the image, is it likely that the object in the description is given class: seal or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is seal. Based on the image, is it likely that the object in the description is given class: seal or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the seal is mentioned in the description but is not smooth.\nB. The seal is not mentioned.\nC. The texture of the seal is not mentioned.\nD. The texture of the seal is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skin is mentioned in the description but is not gray, black, or dark.\nB. The skin or the seal is not mentioned.\nC. The color of the skin is not mentioned, but the skin of the seal is mentioned.\nD. The color of the skin is mentioned in the description and is gray, black, or dark.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the body is mentioned in the description but is not elongated.\nB. The body or the seal is not mentioned.\nC. The shape of the body is not mentioned, but the body of the seal is mentioned.\nD. The shape of the body is mentioned in the description and is elongated.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The teeth of the seal are not mentioned in the description.\nB. The teeth of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The claws of the seal are not mentioned in the description.\nB. The claws of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ears of the seal are not mentioned in the description.\nB. The ears of the seal are mentioned in the description.\nC. The seal is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sand is not mentioned in the description.\nB. The sand is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe harbor seal has a sleek, elongated body with a dark, almost black coloration. Its skin appears smooth and slightly glossy, with a subtle gradient of lighter shades along its back. The seal's head is rounded, and its body tapers towards the tail.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rocks are not mentioned in the description.\nB. The rocks are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the seal is mentioned in the description and is smooth.", + "D. The color of the skin is mentioned in the description and is gray, black, or dark.", + "D. The shape of the body is mentioned in the description and is elongated.", + "A. The teeth of the seal are not mentioned in the description.", + "A. The claws of the seal are not mentioned in the description.", + "A. The ears of the seal are not mentioned in the description.", + "A. The sand is not mentioned in the description.", + "A. The rocks are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "14640483": { + "pred": "A rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.", + 0.5 + ], + [ + "The surface or the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The texture of the surface is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the surface is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the surface is mentioned in the description and is smooth.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.", + -1 + ], + [ + "The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the cutting/chopping board is not mentioned.", + 0 + ], + [ + "The cutting/chopping board is not mentioned.", + 0 + ], + [ + "The material of the cutting/chopping board is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the cutting/chopping board is mentioned in the description and is wood.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the cutting/chopping board is mentioned in the description and is wood.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The hanging hole of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The hanging hole of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The hanging hole of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The juice groove of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The juice groove of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The juice groove of the cutting/chopping board is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the cutting/chopping board is mentioned in the description.", + -1 + ], + [ + "The cutting/chopping board is not mentioned in the description.", + 0 + ], + [ + "The handle of the cutting/chopping board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The handle of the cutting/chopping board is mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The whisk is mentioned in the description.", + -1 + ], + [ + "The whisk is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The whisk is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The utensils are mentioned in the description.", + -1 + ], + [ + "The utensils are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The utensils are mentioned in the description.", + "pred_index": 0, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is cutting/chopping board. Based on the image, is it likely that the object in the description is given class: cutting/chopping board or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is cutting/chopping board. Based on the image, is it likely that the object in the description is given class: cutting/chopping board or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the surface is not mentioned, but the surface of the cutting/chopping board is mentioned.\nB. The surface or the cutting/chopping board is not mentioned.\nC. The texture of the surface is mentioned in the description but is not smooth.\nD. The texture of the surface is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The color of the cutting/chopping board is mentioned in the description but is not light brown or wooden.\nD. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The shape of the cutting/chopping board is mentioned in the description but is not rectangular.\nD. The shape of the cutting/chopping board is mentioned in the description and is rectangular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the cutting/chopping board is not mentioned.\nB. The cutting/chopping board is not mentioned.\nC. The material of the cutting/chopping board is mentioned in the description but is not wood.\nD. The material of the cutting/chopping board is mentioned in the description and is wood.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The hanging hole of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The hanging hole of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The juice groove of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The juice groove of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the cutting/chopping board is mentioned in the description.\nB. The cutting/chopping board is not mentioned in the description.\nC. The handle of the cutting/chopping board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The whisk is mentioned in the description.\nB. The whisk is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular wooden chopping board with a smooth surface and a natural wood grain pattern. The board has a slightly rounded edge and a visible handle on one side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The utensils are mentioned in the description.\nB. The utensils are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the surface is mentioned in the description and is smooth.", + "D. The color of the cutting/chopping board is mentioned in the description and is light brown or wooden.", + "D. The shape of the cutting/chopping board is mentioned in the description and is rectangular.", + "D. The material of the cutting/chopping board is mentioned in the description and is wood.", + "C. The hanging hole of the cutting/chopping board is not mentioned in the description.", + "C. The juice groove of the cutting/chopping board is not mentioned in the description.", + "A. The handle of the cutting/chopping board is mentioned in the description.", + "B. The whisk is not mentioned in the description.", + "A. The utensils are mentioned in the description." + ], + "score": 0.5555555555555556, + "score_pos": 1.0, + "score_neg": 0.2, + "recognition_result": true + }, + "16010041": { + "pred": "A pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the chopsticks is mentioned in the description but is not wood.", + -1 + ], + [ + "The material of the chopsticks is not mentioned.", + 0 + ], + [ + "The material of the chopsticks is mentioned in the description and is wood.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the chopsticks is mentioned in the description and is wood.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the chopsticks is mentioned in the description but is not long cylindrical.", + -1 + ], + [ + "The shape of the chopsticks is not mentioned.", + 0 + ], + [ + "The shape of the chopsticks is mentioned in the description and is long cylindrical.", + 1 + ], + [ + "The chopsticks are not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the chopsticks is mentioned in the description and is long cylindrical.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the body is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the body is not mentioned, but the body of the chopsticks is mentioned.", + 0.5 + ], + [ + "The texture of the body is mentioned in the description and is smooth.", + 1 + ], + [ + "The body or the chopsticks is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the body is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lemon slices are not mentioned in the description.", + 1 + ], + [ + "The lemon slices are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The lemon slices are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the chopsticks are not mentioned in the description.", + 1 + ], + [ + "The chopsticks are not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the chopsticks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The decorative elements of the chopsticks are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The man is not mentioned in the description.", + 1 + ], + [ + "The man is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The man is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sliced meat is not mentioned in the description.", + 1 + ], + [ + "The sliced meat is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The sliced meat is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The green garnish is not mentioned in the description.", + 1 + ], + [ + "The green garnish is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The green garnish is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is chopsticks. Based on the image, is it likely that the object in the description is given class: chopsticks or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is chopsticks. Based on the image, is it likely that the object in the description is given class: chopsticks or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the chopsticks is mentioned in the description but is not wood.\nB. The material of the chopsticks is not mentioned.\nC. The material of the chopsticks is mentioned in the description and is wood.\nD. The chopsticks are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the chopsticks is mentioned in the description but is not long cylindrical.\nB. The shape of the chopsticks is not mentioned.\nC. The shape of the chopsticks is mentioned in the description and is long cylindrical.\nD. The chopsticks are not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the body is mentioned in the description but is not smooth.\nB. The texture of the body is not mentioned, but the body of the chopsticks is mentioned.\nC. The texture of the body is mentioned in the description and is smooth.\nD. The body or the chopsticks is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lemon slices are not mentioned in the description.\nB. The lemon slices are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the chopsticks are not mentioned in the description.\nB. The chopsticks are not mentioned in the description.\nC. The decorative elements of the chopsticks are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The man is not mentioned in the description.\nB. The man is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sliced meat is not mentioned in the description.\nB. The sliced meat is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pair of light-colored wooden chopsticks with a smooth, polished surface. The tips of the chopsticks are slightly tapered and have a subtle orange hue.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The green garnish is not mentioned in the description.\nB. The green garnish is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The material of the chopsticks is mentioned in the description and is wood.", + "C. The shape of the chopsticks is mentioned in the description and is long cylindrical.", + "C. The texture of the body is mentioned in the description and is smooth.", + "A. The lemon slices are not mentioned in the description.", + "A. The decorative elements of the chopsticks are not mentioned in the description.", + "A. The man is not mentioned in the description.", + "A. The sliced meat is not mentioned in the description.", + "A. The green garnish is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "17072759": { + "pred": "A black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the belt is not mentioned.", + 0 + ], + [ + "The color of the belt is mentioned in the description but is not gray or black.", + -1 + ], + [ + "The color of the belt is mentioned in the description and is gray or black.", + 1 + ], + [ + "The belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the belt is mentioned in the description and is gray or black.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The material of the strap is mentioned in the description but is not leather.", + -1 + ], + [ + "The material of the strap is mentioned in the description and is leather.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The material of the strap is not mentioned, but the strap of the belt is mentioned.", + "pred_index": 0, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the strap is not mentioned, but the strap of the belt is mentioned.", + 0.5 + ], + [ + "The texture of the strap is mentioned in the description but is not smooth.", + -1 + ], + [ + "The texture of the strap is mentioned in the description and is smooth.", + 1 + ], + [ + "The strap or the belt is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the strap is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The tip of the belt is mentioned in the description.", + -1 + ], + [ + "The tip of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The tip of the belt is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The chess board is mentioned in the description.", + -1 + ], + [ + "The chess board is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The chess board is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The blanket is mentioned in the description.", + -1 + ], + [ + "The blanket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The blanket is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The sky is mentioned in the description.", + -1 + ], + [ + "The sky is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The sky is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The belt is not mentioned in the description.", + 0 + ], + [ + "The keeper of the belt is mentioned in the description.", + -1 + ], + [ + "The keeper of the belt is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The keeper of the belt is mentioned in the description.", + "pred_index": 1, + "eval_result": -1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is belt. Based on the image, is it likely that the object in the description is given class: belt or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is belt. Based on the image, is it likely that the object in the description is given class: belt or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the belt is not mentioned.\nB. The color of the belt is mentioned in the description but is not gray or black.\nC. The color of the belt is mentioned in the description and is gray or black.\nD. The belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the strap is not mentioned, but the strap of the belt is mentioned.\nB. The material of the strap is mentioned in the description but is not leather.\nC. The material of the strap is mentioned in the description and is leather.\nD. The strap or the belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the strap is not mentioned, but the strap of the belt is mentioned.\nB. The texture of the strap is mentioned in the description but is not smooth.\nC. The texture of the strap is mentioned in the description and is smooth.\nD. The strap or the belt is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The belt is not mentioned in the description.\nB. The tip of the belt is mentioned in the description.\nC. The tip of the belt is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The chess board is mentioned in the description.\nB. The chess board is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The blanket is mentioned in the description.\nB. The blanket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The sky is mentioned in the description.\nB. The sky is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black belt with a smooth texture, featuring a silver rectangular buckle. The belt has a single prong and a loop near the buckle for securing the tail end.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The belt is not mentioned in the description.\nB. The keeper of the belt is mentioned in the description.\nC. The keeper of the belt is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The color of the belt is mentioned in the description and is gray or black.", + "A. The material of the strap is not mentioned, but the strap of the belt is mentioned.", + "C. The texture of the strap is mentioned in the description and is smooth.", + "C. The tip of the belt is not mentioned in the description.", + "B. The chess board is not mentioned in the description.", + "B. The blanket is not mentioned in the description.", + "B. The sky is not mentioned in the description.", + "B. The keeper of the belt is mentioned in the description." + ], + "score": 0.6875, + "score_pos": 0.8333333333333334, + "score_neg": 0.6, + "recognition_result": true + }, + "17072764": { + "pred": "A pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The texture of the pear is mentioned in the description but is not smooth.", + -1 + ], + [ + "The pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is not mentioned.", + 0 + ], + [ + "The texture of the pear is mentioned in the description and is smooth.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The texture of the pear is mentioned in the description and is smooth.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the stem is mentioned in the description but is not short.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The size of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The size of the stem is mentioned in the description and is short.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The size of the stem is mentioned in the description and is short.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the stem is mentioned in the description but is not brown.", + -1 + ], + [ + "The stem or the pear is not mentioned.", + 0 + ], + [ + "The color of the stem is not mentioned, but the stem of the pear is mentioned.", + 0.5 + ], + [ + "The color of the stem is mentioned in the description and is brown.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the stem is mentioned in the description and is brown.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the skin is mentioned in the description but is not yellow or green.", + -1 + ], + [ + "The skin or the pear is not mentioned.", + 0 + ], + [ + "The color of the skin is not mentioned, but the skin of the pear is mentioned.", + 0.5 + ], + [ + "The color of the skin is mentioned in the description and is yellow or green.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the skin is mentioned in the description and is yellow or green.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The cup is not mentioned in the description.", + 1 + ], + [ + "The cup is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The cup is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The core of the pear is not mentioned in the description.", + 1 + ], + [ + "The core of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The core of the pear is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The white top is not mentioned in the description.", + 1 + ], + [ + "The white top is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The white top is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The grass are not mentioned in the description.", + 1 + ], + [ + "The grass are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The grass are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The pear is not mentioned in the description.", + 0 + ], + [ + "The leaf of the pear is not mentioned in the description.", + 1 + ], + [ + "The leaf of the pear is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The leaf of the pear is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is pear. Based on the image, is it likely that the object in the description is given class: pear or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is pear. Based on the image, is it likely that the object in the description is given class: pear or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The texture of the pear is mentioned in the description but is not smooth.\nB. The pear is not mentioned.\nC. The texture of the pear is not mentioned.\nD. The texture of the pear is mentioned in the description and is smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the stem is mentioned in the description but is not short.\nB. The stem or the pear is not mentioned.\nC. The size of the stem is not mentioned, but the stem of the pear is mentioned.\nD. The size of the stem is mentioned in the description and is short.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the stem is mentioned in the description but is not brown.\nB. The stem or the pear is not mentioned.\nC. The color of the stem is not mentioned, but the stem of the pear is mentioned.\nD. The color of the stem is mentioned in the description and is brown.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the skin is mentioned in the description but is not yellow or green.\nB. The skin or the pear is not mentioned.\nC. The color of the skin is not mentioned, but the skin of the pear is mentioned.\nD. The color of the skin is mentioned in the description and is yellow or green.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The cup is not mentioned in the description.\nB. The cup is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The core of the pear is not mentioned in the description.\nC. The core of the pear is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The white top is not mentioned in the description.\nB. The white top is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The grass are not mentioned in the description.\nB. The grass are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA pear with a smooth, light green skin, featuring a slight yellowish hue on the upper right side. The pear has a short, brown stem attached to its top.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The pear is not mentioned in the description.\nB. The leaf of the pear is not mentioned in the description.\nC. The leaf of the pear is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "D. The texture of the pear is mentioned in the description and is smooth.", + "D. The size of the stem is mentioned in the description and is short.", + "D. The color of the stem is mentioned in the description and is brown.", + "D. The color of the skin is mentioned in the description and is yellow or green.", + "A. The cup is not mentioned in the description.", + "B. The core of the pear is not mentioned in the description.", + "A. The white top is not mentioned in the description.", + "A. The grass are not mentioned in the description.", + "B. The leaf of the pear is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "18301585": { + "pred": "The bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the backrest is mentioned in the description and is black.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The color of the backrest is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the backrest is mentioned in the description and is black.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the legs is mentioned in the description and is white.", + 1 + ], + [ + "The legs or the bench are not mentioned.", + 0 + ], + [ + "The color of the legs is mentioned in the description but is not white.", + -1 + ], + [ + "The color of the legs is not mentioned, but the legs of the bench are mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the legs is mentioned in the description and is white.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the backrest is mentioned in the description and is slatted.", + 1 + ], + [ + "The backrest or the bench is not mentioned.", + 0 + ], + [ + "The shape of the backrest is mentioned in the description but is not slatted.", + -1 + ], + [ + "The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The shape of the backrest is mentioned in the description and is slatted.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bollards are mentioned in the description.", + -1 + ], + [ + "The bollards are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The bollards are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The storage compartment of the bench is mentioned in the description.", + -1 + ], + [ + "The storage compartment of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The storage compartment of the bench is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The footrest of the bench is mentioned in the description.", + -1 + ], + [ + "The footrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The footrest of the bench is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The headrest of the bench is mentioned in the description.", + -1 + ], + [ + "The headrest of the bench is not mentioned in the description.", + 1 + ], + [ + "The bench is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The headrest of the bench is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are mentioned in the description.", + -1 + ], + [ + "The trees are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "B. The trees are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is bench. Based on the image, is it likely that the object in the description is given class: bench or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is bench. Based on the image, is it likely that the object in the description is given class: bench or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the backrest is mentioned in the description and is black.\nB. The backrest or the bench is not mentioned.\nC. The color of the backrest is mentioned in the description but is not black.\nD. The color of the backrest is not mentioned, but the backrest of the bench is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the legs is mentioned in the description and is white.\nB. The legs or the bench are not mentioned.\nC. The color of the legs is mentioned in the description but is not white.\nD. The color of the legs is not mentioned, but the legs of the bench are mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the backrest is mentioned in the description and is slatted.\nB. The backrest or the bench is not mentioned.\nC. The shape of the backrest is mentioned in the description but is not slatted.\nD. The shape of the backrest is not mentioned, but the backrest of the bench is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bollards are mentioned in the description.\nB. The bollards are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The storage compartment of the bench is mentioned in the description.\nB. The storage compartment of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The footrest of the bench is mentioned in the description.\nB. The footrest of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The headrest of the bench is mentioned in the description.\nB. The headrest of the bench is not mentioned in the description.\nC. The bench is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe bench features a black metal frame with horizontal slats forming the backrest and seat. The backrest consists of three horizontal slats, while the seat has two horizontal slats. The bench is supported by white concrete legs that are rectangular in shape and have a slightly tapered design.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are mentioned in the description.\nB. The trees are not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the backrest is mentioned in the description and is black.", + "A. The color of the legs is mentioned in the description and is white.", + "A. The shape of the backrest is mentioned in the description and is slatted.", + "B. The bollards are not mentioned in the description.", + "B. The storage compartment of the bench is not mentioned in the description.", + "B. The footrest of the bench is not mentioned in the description.", + "B. The headrest of the bench is not mentioned in the description.", + "B. The trees are not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "18680641": { + "pred": "A rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the carpet is mentioned in the description but is not fabric.", + -1 + ], + [ + "The material of the carpet is not mentioned.", + 0 + ], + [ + "The material of the carpet is mentioned in the description and is fabric.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the carpet is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the edge is mentioned in the description but is not straight.", + -1 + ], + [ + "The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + 0.5 + ], + [ + "The shape of the edge is mentioned in the description and is straight.", + 1 + ], + [ + "The edge or the carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "B. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the carpet is mentioned in the description but is not orange or red.", + -1 + ], + [ + "The color of the carpet is not mentioned.", + 0 + ], + [ + "The color of the carpet is mentioned in the description and is orange or red.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the carpet is mentioned in the description and is orange or red.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the carpet is mentioned in the description but is not rectangular.", + -1 + ], + [ + "The shape of the carpet is not mentioned.", + 0 + ], + [ + "The shape of the carpet is mentioned in the description and is rectangular.", + 1 + ], + [ + "The carpet is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the carpet is mentioned in the description and is rectangular.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The tassels of the carpet are not mentioned in the description.", + 1 + ], + [ + "The tassels of the carpet are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The tassels of the carpet are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drainage pipe is not mentioned in the description.", + 1 + ], + [ + "The drainage pipe is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The drainage pipe is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The carpet is not mentioned in the description.", + 0 + ], + [ + "The pattern of the carpet is not mentioned in the description.", + 1 + ], + [ + "The pattern of the carpet is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The pattern of the carpet is not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shop sign is not mentioned in the description.", + 1 + ], + [ + "The shop sign is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The shop sign is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The candy display is not mentioned in the description.", + 1 + ], + [ + "The candy display is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The candy display is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is carpet. Based on the image, is it likely that the object in the description is given class: carpet or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is carpet. Based on the image, is it likely that the object in the description is given class: carpet or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the carpet is mentioned in the description but is not fabric.\nB. The material of the carpet is not mentioned.\nC. The material of the carpet is mentioned in the description and is fabric.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the edge is mentioned in the description but is not straight.\nB. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.\nC. The shape of the edge is mentioned in the description and is straight.\nD. The edge or the carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the carpet is mentioned in the description but is not orange or red.\nB. The color of the carpet is not mentioned.\nC. The color of the carpet is mentioned in the description and is orange or red.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the carpet is mentioned in the description but is not rectangular.\nB. The shape of the carpet is not mentioned.\nC. The shape of the carpet is mentioned in the description and is rectangular.\nD. The carpet is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The carpet is not mentioned in the description.\nB. The tassels of the carpet are not mentioned in the description.\nC. The tassels of the carpet are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drainage pipe is not mentioned in the description.\nB. The drainage pipe is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The carpet is not mentioned in the description.\nB. The pattern of the carpet is not mentioned in the description.\nC. The pattern of the carpet is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shop sign is not mentioned in the description.\nB. The shop sign is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA rectangular, plush, red carpet with a slightly textured surface and a dark gray border along the edges.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The candy display is not mentioned in the description.\nB. The candy display is mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the carpet is not mentioned.", + "B. The shape of the edge is not mentioned, but the edge of the carpet is mentioned.", + "C. The color of the carpet is mentioned in the description and is orange or red.", + "C. The shape of the carpet is mentioned in the description and is rectangular.", + "B. The tassels of the carpet are not mentioned in the description.", + "A. The drainage pipe is not mentioned in the description.", + "B. The pattern of the carpet is not mentioned in the description.", + "A. The shop sign is not mentioned in the description.", + "A. The candy display is not mentioned in the description." + ], + "score": 0.8333333333333334, + "score_pos": 0.625, + "score_neg": 1.0, + "recognition_result": true + }, + "25273528": { + "pred": "The balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the hot air balloon is mentioned in the description and is multicolored.", + 1 + ], + [ + "The hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the hot air balloon is mentioned in the description but is not multicolored.", + -1 + ], + [ + "The color of the hot air balloon is not mentioned.", + 0 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the hot air balloon is mentioned in the description and is multicolored.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the envelope is mentioned in the description and is nylon or polyester.", + 1 + ], + [ + "The envelope or the hot air balloon is not mentioned.", + 0 + ], + [ + "The material of the envelope is mentioned in the description but is not nylon or polyester.", + -1 + ], + [ + "The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "D. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + "pred_index": 3, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The position of the basket is mentioned in the description and is bottom.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The position of the basket is mentioned in the description but is not bottom.", + -1 + ], + [ + "The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The position of the basket is mentioned in the description and is bottom.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The size of the basket is mentioned in the description and is small.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The size of the basket is mentioned in the description but is not small.", + -1 + ], + [ + "The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The size of the basket is mentioned in the description and is small.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the basket is mentioned in the description and is dark or black.", + 1 + ], + [ + "The basket or the hot air balloon is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description but is not dark or black.", + -1 + ], + [ + "The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.", + 0.5 + ] + ], + "type": "positive", + "pred_answer": "A. The color of the basket is mentioned in the description and is dark or black.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fuel tanks of the hot air balloon are not mentioned in the description.", + 1 + ], + [ + "The fuel tanks of the hot air balloon are mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The fuel tanks of the hot air balloon are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The burner of the hot air balloon is not mentioned in the description.", + 1 + ], + [ + "The burner of the hot air balloon is mentioned in the description.", + -1 + ], + [ + "The hot air balloon is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The burner of the hot air balloon is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ground is not mentioned in the description.", + 1 + ], + [ + "The ground is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The ground is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The people are not mentioned in the description.", + 1 + ], + [ + "The people are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The people are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The trees are not mentioned in the description.", + 1 + ], + [ + "The trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The trees are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is hot air balloon. Based on the image, is it likely that the object in the description is given class: hot air balloon or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is hot air balloon. Based on the image, is it likely that the object in the description is given class: hot air balloon or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the hot air balloon is mentioned in the description and is multicolored.\nB. The hot air balloon is not mentioned.\nC. The color of the hot air balloon is mentioned in the description but is not multicolored.\nD. The color of the hot air balloon is not mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the envelope is mentioned in the description and is nylon or polyester.\nB. The envelope or the hot air balloon is not mentioned.\nC. The material of the envelope is mentioned in the description but is not nylon or polyester.\nD. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The position of the basket is mentioned in the description and is bottom.\nB. The basket or the hot air balloon is not mentioned.\nC. The position of the basket is mentioned in the description but is not bottom.\nD. The position of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The size of the basket is mentioned in the description and is small.\nB. The basket or the hot air balloon is not mentioned.\nC. The size of the basket is mentioned in the description but is not small.\nD. The size of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the basket is mentioned in the description and is dark or black.\nB. The basket or the hot air balloon is not mentioned.\nC. The color of the basket is mentioned in the description but is not dark or black.\nD. The color of the basket is not mentioned, but the basket of the hot air balloon is mentioned.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fuel tanks of the hot air balloon are not mentioned in the description.\nB. The fuel tanks of the hot air balloon are mentioned in the description.\nC. The hot air balloon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The burner of the hot air balloon is not mentioned in the description.\nB. The burner of the hot air balloon is mentioned in the description.\nC. The hot air balloon is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ground is not mentioned in the description.\nB. The ground is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The people are not mentioned in the description.\nB. The people are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nThe balloon features a vibrant pattern with alternating vertical stripes of red, yellow, and green. The red stripes are the most prominent, with yellow and green stripes creating a striking contrast. The balloon has a teardrop shape with a small black basket attached at the bottom.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The trees are not mentioned in the description.\nB. The trees are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "A. The color of the hot air balloon is mentioned in the description and is multicolored.", + "D. The material of the envelope is not mentioned, but the envelope of the hot air balloon is mentioned.", + "A. The position of the basket is mentioned in the description and is bottom.", + "A. The size of the basket is mentioned in the description and is small.", + "A. The color of the basket is mentioned in the description and is dark or black.", + "A. The fuel tanks of the hot air balloon are not mentioned in the description.", + "A. The burner of the hot air balloon is not mentioned in the description.", + "A. The ground is not mentioned in the description.", + "A. The people are not mentioned in the description.", + "A. The trees are not mentioned in the description." + ], + "score": 0.95, + "score_pos": 0.9, + "score_neg": 1.0, + "recognition_result": true + }, + "25419509": { + "pred": "A metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fork is not mentioned.", + 0 + ], + [ + "The color of the fork is not mentioned.", + 0 + ], + [ + "The color of the fork is mentioned in the description and is metallic.", + 1 + ], + [ + "The color of the fork is mentioned in the description but is not metallic.", + -1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the fork is not mentioned.", + "pred_index": 1, + "eval_result": 0 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The material of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The material of the handle is mentioned in the description and is metal.", + 1 + ], + [ + "The material of the handle is mentioned in the description but is not metal.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the handle is mentioned in the description and is metal.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The shape of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The shape of the handle is mentioned in the description and is curved.", + 1 + ], + [ + "The shape of the handle is mentioned in the description but is not curved.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The shape of the handle is mentioned in the description and is curved.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle or the fork is not mentioned.", + 0 + ], + [ + "The texture of the handle is not mentioned, but the handle of the fork is mentioned.", + 0.5 + ], + [ + "The texture of the handle is mentioned in the description and is smooth.", + 1 + ], + [ + "The texture of the handle is mentioned in the description but is not smooth.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the handle is mentioned in the description and is smooth.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The tines or the fork are not mentioned.", + 0 + ], + [ + "The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.", + 0.5 + ], + [ + "The number of parts of the tines is mentioned in the description and is 4.", + 1 + ], + [ + "The number of parts of the tines is mentioned in the description but is not 4.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The number of parts of the tines is mentioned in the description and is 4.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bolster of the fork is not mentioned in the description.", + 1 + ], + [ + "The bolster of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The bolster of the fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The plates are not mentioned in the description.", + 1 + ], + [ + "The plates are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The plates are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The end cap of the fork is not mentioned in the description.", + 1 + ], + [ + "The end cap of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The end cap of the fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The drinks are not mentioned in the description.", + 1 + ], + [ + "The drinks are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The drinks are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The ferrule of the fork is not mentioned in the description.", + 1 + ], + [ + "The ferrule of the fork is mentioned in the description.", + -1 + ], + [ + "The fork is not mentioned in the description.", + 0 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "A. The ferrule of the fork is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is fork. Based on the image, is it likely that the object in the description is given class: fork or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is fork. Based on the image, is it likely that the object in the description is given class: fork or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fork is not mentioned.\nB. The color of the fork is not mentioned.\nC. The color of the fork is mentioned in the description and is metallic.\nD. The color of the fork is mentioned in the description but is not metallic.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The material of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The material of the handle is mentioned in the description and is metal.\nD. The material of the handle is mentioned in the description but is not metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The shape of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The shape of the handle is mentioned in the description and is curved.\nD. The shape of the handle is mentioned in the description but is not curved.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle or the fork is not mentioned.\nB. The texture of the handle is not mentioned, but the handle of the fork is mentioned.\nC. The texture of the handle is mentioned in the description and is smooth.\nD. The texture of the handle is mentioned in the description but is not smooth.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The tines or the fork are not mentioned.\nB. The number of parts of the tines is not mentioned, but the tines of the fork are mentioned.\nC. The number of parts of the tines is mentioned in the description and is 4.\nD. The number of parts of the tines is mentioned in the description but is not 4.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bolster of the fork is not mentioned in the description.\nB. The bolster of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The plates are not mentioned in the description.\nB. The plates are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The end cap of the fork is not mentioned in the description.\nB. The end cap of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The drinks are not mentioned in the description.\nB. The drinks are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA metallic fork with a slightly curved handle and four evenly spaced tines. The handle has a smooth, reflective surface with a gentle upward curve near the tines.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The ferrule of the fork is not mentioned in the description.\nB. The ferrule of the fork is mentioned in the description.\nC. The fork is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The color of the fork is not mentioned.", + "C. The material of the handle is mentioned in the description and is metal.", + "C. The shape of the handle is mentioned in the description and is curved.", + "C. The texture of the handle is mentioned in the description and is smooth.", + "C. The number of parts of the tines is mentioned in the description and is 4.", + "A. The bolster of the fork is not mentioned in the description.", + "A. The plates are not mentioned in the description.", + "A. The end cap of the fork is not mentioned in the description.", + "A. The drinks are not mentioned in the description.", + "A. The ferrule of the fork is not mentioned in the description." + ], + "score": 0.9, + "score_pos": 0.8, + "score_neg": 1.0, + "recognition_result": true + }, + "25612310": { + "pred": "A dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is not mentioned.", + 0 + ], + [ + "The texture of the basket is mentioned in the description and is woven.", + 1 + ], + [ + "The texture of the basket is mentioned in the description but is not woven.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The texture of the basket is mentioned in the description and is woven.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The material of the basket is not mentioned.", + 0 + ], + [ + "The material of the basket is mentioned in the description and is wicker.", + 1 + ], + [ + "The material of the basket is mentioned in the description but is not wicker.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The material of the basket is mentioned in the description and is wicker.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The type of the basket is not mentioned.", + 0 + ], + [ + "The type of the basket is mentioned in the description and is interlaced.", + 1 + ], + [ + "The type of the basket is mentioned in the description but is not interlaced.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The type of the basket is mentioned in the description and is interlaced.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The basket is not mentioned.", + 0 + ], + [ + "The color of the basket is not mentioned.", + 0 + ], + [ + "The color of the basket is mentioned in the description and is brown or wooden.", + 1 + ], + [ + "The color of the basket is mentioned in the description but is not brown or wooden.", + -1 + ] + ], + "type": "positive", + "pred_answer": "C. The color of the basket is mentioned in the description and is brown or wooden.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The base of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The base of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The base of the basket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The handle of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The handle of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The handle of the basket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lid of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lid of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lid of the basket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The decorative elements of the basket are mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The decorative elements of the basket are not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The decorative elements of the basket are not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The lining of the basket is mentioned in the description.", + -1 + ], + [ + "The basket is not mentioned in the description.", + 0 + ], + [ + "The lining of the basket is not mentioned in the description.", + 1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The lining of the basket is not mentioned in the description.", + "pred_index": 2, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is basket. Based on the image, is it likely that the object in the description is given class: basket or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is basket. Based on the image, is it likely that the object in the description is given class: basket or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The texture of the basket is not mentioned.\nC. The texture of the basket is mentioned in the description and is woven.\nD. The texture of the basket is mentioned in the description but is not woven.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The material of the basket is not mentioned.\nC. The material of the basket is mentioned in the description and is wicker.\nD. The material of the basket is mentioned in the description but is not wicker.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The type of the basket is not mentioned.\nC. The type of the basket is mentioned in the description and is interlaced.\nD. The type of the basket is mentioned in the description but is not interlaced.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The basket is not mentioned.\nB. The color of the basket is not mentioned.\nC. The color of the basket is mentioned in the description and is brown or wooden.\nD. The color of the basket is mentioned in the description but is not brown or wooden.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The base of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The base of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The handle of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The handle of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lid of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The lid of the basket is not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The decorative elements of the basket are mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The decorative elements of the basket are not mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA dark brown wicker basket with a woven pattern, featuring a slightly curved edge and a visible portion of the basket's side.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The lining of the basket is mentioned in the description.\nB. The basket is not mentioned in the description.\nC. The lining of the basket is not mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "C. The texture of the basket is mentioned in the description and is woven.", + "C. The material of the basket is mentioned in the description and is wicker.", + "C. The type of the basket is mentioned in the description and is interlaced.", + "C. The color of the basket is mentioned in the description and is brown or wooden.", + "C. The base of the basket is not mentioned in the description.", + "C. The handle of the basket is not mentioned in the description.", + "C. The lid of the basket is not mentioned in the description.", + "C. The decorative elements of the basket are not mentioned in the description.", + "C. The lining of the basket is not mentioned in the description." + ], + "score": 1.0, + "score_pos": 1.0, + "score_neg": 1.0, + "recognition_result": true + }, + "17265253": { + "pred": "A black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.", + "details_positives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the spokes is mentioned in the description but is not metal.", + -1 + ], + [ + "The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The material of the spokes is mentioned in the description and is metal.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The shape of the tire is mentioned in the description but is not circular.", + -1 + ], + [ + "The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The shape of the tire is mentioned in the description and is circular.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The shape of the tire is mentioned in the description and is circular.", + "pred_index": 3, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The material of the tire is mentioned in the description but is not rubber.", + -1 + ], + [ + "The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The material of the tire is mentioned in the description and is rubber.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the spokes is mentioned in the description but is not silver.", + -1 + ], + [ + "The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + 0.5 + ], + [ + "The spokes or the rickshaw are not mentioned.", + 0 + ], + [ + "The color of the spokes is mentioned in the description and is silver.", + 1 + ] + ], + "type": "positive", + "pred_answer": "B. The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + "pred_index": 1, + "eval_result": 0.5 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The color of the tire is mentioned in the description but is not black.", + -1 + ], + [ + "The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + 0.5 + ], + [ + "The tire or the rickshaw is not mentioned.", + 0 + ], + [ + "The color of the tire is mentioned in the description and is black.", + 1 + ] + ], + "type": "positive", + "pred_answer": "D. The color of the tire is mentioned in the description and is black.", + "pred_index": 3, + "eval_result": 1 + } + ], + "details_negatives": [ + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The path is not mentioned in the description.", + 1 + ], + [ + "The path is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The path is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw canopy of the rickshaw is not mentioned in the description.", + 1 + ], + [ + "The rickshaw canopy of the rickshaw is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "C. The rickshaw canopy of the rickshaw is mentioned in the description.", + "pred_index": 2, + "eval_result": -1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The rickshaw is not mentioned in the description.", + 0 + ], + [ + "The rickshaw handlebars of the rickshaw are not mentioned in the description.", + 1 + ], + [ + "The rickshaw handlebars of the rickshaw are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "negative part", + "pred_answer": "B. The rickshaw handlebars of the rickshaw are not mentioned in the description.", + "pred_index": 1, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The fence is not mentioned in the description.", + 1 + ], + [ + "The fence is mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The fence is not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + }, + { + "question": "Which of the following is applicable to the description?", + "choices": [ + [ + "The bamboo trees are not mentioned in the description.", + 1 + ], + [ + "The bamboo trees are mentioned in the description.", + -1 + ] + ], + "type": "negative", + "subtype": "salient negative", + "pred_answer": "A. The bamboo trees are not mentioned in the description.", + "pred_index": 0, + "eval_result": 1 + } + ], + "details_recognition": [ + { + "question": "The object in the image is rickshaw. Based on the image, is it likely that the object in the description is given class: rickshaw or object of a similar type?", + "choices": [ + [ + "Yes", + "correct" + ], + [ + "No", + "incorrect" + ] + ], + "type": "recognition", + "pred_answer": "A. Yes", + "pred_index": 0, + "eval_result": "correct" + } + ], + "prompt": [ + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nThe object in the image is rickshaw. Based on the image, is it likely that the object in the description is given class: rickshaw or object of a similar type?\nA. Yes\nB. No\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the spokes is mentioned in the description but is not metal.\nB. The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.\nC. The spokes or the rickshaw are not mentioned.\nD. The material of the spokes is mentioned in the description and is metal.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The shape of the tire is mentioned in the description but is not circular.\nB. The shape of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The shape of the tire is mentioned in the description and is circular.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The material of the tire is mentioned in the description but is not rubber.\nB. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The material of the tire is mentioned in the description and is rubber.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the spokes is mentioned in the description but is not silver.\nB. The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.\nC. The spokes or the rickshaw are not mentioned.\nD. The color of the spokes is mentioned in the description and is silver.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The color of the tire is mentioned in the description but is not black.\nB. The color of the tire is not mentioned, but the tire of the rickshaw is mentioned.\nC. The tire or the rickshaw is not mentioned.\nD. The color of the tire is mentioned in the description and is black.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The path is not mentioned in the description.\nB. The path is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rickshaw is not mentioned in the description.\nB. The rickshaw canopy of the rickshaw is not mentioned in the description.\nC. The rickshaw canopy of the rickshaw is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The rickshaw is not mentioned in the description.\nB. The rickshaw handlebars of the rickshaw are not mentioned in the description.\nC. The rickshaw handlebars of the rickshaw are mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The fence is not mentioned in the description.\nB. The fence is mentioned in the description.\n", + "Answer the multiple-choice question based on the text description of an object in this image. You need to follow these rules:\n1. Do not output any reasoning. Do not perform correction. Please output exactly one answer from the choices for each question. Do not repeat the question.\n2. There is no need for exact matching. Please choose the closest option based on the description.\n\nThe description is:\nA black rickshaw with a black canopy, featuring a single visible wheel with a silver rim and black tire. The wheel is attached to a black frame with a visible pedal mechanism.\n\nFrom the description above, please answer the following question with one of the choices:\nWhich of the following is applicable to the description?\nA. The bamboo trees are not mentioned in the description.\nB. The bamboo trees are mentioned in the description.\n" + ], + "response": [ + "A. Yes", + "B. The material of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + "D. The shape of the tire is mentioned in the description and is circular.", + "B. The material of the tire is not mentioned, but the tire of the rickshaw is mentioned.", + "B. The color of the spokes is not mentioned, but the spokes of the rickshaw are mentioned.", + "D. The color of the tire is mentioned in the description and is black.", + "A. The path is not mentioned in the description.", + "C. The rickshaw canopy of the rickshaw is mentioned in the description.", + "B. The rickshaw handlebars of the rickshaw are not mentioned in the description.", + "A. The fence is not mentioned in the description.", + "A. The bamboo trees are not mentioned in the description." + ], + "score": 0.65, + "score_pos": 0.7, + "score_neg": 0.6, + "recognition_result": true + } +} \ No newline at end of file diff --git a/evaluation/DLC-Bench/serve_judge.sh b/evaluation/DLC-Bench/serve_judge.sh new file mode 100644 index 0000000000000000000000000000000000000000..57b0ec271d64be4c84980079a5774246e4474b38 --- /dev/null +++ b/evaluation/DLC-Bench/serve_judge.sh @@ -0,0 +1,12 @@ +MODEL_PATH=meta-llama/Meta-Llama-3.1-8B-Instruct + +vllm serve $MODEL_PATH \ + --served-model-name llama3.1-8b \ + --api-key sk-abc123 \ + --tensor-parallel-size 1 \ + --pipeline-parallel-size 1 \ + --trust-remote-code \ + --dtype bfloat16 \ + --gpu-memory-utilization 0.85 \ + --port 8007 \ + --host localhost diff --git a/evaluation/EVALUATION.md b/evaluation/EVALUATION.md new file mode 100644 index 0000000000000000000000000000000000000000..0fc3354031ccf7b7173f13e84d2297089c01a734 --- /dev/null +++ b/evaluation/EVALUATION.md @@ -0,0 +1,306 @@ +# Evaluation of GAR + +## 1. GARBench + +### 1.1 GARBench-Caption-Simple + +First, perform inference, e.g., using GAR-8B. + +```bash +torchrun --nproc-per-node=1 --master-port=9811 \ + evaluation/GAR-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --anno_file evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Simple.json \ + --mode simple \ + --cache_name ${CACHE_NAME} \ + --data_type bf16 \ + --seed 42 +``` + +The generated descriptions will be saved to ```evaluation/GAR-Bench/model_outputs/${CACHE_NAME}_simple.json``` + +Next, perform evaluation (with images using GPT-4o). + +```bash +export AZURE_OPENAI_ENDPOINT=YOUR_AZURE_OPENAI_ENDPOINT +export AZURE_OPENAI_KEY=YOUR_AZURE_OPENAI_KEY + +python3 evaluation/GAR-Bench/eval_simple.py --pred evaluation/GAR-Bench/model_outputs/${CACHE_NAME}_simple.json +``` + +Reference cache (including model predictions and evaluation results) are stored in ```model_outputs/```. Due to the randomness during LLM-Judge, the final performance may slighly differ even with the same predicitons (even with ```temperature=0```). + +To re-run the evaluation, you could change to your own ```CACHE_NAME```. + +Reference results: + +```bash +# GAR-1B +Accuracy: 0.5567010309278351 + +# GAR-8B +Accuracy: 0.6391752577319587 +``` + +### 1.2 GARBench-Caption-Detailed + +First, perform inference, e.g., using GAR-8B. + +```bash +torchrun --nproc-per-node=1 --master-port=9811 \ + evaluation/GAR-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --anno_file evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Detailed.json \ + --mode detailed \ + --cache_name ${CACHE_NAME} \ + --data_type bf16 \ + --seed 42 +``` + +The generated descriptions will be saved to ```evaluation/GAR-Bench/model_outputs/${CACHE_NAME}_detailed.json``` + +Next, perform evaluation (with images using GPT-4o). + +```bash +export AZURE_OPENAI_ENDPOINT=YOUR_AZURE_OPENAI_ENDPOINT +export AZURE_OPENAI_KEY=YOUR_AZURE_OPENAI_KEY + +python3 evaluation/GAR-Bench/eval_detailed.py --pred evaluation/GAR-Bench/model_outputs/${CACHE_NAME}_detailed.json +``` + +Reference cache (including model predictions and evaluation results) are stored in ```model_outputs/```. Due to the randomness during LLM-Judge, the final performance may slighly differ even with the same predicitons (even with ```temperature=0```). + +To re-run the evaluation, you could change to your own ```CACHE_NAME```. + +Reference results: + +```bash +# GAR-1B +Accuracy: 0.6635514018691588 + +# GAR-8B +Accuracy: 0.6915887850467289 +``` + +### 1.3 GARBench-VQA + +Perform inference, e.g., using GAR-8B. + +```bash +torchrun --nproc-per-node=1 --master-port=9811 \ + evaluation/GAR-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --anno_file evaluation/GAR-Bench/annotations/GAR-Bench-VQA.json \ + --mode vqa \ + --cache_name ${CACHE_NAME} \ + --data_type bf16 \ + --seed 42 +``` + +Reference cache (including model predictions and evaluation results) are stored in ```model_outputs/```. + +To re-run the evaluation, you could change to your own ```CACHE_NAME```. + +Reference results: +``` +# GAR-1B +color: [34/69]=49.3 +texture/pattern: [17/29]=58.6 +mirror: [36/61]=59.0 +ordering: [13/64]=20.3 +material: [14/36]=38.9 +shape: [32/64]=50.0 +relation: [57/101]=56.4 +=> overall: [203/424]=47.9 + +# GAR-8B +texture/pattern: [22/29]=75.9 +material: [19/36]=52.8 +mirror: [36/61]=59.0 +relation: [66/101]=65.4 +shape: [34/64]=53.1 +ordering: [28/64]=43.8 +color: [40/69]=58.0 +=> overall: [245/424]=57.8 +``` + +## 2. DLC-Bench + +First, download images of DLC-Bench and put the ```images``` folder in the ```annotations``` directory: +```bash +cd evaluation/DLC-Bench/annotations +hf download nvidia/DLC-Bench --repo-type dataset --include "images/*" --exclude "*" --local-dir ./ +``` + +The overall structure should be: +```bash +evaluation/DLC-Bench/annotations +├── annotations.json +├── class_names.json +├── images +│ └── objects365_v2_*.jpg +└── qa.json +``` + +Next, perform inference to obtain detailed descriptions, e.g., using GAR-8B. + +```bash +torchrun --nproc-per-node=1 --master-port=8841 \ + evaluation/DLC-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --cache_name ${CACHE_NAME} \ + --data_type bf16 \ + --seed 42 +``` + +The generated descriptions will be saved to ```evaluation/DLC-Bench/model_outputs/${CACHE_NAME}.json``` + +Finally, perform evaluation (with images using GPT-4o or without images using Llama3.1-8B). + +**Optional 1. Using GPT-4o *with* images (Recommended)** + +```bash +export AZURE_OPENAI_ENDPOINT=YOUR_AZURE_OPENAI_ENDPOINT +export AZURE_OPENAI_KEY=YOUR_AZURE_OPENAI_KEY + +python3 evaluation/DLC-Bench/eval_gpt_with_image.py --pred evaluation/DLC-Bench/model_outputs/${CACHE_NAME}.json +``` + +**Optional 2. Using Llama3.1-8B *without* images** + +First, we need to serve Llama3.1-8B using vLLM. + +```bash +bash evaluation/DLC-Bench/serve_judge.sh +``` + +Next, on the *same* node, run evaluation. + +```bash +python3 eval_llama_without_image.py --pred ../model_outputs/${CACHE_NAME}.json --base_url http://localhost:8007/v1 +``` + +For more details for the differences between these two evaluation settings, please refer to Appendix F of our paper. + +Reference cache (including model predictions and evaluation results) are stored in ```model_outputs/```. Due to the randomness during LLM-Judge, the final performance may slighly differ even with the same predicitons (even with ```temperature=0```). + +To re-run the evaluation, you could change to your own ```CACHE_NAME```. + +Reference results: + +```bash +# GAR-1B +# By GPT-4o (with images): +Summary (Pos Neg Avg(Pos, Neg)): 0.662, 0.880, 0.771 +# By Llama3.1-8B (without images): +Summary (Pos Neg Avg(Pos, Neg)): 0.489, 0.870, 0.679 + +# GAR-8B +# By GPT-4o (with images): +Summary (Pos Neg Avg(Pos, Neg)): 0.680, 0.860, 0.770 +# By Llama3.1-8B (without images): +Summary (Pos Neg Avg(Pos, Neg)): 0.502, 0.846, 0.674 +``` + +## 3. Ferret-Bench + +First, perform inference to obtain detailed descriptions, e.g., using GAR-8B. + +```bash +torchrun --nproc-per-node=1 --master-port=8841 \ + evaluation/Ferret-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --cache_name ${CACHE_NAME} \ + --data_type bf16 \ + --seed 42 +``` + +The generated descriptions will be saved to ```evaluation/Ferret-Bench/model_outputs/${CACHE_NAME}.json``` + + +Then, perform evaluation using GPT-4o. + +```bash +export AZURE_OPENAI_ENDPOINT=YOUR_AZURE_OPENAI_ENDPOINT +export AZURE_OPENAI_KEY=YOUR_AZURE_OPENAI_KEY + +cd evaluation/Ferret-Bench +bash eval.sh ${CACHE_NAME} +``` + +Reference model predictions are stored in ```model_outputs/```, and reference evaluation results are stored in ```gpt4_result/```. Due to the randomness during LLM-Judge, the final performance may slighly differ even with the same predicitons (even with ```temperature=0```). + +To re-run the evaluation, you could change to your own ```CACHE_NAME```. + +Reference results: +```bash +# GAR-1B +review_refer_desc +all 56.0 +refer_desc 56.0 +================================= + +# GAR-8B +review_refer_desc +all 64.8 +refer_desc 64.8 +================================= +``` + + +## 4. MDVP-Bench + +First, perform inference to obtain detailed descriptions, e.g., using GAR-8B. + +```bash +torchrun --nproc-per-node=1 --master-port=8841 \ + evaluation/MDVP-Bench/inference.py \ + --model_name_or_path HaochenWang/GAR-8B \ + --cache_name ${CACHE_NAME} \ + --data_type bf16 \ + --seed 42 +``` + +The generated descriptions will be saved to ```evaluation/MDVP-Bench/model_outputs/${CACHE_NAME}.json``` + + +Then, perform evaluation using GPT-4o. + +```bash +export AZURE_OPENAI_ENDPOINT=YOUR_AZURE_OPENAI_ENDPOINT +export AZURE_OPENAI_KEY=YOUR_AZURE_OPENAI_KEY + +cd evaluation/MDVP-Bench +bash eval.sh model_outputs/${CACHE_NAME}.json +``` + +Reference model predictions are stored in ```model_outputs/```. Due to the randomness during LLM-Judge, the final performance may slighly differ even with the same predicitons (even with ```temperature=0```). + +To re-run the evaluation, you could change to your own ```CACHE_NAME```. + +Reference results: +```bash +# GAR-1B +android_detailed_caption_box 80.65 +multipanel_detailed_caption_box 103.7 +natural_detailed_caption_box 152.63 +ocr_doc_detailed_caption_box 146.87 +ocr_spotting_detailed_caption_box 152.38 +web_detailed_caption_box 150.0 +# Natural = natural_detailed_caption_box = 152.6 +# OCR = (ocr_doc_detailed_caption_box + ocr_spotting_detailed_caption_box) / 2 = 149.6 +# Multi-Panel = multipanel_detailed_caption_box = 103.7 +# Sceenshot = (android_detailed_caption_box + web_detailed_caption_box) / 2 = 115.3 + +# GAR-8B +android_detailed_caption_box 113.79 +multipanel_detailed_caption_box 117.24 +natural_detailed_caption_box 178.57 +ocr_doc_detailed_caption_box 138.10 +ocr_spotting_detailed_caption_box 160.0 +web_detailed_caption_box 132.26 +# Natural = natural_detailed_caption_box = 178.6 +# OCR = (ocr_doc_detailed_caption_box + ocr_spotting_detailed_caption_box) / 2 = 149.1 +# Multi-Panel = multipanel_detailed_caption_box = 117.2 +# Sceenshot = (android_detailed_caption_box + web_detailed_caption_box) / 2 = 123.0 +``` \ No newline at end of file diff --git a/evaluation/Ferret-Bench/annotations/box_refer_caption.json b/evaluation/Ferret-Bench/annotations/box_refer_caption.json new file mode 100644 index 0000000000000000000000000000000000000000..c26fd317feaa1369c84ea3ff77d8be5db5be3a5a --- /dev/null +++ b/evaluation/Ferret-Bench/annotations/box_refer_caption.json @@ -0,0 +1 @@ +[{"question_id": 0, "image": "000000069138.jpg", "category": "description", "text": "What is the interaction between the object and its surroundings?", "annotation": {"bbox": [230.39, 52.48, 56.02, 32.0], "segmentation": [[230.39, 52.48, 286.41, 52.48, 286.41, 84.48, 230.39, 84.48]]}}, {"question_id": 1, "image": "000000131138.jpg", "category": "description", "text": "What is the interaction between the object and its surrounding?", "annotation": {"bbox": [117.12, 383.52, 91.52, 46.56], "segmentation": [[117.12, 383.52, 208.64, 383.52, 208.64, 430.08, 117.12, 430.08]]}}, {"question_id": 2, "image": "000000475150.jpg", "category": "description", "text": "What can you tell about the region and its interaction with the surrounding areas?", "annotation": {"bbox": [184.32, 138.35, 181.76, 138.77], "segmentation": [[184.32, 138.35, 366.08, 138.35, 366.08, 277.12, 184.32, 277.12]]}}, {"question_id": 3, "image": "000000356424.jpg", "category": "description", "text": "What is happening in the region and what is its relationship to the surrounding objects?", "annotation": {"bbox": [253.44, 162.56, 90.72, 263.68], "segmentation": [[253.44, 162.56, 344.16, 162.56, 344.16, 426.24, 253.44, 426.24]]}}, {"question_id": 4, "image": "000000491090.jpg", "category": "description", "text": "What can be said about the region in relation to nearby objects or elements?", "annotation": {"bbox": [48.76, 318.72, 108.51, 124.16], "segmentation": [[48.76, 318.72, 157.26, 318.72, 157.26, 442.88, 48.76, 442.88]]}}, {"question_id": 5, "image": "000000484415.jpg", "category": "description", "text": "What can be observed in the region and how does it interact with the surroundings?", "annotation": {"bbox": [229.12, 46.08, 56.96, 85.92], "segmentation": [[229.12, 46.08, 286.08, 46.08, 286.08, 132.0, 229.12, 132.0]]}}, {"question_id": 7, "image": "000000184324.jpg", "category": "description", "text": "What is happening within the region and how is it related to the nearby objects?", "annotation": {"bbox": [360.96, 327.68, 199.68, 93.5], "segmentation": [[360.96, 327.68, 560.64, 327.68, 560.64, 421.18, 360.96, 421.18]]}}, {"question_id": 8, "image": "000000341058.jpg", "category": "description", "text": "What is the object and what is its relationship with nearby objects?", "annotation": {"bbox": [222.22, 536.32, 5.03, 7.68], "segmentation": [[222.22, 536.32, 227.25, 536.32, 227.25, 544.0, 222.22, 544.0]]}}, {"question_id": 9, "image": "000000184384.jpg", "category": "description", "text": "What can you tell about the objects found in the region ?", "annotation": {"bbox": [401.92, 57.6, 236.8, 129.12], "segmentation": [[401.92, 57.6, 638.72, 57.6, 638.72, 186.72, 401.92, 186.72]]}}, {"question_id": 10, "image": "000000259097.jpg", "category": "description", "text": "What can be said about the region in relation to the surrounding areas?", "annotation": {"bbox": [6.0, 173.16, 492.0, 36.96], "segmentation": [[6.0, 173.16, 498.0, 173.16, 498.0, 210.12, 6.0, 210.12]]}}, {"question_id": 11, "image": "000000377882.jpg", "category": "description", "text": "What can you tell about the region and its surrounding context?", "annotation": {"bbox": [154.88, 101.28, 38.4, 373.44], "segmentation": [[154.88, 101.28, 193.28, 101.28, 193.28, 474.72, 154.88, 474.72]]}}, {"question_id": 12, "image": "000000415748.jpg", "category": "description", "text": "What can you tell about the object and its interaction with nearby objects?", "annotation": {"bbox": [35.78, 280.32, 273.92, 330.24], "segmentation": [[35.78, 280.32, 309.7, 280.32, 309.7, 610.56, 35.78, 610.56]]}}, {"question_id": 13, "image": "000000408120.jpg", "category": "description", "text": "What can you see within the region and what is its interaction with nearby objects?", "annotation": {"bbox": [252.16, 241.25, 112.64, 65.33], "segmentation": [[252.16, 241.25, 364.8, 241.25, 364.8, 306.59, 252.16, 306.59]]}}, {"question_id": 14, "image": "000000184400.jpg", "category": "description", "text": "What is the interaction between the object and its surrounding objects?", "annotation": {"bbox": [385.28, 401.76, 60.16, 76.8], "segmentation": [[385.28, 401.76, 445.44, 401.76, 445.44, 478.56, 385.28, 478.56]]}}, {"question_id": 15, "image": "000000276018.jpg", "category": "description", "text": "What can you tell me about the region and its interactions with nearby objects?", "annotation": {"bbox": [29.54, 241.92, 177.63, 296.96], "segmentation": [[29.54, 241.92, 207.17, 241.92, 207.17, 538.88, 29.54, 538.88]]}}, {"question_id": 16, "image": "000000376322.jpg", "category": "description", "text": "What is the interaction between objects in the region ?", "annotation": {"bbox": [319.3, 161.28, 115.2, 236.8], "segmentation": [[319.3, 161.28, 434.5, 161.28, 434.5, 398.08, 319.3, 398.08]]}}, {"question_id": 17, "image": "000000125472.jpg", "category": "description", "text": "What can you tell about the region and its interaction with surrounding objects?", "annotation": {"bbox": [85.83, 1.28, 315.55, 483.84], "segmentation": [[85.83, 1.28, 401.38, 1.28, 401.38, 485.12, 85.83, 485.12]]}}, {"question_id": 18, "image": "000000361551.jpg", "category": "description", "text": "Can you tell me about the interaction happening in the region and its context?", "annotation": {"bbox": [459.36, 394.24, 19.2, 34.56], "segmentation": [[459.36, 394.24, 478.56, 394.24, 478.56, 428.8, 459.36, 428.8]]}}, {"question_id": 19, "image": "000000412240.jpg", "category": "description", "text": "What can be said about the region in terms of the surrounding objects and their interactions?", "annotation": {"bbox": [1.0, 163.88, 359.0, 131.25], "segmentation": [[1.0, 163.88, 360.0, 163.88, 360.0, 295.12, 1.0, 295.12]]}}, {"question_id": 20, "image": "000000130566.jpg", "category": "description", "text": "What can you tell about the region and its interaction with the surrounding environment?", "annotation": {"bbox": [403.2, 201.12, 33.28, 33.73], "segmentation": [[403.2, 201.12, 436.48, 201.12, 436.48, 234.85, 403.2, 234.85]]}}, {"question_id": 21, "image": "000000421923.jpg", "category": "description", "text": "What is the relationship between the object ?", "annotation": {"bbox": [339.1, 295.68, 79.24, 56.32], "segmentation": [[339.1, 295.68, 418.33, 295.68, 418.33, 352.0, 339.1, 352.0]]}}, {"question_id": 22, "image": "000000513567.jpg", "category": "description", "text": "What is happening in the region ?", "annotation": {"bbox": [592.64, 121.44, 46.08, 188.16], "segmentation": [[592.64, 121.44, 638.72, 121.44, 638.72, 309.6, 592.64, 309.6]]}}, {"question_id": 23, "image": "000000543300.jpg", "category": "description", "text": "What can you tell about the region and how it relates to the surroundings?", "annotation": {"bbox": [264.96, 331.68, 158.72, 16.32], "segmentation": [[264.96, 331.68, 423.68, 331.68, 423.68, 348.0, 264.96, 348.0]]}}, {"question_id": 24, "image": "000000241668.jpg", "category": "description", "text": "What is happening in the region ?", "annotation": {"bbox": [224.02, 87.04, 147.66, 544.0], "segmentation": [[224.02, 87.04, 371.68, 87.04, 371.68, 631.04, 224.02, 631.04]]}}, {"question_id": 25, "image": "000000535578.jpg", "category": "description", "text": "What can you tell about the region and its surrounding areas?", "annotation": {"bbox": [80.7, 20.48, 219.48, 93.44], "segmentation": [[80.7, 20.48, 300.18, 20.48, 300.18, 113.92, 80.7, 113.92]]}}, {"question_id": 26, "image": "000000277051.jpg", "category": "description", "text": "Describe the bird and its interactions with surrounding objects?", "annotation": {"bbox": [245.76, 158.84, 200.96, 177.21], "segmentation": [[245.76, 158.84, 446.72, 158.84, 446.72, 336.05, 245.76, 336.05]]}}, {"question_id": 27, "image": "000000018519.jpg", "category": "description", "text": "What are the details of the region and how does it relate to the nearby objects?", "annotation": {"bbox": [143.69, 335.36, 31.93, 29.44], "segmentation": [[143.69, 335.36, 175.62, 335.36, 175.62, 364.8, 143.69, 364.8]]}}, {"question_id": 28, "image": "000000106048.jpg", "category": "description", "text": "Can you describe what's happening in the region ?", "annotation": {"bbox": [142.08, 61.63, 382.72, 262.36], "segmentation": [[142.08, 61.63, 524.8, 61.63, 524.8, 324.0, 142.08, 324.0]]}}, {"question_id": 29, "image": "000000058393.jpg", "category": "description", "text": "What can you say about the interaction between objects in the region ?", "annotation": {"bbox": [346.88, 166.7, 172.8, 72.9], "segmentation": [[346.88, 166.7, 519.68, 166.7, 519.68, 239.6, 346.88, 239.6]]}}, {"question_id": 30, "image": "000000010764.jpg", "category": "description", "text": "Referencing the region , can you describe what you see and how it interacts with the surrounding context?", "annotation": {"bbox": [349.44, 265.0, 51.2, 74.62], "segmentation": [[349.44, 265.0, 400.64, 265.0, 400.64, 339.62, 349.44, 339.62]]}}, {"question_id": 31, "image": "000000271402.jpg", "category": "description", "text": "What can you tell me about the region and its relation to nearby objects?", "annotation": {"bbox": [25.44, 55.04, 170.45, 526.08], "segmentation": [[25.44, 55.04, 195.89, 55.04, 195.89, 581.12, 25.44, 581.12]]}}, {"question_id": 32, "image": "000000273493.jpg", "category": "description", "text": "What is happening in the region with regard to its surroundings?", "annotation": {"bbox": [294.0, 108.89, 131.0, 125.21], "segmentation": [[294.0, 108.89, 425.0, 108.89, 425.0, 234.1, 294.0, 234.1]]}}, {"question_id": 33, "image": "000000360960.jpg", "category": "description", "text": "Can you describe the region and its interaction with the surroundings?", "annotation": {"bbox": [223.22, 473.6, 89.46, 74.24], "segmentation": [[223.22, 473.6, 312.68, 473.6, 312.68, 547.84, 223.22, 547.84]]}}, {"question_id": 34, "image": "000000452122.jpg", "category": "description", "text": "What is happening in the region ?", "annotation": {"bbox": [416.0, 182.76, 133.12, 73.44], "segmentation": [[416.0, 182.76, 549.12, 182.76, 549.12, 256.2, 416.0, 256.2]]}}, {"question_id": 35, "image": "000000134722.jpg", "category": "description", "text": "What can you say about the region and its relation with nearby objects?", "annotation": {"bbox": [204.8, 216.48, 89.6, 67.68], "segmentation": [[204.8, 216.48, 294.4, 216.48, 294.4, 284.16, 204.8, 284.16]]}}, {"question_id": 36, "image": "000000039484.jpg", "category": "description", "text": "What is happening in the region and how does this relate to the surrounding area?", "annotation": {"bbox": [540.16, 339.55, 72.96, 52.44], "segmentation": [[540.16, 339.55, 613.12, 339.55, 613.12, 391.99, 540.16, 391.99]]}}, {"question_id": 37, "image": "000000159311.jpg", "category": "description", "text": "What can you tell about the region considering the surrounding entities and their interactions?", "annotation": {"bbox": [103.0, 284.05, 75.0, 42.96], "segmentation": [[103.0, 284.05, 178.0, 284.05, 178.0, 327.01, 103.0, 327.01]]}}, {"question_id": 38, "image": "000000326174.jpg", "category": "description", "text": "Can you describe the interaction or relationship between the objects in the region ?", "annotation": {"bbox": [284.16, 220.32, 69.12, 189.12], "segmentation": [[284.16, 220.32, 353.28, 220.32, 353.28, 409.44, 284.16, 409.44]]}}, {"question_id": 39, "image": "000000562207.jpg", "category": "description", "text": "Can you describe what's happening in the region and how it relates to nearby objects or individuals?", "annotation": {"bbox": [98.56, 166.6, 93.44, 243.1], "segmentation": [[98.56, 166.6, 192.0, 166.6, 192.0, 409.7, 98.56, 409.7]]}}, {"question_id": 40, "image": "000000332318.jpg", "category": "description", "text": "What can you tell about the region and how does it relate to the rest of the scene?", "annotation": {"bbox": [279.04, 368.94, 11.52, 12.87], "segmentation": [[279.04, 368.94, 290.56, 368.94, 290.56, 381.81, 279.04, 381.81]]}}] \ No newline at end of file diff --git a/evaluation/Ferret-Bench/eval.sh b/evaluation/Ferret-Bench/eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..43c95611d1b70395e124be5937fecfca389ed012 --- /dev/null +++ b/evaluation/Ferret-Bench/eval.sh @@ -0,0 +1,17 @@ +CHECKPOINT_FILE=$1 + +mkdir -p gpt4_result/${CHECKPOINT_FILE} +mkdir -p gpt4_result/${CHECKPOINT_FILE}/refer_desc + +python3 eval_gpt.py \ + --question ferret_gpt4_data/refer_desc/question.jsonl \ + --context ferret_gpt4_data/refer_desc/context.jsonl \ + --answer-list \ + ferret_gpt4_data/refer_desc/answer.jsonl \ + gpt4_result/${CHECKPOINT_FILE}/refer_desc/ferret_answer.jsonl \ + --rule ferret_gpt4_data/rule.json \ + --output gpt4_result/${CHECKPOINT_FILE}/review_refer_desc.jsonl \ + --source-file model_outputs/${CHECKPOINT_FILE}.json + +python3 summarize_gpt_review.py \ + --dir=gpt4_result/${CHECKPOINT_FILE} \ No newline at end of file diff --git a/evaluation/Ferret-Bench/eval_gpt.py b/evaluation/Ferret-Bench/eval_gpt.py new file mode 100644 index 0000000000000000000000000000000000000000..92663d500f2eca2d2d275b1d793cd598e68e9f5f --- /dev/null +++ b/evaluation/Ferret-Bench/eval_gpt.py @@ -0,0 +1,207 @@ +import argparse +import json +import os +import re +import time + +import openai +from tqdm import tqdm + +NUM_SECONDS_TO_SLEEP = 0.5 +VOCAB_IMAGE_W = 1000 +VOCAB_IMAGE_H = 1000 + +# Define Azure OpenAI details +model_name = "gpt-4o-2024-11-20" +max_tokens = 1000 # range: [1, 4095] + +# Initialize the Azure client +client = openai.AzureOpenAI( + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), + api_key=os.getenv("AZURE_OPENAI_KEY"), + api_version="2024-03-01-preview", +) + + +def get_eval(content: str, max_tokens: int): + while True: + try: + completion = client.chat.completions.create( + model=model_name, + messages=[ + { + "role": "system", + "content": "You are a helpful and precise assistant for checking the quality of the answer.", + }, + { + "role": "user", + "content": content, + }, + ], + max_tokens=max_tokens, + temperature=0, + ) + ret = completion.choices[0].message.content + break + except openai.error.RateLimitError: + pass + except Exception as e: + print(e) + time.sleep(NUM_SECONDS_TO_SLEEP) + + return ret + + +def postprocess_answer(answer, category): + if category == "refer_desc" or category == "refer_reason": + pattern = r"\[.*?\]" + matches = re.findall(pattern, answer) + for match in matches: + answer = answer.replace(" " + match, "") + elif category == "ground_conv": + pattern = r"\[.*?\]" + matches = re.findall(pattern, answer) + for match in matches: + coor_cur = match.replace("[", "") + coor_cur = coor_cur.replace("]", "") + coor_cur = coor_cur.split(",") + coor_cur = [float(i.strip()) for i in coor_cur] + try: + assert len(coor_cur) == 4 + except: + print("Found a exception when parsing coordinates") + answer = answer.replace(match, "") + converted_box_coor = [ + coor_cur[0] / VOCAB_IMAGE_W, + coor_cur[1] / VOCAB_IMAGE_H, + coor_cur[2] / VOCAB_IMAGE_W, + coor_cur[3] / VOCAB_IMAGE_H, + ] + answer = answer.replace( + match, + f"[{converted_box_coor[0]:.3f}, {converted_box_coor[1]:.3f}, {converted_box_coor[2]:.3f}, {converted_box_coor[3]:.3f}]", + ) + + return answer + + +def parse_score(review): + try: + score_pair = review.split("\n")[0] + score_pair = score_pair.replace(",", " ") + sp = score_pair.split(" ") + print("score:", sp) + return [float(sp[0]), float(sp[1])] + except Exception as e: + print(e) + print("error", review) + return [-1, -1] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="ChatGPT-based QA evaluation.") + parser.add_argument("-q", "--question") + parser.add_argument("-c", "--context") + parser.add_argument("-a", "--answer-list", nargs="+", default=[]) + parser.add_argument("-r", "--rule") + parser.add_argument("-o", "--output") + parser.add_argument( + "--max-tokens", + type=int, + default=1024, + help="maximum number of tokens produced in the output", + ) + parser.add_argument("--source-file", type=str, required=True) + args = parser.parse_args() + + f_q = open(os.path.expanduser(args.question)) + f_ans1 = open(os.path.expanduser(args.answer_list[0])) + + # convert first + target_path = os.path.expanduser(args.answer_list[1]) + with open(args.source_file, "r") as file: + source_file = json.load(file) + with open(target_path, "w") as file: + for idx, item in enumerate(source_file): + info = { + "question_id": idx, + "image": item["image_path"].split("/")[-1], + "category": "refer_desc", + "text": item["caption"], + } + json.dump(info, file, ensure_ascii=False) + file.write("\n") + + f_ans2 = open(os.path.expanduser(args.answer_list[1])) + rule_dict = json.load(open(os.path.expanduser(args.rule), "r")) + + if os.path.isfile(os.path.expanduser(args.output)): + cur_reviews = [ + json.loads(line) for line in open(os.path.expanduser(args.output)) + ] + else: + cur_reviews = [] + + review_file = open(f"{args.output}", "a") + + context_list = [json.loads(line) for line in open(os.path.expanduser(args.context))] + image_to_context = {context["image"]: context for context in context_list} + + handles = [] + idx = 0 + for ques_js, ans1_js, ans2_js in tqdm(zip(f_q, f_ans1, f_ans2)): + ques = json.loads(ques_js) + ans1 = json.loads(ans1_js) + ans2 = json.loads(ans2_js) + + inst = image_to_context[ques["image"]] + # cap_str = '\n'.join(inst['captions']) + # box_str = '\n'.join([f'{instance["category"]}: {instance["bbox"]}' for instance in inst['instances']]) + + category = json.loads(ques_js)["category"] + if category in rule_dict: + rule = rule_dict[category] + else: + assert False, f"Visual QA category not found in rule file: {category}." + + # Assume ans2 is the predicted one. + processed_answer = postprocess_answer(ans2["text"], category) + # pdb.set_trace() + ans2["text"] = processed_answer + # if category == 'refer_desc': + + prompt = rule["prompt"] + role = rule["role"] + content = ( + f'[Context]\{inst["text"]}\n\n' + f'[Question]\n{ques["text"]}\n\n' + f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n' + f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n' + f"[System]\n{prompt}\n\n" + ) + # content = (f'[Context]\n{cap_str}\n\n{box_str}\n\n' + # f'[Question]\n{ques["text"]}\n\n' + # f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n' + # f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n' + # f'[System]\n{prompt}\n\n') + cur_js = { + "id": idx + 1, + "question_id": ques["question_id"], + "answer1_id": ans1.get("answer_id", ans1["question_id"]), + "answer2_id": ans2.get("answer_id", ans2["question_id"]), + "category": category, + } + if idx >= len(cur_reviews): + review = get_eval(content, args.max_tokens) + scores = parse_score(review) + cur_js["content"] = review + cur_js["tuple"] = scores + cur_js["answer1"] = ans1["text"] + cur_js["answer2"] = ans2["text"] + review_file.write(json.dumps(cur_js) + "\n") + review_file.flush() + else: + print(f"Skipping {idx} as we already have it.") + idx += 1 + print(idx) + review_file.close() diff --git a/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/answer.jsonl b/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/answer.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b005f46ac5efd7bf76d372671d2f868c66d25ec7 --- /dev/null +++ b/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/answer.jsonl @@ -0,0 +1,40 @@ +{"question_id": 0, "image": "000000069138.jpg", "category": "refer_desc", "text": "The object is a sign that is placed on a building. The building is offering short term office space, which is stated on the sign. This sign is part of the overall street scene, with other objects nearby, such as a stop light on a pole and a road. The sign is directly related to the building, contributing to the information about the building's function to the overall scene."} +{"question_id": 1, "image": "000000131138.jpg", "category": "refer_desc", "text": "The object is a plate, and on top of this plate is a fork. It's located on a desk. Nearby, there's also a cup and a computer mouse."} +{"question_id": 2, "image": "000000475150.jpg", "category": "refer_desc", "text": "The region is primarily occupied by the face of a giraffe, which is white and brown in color. This portion of the giraffe's body is in close proximity to its neck, which is covered with wrinkles. The giraffe's eye and ear are also located within this region. The giraffe is standing in front of a tree, and its face and neck are spotted with tan and brown spots. The background is filled with branches of the tree and the sky."} +{"question_id": 3, "image": "000000356424.jpg", "category": "refer_desc", "text": "In the region, there's a woman who seems to be having lunch. She is sitting near a table where there are several objects, including a plate of food that contains raspberries, a bottle, and a glass of water. There is also a man nearby, who is wearing glasses and a striped orange and black shirt. Both of them seem to be engaged in a meal."} +{"question_id": 4, "image": "000000491090.jpg", "category": "refer_desc", "text": "The region corresponds to the back wheel of a motorcycle, which is an integral part of the black motorcycle with silver accessories. Close to the back wheel, you can find the tail pipe of the motorcycle located on the left. Above the back wheel, there's a person sitting on the motorcycle, wearing a sweater, jeans, and sneakers."} +{"question_id": 5, "image": "000000484415.jpg", "category": "refer_desc", "text": "In the region, there is a container and a toilet brush cleaner. This region is right next to the toilet bowl, indicating that the brush cleaner is accessible for bathroom cleaning. It's crucial for maintaining the cleanliness of the toilet nearby."} +{"question_id": 7, "image": "000000184324.jpg", "category": "refer_desc", "text": "The region is a crosswalk on a busy city street, highlighted by white stripes. It's being used by a cyclist and a group of people who are crossing the street. There are bikes on the road, and cars are also visible within the vicinity. A large white vehicle with a big windshield is also nearby. This bustling scene is typical for a city intersection."} +{"question_id": 8, "image": "000000341058.jpg", "category": "refer_desc", "text": "The object is a salt shaker. It is located on a table along with a napkin and another shaker, which contains pepper. The table appears to be set for dining at a restaurant, as indicated by the presence of these objects."} +{"question_id": 9, "image": "000000184384.jpg", "category": "refer_desc", "text": "In the region, there is a plate with various types of food on it. This includes a sausage, an egg, and a few other unidentified items. The food is attractively arranged on the plate, which is positioned towards the back of the table. The plate and its contents seem to be part of a larger meal setup on the table."} +{"question_id": 10, "image": "000000259097.jpg", "category": "refer_desc", "text": "Region is full of trees and there is a village on a hill in the distance. These trees and buildings are located behind a grassy field where a man is seen jumping to catch a frisbee. The man's shadow can be seen on the grass."} +{"question_id": 11, "image": "000000377882.jpg", "category": "refer_desc", "text": "The region contains a black fence pole, which seems to be part of a chain-link fence enclosing the area. This fence is next to a water way and encloses several boats and surfboards. There are buildings on the horizon, and some green shrubs growing along the side of the lake."} +{"question_id": 12, "image": "000000415748.jpg", "category": "refer_desc", "text": "The region contains an elephant, which is quite large. There's a man riding on the back of the elephant, and they are moving close to a building. The shadow of the elephant can be seen on the ground. Additionally, the elephant's face and trunk are painted, which indicates some cultural significance."} +{"question_id": 13, "image": "000000408120.jpg", "category": "refer_desc", "text": "In the region, there is a concrete surface which is part of the alley. It is placed alongside the curb and the road, and there is a car parked on it. Also, nearby, there is a girl holding an umbrella walking along this path."} +{"question_id": 14, "image": "000000184400.jpg", "category": "refer_desc", "text": "In the region, there is a metal support column. This column is providing support for a bridge above it, which a train is passing over. The column also features a red line on it. This region is part of a larger scene that includes a train track on an elevated bridge."} +{"question_id": 15, "image": "000000276018.jpg", "category": "refer_desc", "text": "The region is occupied by a boy who is wearing a black jacket. He is holding a brown stuffed dog with a red and white collar. The boy seems to be part of a larger group of children who are all holding various stuffed animals and dolls. They seem to be walking across some grassy area, possibly in some kind of event or gathering."} +{"question_id": 16, "image": "000000376322.jpg", "category": "refer_desc", "text": "In the region, there is a man wearing a green shirt. He is sitting at a table, presumably in a social setting, along with other people. The table is full of items such as plates, glasses, and a decanter. One of the significant interactions is that the man is engaged in a conversation with the people around him."} +{"question_id": 17, "image": "000000125472.jpg", "category": "refer_desc", "text": "This region is primarily occupied by a man, who appears to be in mid-air, performing a trick on a skateboard. The skateboard is beneath him. He is wearing jeans and shoes with laces, and has a bracelet on his wrist. In the background of this region, there are trees, a building, and a fence. The scene seems to be taking place in a stadium, as there are stadium lights on poles in the vicinity."} +{"question_id": 18, "image": "000000361551.jpg", "category": "refer_desc", "text": "This region features a woman, who is dressed in a sleeveless black top. She is bending over her luggage, possibly preparing or checking something inside it. The woman is wearing a black and white headband as well. She is located in the service area of an airport, where there are other people standing around as well, some of them are holding their luggage. This scene is quite typical in an airport setting where passengers are usually seen handling their luggage."} +{"question_id": 19, "image": "000000412240.jpg", "category": "refer_desc", "text": "This region primarily contains a shoe. The shoe appears to be placed on a floor, and light is reflecting off of it. A dog is sitting nearby on the floor as well, and the shoe is positioned next to the dog. The shoe features several distinct elements like laces, a heel, and a toe."} +{"question_id": 20, "image": "000000130566.jpg", "category": "refer_desc", "text": "The region features windows on the side of a train engine. The train itself is traveling down a set of tracks, which are part of a larger railway system that includes multiple sets of tracks on the ground. Nearby, there are also electric lines hanging above the tracks. Further off, there are buildings, trees, and a wall, which add to the overall rural setting."} +{"question_id": 21, "image": "000000421923.jpg", "category": "refer_desc", "text": "The object is a vase, and the object is a flower. The flower is in the vase, suggesting it is a decorative element within the room."} +{"question_id": 22, "image": "000000513567.jpg", "category": "refer_desc", "text": "A woman, who is wearing a brown shirt and jeans, is crossing the street."} +{"question_id": 23, "image": "000000543300.jpg", "category": "refer_desc", "text": "The region is displaying red letters. These letters are on the side of a large, white boat that's sitting in the water. The boat has two levels and there is a set of long, black windows on its side. A silver railing is present on the top level of the boat. Close to the boat, there are buildings with red roofs and outdoor canopies. There's also a blue container on the dock, and a gray sea wall next to the ship."} +{"question_id": 24, "image": "000000241668.jpg", "category": "refer_desc", "text": "In the region, there is a woman with red hair. She's wearing a tie and a suit jacket, and is holding a plate with a piece of cake. The woman is dressed in formal attire, suggesting that she's attending a special occasion like a wedding."} +{"question_id": 25, "image": "000000535578.jpg", "category": "refer_desc", "text": "The region contains rocks and grass, providing a background for the pasture. Nearby, there are white sheep grazing in the green grassy field. There are also trees and a bush in the vicinity. A stone wall is running across the grassy field, bordering it. Besides, there's a hill in the field where some sheep and a rock are located."} +{"question_id": 26, "image": "000000277051.jpg", "category": "refer_desc", "text": "In this region, a bird is standing on the edge of a table. The table is covered with a red tablecloth and there are several objects on it, including a plate with food and crumbs, a bottle, and a steak knife. The bird is close to the knife and the plate with food. There's also a chair next to the table."} +{"question_id": 27, "image": "000000018519.jpg", "category": "refer_desc", "text": "The region contains a black wrist guard that the skater is wearing. This wrist guard is part of the safety gear that the skater has on, which also includes a black helmet, elbow pad, knee pad, and a pair of roller skates. The skater is performing a trick at the skate park, his shadow is cast on the cement ramp, and there is a grey post to a metal fence at the top of the ramp nearby. Overall, this region is an important part of the scene, showing the skater's safety equipment."} +{"question_id": 28, "image": "000000106048.jpg", "category": "refer_desc", "text": "This is a large decorated white bus. It seems to be driving past a tall building. You can see \"Divine Transportation\" written on the front of the bus. There's also a bus identification number on top. The bus features a design, including stripes, and there are headlights at the front. You can also see the side mirrors and wheels. Behind the bus, there's a gray trash can next to some large green bushes."} +{"question_id": 29, "image": "000000058393.jpg", "category": "refer_desc", "text": "The region includes a man who is sitting on a bench. He has his arm around a woman, indicating a close relationship between them. They are both looking towards the ocean, suggesting that they are enjoying the view together. The bench they are sitting on is in front of the ocean."} +{"question_id": 30, "image": "000000010764.jpg", "category": "refer_desc", "text": "This region is occupied by a baseball player wearing knee and leg pads. These pads are a part of the player's protective gear. The player, dressed as a catcher, is crouched on the field, ready to catch a ball. He is in a white uniform, which includes pants with a line on them, and he's wearing sneakers. His gloved hand is extended, prepared to receive. We can also see a black and red wrist band on his wrist. The field beneath him is brown dirt, contrasting with the green grass in the rest of the baseball field. Nearby, there are white chalk lines painted on the field."} +{"question_id": 31, "image": "000000271402.jpg", "category": "refer_desc", "text": "This region contains a little girl who is standing near a scooter. The scooter has an orange board and black handles, and it's specifically located to the right of her. The girl has blonde hair and she's wearing white socks. She is also standing on the pavement."} +{"question_id": 32, "image": "000000273493.jpg", "category": "refer_desc", "text": "In this region, a man in white clothing is preparing to hit a yellow tennis ball with his racket. He is on a tennis court with white boundary lines and a net in front of him. Behind him, there are a fence, trimmed bushes, and tall trees in the distance."} +{"question_id": 33, "image": "000000360960.jpg", "category": "refer_desc", "text": "The region is where a man is found wearing a pair of pants. This man is also wearing a long black coat. He seems to be walking on a sidewalk or decorative square, which fills the background of the image."} +{"question_id": 34, "image": "000000452122.jpg", "category": "refer_desc", "text": "In the region, there is an airplane's engine. The airplane seems to be in mid-flight, given the sky that surrounds it. The front door of the airplane is also visible in this region. The plane appears to be a commercial airline, as indicated by visible letters and windows. Notably, the landing gear of the airplane is lowered, suggesting that it's preparing to land."} +{"question_id": 35, "image": "000000134722.jpg", "category": "refer_desc", "text": "The region contains the front window of a train, which has windshield wipers. This window is part of the front of the train, which is painted yellow and white. Also, the region is located near the headlights of the train."} +{"question_id": 36, "image": "000000039484.jpg", "category": "refer_desc", "text": "In this region, there are people sitting at a table, likely dining or socializing outside a restaurant. This area is part of a bustling city street, filled with various cars, some parked and others potentially in motion. There are numerous buildings nearby, with diverse businesses and stores. One notable building nearby even has a marquee sign indicating \"for lease\". This scene suggests that the region is in a vibrant urban setting, where people are engaging in day-to-day activities such as dining outdoors and commuting by car."} +{"question_id": 37, "image": "000000159311.jpg", "category": "refer_desc", "text": "The region is a patch of grass. There are two zebras standing in and grazing on this grass. They are feeding themselves and are near bushes and a tree."} +{"question_id": 38, "image": "000000326174.jpg", "category": "refer_desc", "text": "In the region, there's a man and a little girl, they seem to be having a conversation. The man is looking back to the girl, who is pulling a surfboard, probably getting ready to surf. They are part of a larger group of people who are heading to the water with their surfboards."} +{"question_id": 39, "image": "000000562207.jpg", "category": "refer_desc", "text": "In the region, there's a man standing wearing shorts. He is standing on the side of a lake, next to an elephant. The elephant is emerging from the water and seems to be interacting with the man and two other individuals not far from him. All three people appear to be tourists posing for a picture with the elephant. The surroundings include water, and some mountains and trees in the far distance, creating a serene and natural setting."} +{"question_id": 40, "image": "000000332318.jpg", "category": "refer_desc", "text": "Within the region, there is a cow. This cow is in a pasture, which is located near a mountainous area. The mountain is partially covered in snow. There are also multiple trailers in the pasture, and one of them appears to be storage for animal equipment. The pasture and its surroundings provide a peaceful and natural living environment for the cows."} diff --git a/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/context.jsonl b/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/context.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fc6570d16a4f14025feb7fd8c95d89e7ad407a7d --- /dev/null +++ b/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/context.jsonl @@ -0,0 +1,40 @@ +{"question_id": 0, "image": "000000069138.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : arrows at [0.000, 0.616, 0.214, 0.644].\nObject 1 : awning at [0.159, 0.260, 0.293, 0.336].\nObject 2 : building at [0.000, 0.000, 1.000, 0.466].\nObject 3 : bushes at [0.693, 0.342, 1.000, 0.512].\nObject 4 : door at [0.110, 0.370, 0.266, 0.518].\nObject 5 : face at [0.390, 0.256, 0.614, 0.392].\nObject 6 : greenery at [0.824, 0.154, 0.997, 0.384].\nObject 7 : hitch at [0.221, 0.520, 0.259, 0.542].\nObject 8 : ladder at [0.110, 0.342, 0.283, 0.364].\nObject 9 : license plate at [0.141, 0.460, 0.234, 0.500].\nObject 10 : line at [0.017, 0.700, 0.266, 0.756].\nObject 11 : picture at [0.155, 0.378, 0.259, 0.442].\nObject 12 : plant barrier at [0.672, 0.482, 1.000, 0.606].\nObject 13 : planter at [0.676, 0.152, 1.000, 0.510].\nObject 14 : pole at [0.328, 0.068, 0.483, 0.994].\nObject 15 : road at [0.000, 0.490, 1.000, 1.000].\nObject 16 : roof at [0.117, 0.360, 0.283, 0.382].\nObject 17 : sad face at [0.383, 0.244, 0.614, 0.384].\nObject 18 : short term at [0.624, 0.040, 0.769, 0.080].\nObject 19 : sidewalk at [0.666, 0.572, 0.993, 0.618].\nObject 20 : sign at [0.621, 0.082, 0.772, 0.132].\nObject 21 : sign at [0.007, 0.144, 0.069, 0.204].\nObject 22 : signal at [0.266, 0.210, 0.679, 0.848].\nObject 23 : stop light at [0.366, 0.236, 0.638, 0.394].\nObject 24 : tail light at [0.100, 0.446, 0.121, 0.472].\nObject 25 : van at [0.076, 0.326, 0.297, 0.556].\nObject 26 : wall at [0.676, 0.500, 0.997, 0.604].\nObject 27 : window at [0.903, 0.000, 1.000, 0.086].\n\nRelationships:\nobject 23 : stop light -> with -> object 17 : sad face.\nobject 0 : arrows -> on -> object 15 : road.\nobject 12 : plant barrier -> beside -> object 15 : road.\nobject 11 : picture -> on -> object 4 : door.\nobject 10 : line -> painted in -> object 15 : road.\nobject 19 : sidewalk -> next to -> object 15 : road.\nobject 2 : building -> for -> object 18 : short term.\nobject 23 : stop light -> making -> object 5 : face.\nobject 3 : bushes -> just above -> object 26 : wall.\nobject 22 : signal -> on -> object 14 : pole.\nobject 25 : van -> has -> object 16 : roof.\nobject 25 : van -> has -> object 8 : ladder.\nobject 8 : ladder -> on -> object 16 : roof.\nobject 13 : planter -> by -> object 15 : road.\nobject 23 : stop light -> on -> object 22 : signal.\n\nRegion Description:\nRegion Description at [0.331, 0.852, 0.472, 0.996] : Pole holding traffic light on street.\nRegion Description at [0.600, 0.036, 0.793, 0.084] : Building offers short term office space.\nRegion Description at [0.603, 0.074, 0.776, 0.120] : Office space as small as 2,500 sq. ft. available.\nRegion Description at [0.003, 0.008, 0.972, 0.356] : an office building is in the background.\n\nGlobal Caption:\nA red traffic light with a sad face drawn over it.\nA street scene with a close of of a stop light.\nA red stoplight with a street in the background.\nA stop sign gives traffic a frown face.\nThe sign is now at a red light."} +{"question_id": 1, "image": "000000131138.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : computer mouse at [0.414, 0.753, 0.470, 0.811].\nObject 1 : cup at [0.350, 0.783, 0.417, 0.906].\nObject 2 : desk at [0.000, 0.488, 0.998, 0.999].\nObject 3 : fork at [0.203, 0.794, 0.270, 0.857].\nObject 4 : glass at [0.277, 0.703, 0.345, 0.816].\nObject 5 : head phones at [0.872, 0.556, 0.993, 0.634].\nObject 6 : keyboard at [0.415, 0.620, 0.650, 0.783].\nObject 7 : lamp at [0.000, 0.302, 0.214, 0.430].\nObject 8 : laptop at [0.491, 0.296, 0.703, 0.540].\nObject 9 : picture at [0.795, 0.204, 0.898, 0.358].\nObject 10 : plant at [0.192, 0.201, 0.391, 0.461].\nObject 11 : plate at [0.183, 0.799, 0.326, 0.896].\nObject 12 : screen at [0.237, 0.249, 0.504, 0.628].\nObject 13 : stand at [0.506, 0.531, 0.663, 0.617].\nObject 14 : window at [0.606, 0.000, 1.000, 0.346].\n\nRelationships:\nobject 0 : computer mouse -> on -> object 2 : desk.\nobject 8 : laptop -> on -> object 13 : stand.\nobject 6 : keyboard -> on -> object 2 : desk.\nobject 9 : picture -> near -> object 14 : window.\nobject 3 : fork -> on -> object 11 : plate.\n\nRegion Description:\n\nGlobal Caption:\na desk with a cup plate laptop monitor and keyboard\nA laptop sitting next to a monitor, keyboard and a mouse.\nA laptop and a desktop monitor are displayed on top of the desk.\nLarge office desk with computers near a window.\nA desk with a laptop, second monitor and keyboard."} +{"question_id": 2, "image": "000000475150.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : animal at [0.220, 0.105, 1.006, 0.997].\nObject 1 : branches at [0.000, 0.000, 1.000, 1.000].\nObject 2 : ear at [0.402, 0.288, 0.452, 0.378].\nObject 3 : eye at [0.332, 0.396, 0.378, 0.429].\nObject 4 : foliage at [0.584, 0.093, 0.748, 0.255].\nObject 5 : giraffe`s neck at [0.476, 0.264, 1.000, 1.003].\nObject 6 : head at [0.216, 0.102, 0.476, 0.706].\nObject 7 : mane at [0.576, 0.502, 0.836, 0.811].\nObject 8 : nose at [0.222, 0.640, 0.266, 0.703].\nObject 9 : sky at [0.000, 0.000, 1.000, 0.562].\nObject 10 : spot at [0.562, 0.535, 0.616, 0.625].\nObject 11 : spot at [0.560, 0.447, 0.592, 0.508].\nObject 12 : spot at [0.592, 0.444, 0.670, 0.556].\nObject 13 : spot at [0.622, 0.565, 0.694, 0.664].\nObject 14 : spot at [0.514, 0.483, 0.570, 0.571].\nObject 15 : spots at [0.700, 0.640, 0.806, 0.817].\nObject 16 : spots at [0.706, 0.823, 0.776, 0.943].\nObject 17 : spots at [0.852, 0.829, 0.984, 0.997].\nObject 18 : spots at [0.674, 0.547, 0.758, 0.655].\nObject 19 : spots at [0.774, 0.700, 0.902, 0.913].\nObject 20 : tree at [0.000, 0.000, 1.000, 1.000].\nObject 21 : wrinkles at [0.466, 0.468, 0.554, 0.586].\n\nRelationships:\nobject 20 : tree -> has -> object 4 : foliage.\nobject 21 : wrinkles -> on -> object 5 : giraffe`s neck.\nobject 3 : eye -> on a -> object 0 : animal.\nobject 4 : foliage -> in -> object 20 : tree.\nobject 1 : branches -> behind -> object 0 : animal.\nobject 14 : spot -> on -> object 0 : animal.\nobject 11 : spot -> on -> object 0 : animal.\nobject 10 : spot -> on -> object 0 : animal.\nobject 12 : spot -> on -> object 0 : animal.\nobject 13 : spot -> on -> object 0 : animal.\nobject 5 : giraffe`s neck -> on -> object 0 : animal.\nobject 3 : eye -> of -> object 0 : animal.\nobject 2 : ear -> of -> object 0 : animal.\nobject 6 : head -> of -> object 0 : animal.\n\nRegion Description:\nRegion Description at [0.616, 0.565, 0.956, 0.958] : the giraffe is spotted tan and brown.\nRegion Description at [0.288, 0.324, 0.572, 0.649] : the giraffes face is white and brown.\n\nGlobal Caption:\nA giraffe stands near a tree in the wilderness. \nA giraffe standing in front of a group of trees.\nA giraffe standing next to a leaf free tree.\nHead and neck of a giraffe in natural feeding habitat.\nA giraffe walking near a tree with very few leaves."} +{"question_id": 3, "image": "000000356424.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : bottle at [0.048, 0.712, 0.195, 1.002].\nObject 1 : chair at [0.696, 0.500, 1.003, 0.718].\nObject 2 : cork at [0.053, 0.712, 0.139, 0.776].\nObject 3 : cup at [0.043, 0.736, 0.240, 0.916].\nObject 4 : dish at [0.416, 0.726, 0.856, 0.904].\nObject 5 : fruit at [0.629, 0.834, 0.675, 0.880].\nObject 6 : glass at [0.275, 0.716, 0.501, 0.998].\nObject 7 : glasses at [0.179, 0.242, 0.464, 0.322].\nObject 8 : hair at [0.536, 0.258, 0.656, 0.320].\nObject 9 : man at [0.075, 0.102, 0.704, 0.716].\nObject 10 : rasberries at [0.499, 0.750, 0.544, 0.786].\nObject 11 : raspberries at [0.664, 0.828, 0.741, 0.864].\nObject 12 : sauce at [0.565, 0.752, 0.715, 0.824].\nObject 13 : shirt at [0.600, 0.350, 0.645, 0.494].\nObject 14 : shirt at [0.635, 0.282, 0.997, 0.654].\nObject 15 : sign at [0.419, 0.134, 0.509, 0.184].\nObject 16 : sweater at [0.072, 0.288, 0.704, 0.718].\nObject 17 : table at [0.000, 0.592, 0.997, 1.000].\nObject 18 : window at [0.328, 0.000, 0.600, 0.298].\nObject 19 : woman at [0.531, 0.258, 0.768, 0.688].\n\nRelationships:\nobject 9 : man -> wearing -> object 7 : glasses.\nobject 0 : bottle -> on -> object 17 : table.\nobject 6 : glass -> on -> object 17 : table.\nobject 11 : raspberries -> on -> object 4 : dish.\nobject 9 : man -> wearing -> object 7 : glasses.\n\nRegion Description:\nRegion Description at [0.640, 0.180, 0.989, 0.530] : Man wearing a black and orange stripe shirt.\nRegion Description at [0.413, 0.136, 0.512, 0.184] : Yellow closed sign with brown letters.\nRegion Description at [0.629, 0.186, 0.995, 0.706] : a man wearing and orange and black striped shirt.\nRegion Description at [0.528, 0.254, 0.717, 0.666] : a woman with a ponytail eating lunch.\nRegion Description at [0.152, 0.238, 0.459, 0.322] : a pair of black wire rimmed eye glasses.\nRegion Description at [0.029, 0.716, 0.243, 0.922] : empty cup that used to contain coffee.\nRegion Description at [0.264, 0.708, 0.867, 0.994] : A plate of food with a glass of water.\n\nGlobal Caption:\nA man sitting in front of a plate of food.\nA man at a wooden table looking at a plate of food.\na man smiling while looking at his plate of food\nA man sitting at a table with a plate filled with food.\nA man looking happily at some dish in front of him."} +{"question_id": 4, "image": "000000491090.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : arm at [0.313, 0.238, 0.567, 0.512].\nObject 1 : back wheel at [0.107, 0.502, 0.307, 0.720].\nObject 2 : face at [0.430, 0.118, 0.535, 0.218].\nObject 3 : floor at [0.003, 0.380, 0.997, 0.998].\nObject 4 : front light at [0.765, 0.514, 0.890, 0.634].\nObject 5 : front wheel at [0.642, 0.706, 0.997, 0.996].\nObject 6 : garage door at [0.532, 0.002, 0.858, 0.096].\nObject 7 : glasses at [0.422, 0.140, 0.548, 0.168].\nObject 8 : hand at [0.457, 0.450, 0.561, 0.518].\nObject 9 : indicator light at [0.666, 0.578, 0.722, 0.620].\nObject 10 : jeans at [0.241, 0.438, 0.465, 0.712].\nObject 11 : lettering at [0.003, 0.062, 0.302, 0.146].\nObject 12 : license plate at [0.939, 0.594, 1.000, 0.654].\nObject 13 : mirrors at [0.428, 0.320, 0.559, 0.384].\nObject 14 : motorcycle at [0.067, 0.358, 0.989, 1.000].\nObject 15 : person at [0.227, 0.086, 0.765, 0.758].\nObject 16 : sneaker at [0.243, 0.646, 0.342, 0.758].\nObject 17 : sweater at [0.243, 0.192, 0.676, 0.486].\nObject 18 : tail pipe at [0.059, 0.524, 0.257, 0.706].\n\nRelationships:\nobject 15 : person -> has -> object 7 : glasses.\nobject 15 : person -> has -> object 16 : sneaker.\nobject 15 : person -> has -> object 17 : sweater.\nobject 15 : person -> has -> object 17 : sweater.\nobject 15 : person -> has on -> object 10 : jeans.\nobject 14 : motorcycle -> has -> object 5 : front wheel.\nobject 14 : motorcycle -> has -> object 1 : back wheel.\nobject 4 : front light -> on -> object 14 : motorcycle.\nobject 15 : person -> on -> object 14 : motorcycle.\nobject 14 : motorcycle -> has -> object 18 : tail pipe.\nobject 15 : person -> sitting on -> object 14 : motorcycle.\nobject 15 : person -> wearing -> object 17 : sweater.\nobject 4 : front light -> on -> object 14 : motorcycle.\nobject 15 : person -> has -> object 8 : hand.\nobject 15 : person -> has -> object 7 : glasses.\nobject 13 : mirrors -> are on -> object 14 : motorcycle.\nobject 1 : back wheel -> on -> object 14 : motorcycle.\nobject 5 : front wheel -> on -> object 14 : motorcycle.\nobject 4 : front light -> on -> object 14 : motorcycle.\nobject 15 : person -> has -> object 2 : face.\nobject 15 : person -> has -> object 0 : arm.\nobject 15 : person -> sitting on -> object 14 : motorcycle.\nobject 15 : person -> has -> object 7 : glasses.\n\nRegion Description:\nRegion Description at [0.444, 0.138, 0.521, 0.168] : The eyeglasses the person on the motorcycle is wearing..\nRegion Description at [0.230, 0.640, 0.361, 0.760] : The person on the motorcycle's sneaker..\nRegion Description at [0.297, 0.216, 0.449, 0.404] : The left sleeve of the person's sweater..\nRegion Description at [0.545, 0.254, 0.738, 0.404] : The right sleeve of the person's sweater..\nRegion Description at [0.644, 0.706, 0.997, 0.994] : The front wheel of the motorcycle the person is on..\nRegion Description at [0.102, 0.498, 0.329, 0.692] : The back wheel of the motorcycle the person is on..\nRegion Description at [0.775, 0.518, 0.896, 0.626] : The front light of the motorcycle the person is on..\nRegion Description at [0.439, 0.432, 0.751, 0.522] : The handle bars on the motorcycle the person is on..\nRegion Description at [0.059, 0.516, 0.310, 0.708] : The tail pipe of the motorcycle the person is on..\nRegion Description at [0.663, 0.568, 0.733, 0.634] : small circular orange indicator light.\nRegion Description at [0.056, 0.522, 0.257, 0.706] : stainless steel motorcycle tailpipe .\nRegion Description at [0.067, 0.318, 0.992, 0.992] : Black motorcycle with silver accessories.\nRegion Description at [0.636, 0.690, 0.989, 0.992] : Black front wheel and fender of motorcycle.\nRegion Description at [0.243, 0.640, 0.353, 0.754] : Black and white shoe of man on motorcycle.\n\nGlobal Caption:\nA man sitting on one of a group of motorcycles.\nA MAN IS SMILING SITTING ON A MOTOR BIKE \nA middle-aged man leans on a sports bike, smiling\nA person sits on top of a motorcycle with others.\nA woman riding on the back of a motorcycle."} +{"question_id": 5, "image": "000000484415.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : arm at [0.000, 0.125, 0.609, 0.988].\nObject 1 : bathroom tile at [0.009, 0.008, 0.994, 0.446].\nObject 2 : blue jeans at [0.369, 0.558, 0.722, 0.979].\nObject 3 : brush at [0.681, 0.208, 0.878, 0.500].\nObject 4 : brush holder at [0.716, 0.279, 0.891, 0.554].\nObject 5 : button at [0.519, 0.113, 0.584, 0.171].\nObject 6 : flusher at [0.534, 0.092, 0.628, 0.300].\nObject 7 : hand at [0.281, 0.125, 0.603, 0.562].\nObject 8 : holder at [0.713, 0.283, 0.903, 0.558].\nObject 9 : lid at [0.028, 0.046, 0.694, 0.446].\nObject 10 : man at [0.000, 0.133, 0.600, 0.992].\nObject 11 : seat at [0.138, 0.583, 0.722, 0.992].\nObject 12 : tank at [0.019, 0.021, 0.706, 0.579].\nObject 13 : tile at [0.794, 0.000, 1.000, 0.200].\nObject 14 : tile at [0.000, 0.000, 0.278, 0.129].\nObject 15 : toilet at [0.016, 0.042, 0.719, 0.996].\nObject 16 : toilet scrubber at [0.744, 0.192, 0.844, 0.521].\nObject 17 : toilet seat at [0.103, 0.517, 0.728, 0.996].\nObject 18 : wall at [0.659, 0.000, 0.978, 0.392].\nObject 19 : water at [0.369, 0.738, 0.500, 0.921].\n\nRelationships:\nobject 15 : toilet -> has -> object 11 : seat.\nobject 4 : brush holder -> by -> object 15 : toilet.\nobject 19 : water -> in -> object 15 : toilet.\nobject 6 : flusher -> on -> object 15 : toilet.\nobject 9 : lid -> on -> object 15 : toilet.\nobject 10 : man -> by -> object 15 : toilet.\nobject 10 : man -> by -> object 15 : toilet.\nobject 10 : man -> has -> object 7 : hand.\nobject 0 : arm -> on -> object 15 : toilet.\nobject 14 : tile -> on -> object 18 : wall.\n\nRegion Description:\nRegion Description at [0.000, 0.046, 0.716, 0.987] : the arm reaching for the white toilet bowl.\nRegion Description at [0.716, 0.192, 0.894, 0.550] : the container and the toilet brush cleaner.\nRegion Description at [0.009, 0.042, 0.894, 0.992] : the toilet bowl next to the toilet bowl cleaner.\nRegion Description at [0.534, 0.087, 0.666, 0.329] : The hand is on the flusher in the image .\nRegion Description at [0.053, 0.158, 0.903, 0.875] : Porcelain toilet with flusher on top of the lid .\nRegion Description at [0.094, 0.154, 0.856, 0.942] : Man flushing the toilet in the bathroom .\n\nGlobal Caption:\nA hand is reaching out to the top if a toilet. \nA person flushing a toilet with a motion sensor.\nA person's hand flushing a toilet with a button on top of the tank. \na persons hand reaching for the top of a toilet\nA hand is reaching over a white toilet."} +{"question_id": 7, "image": "000000184324.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : awning at [0.514, 0.500, 0.736, 0.545].\nObject 1 : bag at [0.086, 0.723, 0.124, 0.777].\nObject 2 : bicycle at [0.716, 0.660, 0.756, 0.738].\nObject 3 : bikes at [0.710, 0.753, 0.864, 0.934].\nObject 4 : black jacket at [0.052, 0.569, 0.120, 0.723].\nObject 5 : blue jeans at [0.654, 0.678, 0.672, 0.729].\nObject 6 : building at [0.540, 0.042, 0.760, 0.617].\nObject 7 : building at [0.706, 0.000, 0.998, 0.726].\nObject 8 : bus at [0.186, 0.491, 0.246, 0.608].\nObject 9 : car at [0.432, 0.557, 0.538, 0.636].\nObject 10 : cars at [0.130, 0.491, 0.756, 0.630].\nObject 11 : coat at [0.128, 0.602, 0.212, 0.798].\nObject 12 : cross walk at [0.428, 0.750, 0.954, 1.000].\nObject 13 : cyclist at [0.752, 0.614, 0.860, 0.792].\nObject 14 : lines at [0.432, 0.608, 0.948, 1.000].\nObject 15 : man at [0.052, 0.518, 0.132, 0.898].\nObject 16 : people at [0.000, 0.515, 0.212, 1.000].\nObject 17 : people at [0.754, 0.605, 0.858, 0.756].\nObject 18 : pole at [0.954, 0.699, 0.970, 0.777].\nObject 19 : road at [0.004, 0.545, 1.000, 1.000].\nObject 20 : scarf at [0.032, 0.873, 0.134, 0.997].\nObject 21 : sidewalk at [0.536, 0.572, 0.668, 0.623].\nObject 22 : sign at [0.482, 0.470, 0.494, 0.494].\nObject 23 : sign at [0.810, 0.407, 0.970, 0.497].\nObject 24 : sign at [0.584, 0.434, 0.614, 0.494].\nObject 25 : store at [0.806, 0.395, 0.968, 0.720].\nObject 26 : street light at [0.640, 0.461, 0.652, 0.485].\nObject 27 : stripes at [0.452, 0.620, 0.944, 0.982].\nObject 28 : tires at [0.712, 0.747, 0.864, 0.931].\nObject 29 : tree at [0.280, 0.358, 0.340, 0.569].\nObject 30 : van at [0.460, 0.545, 0.488, 0.566].\nObject 31 : window at [0.820, 0.217, 0.884, 0.358].\nObject 32 : windshield at [0.192, 0.512, 0.242, 0.548].\nObject 33 : woman at [0.128, 0.569, 0.212, 0.913].\nObject 34 : woman at [0.650, 0.593, 0.688, 0.729].\nObject 35 : woman at [0.020, 0.765, 0.168, 1.000].\nObject 36 : writing at [0.838, 0.422, 0.948, 0.482].\n\nRelationships:\nobject 3 : bikes -> are on -> object 19 : road.\nobject 3 : bikes -> are on -> object 19 : road.\nobject 17 : people -> are riding -> object 3 : bikes.\nobject 3 : bikes -> are on -> object 19 : road.\nobject 17 : people -> are on -> object 19 : road.\nobject 8 : bus -> on -> object 19 : road.\nobject 8 : bus -> on -> object 19 : road.\nobject 8 : bus -> on -> object 19 : road.\nobject 12 : cross walk -> being used by a -> object 13 : cyclist.\nobject 17 : people -> are using -> object 12 : cross walk.\nobject 0 : awning -> above -> object 21 : sidewalk.\nobject 10 : cars -> are on -> object 19 : road.\nobject 26 : street light -> on -> object 6 : building.\nobject 27 : stripes -> on -> object 12 : cross walk.\nobject 7 : building -> has a -> object 31 : window.\nobject 3 : bikes -> have -> object 28 : tires.\nobject 35 : woman -> wearing a -> object 20 : scarf.\nobject 23 : sign -> for -> object 25 : store.\nobject 33 : woman -> wearing a -> object 11 : coat.\nobject 34 : woman -> wearing -> object 5 : blue jeans.\nobject 3 : bikes -> are on -> object 19 : road.\nobject 14 : lines -> are on -> object 19 : road.\nobject 15 : man -> wearing a -> object 4 : black jacket.\nobject 30 : van -> on -> object 19 : road.\nobject 15 : man -> has a -> object 1 : bag.\nobject 8 : bus -> has a -> object 32 : windshield.\nobject 7 : building -> has a -> object 31 : window.\nobject 31 : window -> above -> object 23 : sign.\nobject 14 : lines -> are on -> object 19 : road.\nobject 18 : pole -> near -> object 7 : building.\nobject 35 : woman -> wearing a -> object 20 : scarf.\n\nRegion Description:\nRegion Description at [0.822, 0.395, 0.968, 0.500] : red writing above buisness along the street.\nRegion Description at [0.564, 0.771, 0.876, 0.991] : white stripes painted to indicate cross walk.\nRegion Description at [0.184, 0.485, 0.244, 0.605] : large white vehicle with big windshield.\nRegion Description at [0.478, 0.464, 0.492, 0.491] : blue street sign with a white P on it.\nRegion Description at [0.820, 0.220, 0.886, 0.370] : window on the building above red sign.\n\nGlobal Caption:\nA group of people walking across a busy city street.\nA fish eye lens shows the corner of a busy city street with bikes, people and buildings.\na number of people and cars on a city street\nAn oddly taken photo of some buildings and shops.\nA picture of a city intersection with period buildings and store fronts. "} +{"question_id": 8, "image": "000000341058.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : napkins at [0.541, 0.818, 0.601, 0.858].\nObject 1 : pepper at [0.598, 0.836, 0.623, 0.860].\nObject 2 : post at [0.673, 0.494, 0.712, 0.926].\nObject 3 : restaurant sign at [0.548, 0.180, 0.779, 0.344].\nObject 4 : salt at [0.619, 0.838, 0.633, 0.850].\nObject 5 : shaker at [0.594, 0.822, 0.619, 0.854].\nObject 6 : shaker at [0.612, 0.824, 0.637, 0.854].\nObject 7 : table at [0.448, 0.834, 0.925, 0.998].\n\nRelationships:\nobject 4 : salt -> in -> object 6 : shaker.\nobject 0 : napkins -> on -> object 7 : table.\nobject 3 : restaurant sign -> on -> object 2 : post.\n\nRegion Description:\n\nGlobal Caption:\nThis is an empty table at a restaurant with ships in the background.\nThis table is covered by a blue Sam Adams umbrella\nAdvertising sign above a patio umbrella on sunny day.\nA lamp post stands next to an umbrella and table.\nAn umbrella is opened over an outdoor table."} +{"question_id": 9, "image": "000000184384.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : blueberry at [0.306, 0.312, 0.400, 0.429].\nObject 1 : butter at [0.454, 0.024, 0.638, 0.288].\nObject 2 : cake at [0.238, 0.093, 0.786, 0.787].\nObject 3 : cup at [0.002, 0.000, 0.202, 0.667].\nObject 4 : cup at [0.140, 0.008, 0.336, 0.456].\nObject 5 : egg at [0.636, 0.125, 0.880, 0.267].\nObject 6 : food at [0.632, 0.123, 0.996, 0.336].\nObject 7 : lemon at [0.514, 0.728, 0.798, 0.997].\nObject 8 : melon at [0.308, 0.768, 0.658, 0.997].\nObject 9 : orange at [0.514, 0.733, 0.794, 0.997].\nObject 10 : parsley at [0.372, 0.515, 0.762, 0.965].\nObject 11 : plate at [0.166, 0.453, 1.000, 1.000].\nObject 12 : plate at [0.628, 0.120, 0.998, 0.389].\nObject 13 : sausage at [0.766, 0.248, 0.984, 0.333].\nObject 14 : spot at [0.766, 0.600, 0.790, 0.637].\nObject 15 : table at [0.002, 0.365, 0.998, 0.997].\nObject 16 : water at [0.000, 0.000, 0.202, 0.667].\n\nRelationships:\nobject 7 : lemon -> on -> object 11 : plate.\nobject 10 : parsley -> on -> object 11 : plate.\nobject 6 : food -> on -> object 12 : plate.\nobject 1 : butter -> on -> object 2 : cake.\nobject 11 : plate -> has -> object 14 : spot.\nobject 1 : butter -> on -> object 2 : cake.\nobject 9 : orange -> on -> object 11 : plate.\nobject 13 : sausage -> on -> object 12 : plate.\nobject 0 : blueberry -> on -> object 2 : cake.\nobject 5 : egg -> on -> object 12 : plate.\nobject 8 : melon -> on -> object 11 : plate.\nobject 1 : butter -> on -> object 2 : cake.\nobject 9 : orange -> on -> object 11 : plate.\nobject 2 : cake -> on -> object 11 : plate.\nobject 16 : water -> in -> object 3 : cup.\nobject 13 : sausage -> on -> object 12 : plate.\n\nRegion Description:\nRegion Description at [0.678, 0.104, 0.942, 0.424] : There is food on the plate in the back.\nRegion Description at [0.456, 0.013, 0.636, 0.307] : White frosting on top of a piece of cake.\nRegion Description at [0.322, 0.752, 0.650, 0.997] : square of honey dew on a white plate.\n\nGlobal Caption:\nA bluebery cake is on a plate and is topped with butter.\nA piece of cake with butter on it sits next to an orange slice. \nA large piece of blueberry cake on a plate.\nA plate of food attractively arranged on a table.\nA plate of blueberry coffee cake with butter and an orange slice on a table with breakfast foods."} +{"question_id": 10, "image": "000000259097.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : abs at [0.392, 0.628, 0.426, 0.664].\nObject 1 : arm at [0.416, 0.459, 0.432, 0.565].\nObject 2 : buildings at [0.242, 0.532, 0.640, 0.580].\nObject 3 : frisbee at [0.400, 0.354, 0.446, 0.381].\nObject 4 : grass at [0.000, 0.610, 0.998, 0.997].\nObject 5 : hand at [0.418, 0.423, 0.438, 0.474].\nObject 6 : legs at [0.420, 0.703, 0.456, 0.811].\nObject 7 : man at [0.390, 0.432, 0.466, 0.793].\nObject 8 : pants at [0.390, 0.658, 0.424, 0.763].\nObject 9 : shadow at [0.492, 0.724, 0.622, 0.994].\nObject 10 : shirt at [0.402, 0.468, 0.458, 0.649].\nObject 11 : sky at [0.002, 0.003, 0.996, 0.556].\nObject 12 : trees at [0.002, 0.498, 0.998, 0.646].\n\nRelationships:\nobject 7 : man -> tossing -> object 3 : frisbee.\nobject 7 : man -> has -> object 6 : legs.\nobject 7 : man -> playing -> object 3 : frisbee.\nobject 2 : buildings -> near -> object 12 : trees.\nobject 7 : man -> wearing -> object 10 : shirt.\nobject 7 : man -> wearing -> object 8 : pants.\nobject 7 : man -> catching -> object 3 : frisbee.\nobject 7 : man -> has -> object 5 : hand.\nobject 3 : frisbee -> in -> object 11 : sky.\nobject 7 : man -> wearing -> object 10 : shirt.\nobject 7 : man -> catching -> object 3 : frisbee.\nobject 7 : man -> wearing -> object 8 : pants.\nobject 9 : shadow -> in -> object 4 : grass.\nobject 7 : man -> jumping -> object 4 : grass.\nobject 2 : buildings -> behind -> object 4 : grass.\nobject 7 : man -> catching -> object 3 : frisbee.\nobject 7 : man -> catching -> object 3 : frisbee.\nobject 2 : buildings -> near -> object 12 : trees.\nobject 7 : man -> extending -> object 1 : arm.\nobject 9 : shadow -> in -> object 4 : grass.\nobject 7 : man -> exposing -> object 0 : abs.\nobject 7 : man -> catching -> object 3 : frisbee.\nobject 3 : frisbee -> in -> object 11 : sky.\nobject 9 : shadow -> in -> object 4 : grass.\nobject 2 : buildings -> near -> object 12 : trees.\nobject 1 : arm -> reaching for -> object 3 : frisbee.\n\nRegion Description:\nRegion Description at [0.394, 0.658, 0.480, 0.826] : A person wearing black color trouser.\nRegion Description at [0.394, 0.435, 0.460, 0.796] : man in a red sweatshirt and jeans jumping.\nRegion Description at [0.390, 0.357, 0.464, 0.823] : man catching a frisbee in a wheat field.\nRegion Description at [0.012, 0.520, 0.996, 0.631] : trees and a village on a hill in the distance.\nRegion Description at [0.390, 0.423, 0.464, 0.649] : arm straight up and arm bent at elbow.\n\nGlobal Caption:\nA person trying to reach a Frisbee in a field with high brown grass.\nA young boy in a red top is playing with a red object tossed in the sky.\nA young man in a red jacket jumping for a Frizbee in a field.\nA guy is jumping to catch a frisbee in tall grass.\nA man jumps to catch a Frisbee flying through the air."} +{"question_id": 11, "image": "000000377882.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : blue sky at [0.000, 0.000, 0.998, 0.317].\nObject 1 : boat at [0.000, 0.461, 0.354, 0.579].\nObject 2 : boat at [0.348, 0.501, 0.874, 0.789].\nObject 3 : boat at [0.302, 0.461, 0.684, 0.611].\nObject 4 : buildings at [0.692, 0.195, 0.718, 0.248].\nObject 5 : buildings at [0.888, 0.173, 0.922, 0.227].\nObject 6 : buildings at [0.582, 0.211, 0.610, 0.256].\nObject 7 : buildings at [0.180, 0.259, 0.202, 0.293].\nObject 8 : buildings at [0.466, 0.208, 0.518, 0.272].\nObject 9 : chain-link fence at [0.002, 0.176, 0.996, 0.995].\nObject 10 : cord at [0.412, 0.587, 0.626, 1.000].\nObject 11 : fence pole at [0.230, 0.227, 0.336, 1.000].\nObject 12 : grass at [0.000, 0.667, 0.756, 0.997].\nObject 13 : horizon at [0.000, 0.187, 1.000, 0.336].\nObject 14 : mast at [0.570, 0.000, 0.722, 0.571].\nObject 15 : rack at [0.754, 0.168, 1.000, 0.901].\nObject 16 : sail post at [0.586, 0.000, 0.628, 0.568].\nObject 17 : section at [0.272, 0.179, 0.994, 0.992].\nObject 18 : shelf at [0.762, 0.355, 1.000, 0.387].\nObject 19 : sky line at [0.012, 0.173, 0.994, 0.195].\nObject 20 : surfboard at [0.830, 0.448, 0.996, 0.552].\nObject 21 : surfboard at [0.420, 0.384, 0.502, 0.411].\nObject 22 : surfboard at [0.910, 0.768, 0.998, 0.877].\nObject 23 : surfboard at [0.430, 0.344, 0.508, 0.371].\nObject 24 : surfboard at [0.830, 0.565, 1.000, 0.712].\nObject 25 : surfboard at [0.322, 0.307, 0.450, 0.341].\nObject 26 : surfboard at [0.766, 0.251, 0.998, 0.368].\nObject 27 : surfboard at [0.764, 0.704, 0.998, 0.829].\nObject 28 : water at [0.000, 0.259, 1.000, 0.469].\nObject 29 : water way at [0.008, 0.272, 0.996, 0.432].\n\nRelationships:\nobject 25 : surfboard -> stacked on -> object 18 : shelf.\nobject 24 : surfboard -> stacked on -> object 18 : shelf.\nobject 20 : surfboard -> stacked on -> object 18 : shelf.\nobject 26 : surfboard -> stacked on -> object 18 : shelf.\nobject 15 : rack -> of -> object 20 : surfboard.\nobject 8 : buildings -> on -> object 13 : horizon.\nobject 6 : buildings -> on -> object 13 : horizon.\nobject 4 : buildings -> on -> object 13 : horizon.\nobject 7 : buildings -> on -> object 13 : horizon.\nobject 5 : buildings -> on -> object 13 : horizon.\nobject 14 : mast -> on -> object 2 : boat.\nobject 9 : chain-link fence -> near -> object 29 : water way.\nobject 17 : section -> of -> object 9 : chain-link fence.\n\nRegion Description:\nRegion Description at [0.020, 0.187, 0.972, 0.963] : boats and surfboards behind wire fencing.\nRegion Description at [0.000, 0.160, 0.990, 0.349] : trees and buildings on other side of water.\nRegion Description at [0.340, 0.493, 0.852, 0.613] : white covering pulled over top of boat.\nRegion Description at [0.010, 0.667, 0.516, 0.995] : green bushes beside the chain link fence.\nRegion Description at [0.018, 0.213, 0.992, 0.995] : Black chain link fence enclosing boats..\nRegion Description at [0.242, 0.211, 0.302, 0.989] : Black fence pole holding chain link fence..\nRegion Description at [0.374, 0.499, 0.804, 0.803] : Yellow and white boat with sail pole..\nRegion Description at [0.014, 0.181, 0.998, 0.296] : Skyline of gray buildings in the background..\nRegion Description at [0.000, 0.664, 0.994, 0.976] : Green shrubs growing along side of a lake..\nRegion Description at [0.774, 0.216, 0.996, 0.944] : Boat parts on an outdoor shelving unit..\nRegion Description at [0.006, 0.013, 0.150, 0.285] : Sail masks with no flag attached to them..\n\nGlobal Caption:\nBoats docked on land sitting side by side next to a lake.\nA small harbor with boats docked and on racks\nA collection of boats behind a fence by a body of water.\nBoats and surfboards docked at a harbor bay.\n\nMany boats as seen through a chain link fence."} +{"question_id": 12, "image": "000000415748.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : building at [0.000, 0.086, 0.697, 0.516].\nObject 1 : elephant at [0.084, 0.438, 0.727, 0.954].\nObject 2 : face at [0.411, 0.442, 0.670, 0.664].\nObject 3 : ground at [0.000, 0.742, 0.165, 0.998].\nObject 4 : man at [0.186, 0.246, 0.631, 0.516].\nObject 5 : shadow at [0.477, 0.812, 1.000, 0.958].\nObject 6 : sky at [0.006, 0.000, 0.228, 0.200].\nObject 7 : toe at [0.372, 0.900, 0.411, 0.924].\nObject 8 : tusk at [0.462, 0.670, 0.489, 0.692].\n\nRelationships:\nobject 4 : man -> on -> object 1 : elephant.\nobject 7 : toe -> of -> object 1 : elephant.\nobject 4 : man -> near -> object 0 : building.\nobject 4 : man -> on -> object 1 : elephant.\nobject 4 : man -> near -> object 1 : elephant.\nobject 8 : tusk -> on -> object 2 : face.\nobject 5 : shadow -> of -> object 1 : elephant.\nobject 5 : shadow -> on -> object 3 : ground.\nobject 4 : man -> close to -> object 0 : building.\nobject 0 : building -> close to -> object 1 : elephant.\n\nRegion Description:\nRegion Description at [0.411, 0.482, 0.634, 0.788] : elephant's face and trunk are painted.\n\nGlobal Caption:\nA man riding on the back of an elephant through a city street.\nMan riding on the back of a painted elephant. \nA man in colorful clothing riding a painted elephant.\na man in a white shirt is riding an elephant and some buildings\nAn old decorated elephant and its colorful rider"} +{"question_id": 13, "image": "000000408120.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : alley at [0.052, 0.261, 0.948, 0.997].\nObject 1 : bars at [0.050, 0.000, 0.400, 0.682].\nObject 2 : black tire at [0.500, 0.219, 0.522, 0.249].\nObject 3 : brick at [0.784, 0.105, 0.818, 0.144].\nObject 4 : bricks at [0.926, 0.165, 0.946, 0.195].\nObject 5 : building at [0.742, 0.000, 0.954, 0.796].\nObject 6 : car at [0.418, 0.168, 0.526, 0.240].\nObject 7 : concrete at [0.394, 0.565, 0.570, 0.718].\nObject 8 : corner at [0.850, 0.934, 0.950, 1.000].\nObject 9 : curb at [0.050, 0.264, 0.396, 0.868].\nObject 10 : fence at [0.686, 0.252, 0.826, 0.565].\nObject 11 : flower at [0.580, 0.078, 0.608, 0.123].\nObject 12 : flowers at [0.598, 0.072, 0.634, 0.105].\nObject 13 : girl at [0.444, 0.249, 0.500, 0.480].\nObject 14 : photo at [0.044, 0.000, 0.956, 0.997].\nObject 15 : plants at [0.040, 0.324, 0.224, 0.685].\nObject 16 : polka dot at [0.430, 0.231, 0.450, 0.261].\nObject 17 : road at [0.048, 0.243, 0.954, 0.994].\nObject 18 : shirt at [0.456, 0.279, 0.496, 0.390].\nObject 19 : shoe at [0.484, 0.441, 0.496, 0.459].\nObject 20 : shoe at [0.452, 0.459, 0.470, 0.489].\nObject 21 : umbrella at [0.404, 0.189, 0.528, 0.297].\nObject 22 : wall at [0.738, 0.003, 0.950, 0.760].\nObject 23 : wall window at [0.524, 0.000, 0.538, 0.060].\nObject 24 : window at [0.570, 0.003, 0.586, 0.051].\nObject 25 : window at [0.524, 0.102, 0.538, 0.150].\n\nRelationships:\nobject 13 : girl -> with -> object 19 : shoe.\nobject 13 : girl -> with -> object 20 : shoe.\nobject 13 : girl -> with -> object 18 : shirt.\nobject 4 : bricks -> on -> object 5 : building.\nobject 15 : plants -> are near -> object 0 : alley.\nobject 6 : car -> on -> object 17 : road.\nobject 8 : corner -> of an -> object 0 : alley.\nobject 15 : plants -> in front of -> object 14 : photo.\nobject 21 : umbrella -> on -> object 13 : girl.\nobject 9 : curb -> built alongside -> object 17 : road.\n\nRegion Description:\nRegion Description at [0.038, 0.426, 0.162, 0.526] : patch of green plants in front of photo.\nRegion Description at [0.586, 0.060, 0.678, 0.138] : purple flowers inside of bush on right.\n\nGlobal Caption:\nA little girl that is standing with an umbrella.\nA little girl walking down a driveway carrying a pink umbrella.\nA LITTLE GIRL DRESSED IN PINK ALSO HAS A PINK UMBRELLA\nA small girl is holding an umbrella over her head\nA young girl carries and open unbrella while walking down an alley."} +{"question_id": 14, "image": "000000184400.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : air conditioner at [0.004, 0.261, 0.018, 0.293].\nObject 1 : balcony at [0.048, 0.037, 0.100, 0.077].\nObject 2 : beam at [0.616, 0.621, 0.664, 0.824].\nObject 3 : beam at [0.490, 0.640, 0.532, 0.832].\nObject 4 : beam at [0.426, 0.640, 0.462, 0.835].\nObject 5 : bridge at [0.002, 0.608, 0.988, 0.877].\nObject 6 : bridge at [0.004, 0.453, 1.000, 0.867].\nObject 7 : building at [0.000, 0.000, 0.252, 0.469].\nObject 8 : bushes at [0.000, 0.939, 0.072, 0.997].\nObject 9 : colors at [0.194, 0.480, 0.330, 0.661].\nObject 10 : column at [0.618, 0.824, 0.676, 0.997].\nObject 11 : guard rails at [0.000, 0.496, 1.000, 0.624].\nObject 12 : light at [0.606, 0.192, 0.724, 0.243].\nObject 13 : light at [0.864, 0.947, 0.916, 1.000].\nObject 14 : metal support at [0.002, 0.603, 0.976, 0.995].\nObject 15 : pole at [0.700, 0.205, 0.724, 0.995].\nObject 16 : red line at [0.632, 0.851, 0.648, 0.995].\nObject 17 : sky at [0.250, 0.013, 1.000, 0.467].\nObject 18 : south west at [0.338, 0.616, 0.442, 0.651].\nObject 19 : street at [0.002, 0.861, 1.000, 0.997].\nObject 20 : train at [0.002, 0.408, 1.000, 0.683].\nObject 21 : window at [0.144, 0.013, 0.182, 0.064].\nObject 22 : window at [0.430, 0.485, 0.534, 0.595].\nObject 23 : window at [0.134, 0.091, 0.182, 0.155].\nObject 24 : window at [0.340, 0.504, 0.424, 0.613].\nObject 25 : window at [0.116, 0.944, 0.168, 1.000].\nObject 26 : windows at [0.762, 0.437, 0.920, 0.613].\nObject 27 : windows at [0.004, 0.000, 0.096, 0.088].\n\nRelationships:\nobject 10 : column -> supporting -> object 6 : bridge.\nobject 10 : column -> has -> object 16 : red line.\nobject 12 : light -> on -> object 15 : pole.\nobject 7 : building -> behind -> object 20 : train.\nobject 21 : window -> on -> object 7 : building.\nobject 1 : balcony -> on -> object 7 : building.\nobject 25 : window -> visible under -> object 5 : bridge.\nobject 12 : light -> on -> object 19 : street.\nobject 2 : beam -> of -> object 5 : bridge.\nobject 20 : train -> in -> object 9 : colors.\nobject 24 : window -> of -> object 20 : train.\nobject 22 : window -> of train -> object 20 : train.\nobject 5 : bridge -> on -> object 20 : train.\nobject 7 : building -> beside -> object 20 : train.\nobject 23 : window -> of -> object 7 : building.\nobject 12 : light -> on a -> object 15 : pole.\nobject 12 : light -> on -> object 15 : pole.\nobject 20 : train -> says -> object 18 : south west.\nobject 8 : bushes -> are in -> object 19 : street.\nobject 7 : building -> has many -> object 27 : windows.\nobject 7 : building -> has -> object 0 : air conditioner.\nobject 20 : train -> on -> object 6 : bridge.\nobject 12 : light -> in -> object 19 : street.\nobject 5 : bridge -> has -> object 11 : guard rails.\nobject 26 : windows -> on -> object 20 : train.\nobject 20 : train -> has -> object 18 : south west.\nobject 6 : bridge -> has -> object 14 : metal support.\nobject 9 : colors -> to -> object 20 : train.\n\nRegion Description:\nRegion Description at [0.602, 0.837, 0.696, 0.997] : a metal support column for the bridge.\n\nGlobal Caption:\nA train as it travels down the tracks over a bridge.\na colorful train going along an elevated track \nA train rides on a bridge past a building.\nA subway train that is passing over a train bridge.\na train on a train track on an elevated bridge"} +{"question_id": 15, "image": "000000276018.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : animal at [0.717, 0.042, 0.831, 0.152].\nObject 1 : animal at [0.114, 0.582, 0.348, 0.840].\nObject 2 : baby at [0.385, 0.034, 0.643, 0.434].\nObject 3 : baby at [0.911, 0.028, 1.000, 0.250].\nObject 4 : bear at [0.391, 0.506, 0.622, 0.714].\nObject 5 : bear at [0.695, 0.356, 0.868, 0.580].\nObject 6 : bear hand at [0.114, 0.630, 0.175, 0.660].\nObject 7 : black sock at [0.800, 0.796, 0.858, 0.834].\nObject 8 : blonde boy at [0.166, 0.170, 0.351, 0.460].\nObject 9 : boy at [0.102, 0.388, 0.498, 1.000].\nObject 10 : boy at [0.717, 0.188, 1.000, 0.864].\nObject 11 : child at [0.342, 0.390, 0.622, 1.000].\nObject 12 : coat at [0.077, 0.520, 0.495, 0.910].\nObject 13 : coat at [0.775, 0.296, 1.000, 0.616].\nObject 14 : coat at [0.397, 0.090, 0.634, 0.262].\nObject 15 : flip flops at [0.434, 0.756, 0.606, 0.910].\nObject 16 : girl at [0.372, 0.196, 0.603, 0.922].\nObject 17 : glasses at [0.191, 0.236, 0.308, 0.250].\nObject 18 : grass at [0.637, 0.652, 0.754, 0.788].\nObject 19 : hand at [0.714, 0.094, 0.788, 0.160].\nObject 20 : hands at [0.763, 0.380, 0.877, 0.430].\nObject 21 : hat at [0.757, 0.030, 0.889, 0.078].\nObject 22 : jacket at [0.357, 0.500, 0.622, 0.782].\nObject 23 : jacket at [0.422, 0.286, 0.603, 0.550].\nObject 24 : jacket at [0.163, 0.296, 0.320, 0.462].\nObject 25 : jacket at [0.911, 0.106, 1.000, 0.224].\nObject 26 : lady at [0.286, 0.000, 0.683, 0.560].\nObject 27 : man at [0.628, 0.030, 0.951, 0.742].\nObject 28 : shirt at [0.831, 0.306, 0.957, 0.404].\nObject 29 : shirt at [0.197, 0.296, 0.298, 0.370].\nObject 30 : shoe at [0.717, 0.804, 0.871, 0.864].\nObject 31 : sidewalk at [0.628, 0.574, 0.769, 0.632].\nObject 32 : stuffed animal at [0.286, 0.298, 0.517, 0.422].\n\nRelationships:\nobject 10 : boy -> wearing -> object 28 : shirt.\nobject 3 : baby -> wearing -> object 25 : jacket.\nobject 22 : jacket -> carrying -> object 4 : bear.\nobject 8 : blonde boy -> wears -> object 17 : glasses.\nobject 8 : blonde boy -> wears -> object 24 : jacket.\nobject 11 : child -> holding up -> object 32 : stuffed animal.\nobject 10 : boy -> holding up -> object 5 : bear.\nobject 30 : shoe -> with a -> object 7 : black sock.\nobject 10 : boy -> wearing -> object 7 : black sock.\nobject 26 : lady -> holding -> object 2 : baby.\nobject 16 : girl -> wearing -> object 15 : flip flops.\nobject 9 : boy -> wearing -> object 12 : coat.\nobject 10 : boy -> wearing a -> object 13 : coat.\nobject 4 : bear -> on -> object 20 : hands.\nobject 26 : lady -> carrying -> object 2 : baby.\nobject 0 : animal -> in -> object 19 : hand.\n\nRegion Description:\nRegion Description at [0.905, 0.020, 0.997, 0.272] : blonde haired baby wearing yellow jacket.\nRegion Description at [0.357, 0.388, 0.640, 0.730] : girl in blue jacket carrying blue dog.\nRegion Description at [0.071, 0.378, 0.498, 0.842] : boy in black jacket holding stuffed dog.\nRegion Description at [0.055, 0.572, 0.375, 0.846] : brown stuffed dog with red and white collar.\nRegion Description at [0.283, 0.194, 0.603, 0.400] : girl in pink jacket holding white stuffed animal.\nRegion Description at [0.695, 0.356, 0.874, 0.576] : White stuffed animal wearing a red jacket..\nRegion Description at [0.332, 0.394, 0.618, 0.992] : Little girl holding a grey stuffed dog..\nRegion Description at [0.372, 0.476, 0.723, 0.786] : little girl holding blue and white stuffed animal.\nRegion Description at [0.062, 0.556, 0.422, 0.840] : little boy holding brown and white stuffed animal.\n\nGlobal Caption:\na bunch of kids walking through some grass\nA group of children are holding various stuffed animals and dolls.\nKids walking while holding their stuffed animals. \nA group of kids holding teddy bears and looking happy.\nA group of children carrying stuffed animals walks across the grass. "} +{"question_id": 16, "image": "000000376322.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : beer at [0.404, 0.568, 0.505, 0.724].\nObject 1 : cell phone at [0.128, 0.726, 0.332, 0.784].\nObject 2 : decanter at [0.417, 0.416, 0.503, 0.574].\nObject 3 : fork at [0.142, 0.852, 0.553, 0.964].\nObject 4 : fork at [0.174, 0.616, 0.414, 0.660].\nObject 5 : fork at [0.107, 0.882, 0.545, 0.998].\nObject 6 : glass at [0.401, 0.568, 0.508, 0.726].\nObject 7 : glass at [0.773, 0.622, 0.880, 0.796].\nObject 8 : glasses at [0.013, 0.342, 0.139, 0.376].\nObject 9 : green shirt at [0.698, 0.376, 0.909, 0.620].\nObject 10 : hair at [0.607, 0.336, 0.743, 0.422].\nObject 11 : hair at [0.824, 0.244, 1.000, 0.474].\nObject 12 : man at [0.668, 0.252, 0.909, 0.622].\nObject 13 : man at [0.000, 0.304, 0.136, 0.808].\nObject 14 : plate at [0.102, 0.780, 0.404, 0.898].\nObject 15 : silver spoon at [0.698, 0.882, 0.799, 0.998].\nObject 16 : table at [0.000, 0.428, 0.997, 0.998].\nObject 17 : wall at [0.535, 0.194, 0.997, 0.370].\nObject 18 : watch at [0.570, 0.482, 0.596, 0.508].\nObject 19 : watch at [0.888, 0.486, 0.949, 0.514].\nObject 20 : white plate at [0.361, 0.712, 0.805, 0.860].\nObject 21 : woman at [0.813, 0.242, 1.000, 0.582].\nObject 22 : woman at [0.532, 0.338, 0.765, 0.550].\n\nRelationships:\nobject 21 : woman -> with -> object 11 : hair.\nobject 9 : green shirt -> on -> object 12 : man.\nobject 14 : plate -> on -> object 16 : table.\nobject 1 : cell phone -> on -> object 16 : table.\nobject 5 : fork -> on -> object 16 : table.\nobject 5 : fork -> on -> object 16 : table.\nobject 3 : fork -> on -> object 16 : table.\nobject 4 : fork -> on -> object 16 : table.\nobject 2 : decanter -> on -> object 16 : table.\nobject 12 : man -> wearing a -> object 9 : green shirt.\nobject 21 : woman -> wearing a -> object 19 : watch.\nobject 22 : woman -> wearing a -> object 18 : watch.\nobject 13 : man -> wearing -> object 8 : glasses.\nobject 10 : hair -> on -> object 22 : woman.\nobject 22 : woman -> at -> object 16 : table.\n\nRegion Description:\nRegion Description at [0.353, 0.700, 0.802, 0.860] : a round plate with six pieces of bread and two butter pats.\nRegion Description at [0.096, 0.778, 0.404, 0.892] : a plate with one slice of bread and one butter pat.\nRegion Description at [0.890, 0.698, 0.997, 0.992] : glass of red wine closest to the camera.\nRegion Description at [0.366, 0.710, 0.805, 0.856] : the round white plate under the bread and butter.\n\nGlobal Caption:\nA group of people are reading a menu at the table\nA group of people sit at a large table while talking.\nPeople sitting on the long table with plates of food. \nA long table full of people on both sides.\nA long table accommodating many people while eating"} +{"question_id": 17, "image": "000000125472.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : axle at [0.447, 0.814, 0.535, 0.856].\nObject 1 : background at [0.003, 0.744, 0.994, 0.988].\nObject 2 : bracelet at [0.820, 0.444, 0.859, 0.470].\nObject 3 : building at [0.012, 0.888, 0.099, 0.994].\nObject 4 : corner at [0.027, 0.890, 0.117, 0.992].\nObject 5 : fence at [0.030, 0.886, 1.000, 1.000].\nObject 6 : hair at [0.486, 0.078, 0.712, 0.216].\nObject 7 : jean pants at [0.246, 0.380, 0.841, 0.632].\nObject 8 : laces at [0.168, 0.562, 0.850, 0.674].\nObject 9 : logo at [0.429, 0.232, 0.583, 0.364].\nObject 10 : man at [0.201, 0.002, 0.940, 0.758].\nObject 11 : name at [0.000, 0.960, 0.321, 1.000].\nObject 12 : picture at [0.003, 0.004, 1.000, 0.998].\nObject 13 : poles at [0.180, 0.886, 0.432, 0.990].\nObject 14 : shirt at [0.324, 0.124, 0.694, 0.392].\nObject 15 : shoes at [0.189, 0.606, 0.946, 0.792].\nObject 16 : skateboard at [0.012, 0.746, 0.664, 0.886].\nObject 17 : sky at [0.012, 0.002, 1.000, 0.918].\nObject 18 : stadium lights at [0.147, 0.860, 0.456, 0.994].\nObject 19 : stitching at [0.312, 0.408, 0.754, 0.638].\nObject 20 : strip at [0.279, 0.770, 0.529, 0.802].\nObject 21 : top at [0.024, 0.830, 0.420, 0.936].\nObject 22 : trees at [0.024, 0.846, 1.000, 1.000].\nObject 23 : wheels at [0.012, 0.808, 0.586, 0.904].\nObject 24 : wrist at [0.802, 0.434, 0.856, 0.484].\n\nRelationships:\nobject 2 : bracelet -> on mans -> object 24 : wrist.\nobject 23 : wheels -> on a -> object 16 : skateboard.\nobject 14 : shirt -> has a -> object 9 : logo.\nobject 10 : man -> doing trick on -> object 16 : skateboard.\nobject 3 : building -> behind a -> object 5 : fence.\nobject 11 : name -> on -> object 12 : picture.\nobject 11 : name -> has a -> object 11 : name.\nobject 10 : man -> performing on a -> object 16 : skateboard.\nobject 4 : corner -> of -> object 3 : building.\nobject 18 : stadium lights -> are on -> object 13 : poles.\nobject 16 : skateboard -> has -> object 23 : wheels.\nobject 2 : bracelet -> on mans -> object 24 : wrist.\nobject 11 : name -> on -> object 12 : picture.\nobject 16 : skateboard -> under -> object 10 : man.\nobject 10 : man -> wearing -> object 15 : shoes.\nobject 3 : building -> behind -> object 5 : fence.\nobject 22 : trees -> in -> object 1 : background.\nobject 15 : shoes -> have -> object 8 : laces.\nobject 18 : stadium lights -> on -> object 13 : poles.\nobject 5 : fence -> behind -> object 10 : man.\nobject 20 : strip -> on -> object 16 : skateboard.\nobject 19 : stitching -> on -> object 7 : jean pants.\nobject 9 : logo -> on -> object 14 : shirt.\nobject 23 : wheels -> on -> object 16 : skateboard.\nobject 0 : axle -> on -> object 16 : skateboard.\nobject 21 : top -> of -> object 22 : trees.\n\nRegion Description:\nRegion Description at [0.030, 0.774, 0.643, 0.912] : a black skateboard with black wheels.\n\nGlobal Caption:\nA man flying through the air while riding a skateboard.\nA man is doing tricks on a skateboard.\nA skateboarder jumps while trying to perform a trick.\na man in the air standing above the skateboard\na person attempting a jump with a skateboard"} +{"question_id": 18, "image": "000000361551.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : baggage at [0.107, 0.662, 0.179, 0.750].\nObject 1 : baggage at [0.368, 0.706, 0.456, 0.782].\nObject 2 : building at [0.000, 0.000, 0.997, 0.326].\nObject 3 : cap at [0.784, 0.544, 0.824, 0.568].\nObject 4 : duffel bag at [0.584, 0.702, 0.643, 0.768].\nObject 5 : ground at [0.000, 0.282, 1.000, 0.976].\nObject 6 : hair at [0.920, 0.614, 0.973, 0.640].\nObject 7 : headband at [0.923, 0.628, 0.952, 0.646].\nObject 8 : jacket at [0.776, 0.568, 0.840, 0.642].\nObject 9 : line at [0.696, 0.750, 0.989, 0.794].\nObject 10 : lines at [0.000, 0.436, 0.851, 0.486].\nObject 11 : luggage at [0.907, 0.706, 0.973, 0.786].\nObject 12 : luggage at [0.368, 0.702, 0.456, 0.780].\nObject 13 : man at [0.008, 0.554, 0.139, 0.800].\nObject 14 : man at [0.659, 0.572, 0.920, 0.844].\nObject 15 : man at [0.771, 0.538, 0.843, 0.640].\nObject 16 : pavement at [0.003, 0.308, 0.992, 0.566].\nObject 17 : people at [0.005, 0.562, 0.616, 0.824].\nObject 18 : pillars at [0.211, 0.130, 0.235, 0.240].\nObject 19 : ramp at [0.179, 0.158, 0.707, 0.408].\nObject 20 : service area at [0.003, 0.416, 0.995, 0.996].\nObject 21 : stairs at [0.352, 0.676, 1.000, 0.994].\nObject 22 : sweater at [0.667, 0.634, 0.920, 0.824].\nObject 23 : top at [0.960, 0.626, 1.000, 0.668].\nObject 24 : truck at [0.781, 0.278, 0.997, 0.366].\nObject 25 : walls at [0.608, 0.000, 0.989, 0.320].\nObject 26 : wheel at [0.843, 0.338, 0.875, 0.366].\nObject 27 : woman at [0.917, 0.610, 1.000, 0.724].\n\nRelationships:\nobject 17 : people -> in -> object 20 : service area.\nobject 27 : woman -> bends over -> object 11 : luggage.\nobject 14 : man -> walks down -> object 21 : stairs.\nobject 12 : luggage -> on -> object 5 : ground.\nobject 13 : man -> carries -> object 0 : baggage.\nobject 14 : man -> wears -> object 22 : sweater.\nobject 15 : man -> wears -> object 3 : cap.\nobject 24 : truck -> in -> object 20 : service area.\nobject 15 : man -> wears -> object 8 : jacket.\nobject 10 : lines -> on -> object 16 : pavement.\nobject 14 : man -> walks down -> object 21 : stairs.\nobject 9 : line -> on -> object 16 : pavement.\nobject 24 : truck -> has -> object 26 : wheel.\nobject 2 : building -> has -> object 25 : walls.\nobject 15 : man -> on -> object 20 : service area.\nobject 13 : man -> holds -> object 0 : baggage.\nobject 14 : man -> walks down -> object 21 : stairs.\nobject 13 : man -> holds -> object 0 : baggage.\nobject 27 : woman -> wears -> object 7 : headband.\nobject 1 : baggage -> on -> object 20 : service area.\n\nRegion Description:\nRegion Description at [0.443, 0.528, 0.992, 0.850] : People standing in service area of airport..\nRegion Description at [0.648, 0.564, 0.960, 0.892] : Man walking down stairs of unloading ramp..\nRegion Description at [0.229, 0.698, 0.381, 0.776] : Black and red luggage sitting on ground..\nRegion Description at [0.957, 0.616, 0.997, 0.670] : Woman dressed in sleeveless black top..\nRegion Description at [0.011, 0.548, 0.211, 0.750] : Man holding his luggage and bending over.\nRegion Description at [0.893, 0.578, 0.995, 0.678] : woman with a black and white head band.\nRegion Description at [0.235, 0.684, 0.973, 0.816] : Rainbow of colors in the form of luggage.\n\nGlobal Caption:\nSome are standing outside a building with suitcases.\nA few people are getting of a plane.\nA group of people and luggage on a airport tarmac.\nSome people who are placing luggage on a runway.\nAn airport and plane unloading passengers with luggage."} +{"question_id": 19, "image": "000000412240.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : characters at [0.528, 0.251, 0.992, 0.395].\nObject 1 : date at [0.646, 0.869, 0.824, 0.923].\nObject 2 : dog at [0.292, 0.131, 0.820, 0.771].\nObject 3 : eyes at [0.332, 0.219, 0.354, 0.243].\nObject 4 : floor at [0.002, 0.715, 1.000, 0.997].\nObject 5 : head at [0.290, 0.117, 0.500, 0.392].\nObject 6 : heel at [0.218, 0.629, 0.324, 0.741].\nObject 7 : laces at [0.398, 0.464, 0.540, 0.608].\nObject 8 : left eye at [0.420, 0.245, 0.446, 0.283].\nObject 9 : light at [0.320, 0.493, 0.608, 0.720].\nObject 10 : mouth at [0.318, 0.320, 0.392, 0.373].\nObject 11 : nose at [0.348, 0.283, 0.392, 0.328].\nObject 12 : panel at [0.690, 0.544, 1.000, 0.779].\nObject 13 : photo at [0.000, 0.003, 0.996, 0.997].\nObject 14 : shoe at [0.002, 0.437, 0.250, 0.720].\nObject 15 : shoe at [0.212, 0.445, 0.720, 0.787].\nObject 16 : symbol at [0.750, 0.600, 0.828, 0.699].\nObject 17 : tail at [0.734, 0.720, 0.824, 0.768].\nObject 18 : time at [0.852, 0.872, 0.938, 0.923].\nObject 19 : toe at [0.564, 0.643, 0.724, 0.776].\nObject 20 : year at [0.752, 0.877, 0.834, 0.923].\n\nRelationships:\nobject 3 : eyes -> of -> object 2 : dog.\nobject 1 : date -> of -> object 13 : photo.\nobject 6 : heel -> of -> object 15 : shoe.\nobject 2 : dog -> sitting on -> object 4 : floor.\nobject 15 : shoe -> next to -> object 2 : dog.\nobject 15 : shoe -> reflecting -> object 9 : light.\nobject 0 : characters -> playing -> object 0 : characters.\nobject 0 : characters -> playing -> object 0 : characters.\nobject 2 : dog -> has a -> object 8 : left eye.\nobject 5 : head -> of -> object 2 : dog.\nobject 3 : eyes -> of -> object 2 : dog.\nobject 11 : nose -> on a -> object 2 : dog.\nobject 10 : mouth -> on a -> object 2 : dog.\nobject 15 : shoe -> has -> object 7 : laces.\nobject 17 : tail -> of -> object 2 : dog.\nobject 15 : shoe -> has a -> object 6 : heel.\nobject 19 : toe -> of -> object 15 : shoe.\n\nRegion Description:\nRegion Description at [0.838, 0.837, 0.976, 0.968] : the time written in bottom right corner.\n\nGlobal Caption:\nA dog sitting behind a pair of black shoes.\nA dog sits on the floor next to some shoes. \nA puppy is sitting behind a pair of shoes.\na close up of a small dog near a pair of shoes\nA small black dog sits beside a pair of shoes."} +{"question_id": 20, "image": "000000130566.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : buds at [0.130, 0.814, 0.334, 0.883].\nObject 1 : building at [0.622, 0.213, 0.708, 0.273].\nObject 2 : building at [0.708, 0.222, 0.994, 0.294].\nObject 3 : building at [0.472, 0.240, 0.602, 0.282].\nObject 4 : cars at [0.628, 0.411, 0.912, 0.739].\nObject 5 : electric lines at [0.000, 0.000, 0.912, 0.126].\nObject 6 : gravel at [0.382, 0.381, 0.878, 0.907].\nObject 7 : leaves at [0.736, 0.357, 0.764, 0.390].\nObject 8 : pole at [0.550, 0.589, 0.558, 0.724].\nObject 9 : sky at [0.322, 0.093, 0.852, 0.162].\nObject 10 : tracks at [0.382, 0.429, 0.502, 0.511].\nObject 11 : tracks at [0.374, 0.408, 0.692, 0.709].\nObject 12 : tracks at [0.706, 0.775, 0.942, 0.922].\nObject 13 : train at [0.016, 0.273, 0.906, 0.733].\nObject 14 : train tracks at [0.024, 0.291, 0.996, 0.997].\nObject 15 : tree at [0.760, 0.279, 0.998, 0.426].\nObject 16 : wall at [0.556, 0.721, 0.790, 0.991].\nObject 17 : windshield at [0.850, 0.523, 0.898, 0.583].\nObject 18 : windshield at [0.796, 0.526, 0.846, 0.580].\n\nRelationships:\nobject 18 : windshield -> on a -> object 13 : train.\nobject 12 : tracks -> for a -> object 13 : train.\nobject 15 : tree -> with -> object 7 : leaves.\nobject 5 : electric lines -> on -> object 14 : train tracks.\nobject 8 : pole -> beside -> object 13 : train.\nobject 16 : wall -> beside -> object 13 : train.\nobject 13 : train -> traveling down -> object 11 : tracks.\n\nRegion Description:\nRegion Description at [0.022, 0.258, 0.632, 0.679] : THESE CARS ARE FOR CARGO NOT PASSENGERS.\nRegion Description at [0.630, 0.471, 0.682, 0.550] : THE WINDOWS ARE ON THE SIDE OF THE ENGINE.\nRegion Description at [0.000, 0.024, 0.448, 0.144] : electric lines hanging above train tracks.\nRegion Description at [0.532, 0.571, 0.568, 0.727] : black metal pole beside train tracks.\nRegion Description at [0.782, 0.586, 0.918, 0.667] : yellow paint on the front of the train.\nRegion Description at [0.062, 0.300, 0.996, 0.997] : multiple sets of tracks on the ground.\nRegion Description at [0.026, 0.114, 0.950, 0.970] : a freight train travelling down the tracks.\nRegion Description at [0.054, 0.685, 0.684, 0.991] : wildflowers on the side of a train track.\nRegion Description at [0.002, 0.129, 0.998, 0.991] : the grass and trees around the tracks.\n\nGlobal Caption:\nA yellow train on the tracks with several cars\nA train pulls past an intersection in the rail in a rural area.\na long cargo train going down a track by some trees \nA train with a red and yellow engine on a railroad track.\nA train pulls a large number of cars through a junction."} +{"question_id": 21, "image": "000000421923.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : block at [0.156, 0.630, 0.357, 0.822].\nObject 1 : book at [0.414, 0.208, 0.538, 0.364].\nObject 2 : book at [0.360, 0.202, 0.417, 0.360].\nObject 3 : book at [0.426, 0.484, 0.691, 0.522].\nObject 4 : book at [0.399, 0.404, 0.520, 0.554].\nObject 5 : bowl at [0.072, 0.030, 0.288, 0.076].\nObject 6 : center at [0.850, 0.732, 0.886, 0.766].\nObject 7 : eye at [0.282, 0.506, 0.327, 0.532].\nObject 8 : eye at [0.189, 0.506, 0.237, 0.534].\nObject 9 : flower at [0.796, 0.462, 0.982, 0.550].\nObject 10 : flower at [0.817, 0.528, 0.976, 0.612].\nObject 11 : flower at [0.760, 0.678, 0.946, 0.824].\nObject 12 : flower at [0.691, 0.608, 0.838, 0.722].\nObject 13 : flower at [0.913, 0.680, 1.000, 0.770].\nObject 14 : object at [0.213, 0.840, 0.583, 0.972].\nObject 15 : picture at [0.778, 0.060, 1.000, 0.352].\nObject 16 : shelf at [0.324, 0.528, 0.997, 0.624].\nObject 17 : shelf at [0.207, 0.334, 0.997, 0.380].\nObject 18 : shelf at [0.000, 0.028, 0.607, 0.202].\nObject 19 : stack at [0.435, 0.480, 0.712, 0.578].\nObject 20 : statue at [0.147, 0.404, 0.372, 0.652].\nObject 21 : table at [0.000, 0.690, 1.003, 0.998].\nObject 22 : vase at [0.838, 0.774, 0.994, 0.974].\nObject 23 : water at [0.847, 0.864, 0.997, 0.984].\n\nRelationships:\nobject 20 : statue -> on -> object 0 : block.\nobject 14 : object -> on -> object 21 : table.\nobject 1 : book -> on -> object 17 : shelf.\nobject 4 : book -> on -> object 16 : shelf.\nobject 5 : bowl -> on -> object 18 : shelf.\nobject 22 : vase -> has -> object 23 : water.\nobject 20 : statue -> has -> object 8 : eye.\nobject 20 : statue -> has -> object 7 : eye.\nobject 20 : statue -> on -> object 0 : block.\nobject 9 : flower -> in -> object 22 : vase.\nobject 10 : flower -> in -> object 22 : vase.\nobject 12 : flower -> in -> object 22 : vase.\nobject 13 : flower -> in -> object 22 : vase.\nobject 3 : book -> in -> object 19 : stack.\nobject 11 : flower -> has -> object 6 : center.\nobject 1 : book -> on -> object 17 : shelf.\nobject 2 : book -> on -> object 17 : shelf.\nobject 11 : flower -> has -> object 6 : center.\nobject 3 : book -> on -> object 19 : stack.\nobject 19 : stack -> on -> object 16 : shelf.\nobject 20 : statue -> on -> object 0 : block.\n\nRegion Description:\n\nGlobal Caption:\na glass vase with some flowers coming out of it \nA room witb a statue, bookshelves, books and a vase with flowers in it.\nA desk with a vase containing flowers, a sculpture of a man's head and shelves behind it.\nA statue next to a vase of flowers on a shelf. \nThe bust of a man's head is next to a vase of flowers."} +{"question_id": 22, "image": "000000513567.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : bag at [0.428, 0.435, 0.476, 0.528].\nObject 1 : bag at [0.322, 0.923, 0.498, 0.997].\nObject 2 : building at [0.000, 0.003, 0.158, 0.413].\nObject 3 : face at [0.246, 0.240, 0.374, 0.483].\nObject 4 : flag at [0.044, 0.013, 0.090, 0.149].\nObject 5 : girl at [0.538, 0.019, 0.968, 0.949].\nObject 6 : hand at [0.176, 0.680, 0.304, 0.821].\nObject 7 : hands at [0.660, 0.344, 0.756, 0.517].\nObject 8 : head at [0.560, 0.003, 0.822, 0.339].\nObject 9 : hot dog at [0.676, 0.315, 0.882, 0.408].\nObject 10 : hot dogs at [0.190, 0.587, 0.350, 0.741].\nObject 11 : jeans at [0.586, 0.843, 0.916, 0.995].\nObject 12 : lady at [0.572, 0.045, 0.952, 0.984].\nObject 13 : logo at [0.920, 0.069, 0.996, 0.165].\nObject 14 : man at [0.486, 0.235, 0.564, 0.509].\nObject 15 : man at [0.456, 0.213, 0.520, 0.317].\nObject 16 : maroon shirt at [0.546, 0.333, 0.928, 0.944].\nObject 17 : mouth at [0.288, 0.408, 0.356, 0.440].\nObject 18 : people at [0.552, 0.029, 0.876, 0.995].\nObject 19 : post at [0.104, 0.005, 0.138, 0.533].\nObject 20 : purse at [0.842, 0.661, 0.980, 0.888].\nObject 21 : purse strap at [0.270, 0.893, 0.390, 0.992].\nObject 22 : shadow at [0.934, 0.067, 0.996, 0.141].\nObject 23 : side at [0.922, 0.875, 0.998, 0.997].\nObject 24 : street at [0.042, 0.403, 0.092, 0.520].\nObject 25 : sunglasses at [0.630, 0.005, 0.794, 0.048].\nObject 26 : woman at [0.502, 0.000, 0.982, 0.997].\nObject 27 : woman at [0.102, 0.099, 0.486, 0.984].\nObject 28 : woman's shirt at [0.518, 0.320, 0.944, 0.949].\n\nRelationships:\nobject 0 : bag -> on -> object 15 : man.\nobject 13 : logo -> on -> object 2 : building.\nobject 25 : sunglasses -> on -> object 26 : woman.\nobject 25 : sunglasses -> on -> object 8 : head.\nobject 4 : flag -> on -> object 19 : post.\nobject 6 : hand -> holds -> object 10 : hot dogs.\nobject 27 : woman -> has -> object 17 : mouth.\nobject 12 : lady -> holding -> object 9 : hot dog.\nobject 9 : hot dog -> in -> object 7 : hands.\nobject 18 : people -> crossing -> object 24 : street.\nobject 27 : woman -> wearing -> object 11 : jeans.\nobject 5 : girl -> wears -> object 16 : maroon shirt.\n\nRegion Description:\nRegion Description at [0.038, 0.173, 0.540, 0.995] : Laughing girl in a green shirt holding a hotdog..\nRegion Description at [0.504, 0.000, 0.954, 0.989] : Black haired girl in maroon shirt wearing sunglasses on her head..\nRegion Description at [0.508, 0.000, 0.960, 0.979] : Girl looking at the hot dog she's holding in her hands.\nRegion Description at [0.040, 0.173, 0.536, 0.981] : Girl holding hot dog in her right hand.\nRegion Description at [0.926, 0.253, 0.998, 0.645] : Woman in a brown shirt and jeans crossing the street.\nRegion Description at [0.202, 0.563, 0.334, 0.995] : Blue purse strap around woman's shoulder.\nRegion Description at [0.146, 0.587, 0.370, 0.787] : woman holding hot dog in white napkin.\nRegion Description at [0.682, 0.229, 0.742, 0.315] : woman's mouth open looking at hot dog.\nRegion Description at [0.234, 0.213, 0.396, 0.507] : woman's face smiling with eyes closed.\n\nGlobal Caption:\nTwo Asian women eating chili dogs while standing on a street.\nTwo women preparing to eat a hot dog on a city side.\nThe woman are eating their hot dogs while walking.\nTwo young women are eating hot dogs while walking down the sidewalk.\nTwo women eat chili dogs on a city sidewalk. "} +{"question_id": 23, "image": "000000543300.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : boat at [0.048, 0.552, 0.928, 0.819].\nObject 1 : building at [0.328, 0.493, 0.538, 0.613].\nObject 2 : building at [0.000, 0.467, 0.338, 0.651].\nObject 3 : building at [0.534, 0.096, 0.998, 0.637].\nObject 4 : canopies at [0.452, 0.504, 0.620, 0.600].\nObject 5 : container at [0.858, 0.643, 0.948, 0.712].\nObject 6 : dolphin at [0.282, 0.691, 0.344, 0.773].\nObject 7 : flag at [0.322, 0.563, 0.340, 0.597].\nObject 8 : ground at [0.822, 0.696, 0.880, 0.715].\nObject 9 : leaves at [0.002, 0.483, 0.080, 0.659].\nObject 10 : level at [0.000, 0.709, 1.000, 0.829].\nObject 11 : level at [0.068, 0.616, 0.852, 0.688].\nObject 12 : outdoor seating at [0.502, 0.579, 0.532, 0.624].\nObject 13 : pink writing at [0.414, 0.693, 0.654, 0.725].\nObject 14 : pole at [0.282, 0.416, 0.292, 0.515].\nObject 15 : railing at [0.094, 0.557, 0.728, 0.624].\nObject 16 : railing at [0.238, 0.597, 0.744, 0.627].\nObject 17 : reflection at [0.174, 0.808, 0.922, 0.848].\nObject 18 : roof at [0.000, 0.469, 0.280, 0.523].\nObject 19 : roof at [0.348, 0.509, 0.482, 0.568].\nObject 20 : roof at [0.920, 0.264, 0.980, 0.344].\nObject 21 : row at [0.700, 0.499, 0.878, 0.573].\nObject 22 : sea wall at [0.878, 0.712, 0.998, 0.819].\nObject 23 : shore at [0.000, 0.627, 0.996, 0.816].\nObject 24 : sky at [0.006, 0.000, 1.000, 0.517].\nObject 25 : steeple at [0.918, 0.088, 0.936, 0.237].\nObject 26 : symbol at [0.268, 0.688, 0.350, 0.779].\nObject 27 : symbol at [0.702, 0.693, 0.752, 0.725].\nObject 28 : tree at [0.472, 0.491, 0.592, 0.597].\nObject 29 : trees at [0.948, 0.573, 1.000, 0.691].\nObject 30 : trees at [0.000, 0.488, 0.080, 0.675].\nObject 31 : vehicle at [0.968, 0.653, 0.998, 0.693].\nObject 32 : water at [0.004, 0.813, 0.998, 0.992].\nObject 33 : water at [0.008, 0.717, 0.998, 0.981].\nObject 34 : window at [0.374, 0.733, 0.790, 0.765].\nObject 35 : window at [0.800, 0.491, 0.868, 0.576].\nObject 36 : window at [0.928, 0.512, 0.950, 0.576].\nObject 37 : window at [0.892, 0.395, 0.912, 0.443].\nObject 38 : window at [0.894, 0.517, 0.910, 0.571].\nObject 39 : window at [0.630, 0.493, 0.652, 0.565].\nObject 40 : windows at [0.384, 0.637, 0.724, 0.685].\n\nRelationships:\nobject 40 : windows -> on -> object 0 : boat.\nobject 17 : reflection -> in -> object 33 : water.\nobject 29 : trees -> growing on -> object 23 : shore.\nobject 30 : trees -> growing on -> object 23 : shore.\nobject 28 : tree -> growing on -> object 23 : shore.\nobject 18 : roof -> on -> object 2 : building.\nobject 5 : container -> on -> object 22 : sea wall.\nobject 0 : boat -> in -> object 32 : water.\nobject 0 : boat -> has -> object 15 : railing.\n\nRegion Description:\nRegion Description at [0.414, 0.691, 0.662, 0.725] : the are red letters on the side of the cruise ship.\nRegion Description at [0.370, 0.707, 0.780, 0.763] : there is a long set of black windows on the side of the cruise ship.\nRegion Description at [0.870, 0.243, 0.992, 0.357] : there is a red roof on this building.\nRegion Description at [0.538, 0.400, 0.712, 0.549] : there is red and gray building in the background.\nRegion Description at [0.054, 0.595, 0.312, 0.821] : there is two levels on this cruise ship.\nRegion Description at [0.370, 0.587, 0.664, 0.621] : there is a silver railing on the top level of the cruise ship.\nRegion Description at [0.858, 0.621, 0.952, 0.717] : there is a blue container on the dock.\nRegion Description at [0.876, 0.707, 0.996, 0.787] : there is a gray sea wall beside the ship.\nRegion Description at [0.268, 0.723, 0.346, 0.787] : there are blue water symbols on the side of the cruise ship.\nRegion Description at [0.000, 0.619, 0.024, 0.712] : there is a blue and white sign on the dock.\nRegion Description at [0.662, 0.533, 0.904, 0.603] : An outdoor canopy creates shade for customers. .\n\nGlobal Caption:\nA boat sits on the side of the dock.\nA large white boat in the open water.\nA white double decker boat n water next to buildings.\nA large cruise ship is traveling on the ocean. \nA Port River Dolphin Cruise ship sits in the water."} +{"question_id": 24, "image": "000000241668.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : boutonniere at [0.710, 0.574, 0.799, 0.660].\nObject 1 : cake at [0.630, 0.670, 0.772, 0.750].\nObject 2 : cake crumb at [0.710, 0.348, 0.721, 0.356].\nObject 3 : crown at [0.370, 0.006, 0.549, 0.056].\nObject 4 : dress at [0.000, 0.574, 0.582, 1.000].\nObject 5 : eye at [0.649, 0.244, 0.699, 0.272].\nObject 6 : eye at [0.735, 0.264, 0.769, 0.280].\nObject 7 : eyebrow at [0.655, 0.230, 0.710, 0.250].\nObject 8 : eyebrow at [0.741, 0.252, 0.780, 0.264].\nObject 9 : finger at [0.721, 0.772, 0.816, 0.800].\nObject 10 : finger at [0.535, 0.740, 0.685, 0.826].\nObject 11 : ground at [0.003, 0.888, 0.997, 1.000].\nObject 12 : hair at [0.507, 0.142, 0.791, 0.642].\nObject 13 : hair at [0.189, 0.044, 0.652, 0.374].\nObject 14 : hand at [0.721, 0.720, 0.822, 0.818].\nObject 15 : hand at [0.493, 0.710, 0.685, 0.826].\nObject 16 : head at [0.209, 0.048, 0.652, 0.360].\nObject 17 : mouth at [0.646, 0.310, 0.724, 0.352].\nObject 18 : neck at [0.560, 0.344, 0.663, 0.460].\nObject 19 : necklace at [0.357, 0.334, 0.471, 0.484].\nObject 20 : necktie at [0.571, 0.442, 0.674, 0.936].\nObject 21 : paper at [0.760, 0.792, 0.914, 0.934].\nObject 22 : person at [0.490, 0.136, 0.825, 0.998].\nObject 23 : plate at [0.579, 0.734, 0.816, 0.768].\nObject 24 : purse at [0.774, 0.792, 0.883, 0.840].\nObject 25 : ring at [0.786, 0.780, 0.794, 0.796].\nObject 26 : shirt at [0.554, 0.376, 0.691, 0.950].\nObject 27 : suit jacket at [0.490, 0.422, 0.799, 0.998].\nObject 28 : table at [0.696, 0.816, 0.997, 0.916].\nObject 29 : toilet at [0.000, 0.656, 0.997, 0.936].\nObject 30 : wallpaper at [0.003, 0.000, 0.916, 0.656].\n\nRelationships:\nobject 21 : paper -> on top of -> object 11 : ground.\nobject 21 : paper -> by -> object 29 : toilet.\nobject 21 : paper -> on top of -> object 11 : ground.\nobject 21 : paper -> on top of -> object 11 : ground.\nobject 21 : paper -> by -> object 29 : toilet.\nobject 21 : paper -> by -> object 29 : toilet.\nobject 21 : paper -> sitting by -> object 29 : toilet.\nobject 21 : paper -> lying by -> object 29 : toilet.\nobject 21 : paper -> on top of -> object 11 : ground.\nobject 21 : paper -> lying by -> object 29 : toilet.\nobject 2 : cake crumb -> on side of -> object 17 : mouth.\nobject 24 : purse -> on top of -> object 28 : table.\nobject 5 : eye -> of a -> object 22 : person.\nobject 6 : eye -> of a -> object 22 : person.\nobject 7 : eyebrow -> of -> object 22 : person.\nobject 8 : eyebrow -> of -> object 22 : person.\nobject 10 : finger -> of -> object 15 : hand.\nobject 10 : finger -> of -> object 15 : hand.\nobject 10 : finger -> of -> object 15 : hand.\nobject 10 : finger -> of -> object 15 : hand.\nobject 3 : crown -> on top of -> object 16 : head.\nobject 20 : necktie -> worn on -> object 22 : person.\nobject 22 : person -> holding -> object 1 : cake.\nobject 14 : hand -> holding -> object 1 : cake.\nobject 22 : person -> wearing -> object 27 : suit jacket.\nobject 22 : person -> wearing -> object 4 : dress.\nobject 20 : necktie -> worn on -> object 18 : neck.\nobject 13 : hair -> on top of -> object 16 : head.\nobject 1 : cake -> on top of -> object 23 : plate.\nobject 25 : ring -> worn on -> object 9 : finger.\n\nRegion Description:\nRegion Description at [0.022, 0.020, 0.203, 0.312] : A green and yellow striped wallpaper.\nRegion Description at [0.000, 0.048, 0.613, 0.996] : woman wearing a strapless white wedding dress .\nRegion Description at [0.487, 0.136, 0.808, 0.986] : woman white red hair holding a piece of cake on a plate.\nRegion Description at [0.543, 0.674, 0.813, 0.826] : woman's hands holding a plate of cake.\nRegion Description at [0.579, 0.124, 0.788, 0.524] : red haired woman wearing a tie and suit jacket .\nRegion Description at [0.000, 0.012, 0.819, 0.996] : two people wearing formal wedding attire .\n\nGlobal Caption:\nThere are two people enjoying a wedding reception\nA woman in a wedding dress with another woman in a suit behind\nA woman in a wedding dress with another lady holding a piece of cake.\nA red head girl holding a piece of cake\nA bride is with a long red haired person with cake."} +{"question_id": 25, "image": "000000535578.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : bush at [0.480, 0.000, 0.748, 0.084].\nObject 1 : ear at [0.544, 0.544, 0.571, 0.562].\nObject 2 : field at [0.000, 0.002, 0.994, 0.998].\nObject 3 : hill at [0.000, 0.000, 0.997, 0.998].\nObject 4 : plant at [0.000, 0.764, 0.601, 0.998].\nObject 5 : rock at [0.727, 0.410, 0.808, 0.470].\nObject 6 : sheep at [0.532, 0.546, 0.646, 0.662].\nObject 7 : sheep at [0.532, 0.666, 0.817, 0.810].\nObject 8 : tail at [0.565, 0.572, 0.604, 0.610].\nObject 9 : tree at [0.649, 0.000, 0.997, 0.334].\nObject 10 : trees at [0.736, 0.036, 0.835, 0.100].\nObject 11 : wall at [0.000, 0.000, 0.769, 0.180].\nObject 12 : weed at [0.417, 0.346, 0.492, 0.390].\n\nRelationships:\nobject 7 : sheep -> in a -> object 2 : field.\nobject 7 : sheep -> grazing in -> object 2 : field.\nobject 7 : sheep -> grazing in a -> object 2 : field.\nobject 7 : sheep -> grazing in a -> object 2 : field.\nobject 7 : sheep -> grazing in a -> object 2 : field.\nobject 7 : sheep -> grazing in a -> object 2 : field.\nobject 11 : wall -> borders -> object 2 : field.\nobject 7 : sheep -> grazing in a -> object 2 : field.\nobject 5 : rock -> in -> object 2 : field.\nobject 7 : sheep -> grazing in -> object 2 : field.\nobject 7 : sheep -> grazing in -> object 2 : field.\nobject 5 : rock -> in -> object 2 : field.\nobject 0 : bush -> in -> object 2 : field.\nobject 10 : trees -> in -> object 2 : field.\nobject 6 : sheep -> has an -> object 1 : ear.\nobject 6 : sheep -> has a -> object 8 : tail.\nobject 12 : weed -> growing in -> object 2 : field.\nobject 7 : sheep -> on -> object 3 : hill.\nobject 4 : plant -> on -> object 2 : field.\nobject 5 : rock -> on -> object 3 : hill.\nobject 7 : sheep -> are in -> object 2 : field.\nobject 11 : wall -> running across -> object 2 : field.\nobject 0 : bush -> in -> object 2 : field.\nobject 0 : bush -> in -> object 2 : field.\nobject 5 : rock -> in -> object 2 : field.\nobject 6 : sheep -> has a -> object 8 : tail.\nobject 5 : rock -> in -> object 2 : field.\n\nRegion Description:\nRegion Description at [0.000, 0.072, 0.760, 0.160] : A stone wall boarding a field of sheep.\nRegion Description at [0.189, 0.032, 0.703, 0.178] : rocks and grass in the background of the pasture.\nRegion Description at [0.541, 0.662, 0.823, 0.802] : white sheep grazing in green grassy field.\nRegion Description at [0.538, 0.544, 0.646, 0.656] : white sheep grazing in green grassy field.\nRegion Description at [0.228, 0.374, 0.357, 0.436] : white sheep grazing in green grassy field.\nRegion Description at [0.607, 0.380, 0.712, 0.456] : white sheep grazing in green grassy field.\nRegion Description at [0.811, 0.296, 0.937, 0.338] : two white sheep grazing in green grassy field.\nRegion Description at [0.048, 0.200, 0.249, 0.242] : group of white sheep grazing in green grassy field.\nRegion Description at [0.213, 0.164, 0.336, 0.192] : group of white sheep grazing in green grassy field.\nRegion Description at [0.000, 0.006, 0.997, 0.172] : two long gray stone walls across field.\nRegion Description at [0.453, 0.000, 0.730, 0.062] : a stand of trees outside the stone fence.\n\nGlobal Caption:\nA group of sheep grazing in a grassy valley.\nSheep graze in a lushly green mountain meadow\nA flock of sheep walking along a grassy hillside grazing.\nA flock of sheep are grazing on a grassy slope.\nA group of sheep grazing in a grassy field."} +{"question_id": 26, "image": "000000277051.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : bird at [0.400, 0.408, 0.688, 0.775].\nObject 1 : bird at [0.110, 0.468, 0.576, 0.820].\nObject 2 : bottle at [0.080, 0.003, 0.296, 0.721].\nObject 3 : chair at [0.678, 0.177, 0.882, 0.408].\nObject 4 : crumbs at [0.098, 0.835, 0.434, 1.000].\nObject 5 : feet at [0.514, 0.724, 0.562, 0.769].\nObject 6 : food at [0.000, 0.877, 0.180, 1.000].\nObject 7 : foot at [0.474, 0.706, 0.514, 0.733].\nObject 8 : ground at [0.518, 0.183, 0.620, 0.402].\nObject 9 : handle at [0.488, 0.796, 0.800, 0.940].\nObject 10 : knife at [0.000, 0.793, 0.800, 1.000].\nObject 11 : label at [0.080, 0.000, 0.260, 0.598].\nObject 12 : leg at [0.552, 0.652, 0.578, 0.742].\nObject 13 : leg at [0.508, 0.646, 0.540, 0.685].\nObject 14 : liquid at [0.092, 0.114, 0.294, 0.721].\nObject 15 : paper at [0.000, 0.658, 0.762, 1.003].\nObject 16 : placemat at [0.000, 0.658, 0.766, 1.000].\nObject 17 : plate at [0.000, 0.748, 0.618, 1.000].\nObject 18 : table at [0.742, 0.261, 1.002, 0.883].\nObject 19 : table at [0.000, 0.658, 1.000, 1.003].\nObject 20 : tablecloth at [0.000, 0.664, 1.002, 1.003].\nObject 21 : tablecloth at [0.596, 0.267, 1.000, 0.883].\n\nRelationships:\nobject 6 : food -> on -> object 17 : plate.\nobject 4 : crumbs -> on -> object 17 : plate.\nobject 3 : chair -> next to -> object 18 : table.\nobject 3 : chair -> beside -> object 18 : table.\n\nRegion Description:\nRegion Description at [0.050, 0.769, 0.804, 0.979] : a steak knife resting on the edge of a plate.\nRegion Description at [0.008, 0.724, 0.628, 0.994] : a white plate with food and crumbs on it.\nRegion Description at [0.040, 0.685, 0.380, 0.925] : a blue and white paper placemat underneath a plate.\nRegion Description at [0.636, 0.147, 0.906, 0.492] : a bird on a table with a chair behind it.\nRegion Description at [0.384, 0.372, 0.698, 0.787] : a bird standing on the edge of a table.\n\nGlobal Caption:\ntwo little sparrows standing on a table by a knife\ntwo gray white and brown birds a knife and a red table\nA couple of small birds standing on top of a table.\nTwo sparrows sit n a table with a red tablecloth at an outdoor cafe. \nTwo birds perched on a table near a plate of food."} +{"question_id": 27, "image": "000000018519.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : concrete at [0.000, 0.576, 1.002, 0.998].\nObject 1 : elbow at [0.403, 0.538, 0.433, 0.552].\nObject 2 : fence at [0.000, 0.314, 0.998, 0.600].\nObject 3 : graffiti at [0.470, 0.856, 0.794, 0.998].\nObject 4 : grass at [0.000, 0.154, 1.002, 0.448].\nObject 5 : helmet at [0.358, 0.354, 0.448, 0.422].\nObject 6 : knee at [0.525, 0.608, 0.545, 0.622].\nObject 7 : knee pad at [0.450, 0.542, 0.512, 0.598].\nObject 8 : pad at [0.540, 0.362, 0.595, 0.420].\nObject 9 : pad at [0.512, 0.578, 0.592, 0.624].\nObject 10 : pad at [0.376, 0.512, 0.443, 0.554].\nObject 11 : park at [0.007, 0.006, 1.000, 0.578].\nObject 12 : pipe at [0.657, 0.300, 0.687, 0.578].\nObject 13 : pipe at [0.177, 0.324, 0.211, 0.590].\nObject 14 : rail at [0.000, 0.310, 1.000, 0.334].\nObject 15 : ramp at [0.000, 0.592, 1.002, 0.998].\nObject 16 : rock at [0.100, 0.302, 0.154, 0.326].\nObject 17 : shadow at [0.415, 0.642, 0.754, 0.912].\nObject 18 : shirt at [0.438, 0.376, 0.637, 0.514].\nObject 19 : shorts at [0.460, 0.500, 0.664, 0.580].\nObject 20 : skate at [0.647, 0.490, 0.709, 0.584].\nObject 21 : skater at [0.234, 0.352, 0.719, 0.624].\nObject 22 : sticker at [0.408, 0.358, 0.438, 0.368].\nObject 23 : tree at [0.122, 0.008, 0.677, 0.322].\nObject 24 : wheels at [0.689, 0.496, 0.721, 0.526].\nObject 25 : wrist brace at [0.279, 0.524, 0.338, 0.564].\n\nRelationships:\nobject 21 : skater -> has a -> object 17 : shadow.\nobject 20 : skate -> has -> object 24 : wheels.\nobject 23 : tree -> standing in a -> object 11 : park.\nobject 21 : skater -> wearing a -> object 5 : helmet.\nobject 10 : pad -> protecting an -> object 1 : elbow.\nobject 9 : pad -> protecting a -> object 6 : knee.\nobject 17 : shadow -> of a -> object 21 : skater.\nobject 15 : ramp -> has a -> object 3 : graffiti.\nobject 21 : skater -> has a -> object 5 : helmet.\nobject 16 : rock -> in -> object 4 : grass.\nobject 5 : helmet -> has a -> object 22 : sticker.\nobject 21 : skater -> wearing -> object 20 : skate.\nobject 21 : skater -> wearing a -> object 10 : pad.\nobject 25 : wrist brace -> on -> object 21 : skater.\nobject 21 : skater -> has a -> object 20 : skate.\nobject 17 : shadow -> on -> object 15 : ramp.\nobject 21 : skater -> has a -> object 5 : helmet.\nobject 21 : skater -> has a -> object 8 : pad.\nobject 21 : skater -> has a -> object 18 : shirt.\nobject 21 : skater -> has -> object 19 : shorts.\nobject 23 : tree -> behind -> object 21 : skater.\nobject 25 : wrist brace -> on -> object 21 : skater.\nobject 21 : skater -> has a -> object 9 : pad.\nobject 7 : knee pad -> for a -> object 21 : skater.\nobject 17 : shadow -> on -> object 0 : concrete.\nobject 3 : graffiti -> on -> object 0 : concrete.\n\nRegion Description:\nRegion Description at [0.391, 0.630, 0.776, 0.962] : Skater's shadow while performing a trick.\nRegion Description at [0.346, 0.342, 0.475, 0.440] : Man is wearing a black safety helmet.\nRegion Description at [0.184, 0.320, 0.741, 0.700] : a man roller skating at a skate park.\nRegion Description at [0.448, 0.636, 0.779, 0.940] : the shadow of the man cast on the cement ramp.\nRegion Description at [0.465, 0.856, 0.803, 0.996] : light blue painted graffiti on the cement ramp.\nRegion Description at [0.279, 0.524, 0.341, 0.570] : a black wrist guard on the man's wrist.\nRegion Description at [0.353, 0.352, 0.460, 0.422] : black helmet with several stickers on it.\nRegion Description at [0.644, 0.488, 0.719, 0.574] : the black rollerskate the man is wearing.\nRegion Description at [0.142, 0.314, 0.234, 0.604] : a grey post to the metal fence that is at the top of the ramp.\nRegion Description at [0.363, 0.500, 0.453, 0.566] : a black elbow pad the man is wearing.\nRegion Description at [0.405, 0.642, 0.746, 0.916] : shadow of a roller skater on concrete.\n\nGlobal Caption:\nA young man riding a skateboard down the side of a ramp.\nA man doing a trick on roller-skates in a skate park.\nA skateboarder performing a jump off the side of a ramp.\na man wearing roller skates doing a jump on the side of a wall \nThe man in the helmet is jumping while wearing roller skates. "} +{"question_id": 28, "image": "000000106048.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : book at [0.218, 0.105, 0.834, 0.754].\nObject 1 : building at [0.050, 0.000, 1.000, 0.713].\nObject 2 : bus at [0.222, 0.144, 0.820, 0.757].\nObject 3 : bushes at [0.810, 0.401, 1.000, 0.680].\nObject 4 : design at [0.228, 0.422, 0.438, 0.560].\nObject 5 : ground at [0.000, 0.629, 1.002, 0.994].\nObject 6 : headlight at [0.738, 0.590, 0.796, 0.632].\nObject 7 : headlight at [0.522, 0.596, 0.610, 0.629].\nObject 8 : light at [0.604, 0.201, 0.706, 0.222].\nObject 9 : pavement at [0.002, 0.629, 0.996, 0.994].\nObject 10 : pipe at [0.172, 0.147, 0.208, 0.617].\nObject 11 : pipe at [0.438, 0.096, 0.458, 0.192].\nObject 12 : roof at [0.118, 0.000, 0.896, 0.174].\nObject 13 : side mirror at [0.488, 0.314, 0.530, 0.428].\nObject 14 : side mirror at [0.790, 0.332, 0.818, 0.455].\nObject 15 : street at [0.002, 0.611, 0.992, 0.991].\nObject 16 : stripe at [0.228, 0.428, 0.516, 0.569].\nObject 17 : trash can at [0.790, 0.569, 0.822, 0.662].\nObject 18 : wall at [0.858, 0.368, 0.920, 0.419].\nObject 19 : wheel at [0.266, 0.545, 0.294, 0.677].\nObject 20 : wheel at [0.248, 0.551, 0.264, 0.668].\nObject 21 : wheel at [0.444, 0.578, 0.472, 0.751].\nObject 22 : windows at [0.510, 0.216, 0.796, 0.548].\nObject 23 : windshield at [0.518, 0.222, 0.782, 0.545].\n\nRelationships:\nobject 10 : pipe -> running from -> object 12 : roof.\nobject 12 : roof -> to -> object 5 : ground.\nobject 17 : trash can -> next to -> object 3 : bushes.\nobject 3 : bushes -> by -> object 15 : street.\n\nRegion Description:\nRegion Description at [0.568, 0.524, 0.770, 0.599] : Divine Transportation written on front of bus.\nRegion Description at [0.162, 0.129, 0.212, 0.623] : black drain pipe running from the roof to the ground.\nRegion Description at [0.712, 0.177, 0.762, 0.240] : bus identification number on top of bus.\nRegion Description at [0.790, 0.557, 0.820, 0.647] : gray trash can next to bushes behind bus.\nRegion Description at [0.810, 0.407, 0.990, 0.692] : large green bushes in front of building.\nRegion Description at [0.670, 0.317, 0.740, 0.527] : black windshield wiper on windshield.\n\nGlobal Caption:\nA white bus driving past a tall building.\na black and white bus some bushes and building\nA white decorated bus is next to a building.\na large white bus that is by a building\nA large bus parked in a parking lot "} +{"question_id": 29, "image": "000000058393.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : arm at [0.658, 0.462, 0.828, 0.496].\nObject 1 : bench at [0.070, 0.493, 0.932, 0.960].\nObject 2 : concrete at [0.030, 0.810, 0.974, 0.997].\nObject 3 : foot at [0.724, 0.784, 0.782, 0.844].\nObject 4 : hair at [0.646, 0.367, 0.754, 0.472].\nObject 5 : hair at [0.564, 0.338, 0.652, 0.462].\nObject 6 : man at [0.542, 0.343, 0.812, 0.493].\nObject 7 : ocean at [0.028, 0.319, 0.972, 0.821].\nObject 8 : post at [0.090, 0.641, 0.102, 0.734].\nObject 9 : post at [0.924, 0.652, 0.944, 0.836].\nObject 10 : rail at [0.028, 0.620, 0.974, 0.660].\nObject 11 : seat at [0.072, 0.728, 0.928, 0.786].\nObject 12 : shoe at [0.720, 0.789, 0.782, 0.855].\nObject 13 : sky at [0.028, 0.037, 0.974, 0.325].\nObject 14 : slat at [0.072, 0.749, 0.928, 0.781].\nObject 15 : slat at [0.112, 0.499, 0.912, 0.522].\nObject 16 : slat at [0.126, 0.702, 0.912, 0.728].\nObject 17 : slat at [0.108, 0.594, 0.908, 0.625].\nObject 18 : slat at [0.106, 0.525, 0.908, 0.554].\nObject 19 : woman at [0.644, 0.377, 0.834, 0.863].\n\nRelationships:\nobject 6 : man -> sitting on -> object 1 : bench.\nobject 6 : man -> sitting with -> object 19 : woman.\nobject 6 : man -> has -> object 0 : arm.\nobject 0 : arm -> around -> object 19 : woman.\nobject 3 : foot -> wearing -> object 12 : shoe.\nobject 19 : woman -> has -> object 3 : foot.\nobject 3 : foot -> inside -> object 12 : shoe.\nobject 19 : woman -> looking at -> object 7 : ocean.\nobject 6 : man -> looking at -> object 7 : ocean.\nobject 19 : woman -> has -> object 4 : hair.\nobject 6 : man -> has -> object 5 : hair.\nobject 1 : bench -> in front of -> object 7 : ocean.\nobject 1 : bench -> in front of -> object 7 : ocean.\nobject 1 : bench -> backs up to -> object 1 : bench.\nobject 19 : woman -> sitting on -> object 1 : bench.\nobject 6 : man -> sitting on -> object 1 : bench.\nobject 19 : woman -> relaxing on -> object 1 : bench.\nobject 6 : man -> relaxing on -> object 1 : bench.\nobject 19 : woman -> facing -> object 7 : ocean.\nobject 6 : man -> facing -> object 7 : ocean.\nobject 19 : woman -> looking at -> object 7 : ocean.\nobject 6 : man -> looking at -> object 7 : ocean.\nobject 6 : man -> relaxing with -> object 19 : woman.\nobject 6 : man -> on bench with -> object 19 : woman.\nobject 19 : woman -> resting on -> object 1 : bench.\nobject 6 : man -> resting on -> object 1 : bench.\nobject 1 : bench -> near -> object 7 : ocean.\nobject 1 : bench -> near -> object 7 : ocean.\nobject 11 : seat -> part of -> object 1 : bench.\nobject 9 : post -> supporting -> object 10 : rail.\nobject 8 : post -> supporting -> object 10 : rail.\nobject 19 : woman -> has -> object 3 : foot.\nobject 12 : shoe -> belongs to -> object 19 : woman.\nobject 19 : woman -> has -> object 3 : foot.\nobject 2 : concrete -> under -> object 1 : bench.\nobject 2 : concrete -> under -> object 1 : bench.\nobject 7 : ocean -> in front of -> object 1 : bench.\nobject 6 : man -> sitting next to -> object 19 : woman.\nobject 6 : man -> cuddling with -> object 19 : woman.\nobject 0 : arm -> around -> object 19 : woman.\nobject 6 : man -> silhouetted with -> object 19 : woman.\nobject 18 : slat -> part of -> object 1 : bench.\n\nRegion Description:\nRegion Description at [0.502, 0.309, 0.892, 0.512] : a man and woman looking at the ocean.\n\nGlobal Caption:\nTwo people sitting on a bench silhouetted against the sea.\nTwo people are sitting on a bench together in front of water.\nThe silhouette of two people sitting on a bench in front of the water.\nA couple is sitting on a bench in front of the water. \nA couple sits on a park bench and watches the water"} +{"question_id": 30, "image": "000000010764.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : catcher at [0.334, 0.193, 0.756, 0.940].\nObject 1 : field at [0.000, 0.000, 0.998, 0.997].\nObject 2 : glove at [0.660, 0.492, 0.764, 0.674].\nObject 3 : hand at [0.666, 0.498, 0.748, 0.665].\nObject 4 : helmet at [0.472, 0.187, 0.610, 0.444].\nObject 5 : jersey at [0.340, 0.332, 0.556, 0.695].\nObject 6 : line at [0.396, 0.656, 0.560, 0.731].\nObject 7 : lines at [0.866, 0.927, 1.000, 0.997].\nObject 8 : lines at [0.754, 0.837, 0.998, 0.867].\nObject 9 : pads at [0.562, 0.668, 0.634, 0.782].\nObject 10 : pants at [0.336, 0.640, 0.612, 0.858].\nObject 11 : sneakers at [0.406, 0.834, 0.544, 0.946].\nObject 12 : stripe at [0.608, 0.737, 0.998, 0.795].\nObject 13 : wrist band at [0.586, 0.583, 0.604, 0.640].\n\nRelationships:\nobject 0 : catcher -> in -> object 1 : field.\nobject 2 : glove -> on -> object 3 : hand.\nobject 6 : line -> on -> object 10 : pants.\n\nRegion Description:\nRegion Description at [0.546, 0.625, 0.626, 0.801] : The player is wearing knee and leg pads..\nRegion Description at [0.018, 0.665, 0.280, 0.825] : A brown dirt ground surface on a baseball field.\nRegion Description at [0.676, 0.701, 0.974, 0.979] : White chalk lines painted on a baseball field.\nRegion Description at [0.062, 0.130, 0.370, 0.535] : A green grass ground surface of a baseball field.\nRegion Description at [0.566, 0.580, 0.620, 0.656] : A black and red bracelet on a man's wrist.\n\nGlobal Caption:\nA catches crouches on a patch of dirt.\nA catcher squatting at a base with his gloved hand extended.\nA baseball catcher stands ready to catch a ball.\na catcher kneeling at the mound waiting for a baseball \nA catcher in white uniform during a baseball game."} +{"question_id": 31, "image": "000000271402.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : blonde hair at [0.193, 0.100, 0.375, 0.176].\nObject 1 : building at [0.804, 0.200, 0.906, 0.318].\nObject 2 : dress at [0.378, 0.284, 0.804, 0.652].\nObject 3 : fence at [0.607, 0.282, 0.997, 0.378].\nObject 4 : girl at [0.329, 0.148, 0.973, 0.892].\nObject 5 : girl at [0.057, 0.102, 0.456, 0.898].\nObject 6 : ground at [0.000, 0.374, 1.000, 0.916].\nObject 7 : hair at [0.320, 0.148, 0.517, 0.286].\nObject 8 : handle at [0.329, 0.432, 0.508, 0.480].\nObject 9 : handle at [0.091, 0.450, 0.299, 0.502].\nObject 10 : head at [0.335, 0.152, 0.508, 0.314].\nObject 11 : insignia at [0.447, 0.350, 0.502, 0.390].\nObject 12 : orange platform at [0.181, 0.816, 0.489, 0.998].\nObject 13 : orange wheel at [0.193, 0.820, 0.248, 0.876].\nObject 14 : pavement at [0.009, 0.370, 0.994, 0.996].\nObject 15 : racket at [0.462, 0.480, 0.713, 0.840].\nObject 16 : right shoe at [0.465, 0.778, 0.610, 0.886].\nObject 17 : scooter at [0.097, 0.424, 0.592, 0.996].\nObject 18 : shoe at [0.060, 0.794, 0.202, 0.902].\nObject 19 : shoe at [0.302, 0.780, 0.453, 0.874].\nObject 20 : skirt at [0.471, 0.514, 0.804, 0.654].\nObject 21 : sneaker at [0.849, 0.738, 0.970, 0.886].\nObject 22 : sock at [0.317, 0.776, 0.347, 0.798].\nObject 23 : sock at [0.130, 0.790, 0.184, 0.810].\n\nRelationships:\nobject 4 : girl -> on -> object 14 : pavement.\nobject 5 : girl -> wearing -> object 22 : sock.\nobject 5 : girl -> wearing -> object 23 : sock.\nobject 4 : girl -> wearing -> object 20 : skirt.\nobject 4 : girl -> holding -> object 15 : racket.\nobject 5 : girl -> with -> object 0 : blonde hair.\nobject 17 : scooter -> with -> object 8 : handle.\nobject 1 : building -> with -> object 3 : fence.\nobject 4 : girl -> with -> object 11 : insignia.\nobject 13 : orange wheel -> of -> object 17 : scooter.\n\nRegion Description:\nRegion Description at [0.858, 0.760, 0.970, 0.852] : Girl is wearing blue, white, pink, and gray shoes..\nRegion Description at [0.293, 0.136, 0.976, 0.884] : a little girl holding a tennis racket..\nRegion Description at [0.060, 0.086, 0.462, 0.908] : A little girl standing near a scooter..\nRegion Description at [0.308, 0.146, 0.985, 0.892] : young girl wearing velcro strapped tennis shoes.\nRegion Description at [0.082, 0.436, 0.601, 0.996] : orange scooter board with black handles.\nRegion Description at [0.755, 0.184, 0.973, 0.372] : a tall building with fence in foreground.\nRegion Description at [0.021, 0.096, 0.988, 0.928] : two young girls wearing white outfits.\nRegion Description at [0.311, 0.136, 0.991, 0.886] : young girl with insignia on white outfit.\nRegion Description at [0.175, 0.814, 0.266, 0.888] : orange colored back wheel of a scooter board.\nRegion Description at [0.453, 0.478, 0.725, 0.848] : lavender, yellow and pink colored tennis racket.\n\nGlobal Caption:\ntwo little girls in tennis uniforms standing next to a scooter\nTwo young girls with a tennis racket and a scooter.\nTwo little girls posing for a picture, on a tennis court.\nTwo young girls on a tennis court with a racquet and a scooter\nTwo cute girls with a scooter and tennis raquet."} +{"question_id": 32, "image": "000000273493.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : ball at [0.640, 0.399, 0.648, 0.411].\nObject 1 : border at [0.040, 0.502, 1.000, 0.556].\nObject 2 : boundary lines at [0.030, 0.661, 1.000, 1.000].\nObject 3 : bushes at [0.020, 0.186, 0.104, 0.517].\nObject 4 : fence at [0.008, 0.366, 0.994, 0.565].\nObject 5 : fence at [0.024, 0.502, 0.996, 0.709].\nObject 6 : grass at [0.004, 0.529, 0.994, 0.997].\nObject 7 : man at [0.144, 0.360, 0.246, 0.736].\nObject 8 : man at [0.730, 0.474, 0.780, 0.613].\nObject 9 : pants at [0.732, 0.529, 0.778, 0.604].\nObject 10 : shirt at [0.164, 0.411, 0.222, 0.547].\nObject 11 : shorts at [0.162, 0.535, 0.220, 0.628].\nObject 12 : sign at [0.916, 0.405, 0.934, 0.438].\nObject 13 : sky at [0.006, 0.021, 0.990, 0.279].\nObject 14 : sneakers at [0.180, 0.709, 0.216, 0.739].\nObject 15 : sneakers at [0.762, 0.598, 0.776, 0.613].\nObject 16 : tennis at [0.012, 0.384, 0.984, 0.934].\nObject 17 : tennis court at [0.000, 0.372, 0.988, 0.979].\nObject 18 : tennis racket at [0.768, 0.526, 0.808, 0.556].\nObject 19 : tennis racket at [0.214, 0.574, 0.238, 0.619].\nObject 20 : trees at [0.586, 0.282, 0.692, 0.420].\nObject 21 : white at [0.734, 0.492, 0.778, 0.601].\n\nRelationships:\nobject 7 : man -> in -> object 10 : shirt.\nobject 7 : man -> with -> object 19 : tennis racket.\nobject 7 : man -> plays -> object 16 : tennis.\nobject 7 : man -> wears -> object 14 : sneakers.\nobject 8 : man -> wears -> object 15 : sneakers.\nobject 7 : man -> wears -> object 11 : shorts.\nobject 8 : man -> wears -> object 9 : pants.\nobject 5 : fence -> has -> object 1 : border.\nobject 20 : trees -> behind -> object 3 : bushes.\nobject 2 : boundary lines -> on -> object 17 : tennis court.\nobject 2 : boundary lines -> on -> object 6 : grass.\nobject 3 : bushes -> behind -> object 4 : fence.\nobject 20 : trees -> behind -> object 4 : fence.\nobject 7 : man -> has -> object 19 : tennis racket.\nobject 8 : man -> wears -> object 21 : white.\nobject 4 : fence -> around -> object 17 : tennis court.\nobject 20 : trees -> behind -> object 8 : man.\nobject 6 : grass -> on -> object 17 : tennis court.\nobject 8 : man -> has -> object 18 : tennis racket.\nobject 8 : man -> hitting -> object 0 : ball.\nobject 5 : fence -> on -> object 17 : tennis court.\n\nRegion Description:\nRegion Description at [0.024, 0.489, 0.998, 0.730] : The tennis net separating the sides of the players..\nRegion Description at [0.144, 0.652, 0.234, 0.745] : The black sneakers the player is wearing..\nRegion Description at [0.720, 0.577, 0.784, 0.613] : The white sneakers the player is wearing..\nRegion Description at [0.158, 0.544, 0.230, 0.628] : The gray shorts the player is wearing..\nRegion Description at [0.006, 0.402, 0.998, 0.574] : The trimmed bushes behind the player..\nRegion Description at [0.008, 0.168, 0.998, 0.402] : The trees behind the trimmed bushes behind the player..\nRegion Description at [0.006, 0.604, 0.998, 0.985] : The white boundary lines on the tennis court..\nRegion Description at [0.020, 0.447, 0.994, 0.760] : A black and white net stretches across the field.\nRegion Description at [0.060, 0.526, 0.984, 0.985] : The field has green grass with white lines.\nRegion Description at [0.016, 0.369, 0.978, 0.595] : A tall green shrub is behind the fence.\nRegion Description at [0.034, 0.150, 0.984, 0.393] : Trees are seen behind the fence and shrub.\nRegion Description at [0.588, 0.327, 0.850, 0.703] : The yellow ball is flying towards the man.\nRegion Description at [0.902, 0.378, 0.956, 0.529] : A black circular sign with the number five.\nRegion Description at [0.142, 0.354, 0.248, 0.736] : male in white t-shirt playing tennis.\nRegion Description at [0.200, 0.565, 0.244, 0.625] : Head of tennis racket of man playing.\nRegion Description at [0.726, 0.465, 0.786, 0.631] : Man in white preparing to hit tennis ball.\n\nGlobal Caption:\nTwo men playing a game of tennis on a court.\ntwo people playing tennis with rackets on a grass court\nTwo young men playing a game of tennis.\nPeople playing tennis on a court surrounded by green hedges.\ntHERE ARE TWO MEN PLAYING TENNIS ON THE TENNIS COURT"} +{"question_id": 33, "image": "000000360960.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : coat at [0.405, 0.332, 0.835, 0.746].\nObject 1 : decorative square at [0.000, 0.382, 1.000, 1.000].\nObject 2 : hat at [0.006, 0.162, 0.072, 0.198].\nObject 3 : jacket at [0.078, 0.222, 0.318, 0.430].\nObject 4 : jeans at [0.853, 0.422, 1.000, 0.632].\nObject 5 : leg at [0.853, 0.456, 0.928, 0.610].\nObject 6 : leg at [0.210, 0.458, 0.303, 0.638].\nObject 7 : leg at [0.000, 0.458, 0.060, 0.630].\nObject 8 : man at [0.066, 0.162, 0.318, 0.686].\nObject 9 : man at [0.850, 0.156, 1.000, 0.652].\nObject 10 : man at [0.390, 0.344, 0.838, 0.894].\nObject 11 : pants at [0.523, 0.736, 0.739, 0.858].\nObject 12 : person at [0.000, 0.162, 0.135, 0.668].\nObject 13 : person at [0.853, 0.154, 1.000, 0.650].\nObject 14 : section at [0.000, 0.134, 1.000, 1.000].\nObject 15 : sidewalk at [0.000, 0.388, 1.000, 1.000].\nObject 16 : umbrella at [0.168, 0.106, 0.910, 0.366].\nObject 17 : uniform at [0.000, 0.222, 0.126, 0.646].\nObject 18 : uniform at [0.105, 0.218, 0.318, 0.628].\n\nRelationships:\nobject 10 : man -> wearing -> object 11 : pants.\nobject 10 : man -> wearing -> object 0 : coat.\nobject 9 : man -> wearing -> object 4 : jeans.\nobject 8 : man -> wearing -> object 2 : hat.\nobject 8 : man -> wearing -> object 3 : jacket.\nobject 16 : umbrella -> has -> object 14 : section.\nobject 5 : leg -> of -> object 13 : person.\nobject 7 : leg -> of -> object 12 : person.\nobject 12 : person -> in -> object 17 : uniform.\n\nRegion Description:\nRegion Description at [0.066, 0.164, 0.318, 0.686] : the back of a man in a black uniform.\nRegion Description at [0.393, 0.324, 0.871, 0.766] : THIS MAN IS WEARING A LONG BLACK COAT.\nRegion Description at [0.468, 0.142, 0.634, 0.356] : THIS IS A RED SECTION ON THE UMBRELLA.\nRegion Description at [0.168, 0.140, 0.523, 0.292] : THIS IS A YELLOW SECTION ON THE UMBRELLA.\nRegion Description at [0.568, 0.138, 0.919, 0.232] : THIS IS A GREEN SECTION OF THE UMBRELLA.\n\nGlobal Caption:\nSeveral people walking on a sidewalk, with one man holding an umbrella.\nA person walking while carrying a rainbow umbrella\nA person is holding up a large colorful umbrella\na person walking down the street carrying a rainbow colored umbrella\nA person walking in a square carrying a rainbow colored umbrella."} +{"question_id": 34, "image": "000000452122.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : airline at [0.408, 0.420, 0.758, 0.502].\nObject 1 : airplane at [0.112, 0.300, 0.858, 0.640].\nObject 2 : engine at [0.652, 0.529, 0.730, 0.592].\nObject 3 : engine at [0.494, 0.502, 0.574, 0.577].\nObject 4 : fin at [0.208, 0.303, 0.320, 0.492].\nObject 5 : fin at [0.116, 0.480, 0.284, 0.526].\nObject 6 : front door at [0.752, 0.435, 0.772, 0.483].\nObject 7 : gear at [0.450, 0.592, 0.600, 0.643].\nObject 8 : letters at [0.694, 0.489, 0.732, 0.520].\nObject 9 : name at [0.398, 0.426, 0.760, 0.489].\nObject 10 : sky at [0.000, 0.000, 0.998, 1.000].\nObject 11 : window at [0.806, 0.438, 0.844, 0.456].\nObject 12 : windows at [0.326, 0.450, 0.750, 0.532].\nObject 13 : wing at [0.152, 0.426, 0.598, 0.538].\nObject 14 : wing at [0.116, 0.492, 0.282, 0.538].\n\nRelationships:\nobject 6 : front door -> of -> object 1 : airplane.\n\nRegion Description:\n\nGlobal Caption:\nAn airplane flying in the air during the day.\nA large aircraft is shown in the air.\nThe large jumbo jet has it's landing gear lowered.\nA large white airplane flies in the gray sky.\nAn airplane in route with a cloudy sky behind it."} +{"question_id": 35, "image": "000000134722.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : awning at [0.886, 0.000, 1.000, 0.240].\nObject 1 : awning at [0.000, 0.299, 0.132, 0.403].\nObject 2 : bench at [0.000, 0.592, 0.066, 0.683].\nObject 3 : building at [0.000, 0.299, 0.204, 0.659].\nObject 4 : canopy at [0.000, 0.301, 0.136, 0.400].\nObject 5 : car at [0.290, 0.400, 0.998, 0.784].\nObject 6 : clouds at [0.374, 0.067, 0.920, 0.312].\nObject 7 : door opening at [0.658, 0.501, 0.682, 0.680].\nObject 8 : door opening at [0.678, 0.509, 0.710, 0.675].\nObject 9 : exterior at [0.000, 0.400, 0.200, 0.669].\nObject 10 : front at [0.294, 0.400, 0.494, 0.739].\nObject 11 : gravel at [0.090, 0.837, 0.334, 0.997].\nObject 12 : headlights at [0.416, 0.624, 0.446, 0.656].\nObject 13 : headlights at [0.300, 0.624, 0.324, 0.651].\nObject 14 : markings at [0.606, 0.821, 0.770, 0.928].\nObject 15 : panel at [0.304, 0.421, 0.450, 0.677].\nObject 16 : pole at [0.030, 0.419, 0.062, 0.656].\nObject 17 : railway tracks at [0.000, 0.752, 0.520, 0.944].\nObject 18 : side walk at [0.192, 0.712, 1.000, 0.997].\nObject 19 : sky at [0.000, 0.000, 0.998, 0.560].\nObject 20 : train stop at [0.000, 0.000, 1.000, 1.000].\nObject 21 : trees at [0.208, 0.253, 0.322, 0.653].\nObject 22 : trim at [0.000, 0.333, 0.132, 0.403].\nObject 23 : wall at [0.000, 0.392, 0.206, 0.611].\nObject 24 : wheel at [0.844, 0.669, 0.884, 0.728].\nObject 25 : wheel at [0.792, 0.675, 0.840, 0.747].\nObject 26 : wheel at [0.516, 0.691, 0.620, 0.808].\nObject 27 : window at [0.316, 0.451, 0.458, 0.595].\nObject 28 : windows at [0.700, 0.547, 0.848, 0.632].\nObject 29 : windsheild wipers at [0.348, 0.499, 0.410, 0.584].\n\nRelationships:\nobject 6 : clouds -> in -> object 19 : sky.\nobject 2 : bench -> in -> object 4 : canopy.\nobject 22 : trim -> on -> object 1 : awning.\nobject 11 : gravel -> next to -> object 17 : railway tracks.\nobject 14 : markings -> on side of -> object 18 : side walk.\nobject 5 : car -> on -> object 17 : railway tracks.\n\nRegion Description:\nRegion Description at [0.288, 0.392, 0.510, 0.741] : the front of the train is yellow and white.\nRegion Description at [0.320, 0.451, 0.460, 0.592] : the front window of the train has windshield wipers.\nRegion Description at [0.292, 0.592, 0.456, 0.739] : the headlights are on front of the train.\nRegion Description at [0.010, 0.405, 0.220, 0.736] : a red brick wall is near the platform.\nRegion Description at [0.000, 0.288, 0.128, 0.707] : an aluminum canopy is on the platform.\nRegion Description at [0.016, 0.325, 0.100, 0.672] : a red steel pole is holding up the awning.\nRegion Description at [0.306, 0.395, 0.998, 0.733] : the train has windowed passenger cars.\nRegion Description at [0.300, 0.427, 0.492, 0.693] : the yellow and white front of a train.\nRegion Description at [0.510, 0.744, 0.834, 0.891] : white painted line beside a train track.\nRegion Description at [0.298, 0.408, 0.468, 0.661] : a yellow panel on the front of the train.\nRegion Description at [0.002, 0.397, 0.210, 0.675] : a red brick building on the side of the tracks.\nRegion Description at [0.844, 0.000, 0.998, 0.248] : an awning of a structure next to the train tracks.\nRegion Description at [0.294, 0.360, 0.516, 0.787] : front of a train car in yellow, white and blue.\nRegion Description at [0.194, 0.221, 0.286, 0.901] : trees on the side of a train station.\nRegion Description at [0.580, 0.821, 0.764, 0.931] : markings on the side of railway tracks.\nRegion Description at [0.632, 0.491, 0.726, 0.691] : white, blue and grey doors on the side of a train car.\nRegion Description at [0.500, 0.096, 0.916, 0.531] : skyline on the side of a train station.\n\nGlobal Caption:\nFast commuter train moving past an outdoor platform.\nA train on the track pulling by a train station.\nA train pulling into a station outside during the day.\nA passenger train moving through a rail yard\na long passenger train pulling up to a station"} +{"question_id": 36, "image": "000000039484.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : area at [0.002, 0.000, 0.998, 1.000].\nObject 1 : awning at [0.804, 0.625, 1.000, 0.718].\nObject 2 : billboard at [0.172, 0.367, 0.306, 0.484].\nObject 3 : building at [0.854, 0.246, 0.998, 0.815].\nObject 4 : building at [0.428, 0.537, 0.536, 0.801].\nObject 5 : building at [0.670, 0.481, 0.860, 0.827].\nObject 6 : building at [0.124, 0.531, 0.350, 0.798].\nObject 7 : car at [0.488, 0.774, 0.650, 0.900].\nObject 8 : car at [0.276, 0.762, 0.334, 0.850].\nObject 9 : car at [0.002, 0.804, 0.180, 1.000].\nObject 10 : car at [0.608, 0.768, 0.702, 0.894].\nObject 11 : car at [0.366, 0.783, 0.472, 0.871].\nObject 12 : car at [0.326, 0.757, 0.424, 0.865].\nObject 13 : car at [0.216, 0.786, 0.280, 0.839].\nObject 14 : car at [0.054, 0.833, 0.418, 1.000].\nObject 15 : car at [0.670, 0.768, 0.820, 0.909].\nObject 16 : crown at [0.176, 0.680, 0.210, 0.713].\nObject 17 : for lease at [0.542, 0.639, 0.602, 0.657].\nObject 18 : hat at [0.762, 0.525, 0.790, 0.554].\nObject 19 : lamp post at [0.604, 0.196, 0.738, 0.777].\nObject 20 : light at [0.718, 0.531, 0.740, 0.560].\nObject 21 : light at [0.612, 0.199, 0.650, 0.223].\nObject 22 : man at [0.756, 0.528, 0.794, 0.598].\nObject 23 : marquee at [0.526, 0.613, 0.644, 0.686].\nObject 24 : omnifest at [0.188, 0.431, 0.266, 0.463].\nObject 25 : outside at [0.000, 0.000, 0.998, 0.997].\nObject 26 : people at [0.816, 0.783, 0.990, 0.880].\nObject 27 : person at [0.856, 0.798, 0.890, 0.827].\nObject 28 : person at [0.880, 0.789, 0.910, 0.827].\nObject 29 : person at [0.944, 0.783, 0.966, 0.880].\nObject 30 : person at [0.956, 0.783, 0.984, 0.877].\nObject 31 : person at [0.906, 0.789, 0.924, 0.830].\nObject 32 : person at [0.368, 0.762, 0.402, 0.871].\nObject 33 : person at [0.916, 0.795, 0.942, 0.880].\nObject 34 : plane at [0.944, 0.026, 0.988, 0.053].\nObject 35 : pole at [0.002, 0.648, 0.076, 0.657].\nObject 36 : shirt at [0.856, 0.809, 0.892, 0.830].\nObject 37 : sign at [0.872, 0.525, 0.972, 0.595].\nObject 38 : sign at [0.738, 0.522, 0.796, 0.613].\nObject 39 : sign at [0.652, 0.326, 0.692, 0.372].\nObject 40 : sign at [0.432, 0.484, 0.522, 0.531].\nObject 41 : sky at [0.002, 0.000, 1.000, 0.630].\nObject 42 : street at [0.326, 0.853, 1.000, 1.000].\nObject 43 : street sign at [0.018, 0.645, 0.068, 0.666].\nObject 44 : table at [0.888, 0.824, 0.918, 0.880].\nObject 45 : van at [0.328, 0.754, 0.420, 0.865].\nObject 46 : wheel at [0.486, 0.839, 0.510, 0.891].\nObject 47 : wheel at [0.530, 0.842, 0.556, 0.894].\nObject 48 : wheel at [0.616, 0.865, 0.642, 0.903].\nObject 49 : wheel at [0.572, 0.880, 0.592, 0.900].\nObject 50 : window at [0.746, 0.777, 0.810, 0.812].\nObject 51 : window at [0.904, 0.413, 0.930, 0.516].\n\nRelationships:\nobject 7 : car -> on -> object 42 : street.\nobject 33 : person -> sitting -> object 25 : outside.\nobject 29 : person -> sitting -> object 25 : outside.\nobject 27 : person -> sitting -> object 25 : outside.\nobject 28 : person -> sitting -> object 25 : outside.\nobject 30 : person -> sitting -> object 25 : outside.\nobject 1 : awning -> on -> object 3 : building.\nobject 37 : sign -> on -> object 3 : building.\nobject 2 : billboard -> at top of -> object 6 : building.\nobject 32 : person -> getting out of -> object 11 : car.\nobject 19 : lamp post -> next to -> object 42 : street.\nobject 27 : person -> sitting at -> object 44 : table.\nobject 33 : person -> sitting at -> object 44 : table.\nobject 28 : person -> sitting at -> object 44 : table.\nobject 31 : person -> sitting at -> object 44 : table.\nobject 27 : person -> wearing -> object 36 : shirt.\nobject 2 : billboard -> on -> object 6 : building.\nobject 32 : person -> getting out of -> object 11 : car.\nobject 27 : person -> sitting -> object 25 : outside.\nobject 28 : person -> sitting -> object 25 : outside.\nobject 31 : person -> sitting -> object 25 : outside.\nobject 33 : person -> sitting -> object 25 : outside.\nobject 29 : person -> sitting -> object 25 : outside.\nobject 17 : for lease -> on -> object 23 : marquee.\nobject 47 : wheel -> on -> object 7 : car.\nobject 46 : wheel -> on -> object 7 : car.\nobject 48 : wheel -> on -> object 7 : car.\nobject 49 : wheel -> on -> object 7 : car.\nobject 51 : window -> on -> object 3 : building.\nobject 9 : car -> on -> object 42 : street.\nobject 39 : sign -> above -> object 5 : building.\nobject 14 : car -> parked on -> object 42 : street.\nobject 10 : car -> parked on -> object 42 : street.\nobject 13 : car -> parked on -> object 42 : street.\nobject 8 : car -> parked on -> object 42 : street.\nobject 12 : car -> parked on -> object 42 : street.\nobject 43 : street sign -> on -> object 35 : pole.\nobject 24 : omnifest -> on -> object 2 : billboard.\nobject 50 : window -> on -> object 15 : car.\nobject 17 : for lease -> on -> object 23 : marquee.\nobject 20 : light -> on -> object 19 : lamp post.\nobject 21 : light -> on -> object 19 : lamp post.\nobject 34 : plane -> in -> object 41 : sky.\nobject 32 : person -> getting out of -> object 11 : car.\nobject 24 : omnifest -> on -> object 2 : billboard.\nobject 45 : van -> in front of -> object 4 : building.\nobject 22 : man -> wearing -> object 18 : hat.\nobject 22 : man -> on -> object 38 : sign.\n\nRegion Description:\nRegion Description at [0.844, 0.777, 0.958, 0.897] : people sitting at table in front of restaurant.\nRegion Description at [0.530, 0.625, 0.640, 0.683] : a sign on a marquee that says \"for lease\".\nRegion Description at [0.166, 0.352, 0.296, 0.472] : large omnifest billboard advertisement.\nRegion Description at [0.074, 0.833, 0.428, 0.994] : parked silver car with tinted windshield.\nRegion Description at [0.088, 0.760, 0.178, 0.824] : Van in front of red building is purple.\n\nGlobal Caption:\nA variety of cars on a street with buildings.\nA variety of shops and stores on a busy city street.\nA busy street crammed with businesses and parked cars.\nA street full of cars that are driving and parked and a group of people sitting outside eating.\nA city has colorful buildings and signs among cars."} +{"question_id": 37, "image": "000000159311.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : eye at [0.566, 0.526, 0.592, 0.565].\nObject 1 : grass at [0.004, 0.808, 0.118, 0.991].\nObject 2 : grass at [0.206, 0.853, 0.356, 0.982].\nObject 3 : leg at [0.232, 0.375, 0.312, 0.805].\nObject 4 : plant at [0.500, 0.736, 0.618, 0.796].\nObject 5 : sitck at [0.746, 0.042, 0.912, 0.339].\nObject 6 : zebra at [0.000, 0.000, 0.622, 0.790].\nObject 7 : zebra at [0.002, 0.000, 0.684, 0.682].\n\nRelationships:\nobject 7 : zebra -> eating -> object 4 : plant.\nobject 6 : zebra -> standing in -> object 1 : grass.\nobject 7 : zebra -> standing in -> object 1 : grass.\nobject 7 : zebra -> grazing in -> object 1 : grass.\nobject 6 : zebra -> grazing in -> object 1 : grass.\n\nRegion Description:\nRegion Description at [0.352, 0.093, 0.602, 0.393] : thin line of hair running down the neck.\n\nGlobal Caption:\nA pair of zebra's leaning over eating grass in a field.\nTwo zebra stand near bushes and tall grass.\nTwo zebras grazing from grass next to a tree.\nTwo zebra standing next to each other on a lush green field.\nTwo zebras are feeding on the grass by themselves."} +{"question_id": 38, "image": "000000326174.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : beach at [0.000, 0.720, 0.998, 1.000].\nObject 1 : boy at [0.792, 0.480, 0.938, 0.853].\nObject 2 : child at [0.322, 0.587, 0.376, 0.835].\nObject 3 : child at [0.320, 0.587, 0.374, 0.835].\nObject 4 : girl at [0.444, 0.539, 0.534, 0.856].\nObject 5 : man at [0.140, 0.443, 0.216, 0.845].\nObject 6 : man at [0.434, 0.459, 0.500, 0.760].\nObject 7 : man at [0.578, 0.459, 0.682, 0.845].\nObject 8 : ocean waters at [0.590, 0.419, 0.892, 0.629].\nObject 9 : people at [0.206, 0.456, 0.352, 0.851].\nObject 10 : person at [0.792, 0.480, 0.936, 0.851].\nObject 11 : shirt at [0.592, 0.496, 0.670, 0.629].\nObject 12 : shore at [0.000, 0.360, 0.998, 0.997].\nObject 13 : surfboard at [0.306, 0.709, 0.538, 0.853].\nObject 14 : surfboard at [0.790, 0.587, 0.960, 0.691].\nObject 15 : water at [0.384, 0.368, 0.544, 0.435].\nObject 16 : waves at [0.656, 0.709, 0.794, 0.779].\nObject 17 : wetsuit at [0.326, 0.629, 0.372, 0.773].\nObject 18 : woman at [0.208, 0.499, 0.304, 0.629].\n\nRelationships:\nobject 1 : boy -> holding -> object 14 : surfboard.\nobject 5 : man -> and -> object 18 : woman.\nobject 18 : woman -> and -> object 3 : child.\nobject 16 : waves -> coming to -> object 12 : shore.\nobject 7 : man -> looking down to -> object 15 : water.\nobject 2 : child -> with -> object 17 : wetsuit.\nobject 6 : man -> looking back to -> object 4 : girl.\nobject 4 : girl -> pulling -> object 13 : surfboard.\nobject 9 : people -> on -> object 0 : beach.\nobject 7 : man -> wearing -> object 11 : shirt.\n\nRegion Description:\nRegion Description at [0.096, 0.437, 0.970, 0.872] : Seven people headed to the water to surf..\nRegion Description at [0.390, 0.531, 0.540, 0.851] : Girl in yellow shirt and pony tail. .\nRegion Description at [0.312, 0.581, 0.374, 0.851] : Small child with red and black wetsuit..\nRegion Description at [0.578, 0.443, 0.688, 0.856] : Man with white shirt and grey wetsuit pants..\nRegion Description at [0.436, 0.440, 0.534, 0.872] : Man looking back to girl pulling surfboard..\nRegion Description at [0.444, 0.459, 0.552, 0.853] : A man and a little girl having a conversation.\nRegion Description at [0.104, 0.419, 0.314, 0.851] : A man and a woman walking toward the water.\n\nGlobal Caption:\nA group of people are taking surfing lessons.\nA group of men, women and children walking toward the water with surfboards.\nA mixed age group is going toward the ocean with surfboards.\nA group of surfers are carrying their surf boards into the ocean.\nSeveral people are getting ready to enter the water for surfing."} +{"question_id": 39, "image": "000000562207.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : body at [0.166, 0.539, 0.296, 0.997].\nObject 1 : boot at [0.594, 0.753, 0.620, 0.870].\nObject 2 : boot at [0.620, 0.744, 0.658, 0.858].\nObject 3 : bucket at [0.268, 0.744, 0.322, 0.828].\nObject 4 : clouds at [0.156, 0.000, 0.968, 0.328].\nObject 5 : ear at [0.590, 0.226, 0.638, 0.410].\nObject 6 : ear at [0.368, 0.208, 0.448, 0.434].\nObject 7 : elephant at [0.328, 0.157, 0.638, 0.967].\nObject 8 : eye at [0.476, 0.319, 0.504, 0.346].\nObject 9 : foot at [0.436, 0.901, 0.516, 0.958].\nObject 10 : grass at [0.950, 0.759, 0.996, 0.807].\nObject 11 : leg at [0.498, 0.572, 0.548, 0.898].\nObject 12 : leg at [0.408, 0.512, 0.516, 0.955].\nObject 13 : man at [0.582, 0.476, 0.662, 0.870].\nObject 14 : man at [0.164, 0.455, 0.292, 0.997].\nObject 15 : mountains at [0.000, 0.265, 0.376, 0.470].\nObject 16 : rock at [0.736, 0.895, 0.762, 0.934].\nObject 17 : sand at [0.240, 0.687, 0.998, 1.000].\nObject 18 : shirt at [0.582, 0.521, 0.650, 0.681].\nObject 19 : shorts at [0.174, 0.699, 0.254, 0.864].\nObject 20 : side at [0.236, 0.675, 0.994, 0.997].\nObject 21 : skirt at [0.298, 0.687, 0.360, 0.810].\nObject 22 : sky at [0.004, 0.000, 0.998, 0.355].\nObject 23 : top at [0.302, 0.539, 0.358, 0.696].\nObject 24 : tree at [0.012, 0.407, 0.076, 0.500].\nObject 25 : trunk at [0.506, 0.392, 0.600, 0.964].\nObject 26 : watch at [0.172, 0.711, 0.192, 0.732].\nObject 27 : water at [0.000, 0.488, 0.994, 1.000].\nObject 28 : woman at [0.288, 0.473, 0.420, 0.967].\n\nRelationships:\nobject 7 : elephant -> on -> object 20 : side.\nobject 28 : woman -> touching -> object 7 : elephant.\nobject 14 : man -> standing on -> object 20 : side.\nobject 14 : man -> standing beside -> object 7 : elephant.\nobject 10 : grass -> on -> object 20 : side.\nobject 28 : woman -> wearing -> object 23 : top.\nobject 13 : man -> wearing -> object 18 : shirt.\nobject 13 : man -> wearing -> object 1 : boot.\nobject 13 : man -> wearing -> object 2 : boot.\nobject 28 : woman -> touching -> object 7 : elephant.\nobject 7 : elephant -> has -> object 25 : trunk.\nobject 14 : man -> wearing -> object 19 : shorts.\nobject 28 : woman -> petting -> object 7 : elephant.\nobject 14 : man -> with -> object 7 : elephant.\nobject 28 : woman -> with -> object 7 : elephant.\nobject 13 : man -> with -> object 7 : elephant.\nobject 25 : trunk -> of -> object 7 : elephant.\nobject 25 : trunk -> of -> object 7 : elephant.\nobject 9 : foot -> of an -> object 7 : elephant.\nobject 25 : trunk -> of -> object 7 : elephant.\nobject 11 : leg -> of -> object 7 : elephant.\nobject 12 : leg -> of -> object 7 : elephant.\nobject 5 : ear -> of -> object 7 : elephant.\nobject 6 : ear -> of -> object 7 : elephant.\nobject 8 : eye -> of -> object 7 : elephant.\nobject 27 : water -> behind -> object 7 : elephant.\n\nRegion Description:\nRegion Description at [0.338, 0.139, 0.618, 0.967] : the elephant standing on the lake side.\nRegion Description at [0.154, 0.392, 0.300, 0.964] : a man standing on the lake side with shorts.\nRegion Description at [0.574, 0.422, 0.686, 0.910] : the man standing beside the elephant.\nRegion Description at [0.292, 0.485, 0.378, 0.705] : this lady is wearing a blue tank top.\nRegion Description at [0.722, 0.768, 0.988, 0.964] : the sand is brown with green grass growing in it.\nRegion Description at [0.156, 0.669, 0.270, 0.910] : the man is wearing grey black and white shorts.\nRegion Description at [0.504, 0.560, 0.568, 0.898] : The front right leg of the elephant..\nRegion Description at [0.310, 0.536, 0.358, 0.690] : The light blue tank top the girl is wearing..\nRegion Description at [0.262, 0.732, 0.326, 0.825] : The black bucket in the girl's hand..\nRegion Description at [0.002, 0.443, 0.992, 0.994] : The water behind the people and the elephant..\n\nGlobal Caption:\nA group of people are standing next to an elephant emerging from the water.\na group of people stand beside of a giant elephant \nThree tourists pose for a picture next to an elephant.\nThree people stand with an elephant in front of a stream.\nThree people standing next to an elephant along a river."} +{"question_id": 40, "image": "000000332318.jpg", "category": "refer_desc", "text": "Objects:\nObject 0 : background at [0.000, 0.000, 1.002, 0.997].\nObject 1 : bench at [0.604, 0.967, 0.672, 0.997].\nObject 2 : cow at [0.548, 0.860, 0.574, 0.896].\nObject 3 : cow at [0.436, 0.860, 0.454, 0.890].\nObject 4 : fence at [0.698, 0.949, 0.852, 0.997].\nObject 5 : moutain at [0.000, 0.057, 0.992, 0.782].\nObject 6 : pasture at [0.000, 0.815, 0.984, 1.000].\nObject 7 : peak at [0.744, 0.042, 0.898, 0.119].\nObject 8 : sky at [0.000, 0.000, 1.002, 0.257].\nObject 9 : snow at [0.210, 0.036, 0.962, 0.445].\nObject 10 : trailer at [0.796, 0.910, 0.894, 0.997].\nObject 11 : trailer at [0.632, 0.899, 0.742, 0.994].\nObject 12 : tree at [0.740, 0.409, 1.000, 0.982].\nObject 13 : tree at [0.638, 0.284, 0.652, 0.301].\n\nRelationships:\nobject 11 : trailer -> in -> object 6 : pasture.\nobject 5 : moutain -> has -> object 9 : snow.\nobject 6 : pasture -> near -> object 5 : moutain.\nobject 3 : cow -> in -> object 6 : pasture.\nobject 2 : cow -> in -> object 6 : pasture.\nobject 9 : snow -> on -> object 5 : moutain.\nobject 5 : moutain -> covered in -> object 9 : snow.\nobject 5 : moutain -> has -> object 7 : peak.\nobject 2 : cow -> in -> object 6 : pasture.\nobject 5 : moutain -> in -> object 0 : background.\nobject 5 : moutain -> has -> object 9 : snow.\nobject 11 : trailer -> near -> object 12 : tree.\nobject 5 : moutain -> has -> object 13 : tree.\nobject 7 : peak -> covered with -> object 9 : snow.\n\nRegion Description:\nRegion Description at [0.784, 0.901, 0.934, 0.991] : storage container for animal equipment.\nRegion Description at [0.828, 0.060, 0.880, 0.125] : The mountain is partially covered in snow..\nRegion Description at [0.840, 0.899, 0.920, 0.997] : horse trailer or cow trailer is silvertone, rectangular.\nRegion Description at [0.606, 0.919, 0.640, 0.982] : smaller trailer, white w/ brown+orange stripe.\nRegion Description at [0.060, 0.472, 0.540, 0.806] : a bare patch of earth amid lush green growth.\nRegion Description at [0.034, 0.839, 0.812, 0.973] : tiny cattle-containing fenceposts in the distance.\nRegion Description at [0.902, 0.827, 0.990, 0.997] : a split tree trunk in shadow, beneath leaves, shadow on ground.\nRegion Description at [0.734, 0.919, 0.802, 0.994] : an older station wagon/suv-type van thing.\nRegion Description at [0.090, 0.854, 0.124, 0.904] : a black & white animal stands alone, away from brown brethren, in the far distance.\n\nGlobal Caption:\nCows lounge in a field with a mountain backdrop.\nA VERY BIG MOUNTAIN AND ANIMALS SPREAD ACROSS A FARM.\nSeveral herd animals are on the grass by a mountain.\nCattle on a level pasture in a mountainous area.\nA bunch of cattle relax in a pasture located in the mountains"} diff --git a/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/question.jsonl b/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/question.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1141a4e4ab49418302a4666b0aa5345a4e487b35 --- /dev/null +++ b/evaluation/Ferret-Bench/ferret_gpt4_data/refer_desc/question.jsonl @@ -0,0 +1,40 @@ +{"question_id": 0, "image": "000000069138.jpg", "category": "refer_desc", "text": "What is the interaction between the object [0.621, 0.082, 0.772, 0.132] and its surroundings?"} +{"question_id": 1, "image": "000000131138.jpg", "category": "refer_desc", "text": "What is the interaction between the object [0.183, 0.799, 0.326, 0.896] and its surrounding?"} +{"question_id": 2, "image": "000000475150.jpg", "category": "refer_desc", "text": "What can you tell about the region [0.288, 0.324, 0.572, 0.649] and its interaction with the surrounding areas?"} +{"question_id": 3, "image": "000000356424.jpg", "category": "refer_desc", "text": "What is happening in the region [0.528, 0.254, 0.717, 0.666] and what is its relationship to the surrounding objects?"} +{"question_id": 4, "image": "000000491090.jpg", "category": "refer_desc", "text": "What can be said about the region [0.102, 0.498, 0.329, 0.692] in relation to nearby objects or elements?"} +{"question_id": 5, "image": "000000484415.jpg", "category": "refer_desc", "text": "What can be observed in the region [0.716, 0.192, 0.894, 0.550] and how does it interact with the surroundings?"} +{"question_id": 7, "image": "000000184324.jpg", "category": "refer_desc", "text": "What is happening within the region [0.564, 0.771, 0.876, 0.991] and how is it related to the nearby objects?"} +{"question_id": 8, "image": "000000341058.jpg", "category": "refer_desc", "text": "What is the object [0.619, 0.838, 0.633, 0.850] and what is its relationship with nearby objects?"} +{"question_id": 9, "image": "000000184384.jpg", "category": "refer_desc", "text": "What can you tell about the objects found in the region [0.628, 0.120, 0.998, 0.389]?"} +{"question_id": 10, "image": "000000259097.jpg", "category": "refer_desc", "text": "What can be said about the region [0.012, 0.520, 0.996, 0.631] in relation to the surrounding areas?"} +{"question_id": 11, "image": "000000377882.jpg", "category": "refer_desc", "text": "What can you tell about the region [0.242, 0.211, 0.302, 0.989] and its surrounding context?"} +{"question_id": 12, "image": "000000415748.jpg", "category": "refer_desc", "text": "What can you tell about the object [0.084, 0.438, 0.727, 0.954] and its interaction with nearby objects?"} +{"question_id": 13, "image": "000000408120.jpg", "category": "refer_desc", "text": "What can you see within the region [0.394, 0.565, 0.570, 0.718] and what is its interaction with nearby objects?"} +{"question_id": 14, "image": "000000184400.jpg", "category": "refer_desc", "text": "What is the interaction between the object [0.602, 0.837, 0.696, 0.997] and its surrounding objects?"} +{"question_id": 15, "image": "000000276018.jpg", "category": "refer_desc", "text": "What can you tell me about the region [0.071, 0.378, 0.498, 0.842] and its interactions with nearby objects?"} +{"question_id": 16, "image": "000000376322.jpg", "category": "refer_desc", "text": "What is the interaction between objects in the region [0.668, 0.252, 0.909, 0.622]?"} +{"question_id": 17, "image": "000000125472.jpg", "category": "refer_desc", "text": "What can you tell about the region [0.201, 0.002, 0.940, 0.758] and its interaction with surrounding objects?"} +{"question_id": 18, "image": "000000361551.jpg", "category": "refer_desc", "text": "Can you tell me about the interaction happening in the region [0.957, 0.616, 0.997, 0.670] and its context?"} +{"question_id": 19, "image": "000000412240.jpg", "category": "refer_desc", "text": "What can be said about the region [0.002, 0.437, 0.720, 0.787] in terms of the surrounding objects and their interactions?"} +{"question_id": 20, "image": "000000130566.jpg", "category": "refer_desc", "text": "What can you tell about the region [0.630, 0.471, 0.682, 0.550] and its interaction with the surrounding environment?"} +{"question_id": 21, "image": "000000421923.jpg", "category": "refer_desc", "text": "What is the relationship between the object [0.838, 0.774, 0.994, 0.974] and object [0.796, 0.462, 0.982, 0.550]?"} +{"question_id": 22, "image": "000000513567.jpg", "category": "refer_desc", "text": "What is happening in the region [0.926, 0.253, 0.998, 0.645]?"} +{"question_id": 23, "image": "000000543300.jpg", "category": "refer_desc", "text": "What can you tell about the region [0.414, 0.691, 0.662, 0.725] and how it relates to the surroundings?"} +{"question_id": 24, "image": "000000241668.jpg", "category": "refer_desc", "text": "What is happening in the region [0.487, 0.136, 0.808, 0.986]?"} +{"question_id": 25, "image": "000000535578.jpg", "category": "refer_desc", "text": "What can you tell about the region [0.189, 0.032, 0.703, 0.178] and its surrounding areas?"} +{"question_id": 26, "image": "000000277051.jpg", "category": "refer_desc", "text": "Describe the bird [0.384, 0.372, 0.698, 0.787] and its interactions with surrounding objects?"} +{"question_id": 27, "image": "000000018519.jpg", "category": "refer_desc", "text": "What are the details of the region [0.279, 0.524, 0.341, 0.570] and how does it relate to the nearby objects?"} +{"question_id": 28, "image": "000000106048.jpg", "category": "refer_desc", "text": "Can you describe what's happening in the region [0.222, 0.144, 0.820, 0.757]?"} +{"question_id": 29, "image": "000000058393.jpg", "category": "refer_desc", "text": "What can you say about the interaction between objects in the region [0.542, 0.343, 0.812, 0.493]?"} +{"question_id": 30, "image": "000000010764.jpg", "category": "refer_desc", "text": "Referencing the region [0.546, 0.625, 0.626, 0.801], can you describe what you see and how it interacts with the surrounding context?"} +{"question_id": 31, "image": "000000271402.jpg", "category": "refer_desc", "text": "What can you tell me about the region [0.060, 0.086, 0.462, 0.908] and its relation to nearby objects?"} +{"question_id": 32, "image": "000000273493.jpg", "category": "refer_desc", "text": "What is happening in the region [0.588, 0.327, 0.850, 0.703] with regard to its surroundings?"} +{"question_id": 33, "image": "000000360960.jpg", "category": "refer_desc", "text": "Can you describe the region [0.524, 0.740, 0.734, 0.856] and its interaction with the surroundings?"} +{"question_id": 34, "image": "000000452122.jpg", "category": "refer_desc", "text": "What is happening in the region [0.650, 0.428, 0.858, 0.600]?"} +{"question_id": 35, "image": "000000134722.jpg", "category": "refer_desc", "text": "What can you say about the region [0.320, 0.451, 0.460, 0.592] and its relation with nearby objects?"} +{"question_id": 36, "image": "000000039484.jpg", "category": "refer_desc", "text": "What is happening in the region [0.844, 0.777, 0.958, 0.897] and how does this relate to the surrounding area?"} +{"question_id": 37, "image": "000000159311.jpg", "category": "refer_desc", "text": "What can you tell about the region [0.206, 0.853, 0.356, 0.982] considering the surrounding entities and their interactions?"} +{"question_id": 38, "image": "000000326174.jpg", "category": "refer_desc", "text": "Can you describe the interaction or relationship between the objects in the region [0.444, 0.459, 0.552, 0.853]?"} +{"question_id": 39, "image": "000000562207.jpg", "category": "refer_desc", "text": "Can you describe what's happening in the region [0.154, 0.392, 0.300, 0.964] and how it relates to nearby objects or individuals?"} +{"question_id": 40, "image": "000000332318.jpg", "category": "refer_desc", "text": "What can you tell about the region [0.436, 0.860, 0.454, 0.890] and how does it relate to the rest of the scene?"} diff --git a/evaluation/Ferret-Bench/ferret_gpt4_data/rule.json b/evaluation/Ferret-Bench/ferret_gpt4_data/rule.json new file mode 100644 index 0000000000000000000000000000000000000000..7294372c37a477fa82125f1b2ba3ae7c3df0e000 --- /dev/null +++ b/evaluation/Ferret-Bench/ferret_gpt4_data/rule.json @@ -0,0 +1,5 @@ +{ + "refer_desc": {"role": "Assistant", "prompt": "We would like to request your feedback on the performance of two AI assistants in response to the user question displayed above. The user asks the question about specific region of an image. For your reference, the visual content in the image is represented with five descriptive sentences describing the same image. In addition, specific object locations within the image are given, along with detailed coordinates. These coordinates are in the form of bounding boxes, represented as (x1, y1, x2, y2) with floating numbers ranging from 0 to 1. These values correspond to the top left x, top left y, bottom right x, and bottom right y. Also, the relationships between pairs of objects are provided, in the format of object -> relationship -> subject, where the object/subject are indexed by object id from previous object lists as well as the object names. Also, several region description are given, each describing a box region of image, with detailed coordinates. \nPlease rate the spatial correspondence, helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space.\nIn the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."}, + "refer_reason": {"role": "Assistant", "prompt": "We would like to request your feedback on the performance of two AI assistants in response to the user question displayed above. The user asks the question about specific region of an image. For your reference, the visual content in the image is represented with five descriptive sentences describing the same image. In addition, specific object locations within the image are given, along with detailed coordinates. These coordinates are in the form of bounding boxes, represented as (x1, y1, x2, y2) with floating numbers ranging from 0 to 1. These values correspond to the top left x, top left y, bottom right x, and bottom right y. Also, the relationships between pairs of objects are provided, in the format of object -> relationship -> subject, where the object/subject are indexed by object id from previous object lists as well as the object names. Also, several region description are given, each describing a box region of image, with detailed coordinates. \nPlease rate the spatial correspondence, helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space.\nIn the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."}, + "ground_conv": {"role": "Assistant", "prompt": "We would like to request your feedback on the performance of two AI assistants in response to the user question displayed above. The user asks the question that requires model to predict the coordinates of relevant object. For your reference, the visual content in the image is represented with five descriptive sentences describing the same image. In addition, specific object locations within the image are given, along with detailed coordinates. These coordinates are in the form of bounding boxes, represented as (x1, y1, x2, y2) with floating numbers ranging from 0 to 1. These values correspond to the top left x, top left y, bottom right x, and bottom right y. Also, the relationships between pairs of objects are provided, in the format of object -> relationship -> subject, where the object/subject are indexed by object id from previous object lists as well as the object names. Also, several region description are given, each describing a box region of image, with detailed coordinates. \nPlease rate the predicted coordinates, helpfulness, relevance, accuracy, level of details of their responses. Specifically, pay your attention to the precision of the coordinates and whether it matches the object. Small deviation (<20% of ground-truth box width or height) of coordinates is allowed and shouldn't be punished. More than that, the degree of deviation should be reflected in scoring too. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space.\nIn the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."} +} \ No newline at end of file diff --git a/evaluation/Ferret-Bench/gpt4_result/gar_1b/refer_desc/ferret_answer.jsonl b/evaluation/Ferret-Bench/gpt4_result/gar_1b/refer_desc/ferret_answer.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2ec5d9a591b11bd7d479586388db3e795fe6e02d --- /dev/null +++ b/evaluation/Ferret-Bench/gpt4_result/gar_1b/refer_desc/ferret_answer.jsonl @@ -0,0 +1,40 @@ +{"question_id": 0, "image": "000000069138.jpg", "category": "refer_desc", "text": "A rectangular signboard with a dark background and capitalized, bold, sans-serif font text that reads \"FROM 2,500 SQ FT IMMEDIATELY AVAILABLE.\""} +{"question_id": 1, "image": "000000131138.jpg", "category": "refer_desc", "text": "A white ceramic plate with a glossy finish, featuring a smudge of a dark brown substance, possibly a sauce or food residue, and a small, dark-colored object, likely a utensil, resting on the plate."} +{"question_id": 2, "image": "000000475150.jpg", "category": "refer_desc", "text": "The giraffe has a pattern of large, irregular brown patches separated by cream-colored lines on its skin. Its eyes are large with long lashes, and the giraffe's ears are pointed and tufted with hair. The visible part of the neck shows a continuation of the same patterned skin."} +{"question_id": 3, "image": "000000356424.jpg", "category": "refer_desc", "text": "A person with dark hair tied back, wearing a dark-colored top with a visible collar and a patterned garment underneath. The individual appears to be seated and is looking downwards."} +{"question_id": 4, "image": "000000491090.jpg", "category": "refer_desc", "text": "A black motorcycle with a visible rear wheel featuring a multi-spoke design and a disc brake system. The exhaust system includes a cylindrical chrome muffler with a heat shield. The rear suspension is partially visible, showcasing a telescopic fork. The motorcycle's bodywork includes a black fender over the wheel and a portion of the frame with a visible bolt pattern."} +{"question_id": 5, "image": "000000484415.jpg", "category": "refer_desc", "text": "A white ceramic toilet with an open black seat and lid, featuring a visible flush handle on the left side."} +{"question_id": 6, "image": "000000184324.jpg", "category": "refer_desc", "text": "Two bicycles with black tires and silver rims, positioned parallel to each other on a crosswalk. The bicycle on the left has a visible rear wheel and part of the frame, while the bicycle on the right has a visible front wheel and part of the frame."} +{"question_id": 7, "image": "000000341058.jpg", "category": "refer_desc", "text": "A cylindrical, transparent glass saltshaker with a metal screw-on lid featuring multiple small holes for dispensing salt."} +{"question_id": 8, "image": "000000184384.jpg", "category": "refer_desc", "text": "A plate with a grilled sausage, scrambled eggs, and a side of hash browns."} +{"question_id": 9, "image": "000000259097.jpg", "category": "refer_desc", "text": "A long, continuous hill with a dense covering of dark green trees and vegetation, extending horizontally across the image. The hill has a slightly uneven, jagged top edge and is characterized by a mix of tall and short trees, creating a textured appearance."} +{"question_id": 10, "image": "000000377882.jpg", "category": "refer_desc", "text": "A vertical, cylindrical pole with a textured surface, possibly metallic, exhibiting a pattern of diagonal ridges that create a diamond-like appearance. The pole has a consistent diameter throughout its visible length and is topped with a flat, circular cap."} +{"question_id": 11, "image": "000000415748.jpg", "category": "refer_desc", "text": "An adult elephant with a predominantly dark gray skin tone, adorned with colorful paint markings on its forehead and trunk. The elephant's ears are partially visible, and it has a long, curved trunk with visible wrinkles and a pair of ivory tusks protruding from the upper jaw. The elephant's eyes are small with long lashes, and it has a robust body with thick, pillar-like legs. The feet are broad with visible toenails."} +{"question_id": 12, "image": "000000408120.jpg", "category": "refer_desc", "text": "A rectangular concrete curb cut with a rough, textured surface and slightly uneven edges."} +{"question_id": 13, "image": "000000184400.jpg", "category": "refer_desc", "text": "A cylindrical, vertical pole with a flared base transitioning into a narrower shaft, topped by a flat, horizontal element with a series of evenly spaced, small, rectangular protrusions along its upper edge."} +{"question_id": 14, "image": "000000276018.jpg", "category": "refer_desc", "text": "A young boy with short, wavy brown hair, wearing a black jacket with a hood, is holding a plush toy resembling a brown and white dog. He has a slight smile on his face and is looking slightly to his left."} +{"question_id": 15, "image": "000000376322.jpg", "category": "refer_desc", "text": "An elderly man with short gray hair and glasses, wearing a light green button-up shirt with a name tag on the left side of his chest. He is holding a clear glass in his right hand and appears to be looking slightly to his left."} +{"question_id": 16, "image": "000000125472.jpg", "category": "refer_desc", "text": "The sky is a pale, almost white, light gray with a smooth gradient, transitioning from a slightly darker shade at the top to a lighter shade towards the bottom."} +{"question_id": 17, "image": "000000361551.jpg", "category": "refer_desc", "text": "A person with dark hair tied back, wearing a black top with a scoop neckline and a visible strap over the shoulder."} +{"question_id": 18, "image": "000000412240.jpg", "category": "refer_desc", "text": "Two black leather shoes with a low heel and a rounded toe. The shoes have a smooth, polished finish and are positioned side by side."} +{"question_id": 19, "image": "000000130566.jpg", "category": "refer_desc", "text": "The passenger car is a modern rail vehicle with a predominantly maroon and cream color scheme. It features a series of windows with black frames, some of which are partially open, and others are closed. The side of the car displays a pattern of vertical and horizontal lines, creating a grid-like texture. The lower portion of the car has a yellow stripe running horizontally, and there are visible steps leading up to the entrance. The roof of the car is equipped with a pantograph for electrical power collection."} +{"question_id": 20, "image": "000000421923.jpg", "category": "refer_desc", "text": "A white daisy with a dense cluster of elongated petals radiating from a central yellow disc, accompanied by green leaves with jagged edges."} +{"question_id": 21, "image": "000000513567.jpg", "category": "refer_desc", "text": "A person with dark hair wearing a brown top and blue jeans, walking with their left arm slightly bent and their right arm hanging down."} +{"question_id": 22, "image": "000000543300.jpg", "category": "refer_desc", "text": "A white boat with the name \"Port River Dolphin Cruises\" written in a stylized font on its side."} +{"question_id": 23, "image": "000000241668.jpg", "category": "refer_desc", "text": "A smiling woman with long, straight red hair, wearing a black blazer over a white shirt and a dark tie. She is holding a slice of cake with both hands."} +{"question_id": 24, "image": "000000535578.jpg", "category": "refer_desc", "text": "The hill is gently sloping with a mix of green grass and patches of exposed soil. It features a winding stone path that ascends from the bottom left to the top right. The hill is dotted with small bushes and scattered rocks, adding texture to the landscape. The terrain is uneven, with slight undulations and a few small mounds."} +{"question_id": 25, "image": "000000277051.jpg", "category": "refer_desc", "text": "A small bird with a brownish-grey plumage, displaying a subtle gradient from lighter shades on the chest to darker tones on the back and wings. The bird has a distinct white eyebrow stripe above its eye, a short, conical beak, and a visible eye with a dark pupil. Its legs are thin and pinkish, with delicate toes adapted for perching."} +{"question_id": 26, "image": "000000018519.jpg", "category": "refer_desc", "text": "A black, textured strap with a series of evenly spaced, circular indentations along its length, and a metallic buckle with a prong at one end."} +{"question_id": 27, "image": "000000106048.jpg", "category": "refer_desc", "text": "A white coach bus with a curved front windshield and a large, tinted side window. The bus features a purple and blue wave-like design on the side, with the text \"Divine Transportation\" written in elegant script. The vehicle number \"7006\" is displayed at the top front corner. The bus has a black front bumper, side mirrors, and a visible door towards the front. The wheels are not visible in the image."} +{"question_id": 28, "image": "000000058393.jpg", "category": "refer_desc", "text": "Two individuals with short hair, one with a side-swept fringe and the other with a more pronounced side-swept fringe, both with visible earlobes and a portion of their shoulders and upper backs."} +{"question_id": 29, "image": "000000010764.jpg", "category": "refer_desc", "text": "The knee pad features a hard, black, rounded protective shell with a glossy finish, designed to cover the knee. It is attached to a black, adjustable strap system with a buckle for securing the pad around the leg. The shell has a contoured design to fit the shape of the knee, with a central ridge running vertically down the middle. The strap system includes a red adjustment buckle for size customization."} +{"question_id": 30, "image": "000000271402.jpg", "category": "refer_desc", "text": "Two young girls with light skin and blonde hair, wearing white short-sleeved shirts with a circular emblem on the left chest, white pleated skirts, and white sneakers with yellow socks. The girl on the left is holding a black handlebar with a silver shaft, while the girl on the right is holding a similar handlebar."} +{"question_id": 31, "image": "000000273493.jpg", "category": "refer_desc", "text": "A tennis net with a white tape running along the top edge, supported by a white post on the right side. The net is composed of a grid of black mesh squares."} +{"question_id": 32, "image": "000000360960.jpg", "category": "refer_desc", "text": "Black trousers with a straight-leg cut, featuring a smooth texture and a regular fit."} +{"question_id": 33, "image": "000000452122.jpg", "category": "refer_desc", "text": "A commercial jet with a white fuselage featuring the word \"ANA\" in blue letters, a blue and white logo, and a partially visible engine under the wing."} +{"question_id": 34, "image": "000000134722.jpg", "category": "refer_desc", "text": "The windshield is a large, curved, and slightly tinted glass panel with a smooth surface, seamlessly integrated into the train's front. It features a dark wiper at the bottom and is bordered by a thin, yellow frame that matches the train's exterior color."} +{"question_id": 35, "image": "000000039484.jpg", "category": "refer_desc", "text": "A group of people are seated at a table, engaged in conversation. The table is covered with a green tablecloth, and the individuals are dressed in casual attire. The scene suggests a relaxed and social atmosphere."} +{"question_id": 36, "image": "000000159311.jpg", "category": "refer_desc", "text": "A dense cluster of green grass blades with varying lengths, some standing upright while others are slightly bent, interspersed with thin, dry, brownish strands."} +{"question_id": 37, "image": "000000326174.jpg", "category": "refer_desc", "text": "A person with long hair tied back, wearing a yellow hoodie with a graphic on the back, black shorts, and brown shoes, is holding a blue surfboard with a black traction pad on the tail."} +{"question_id": 38, "image": "000000562207.jpg", "category": "refer_desc", "text": "A smiling man with short hair, wearing dark gray shorts with white stripes on the sides, stands with his left hand on his hip and his right arm extended outward. He has a watch on his left wrist and is barefoot."} +{"question_id": 39, "image": "000000332318.jpg", "category": "refer_desc", "text": "A brown cow with a white pattern on its forehead and snout, dark eyes, and a pair of curved horns."} diff --git a/evaluation/Ferret-Bench/gpt4_result/gar_1b/review_refer_desc.jsonl b/evaluation/Ferret-Bench/gpt4_result/gar_1b/review_refer_desc.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..54f48a3dcaa3c7be449c2b3576958f6b90d73803 --- /dev/null +++ b/evaluation/Ferret-Bench/gpt4_result/gar_1b/review_refer_desc.jsonl @@ -0,0 +1,40 @@ +{"id": 1, "question_id": 0, "answer1_id": 0, "answer2_id": 0, "category": "refer_desc", "content": "7 4\n\nAssistant 1 provided a response that was moderately helpful and relevant to the question. It correctly identified the object as a sign and connected it to the building offering short-term office space, which aligns with the region description at [0.600, 0.036, 0.793, 0.084]. However, the explanation lacked specificity regarding the sign's exact text or visual details, and the spatial correspondence to the surroundings was only partially addressed. The assistant did not fully utilize the bounding box coordinates or relationships to describe the interaction between the sign and its surroundings in detail.\n\nAssistant 2, on the other hand, focused on the textual content of the sign but failed to address the interaction between the sign and its surroundings, which was the core of the user's question. While the description of the sign's text was accurate, it did not provide any spatial or contextual information about how the sign relates to the building or the overall scene. This made the response less relevant and helpful in answering the user's query.\n\nOverall, Assistant 1 demonstrated better spatial correspondence, relevance, and accuracy, earning a score of 7. Assistant 2, while accurate in describing the sign's text, lacked relevance and detail, resulting in a lower score of 4.", "tuple": [7.0, 4.0], "answer1": "The object is a sign that is placed on a building. The building is offering short term office space, which is stated on the sign. This sign is part of the overall street scene, with other objects nearby, such as a stop light on a pole and a road. The sign is directly related to the building, contributing to the information about the building's function to the overall scene.", "answer2": "A rectangular signboard with a dark background and capitalized, bold, sans-serif font text that reads \"FROM 2,500 SQ FT IMMEDIATELY AVAILABLE.\""} +{"id": 2, "question_id": 1, "answer1_id": 1, "answer2_id": 1, "category": "refer_desc", "content": "7 4\n\nAssistant 1 provided a response that is spatially accurate and relevant to the question. It correctly identified the object as a plate and noted the interaction with the fork resting on it, which aligns with the provided relationship data (\"fork -> on -> plate\"). Additionally, it mentioned nearby objects like the cup and computer mouse, which are consistent with the bounding box coordinates and the spatial arrangement described in the context. However, it could have included more detail about the plate's appearance or its surroundings to enhance the response.\n\nAssistant 2, while attempting to describe the plate in detail, introduced speculative and irrelevant information about its appearance (e.g., \"glossy finish,\" \"dark brown substance,\" \"possibly a sauce or food residue\") that is not supported by the provided context or data. Furthermore, it did not address the spatial interaction or relationships between the plate and its surroundings, making the response less accurate and helpful for the user's question. It failed to utilize the bounding box coordinates or relationships effectively, leading to a lower score.", "tuple": [7.0, 4.0], "answer1": "The object is a plate, and on top of this plate is a fork. It's located on a desk. Nearby, there's also a cup and a computer mouse.", "answer2": "A white ceramic plate with a glossy finish, featuring a smudge of a dark brown substance, possibly a sauce or food residue, and a small, dark-colored object, likely a utensil, resting on the plate."} +{"id": 3, "question_id": 2, "answer1_id": 2, "answer2_id": 2, "category": "refer_desc", "content": "7 5\n\nAssistant 1 provides a more comprehensive and spatially accurate response to the question about the region [0.288, 0.324, 0.572, 0.649]. It identifies the region as the giraffe's face, mentions its white and brown coloration, and correctly associates nearby features such as the neck with wrinkles, the eye, and the ear. Assistant 1 also contextualizes the giraffe's position relative to the tree and mentions the background elements like branches and the sky. While it could have been more concise, its response is detailed, relevant, and spatially aligned with the given data.\n\nAssistant 2, on the other hand, provides a more generalized description of the giraffe's appearance, focusing on the pattern of its skin, eyes, and ears. While this information is accurate and relevant to the giraffe, it does not specifically address the region in question or its interaction with surrounding areas. The response lacks spatial correspondence and detail, making it less helpful in answering the user's query about the specific region.\n\nOverall, Assistant 1 scores higher for its detailed and spatially accurate response, while Assistant 2 scores lower due to its lack of specificity and focus on the region.", "tuple": [7.0, 5.0], "answer1": "The region is primarily occupied by the face of a giraffe, which is white and brown in color. This portion of the giraffe's body is in close proximity to its neck, which is covered with wrinkles. The giraffe's eye and ear are also located within this region. The giraffe is standing in front of a tree, and its face and neck are spotted with tan and brown spots. The background is filled with branches of the tree and the sky.", "answer2": "The giraffe has a pattern of large, irregular brown patches separated by cream-colored lines on its skin. Its eyes are large with long lashes, and the giraffe's ears are pointed and tufted with hair. The visible part of the neck shows a continuation of the same patterned skin."} +{"id": 4, "question_id": 3, "answer1_id": 3, "answer2_id": 3, "category": "refer_desc", "content": "7 4 \nAssistant 1 provides a more comprehensive and spatially relevant response to the question. It correctly identifies the woman in the region [0.528, 0.254, 0.717, 0.666] as eating lunch and connects her presence to nearby objects such as the table, plate of food, raspberries, bottle, and glass of water. Additionally, it mentions the man nearby, wearing glasses and a striped shirt, which aligns with the provided object relationships and region descriptions. However, it could have been more precise in detailing the exact spatial relationships between the woman and the surrounding objects. \n\nAssistant 2's response is vague and lacks relevance to the question. While it describes the appearance of the individual in the region, it does not address the activity (\"eating lunch\") or connect the region to surrounding objects or relationships. It misses key details provided in the context, such as the table, plate of food, and raspberries, and does not integrate the spatial or relational information effectively. Consequently, its response is less helpful and accurate compared to Assistant 1.", "tuple": [7.0, 4.0], "answer1": "In the region, there's a woman who seems to be having lunch. She is sitting near a table where there are several objects, including a plate of food that contains raspberries, a bottle, and a glass of water. There is also a man nearby, who is wearing glasses and a striped orange and black shirt. Both of them seem to be engaged in a meal.", "answer2": "A person with dark hair tied back, wearing a dark-colored top with a visible collar and a patterned garment underneath. The individual appears to be seated and is looking downwards."} +{"id": 5, "question_id": 4, "answer1_id": 4, "answer2_id": 4, "category": "refer_desc", "content": "7 5\n\nAssistant 1 provided a response that was more aligned with the context and question. It correctly identified the region as corresponding to the back wheel of the motorcycle and described its relationship to nearby objects, such as the tail pipe and the person sitting on the motorcycle. The spatial correspondence was accurate, and the response was helpful and relevant to the question. However, it could have included more specific details about the back wheel itself, such as its design or features.\n\nAssistant 2, while offering a detailed description of a motorcycle's rear wheel and related components, did not directly address the specific region or its relation to nearby objects as requested. The response included technical details (e.g., multi-spoke design, disc brake system, telescopic fork) that were not supported by the provided context or visual descriptions. This made the answer less accurate and relevant to the question. Additionally, Assistant 2's response lacked spatial correspondence to the bounding box coordinates and nearby elements.\n\nOverall, Assistant 1 demonstrated better spatial correspondence, relevance, and accuracy, earning a higher score. Assistant 2's response, while detailed, was less helpful and lacked alignment with the provided context, resulting in a lower score.", "tuple": [7.0, 5.0], "answer1": "The region corresponds to the back wheel of a motorcycle, which is an integral part of the black motorcycle with silver accessories. Close to the back wheel, you can find the tail pipe of the motorcycle located on the left. Above the back wheel, there's a person sitting on the motorcycle, wearing a sweater, jeans, and sneakers.", "answer2": "A black motorcycle with a visible rear wheel featuring a multi-spoke design and a disc brake system. The exhaust system includes a cylindrical chrome muffler with a heat shield. The rear suspension is partially visible, showcasing a telescopic fork. The motorcycle's bodywork includes a black fender over the wheel and a portion of the frame with a visible bolt pattern."} +{"id": 6, "question_id": 5, "answer1_id": 5, "answer2_id": 5, "category": "refer_desc", "content": "8 3\n\nAssistant 1 provided a response that aligns well with the region description at [0.716, 0.192, 0.894, 0.550], which mentions \"the container and the toilet brush cleaner.\" The assistant correctly identifies the objects in this region and explains their interaction with the surroundings, specifically their proximity to the toilet bowl and their role in maintaining cleanliness. The response is spatially accurate, relevant, and helpful, with a good level of detail.\n\nAssistant 2, on the other hand, describes a \"white ceramic toilet with an open black seat and lid, featuring a visible flush handle on the left side,\" which does not correspond to the specified region. This description seems to refer to a different part of the image, possibly the toilet itself, rather than the container and toilet brush cleaner in the region [0.716, 0.192, 0.894, 0.550]. The response is inaccurate, irrelevant to the question, and lacks spatial correspondence, resulting in a lower score.", "tuple": [8.0, 3.0], "answer1": "In the region, there is a container and a toilet brush cleaner. This region is right next to the toilet bowl, indicating that the brush cleaner is accessible for bathroom cleaning. It's crucial for maintaining the cleanliness of the toilet nearby.", "answer2": "A white ceramic toilet with an open black seat and lid, featuring a visible flush handle on the left side."} +{"id": 7, "question_id": 7, "answer1_id": 7, "answer2_id": 6, "category": "refer_desc", "content": "7 4 \nAssistant 1 provides a more comprehensive and accurate response to the question. It correctly identifies the region as a crosswalk with white stripes and relates it to nearby objects, such as the cyclist and people using the crosswalk, as well as the presence of bikes and cars on the road. This aligns well with the provided relationships and descriptions, demonstrating good spatial correspondence, relevance, and helpfulness. However, it could have been more detailed in explicitly connecting the bounding box coordinates to the described scene.\n\nAssistant 2, on the other hand, focuses solely on bicycles within the region and provides a description that is not supported by the provided data or relationships. It does not address the crosswalk or its connection to other nearby objects, such as the cyclist or people, and fails to incorporate the broader context of the scene. This response lacks relevance and accuracy, and its level of detail is insufficient for the given question. Consequently, it scores lower due to its limited helpfulness and poor spatial correspondence.", "tuple": [7.0, 4.0], "answer1": "The region is a crosswalk on a busy city street, highlighted by white stripes. It's being used by a cyclist and a group of people who are crossing the street. There are bikes on the road, and cars are also visible within the vicinity. A large white vehicle with a big windshield is also nearby. This bustling scene is typical for a city intersection.", "answer2": "Two bicycles with black tires and silver rims, positioned parallel to each other on a crosswalk. The bicycle on the left has a visible rear wheel and part of the frame, while the bicycle on the right has a visible front wheel and part of the frame."} +{"id": 8, "question_id": 8, "answer1_id": 8, "answer2_id": 7, "category": "refer_desc", "content": "6 3\n\nAssistant 1 provided a partially correct and relevant response but lacked precision and clarity. While it correctly identified the object as a salt shaker and mentioned its relationship with nearby objects, it inaccurately stated that the shaker contains pepper and failed to directly reference the bounding box coordinates or the explicit relationship \"salt -> in -> shaker\" provided in the context. Additionally, it generalized the table setting without leveraging the detailed relationships and object data provided.\n\nAssistant 2, on the other hand, gave an overly generic and speculative description of the object as a \"cylindrical, transparent glass saltshaker with a metal screw-on lid,\" which is not supported by the given context. The assistant did not address the bounding box coordinates, the object's relationship with nearby objects, or any of the provided contextual information, making the response irrelevant and unhelpful.\n\nOverall, Assistant 1 demonstrated a better understanding of the question and context, though it still had notable shortcomings. Assistant 2's response was largely disconnected from the provided data and failed to address the user's query effectively.", "tuple": [6.0, 3.0], "answer1": "The object is a salt shaker. It is located on a table along with a napkin and another shaker, which contains pepper. The table appears to be set for dining at a restaurant, as indicated by the presence of these objects.", "answer2": "A cylindrical, transparent glass saltshaker with a metal screw-on lid featuring multiple small holes for dispensing salt."} +{"id": 9, "question_id": 9, "answer1_id": 9, "answer2_id": 8, "category": "refer_desc", "content": "7 4\n\nAssistant 1 provides a more detailed and spatially accurate response to the question. The region [0.628, 0.120, 0.998, 0.389] corresponds to Object 12, which is a plate, and the relationships indicate that food items such as sausage (Object 13) and egg (Object 5) are on this plate. Assistant 1 correctly identifies the plate and mentions that it contains various types of food, including sausage and egg, and notes that the plate is positioned towards the back of the table, which aligns with the region description \"There is food on the plate in the back.\" However, Assistant 1 could improve by specifying the exact food items (sausage and egg) rather than vaguely referring to \"a few other unidentified items.\"\n\nAssistant 2, on the other hand, provides an inaccurate response. While it mentions a plate with food items, it incorrectly identifies scrambled eggs and hash browns, which are not mentioned in the object list or relationships. This response lacks spatial correspondence and accuracy, as it introduces elements not supported by the provided data. Additionally, Assistant 2 does not acknowledge the positioning of the plate or its relation to the table, making the response less helpful and relevant.\n\nOverall, Assistant 1 demonstrates better spatial correspondence, relevance, and accuracy, earning a score of 7. Assistant 2's inaccuracies and lack of alignment with the provided data result in a lower score of 4.", "tuple": [7.0, 4.0], "answer1": "In the region, there is a plate with various types of food on it. This includes a sausage, an egg, and a few other unidentified items. The food is attractively arranged on the plate, which is positioned towards the back of the table. The plate and its contents seem to be part of a larger meal setup on the table.", "answer2": "A plate with a grilled sausage, scrambled eggs, and a side of hash browns."} +{"id": 10, "question_id": 10, "answer1_id": 10, "answer2_id": 9, "category": "refer_desc", "content": "7 5\n\nAssistant 1 provided a response that is moderately accurate and relevant to the question. It correctly identifies that the region contains trees and a village on a hill in the distance, which aligns with the region description provided in the context. Additionally, it attempts to relate the region to the surrounding areas by mentioning the grassy field and the man jumping to catch a frisbee. However, the mention of the man and his shadow is not directly supported by the bounding box coordinates of the region in question ([0.012, 0.520, 0.996, 0.631]), which is focused on trees and buildings rather than the grassy field or the man. This slightly detracts from the spatial correspondence and accuracy of the response. Overall, the response is helpful and provides a fair level of detail, but it could have been more precise in its spatial interpretation.\n\nAssistant 2, on the other hand, provides a detailed description of the hill and trees within the region but fails to establish any meaningful relationship between the region and the surrounding areas, as requested by the question. While the description of the hill and trees is accurate and detailed, it does not address the relational aspect of the question, making the response less relevant and helpful. Additionally, the response lacks context from the provided data, such as the mention of the village or the grassy field, which could have enhanced its relevance and completeness.\n\nOverall, Assistant 1 scores higher due to its attempt to address the relational aspect of the question, despite some inaccuracies, while Assistant 2, though detailed, fails to adequately respond to the user's query about the region's relation to surrounding areas.", "tuple": [7.0, 5.0], "answer1": "Region is full of trees and there is a village on a hill in the distance. These trees and buildings are located behind a grassy field where a man is seen jumping to catch a frisbee. The man's shadow can be seen on the grass.", "answer2": "A long, continuous hill with a dense covering of dark green trees and vegetation, extending horizontally across the image. The hill has a slightly uneven, jagged top edge and is characterized by a mix of tall and short trees, creating a textured appearance."} +{"id": 11, "question_id": 11, "answer1_id": 11, "answer2_id": 10, "category": "refer_desc", "content": "7 4 \nAssistant 1 provides a response that is moderately accurate and relevant to the question. It correctly identifies the black fence pole as part of a chain-link fence and mentions its role in enclosing boats and surfboards. Additionally, it incorporates surrounding context, such as the nearby waterway, buildings on the horizon, and green shrubs by the lake, which aligns with the provided data. However, the description could have been more detailed in terms of the pole's physical characteristics or its exact spatial relationship with other objects. \n\nAssistant 2, on the other hand, focuses exclusively on the physical appearance of the pole, describing its texture and shape in detail. While this level of detail is helpful in understanding the pole itself, the response fails to address the broader context of the region, such as its relationship to the chain-link fence, nearby objects, or the surrounding environment. This makes the answer less relevant and helpful in addressing the user's question about the region and its context. \n\nOverall, Assistant 1 scores higher due to its relevance, spatial correspondence, and inclusion of contextual information, while Assistant 2 scores lower for its lack of contextual awareness despite providing detailed physical characteristics.", "tuple": [7.0, 4.0], "answer1": "The region contains a black fence pole, which seems to be part of a chain-link fence enclosing the area. This fence is next to a water way and encloses several boats and surfboards. There are buildings on the horizon, and some green shrubs growing along the side of the lake.", "answer2": "A vertical, cylindrical pole with a textured surface, possibly metallic, exhibiting a pattern of diagonal ridges that create a diamond-like appearance. The pole has a consistent diameter throughout its visible length and is topped with a flat, circular cap."} +{"id": 12, "question_id": 12, "answer1_id": 12, "answer2_id": 11, "category": "refer_desc", "content": "7 5\n\nAssistant 1 provided a response that was more aligned with the user's question, which asked about the object at coordinates [0.084, 0.438, 0.727, 0.954] (the elephant) and its interaction with nearby objects. The assistant correctly identified the elephant and mentioned its interaction with the man riding on its back, the nearby building, and the shadow on the ground. Additionally, it referenced the painted face and trunk of the elephant, which aligns with the region description provided. However, the response lacked some detail about the elephant's physical features and could have been more specific about the interactions, such as the \"close to\" relationship with the building.\n\nAssistant 2, while providing a detailed physical description of the elephant, failed to address the core of the user's question, which was about the elephant's interaction with nearby objects. The response focused entirely on the elephant's appearance and omitted any mention of the relationships or interactions described in the context, such as the man riding the elephant or the shadow on the ground. This made the response less relevant and less helpful in answering the user's specific query.\n\nOverall, Assistant 1 scored higher (7) for being more relevant, accurate, and helpful in addressing the user's question, though it could have included more detail. Assistant 2 scored lower (5) due to its lack of relevance to the user's query, despite providing a detailed description of the elephant.", "tuple": [7.0, 5.0], "answer1": "The region contains an elephant, which is quite large. There's a man riding on the back of the elephant, and they are moving close to a building. The shadow of the elephant can be seen on the ground. Additionally, the elephant's face and trunk are painted, which indicates some cultural significance.", "answer2": "An adult elephant with a predominantly dark gray skin tone, adorned with colorful paint markings on its forehead and trunk. The elephant's ears are partially visible, and it has a long, curved trunk with visible wrinkles and a pair of ivory tusks protruding from the upper jaw. The elephant's eyes are small with long lashes, and it has a robust body with thick, pillar-like legs. The feet are broad with visible toenails."} +{"id": 13, "question_id": 13, "answer1_id": 13, "answer2_id": 12, "category": "refer_desc", "content": "6 3\n\nAssistant 1 provided a response that partially aligns with the spatial correspondence and relevance of the question. The region [0.394, 0.565, 0.570, 0.718] contains the object \"concrete,\" as indicated in the object list. Assistant 1 correctly identifies the concrete surface within the region but inaccurately states that a car is parked on it, which is not supported by the provided object relationships or coordinates. Furthermore, the mention of the girl holding an umbrella walking along the path is not directly relevant to the region in question, as the girl is located elsewhere ([0.444, 0.249, 0.500, 0.480]). While the response is somewhat helpful and accurate in identifying the concrete, it lacks precision and introduces irrelevant details.\n\nAssistant 2's response is vague and does not correspond well to the spatial details provided. The description of a \"rectangular concrete curb cut\" is incorrect, as the object in the region is labeled as \"concrete,\" not a curb. Additionally, the response lacks interaction details with nearby objects, which the user specifically requested. The level of detail is minimal, and the description does not align with the provided data.\n\nOverall, Assistant 1 scores higher for partially addressing the question and identifying the correct object, despite inaccuracies and irrelevant details. Assistant 2 scores lower due to its lack of relevance, accuracy, and detail.", "tuple": [6.0, 3.0], "answer1": "In the region, there is a concrete surface which is part of the alley. It is placed alongside the curb and the road, and there is a car parked on it. Also, nearby, there is a girl holding an umbrella walking along this path.", "answer2": "A rectangular concrete curb cut with a rough, textured surface and slightly uneven edges."} +{"id": 14, "question_id": 14, "answer1_id": 14, "answer2_id": 13, "category": "refer_desc", "content": "8 3 \nAssistant 1 provides a response that is spatially accurate, relevant, and helpful. It correctly identifies the object in the region ([0.602, 0.837, 0.696, 0.997]) as a metal support column and describes its interaction with surrounding objects, such as supporting the bridge and featuring a red line. This aligns well with the provided relationships and region description. The response is detailed and ties the object to the broader scene involving the train and bridge, making it highly accurate and useful for understanding the context.\n\nAssistant 2, on the other hand, offers a vague and overly generic description of the object, referring to it as a \"cylindrical, vertical pole\" without connecting it to the surrounding objects or the scene. It fails to address the specific interactions or relationships provided in the context, such as the column supporting the bridge or the presence of the red line. The response lacks relevance and accuracy, and its level of detail does not contribute meaningfully to answering the user's question.", "tuple": [8.0, 3.0], "answer1": "In the region, there is a metal support column. This column is providing support for a bridge above it, which a train is passing over. The column also features a red line on it. This region is part of a larger scene that includes a train track on an elevated bridge.", "answer2": "A cylindrical, vertical pole with a flared base transitioning into a narrower shaft, topped by a flat, horizontal element with a series of evenly spaced, small, rectangular protrusions along its upper edge."} +{"id": 15, "question_id": 15, "answer1_id": 15, "answer2_id": 14, "category": "refer_desc", "content": "7 5 \nAssistant 1 provides a more accurate and relevant response to the user's question about the specified region. It correctly identifies the boy wearing a black jacket and holding a brown stuffed dog with a red and white collar, which aligns with the region description and object relationships provided in the context. The spatial correspondence is strong, as the assistant ties the region coordinates to the correct objects and interactions. However, it could have included more detail about the boy's appearance or the surrounding context, such as the grassy area or group of children, to enhance the level of detail.\n\nAssistant 2, while offering a description of the boy's appearance, does not directly address the spatial correspondence or interactions with nearby objects. It fails to mention the black jacket or the stuffed dog with a red and white collar, which are key details from the region description and object relationships. This omission reduces the accuracy and relevance of the response. Additionally, the assistant's description of the boy's hair and facial expression is not supported by the provided context, making it speculative and less helpful. Overall, Assistant 2's response lacks the depth and precision needed to fully address the user's query.", "tuple": [7.0, 5.0], "answer1": "The region is occupied by a boy who is wearing a black jacket. He is holding a brown stuffed dog with a red and white collar. The boy seems to be part of a larger group of children who are all holding various stuffed animals and dolls. They seem to be walking across some grassy area, possibly in some kind of event or gathering.", "answer2": "A young boy with short, wavy brown hair, wearing a black jacket with a hood, is holding a plush toy resembling a brown and white dog. He has a slight smile on his face and is looking slightly to his left."} +{"id": 16, "question_id": 16, "answer1_id": 16, "answer2_id": 15, "category": "refer_desc", "content": "4 2\n\nAssistant 1 provided a response that was somewhat relevant but lacked specificity and accuracy. The question explicitly asked about the interaction between objects in the region [0.668, 0.252, 0.909, 0.622], which corresponds to \"object 12: man\" wearing \"object 9: green shirt.\" However, Assistant 1's response was vague, mentioning a social setting and items on the table without directly addressing the specific interactions involving the man in the green shirt. While it correctly identified the man wearing the green shirt, it failed to elaborate on the interaction between these objects or provide any meaningful details about the region.\n\nAssistant 2's response was even less accurate and relevant. It described an elderly man with gray hair and glasses, which is not supported by the provided context. The description of the man holding a glass and looking to his left is fabricated and does not align with the given data. There is no mention of glasses or gray hair for \"object 12: man,\" nor is there any indication that he is holding a glass. This response is entirely disconnected from the provided information and the specific question.\n\nOverall, Assistant 1 scored slightly higher for at least partially addressing the correct objects and their relationship, albeit in a vague manner. Assistant 2 scored lower due to its fabricated and irrelevant response. Neither assistant provided a satisfactory answer to the question.", "tuple": [4.0, 2.0], "answer1": "In the region, there is a man wearing a green shirt. He is sitting at a table, presumably in a social setting, along with other people. The table is full of items such as plates, glasses, and a decanter. One of the significant interactions is that the man is engaged in a conversation with the people around him.", "answer2": "An elderly man with short gray hair and glasses, wearing a light green button-up shirt with a name tag on the left side of his chest. He is holding a clear glass in his right hand and appears to be looking slightly to his left."} +{"id": 17, "question_id": 17, "answer1_id": 17, "answer2_id": 16, "category": "refer_desc", "content": "8 3 \nAssistant 1 provided a detailed and relevant response to the user's question about the region [0.201, 0.002, 0.940, 0.758]. It accurately identified the man as the primary subject in the region, described his interaction with the skateboard, and mentioned other surrounding objects such as jeans, shoes, laces, bracelet, trees, building, and fence. The assistant also inferred the context of the scene, mentioning the stadium lights and poles, which aligns with the relationships provided. The spatial correspondence was strong, as the bounding box coordinates matched the described objects and interactions. The response was helpful, accurate, and detailed, earning it a score of 8.\n\nAssistant 2's response, on the other hand, was highly irrelevant to the user's question. It focused on the sky's color and gradient, which is not directly related to the region specified in the query. The assistant failed to address the man or the skateboard, which are the central elements of the region in question. The response lacked spatial correspondence, relevance, and accuracy, and did not provide any helpful information about the specified region or its interactions with surrounding objects. As a result, it received a score of 3.", "tuple": [8.0, 3.0], "answer1": "This region is primarily occupied by a man, who appears to be in mid-air, performing a trick on a skateboard. The skateboard is beneath him. He is wearing jeans and shoes with laces, and has a bracelet on his wrist. In the background of this region, there are trees, a building, and a fence. The scene seems to be taking place in a stadium, as there are stadium lights on poles in the vicinity.", "answer2": "The sky is a pale, almost white, light gray with a smooth gradient, transitioning from a slightly darker shade at the top to a lighter shade towards the bottom."} +{"id": 18, "question_id": 18, "answer1_id": 18, "answer2_id": 17, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provided a detailed and contextually accurate response about the interaction happening in the specified region. It correctly identified the woman dressed in a sleeveless black top and wearing a black and white headband, which aligns with the object descriptions and relationships provided in the context. Additionally, Assistant 1 incorporated relevant contextual information about the airport service area and the typical activities involving luggage, which enhanced the helpfulness and relevance of the response. The spatial correspondence was strong, as the description matched the bounding box coordinates and the objects within it. However, the response could have been slightly more concise.\n\nAssistant 2, on the other hand, provided a minimal and vague description that lacked sufficient detail and context. While it mentioned the black top and a strap, it did not address the headband, the luggage interaction, or the broader context of the airport service area. This made the response less helpful and relevant to the user's query. The spatial correspondence was weak, as the description did not fully utilize the provided bounding box information or relationships. Overall, Assistant 2's response was incomplete and less accurate compared to Assistant 1's.", "tuple": [8.0, 4.0], "answer1": "This region features a woman, who is dressed in a sleeveless black top. She is bending over her luggage, possibly preparing or checking something inside it. The woman is wearing a black and white headband as well. She is located in the service area of an airport, where there are other people standing around as well, some of them are holding their luggage. This scene is quite typical in an airport setting where passengers are usually seen handling their luggage.", "answer2": "A person with dark hair tied back, wearing a black top with a scoop neckline and a visible strap over the shoulder."} +{"id": 19, "question_id": 19, "answer1_id": 19, "answer2_id": 18, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provides a detailed and accurate response that aligns well with the context of the question. It identifies the primary object (the shoe) within the specified region and discusses its interactions with surrounding objects, such as the floor, light, and the nearby dog. It also mentions specific features of the shoe, like the laces, heel, and toe, which are supported by the provided relationships and object descriptions. The spatial correspondence is strong, as the region [0.002, 0.437, 0.720, 0.787] indeed encompasses the bounding boxes of the shoes (objects 14 and 15). The response is relevant, helpful, and provides sufficient detail to address the question comprehensively.\n\nAssistant 2, on the other hand, provides a vague and overly generalized description of the shoes without addressing the interactions or surrounding objects. While it describes the shoes' appearance, this information is not explicitly supported by the provided data (e.g., there is no mention of \"black leather\" or \"polished finish\" in the context). Furthermore, it fails to acknowledge the relationships and interactions involving the shoes, such as their proximity to the dog, their placement on the floor, or the light reflecting off them. The response lacks spatial correspondence, relevance, and depth, making it less helpful in answering the question.\n\nOverall, Assistant 1 demonstrates a much better understanding of the context and provides a more thorough and accurate response, while Assistant 2 falls short in multiple aspects.", "tuple": [8.0, 4.0], "answer1": "This region primarily contains a shoe. The shoe appears to be placed on a floor, and light is reflecting off of it. A dog is sitting nearby on the floor as well, and the shoe is positioned next to the dog. The shoe features several distinct elements like laces, a heel, and a toe.", "answer2": "Two black leather shoes with a low heel and a rounded toe. The shoes have a smooth, polished finish and are positioned side by side."} +{"id": 20, "question_id": 20, "answer1_id": 20, "answer2_id": 19, "category": "refer_desc", "content": "7 3\n\nAssistant 1 provided a response that is moderately accurate and relevant to the question. It correctly identifies the region as featuring windows on the side of a train engine, which aligns with the region description at [0.630, 0.471, 0.682, 0.550]. The assistant also connects the train to its surrounding environment, mentioning the tracks and nearby elements like electric lines, buildings, trees, and a wall. However, the response could have been more detailed and precise in describing the specific interaction of the windows with the environment, such as their role in the train's design or function. Additionally, some of the mentioned elements, like buildings and trees, are not directly relevant to the specific region in question, which slightly detracts from the focus.\n\nAssistant 2's response is entirely inaccurate and irrelevant to the question. It describes a passenger car with a maroon and cream color scheme, pantograph, and other features that are not present in the provided context. The region in question is specifically about windows on the side of a train engine, and there is no mention of a passenger car or any of the details provided by Assistant 2 in the image's descriptions, objects, or relationships. This response demonstrates a significant lack of spatial correspondence, relevance, and accuracy, leading to a much lower score.\n\nOverall, Assistant 1 receives a score of 7 for its moderately helpful and relevant response, while Assistant 2 receives a score of 3 for its entirely inaccurate and irrelevant answer.", "tuple": [7.0, 3.0], "answer1": "The region features windows on the side of a train engine. The train itself is traveling down a set of tracks, which are part of a larger railway system that includes multiple sets of tracks on the ground. Nearby, there are also electric lines hanging above the tracks. Further off, there are buildings, trees, and a wall, which add to the overall rural setting.", "answer2": "The passenger car is a modern rail vehicle with a predominantly maroon and cream color scheme. It features a series of windows with black frames, some of which are partially open, and others are closed. The side of the car displays a pattern of vertical and horizontal lines, creating a grid-like texture. The lower portion of the car has a yellow stripe running horizontally, and there are visible steps leading up to the entrance. The roof of the car is equipped with a pantograph for electrical power collection."} +{"id": 21, "question_id": 21, "answer1_id": 21, "answer2_id": 20, "category": "refer_desc", "content": "7 2\n\nAssistant 1 provided a response that was moderately accurate and relevant to the question. The user asked about the relationship between two specific objects, identified by their bounding box coordinates. Assistant 1 correctly identified the objects as a vase and a flower and stated the relationship (\"the flower is in the vase\"), which aligns with the provided relationship data (\"object 9 : flower -> in -> object 22 : vase\"). However, the response lacked spatial correspondence and detail, as it did not explicitly confirm the bounding box coordinates or directly reference the objects' IDs. While helpful, the answer could have been more precise and detailed.\n\nAssistant 2's response was entirely irrelevant to the question. Instead of addressing the relationship between the vase and the flower, it described the appearance of a flower (a white daisy) without any connection to the bounding box coordinates, object IDs, or relationships provided in the context. This response failed in terms of relevance, accuracy, and spatial correspondence, and it did not address the user's query in any meaningful way.\n\nOverall, Assistant 1 demonstrated a reasonable understanding of the context and provided a helpful answer, albeit with room for improvement in detail and precision. Assistant 2's response was unrelated to the question and did not utilize the provided context effectively.", "tuple": [7.0, 2.0], "answer1": "The object is a vase, and the object is a flower. The flower is in the vase, suggesting it is a decorative element within the room.", "answer2": "A white daisy with a dense cluster of elongated petals radiating from a central yellow disc, accompanied by green leaves with jagged edges."} +{"id": 22, "question_id": 22, "answer1_id": 22, "answer2_id": 21, "category": "refer_desc", "content": "8 6\n\nAssistant 1 provides a concise and accurate description of the activity occurring in the specified region ([0.926, 0.253, 0.998, 0.645]). The response aligns well with the region description \"Woman in a brown shirt and jeans crossing the street\" and the relationship \"people -> crossing -> street.\" The spatial correspondence is accurate, and the response is relevant and helpful. However, the level of detail could be slightly improved by mentioning additional visual cues, such as the woman's posture or other contextual elements.\n\nAssistant 2, while offering a more detailed description of the person's appearance and posture, introduces inaccuracies and lacks alignment with the provided region description and relationships. For instance, the mention of \"dark hair\" and the specific arm positions are not corroborated by the available data. This reduces the accuracy and relevance of the response. Additionally, the description does not explicitly state the action of \"crossing the street,\" which is central to the question. While the response attempts to provide detail, it ultimately sacrifices accuracy and relevance, leading to a lower score.", "tuple": [8.0, 6.0], "answer1": "A woman, who is wearing a brown shirt and jeans, is crossing the street.", "answer2": "A person with dark hair wearing a brown top and blue jeans, walking with their left arm slightly bent and their right arm hanging down."} +{"id": 23, "question_id": 23, "answer1_id": 23, "answer2_id": 22, "category": "refer_desc", "content": "7 4\n\nAssistant 1 provided a detailed response that incorporated spatial correspondence, helpfulness, and relevance to the question. It accurately described the region [0.414, 0.691, 0.662, 0.725] as displaying red letters on the side of a boat and connected this to the surroundings, such as the boat's features (two levels, black windows, silver railing) and nearby objects (buildings, container, sea wall). While the answer was thorough, it could have been more concise and focused specifically on the region's relationship to its surroundings.\n\nAssistant 2 gave a very brief response, identifying the boat and mentioning the name \"Port River Dolphin Cruises\" written on its side. While this is somewhat relevant, it lacks spatial correspondence to the specific region coordinates and does not address the surroundings or relationships between objects. The response is overly simplistic and misses key details provided in the context, such as the red letters and their connection to the boat's features and nearby objects.\n\nOverall, Assistant 1 demonstrated better accuracy, relevance, and level of detail, earning a higher score. Assistant 2's response was less helpful and lacked depth, resulting in a lower score.", "tuple": [7.0, 4.0], "answer1": "The region is displaying red letters. These letters are on the side of a large, white boat that's sitting in the water. The boat has two levels and there is a set of long, black windows on its side. A silver railing is present on the top level of the boat. Close to the boat, there are buildings with red roofs and outdoor canopies. There's also a blue container on the dock, and a gray sea wall next to the ship.", "answer2": "A white boat with the name \"Port River Dolphin Cruises\" written in a stylized font on its side."} +{"id": 24, "question_id": 24, "answer1_id": 24, "answer2_id": 23, "category": "refer_desc", "content": "7 6\n\nAssistant 1 provided a response that is generally accurate and aligns with the context of the region in question. The description of the woman with red hair wearing a tie and suit jacket and holding a plate with a piece of cake is consistent with the provided region description and object relationships. However, the response could have been more detailed, such as mentioning the specific action of holding the cake with both hands, as indicated in the region description. Additionally, the assistant could have explicitly tied the description to the formal wedding setting mentioned in the global caption for added context.\n\nAssistant 2, while also accurate in describing the red-haired woman and her attire, introduces unnecessary details like \"smiling,\" which is not supported by the provided data. Furthermore, the assistant omits the mention of the formal occasion or wedding context, which is relevant to the scene. The response is less comprehensive and lacks the contextual depth provided by Assistant 1, making it slightly less helpful and relevant.\n\nOverall, Assistant 1 scores higher due to better alignment with the provided data and a more accurate depiction of the scene, though both responses could improve in detail and contextual integration.", "tuple": [7.0, 6.0], "answer1": "In the region, there is a woman with red hair. She's wearing a tie and a suit jacket, and is holding a plate with a piece of cake. The woman is dressed in formal attire, suggesting that she's attending a special occasion like a wedding.", "answer2": "A smiling woman with long, straight red hair, wearing a black blazer over a white shirt and a dark tie. She is holding a slice of cake with both hands."} +{"id": 25, "question_id": 25, "answer1_id": 25, "answer2_id": 24, "category": "refer_desc", "content": "7 3\n\nAssistant 1 provided a response that was moderately relevant and accurate to the question. It correctly identified that the region [0.189, 0.032, 0.703, 0.178] contains rocks and grass, as described in the region description. It also mentioned nearby elements like sheep grazing in the field, trees, a bush, and a stone wall bordering the field, which are consistent with the provided data. However, the response included some inaccuracies, such as mentioning a hill in the field where sheep and a rock are located, which is not directly supported by the given data for this specific region or its immediate surroundings. The level of detail was reasonable but could have been more precise in focusing on the specific region and its immediate context. Overall, Assistant 1 demonstrated a fair understanding of the spatial correspondence and relationships but included some extraneous or unsupported details.\n\nAssistant 2's response was largely irrelevant and inaccurate. It described a hill with a winding stone path, patches of soil, and small bushes, none of which are mentioned in the provided data for the region [0.189, 0.032, 0.703, 0.178] or its surroundings. The description appears to be fabricated and does not align with the objects, relationships, or region descriptions provided. As a result, the response lacked helpfulness, relevance, and accuracy, and it failed to address the user's question effectively. The level of detail was high but entirely misplaced, as it did not correspond to the given context.\n\nThus, Assistant 1 receives a score of 7 for its moderately accurate and relevant response, while Assistant 2 receives a score of 3 for its irrelevant and fabricated answer.", "tuple": [7.0, 3.0], "answer1": "The region contains rocks and grass, providing a background for the pasture. Nearby, there are white sheep grazing in the green grassy field. There are also trees and a bush in the vicinity. A stone wall is running across the grassy field, bordering it. Besides, there's a hill in the field where some sheep and a rock are located.", "answer2": "The hill is gently sloping with a mix of green grass and patches of exposed soil. It features a winding stone path that ascends from the bottom left to the top right. The hill is dotted with small bushes and scattered rocks, adding texture to the landscape. The terrain is uneven, with slight undulations and a few small mounds."} +{"id": 26, "question_id": 26, "answer1_id": 26, "answer2_id": 25, "category": "refer_desc", "content": "6 4\n\nAssistant 1 provided a response that was moderately helpful and relevant to the question, but it lacked precision and detail. While it correctly identified the bird standing on the edge of a table and mentioned surrounding objects like the knife and plate with food, it did not describe the bird itself or its specific interactions with these objects. Additionally, the spatial correspondence was somewhat vague, as it did not explicitly tie the bird's position to the given coordinates or elaborate on its proximity to the knife and plate. The mention of a chair next to the table was accurate but not directly relevant to the bird's interactions.\n\nAssistant 2 focused entirely on describing the bird's physical appearance, which was detailed and accurate but did not address the bird's interactions with surrounding objects or its spatial context within the image. This omission made the response less relevant to the user's question, which specifically asked about the bird's interactions with objects. While the description of the bird was thorough, it did not align with the user's request for information about its interactions with the environment.\n\nOverall, Assistant 1 scored higher (6) for providing a more relevant and spatially aware response, albeit lacking in detail. Assistant 2 scored lower (4) for failing to address the core aspect of the question, despite offering a detailed description of the bird's appearance.", "tuple": [6.0, 4.0], "answer1": "In this region, a bird is standing on the edge of a table. The table is covered with a red tablecloth and there are several objects on it, including a plate with food and crumbs, a bottle, and a steak knife. The bird is close to the knife and the plate with food. There's also a chair next to the table.", "answer2": "A small bird with a brownish-grey plumage, displaying a subtle gradient from lighter shades on the chest to darker tones on the back and wings. The bird has a distinct white eyebrow stripe above its eye, a short, conical beak, and a visible eye with a dark pupil. Its legs are thin and pinkish, with delicate toes adapted for perching."} +{"id": 27, "question_id": 27, "answer1_id": 27, "answer2_id": 26, "category": "refer_desc", "content": "8 3\n\nAssistant 1 provides a detailed and accurate description of the region [0.279, 0.524, 0.341, 0.570], identifying it as a black wrist guard worn by the skater. The response connects the wrist guard to the skater and other safety equipment, such as the helmet, elbow pad, knee pad, and roller skates, which are relevant objects in the scene. Assistant 1 also mentions the skater's activity and the surrounding environment, such as the skate park and ramp, demonstrating spatial correspondence and relevance. The response is helpful, accurate, and provides sufficient detail to address the user's question comprehensively.\n\nAssistant 2, on the other hand, provides an overly generic and vague description of the wrist guard, focusing on its physical appearance without connecting it to the skater or the context of the scene. The response lacks spatial correspondence, relevance, and accuracy, as it fails to relate the wrist guard to nearby objects or the skater's activity. It does not address the user's question effectively and provides minimal detail, making it less helpful overall.\n\nThus, Assistant 1 scores an 8 for its well-rounded and contextually appropriate response, while Assistant 2 scores a 3 for its lack of relevance, detail, and connection to the scene.", "tuple": [8.0, 3.0], "answer1": "The region contains a black wrist guard that the skater is wearing. This wrist guard is part of the safety gear that the skater has on, which also includes a black helmet, elbow pad, knee pad, and a pair of roller skates. The skater is performing a trick at the skate park, his shadow is cast on the cement ramp, and there is a grey post to a metal fence at the top of the ramp nearby. Overall, this region is an important part of the scene, showing the skater's safety equipment.", "answer2": "A black, textured strap with a series of evenly spaced, circular indentations along its length, and a metallic buckle with a prong at one end."} +{"id": 28, "question_id": 28, "answer1_id": 28, "answer2_id": 27, "category": "refer_desc", "content": "7 6\n\nAssistant 1 provided a more comprehensive and spatially accurate description of the region [0.222, 0.144, 0.820, 0.757], which corresponds to the bus. It included details about the bus's appearance (\"large decorated white bus\"), its features (\"Divine Transportation\" written on the front, headlights, side mirrors, wheels), and its surroundings (gray trash can next to bushes). This aligns well with the given region descriptions and object data. However, Assistant 1's response could have been slightly more concise and focused on the specific region without overly generalizing the surroundings.\n\nAssistant 2 offered a less spatially accurate description. While it mentioned the bus's features (\"curved front windshield,\" \"purple and blue wave-like design,\" \"Divine Transportation\" text, and vehicle number \"7006\"), it incorrectly stated that the wheels were not visible, which contradicts the object data showing wheels within the region coordinates. Additionally, Assistant 2's description lacked references to the surrounding objects (e.g., trash can, bushes) and omitted some relevant details like the headlights and side mirrors, which are part of the region.\n\nOverall, Assistant 1 demonstrated better spatial correspondence, relevance, and accuracy, earning a higher score of 7. Assistant 2 provided helpful information but lacked precision and completeness, resulting in a score of 6.", "tuple": [7.0, 6.0], "answer1": "This is a large decorated white bus. It seems to be driving past a tall building. You can see \"Divine Transportation\" written on the front of the bus. There's also a bus identification number on top. The bus features a design, including stripes, and there are headlights at the front. You can also see the side mirrors and wheels. Behind the bus, there's a gray trash can next to some large green bushes.", "answer2": "A white coach bus with a curved front windshield and a large, tinted side window. The bus features a purple and blue wave-like design on the side, with the text \"Divine Transportation\" written in elegant script. The vehicle number \"7006\" is displayed at the top front corner. The bus has a black front bumper, side mirrors, and a visible door towards the front. The wheels are not visible in the image."} +{"id": 29, "question_id": 29, "answer1_id": 29, "answer2_id": 28, "category": "refer_desc", "content": "8 3 \nAssistant 1 provided a detailed and accurate response that aligns well with the spatial correspondence of the region [0.542, 0.343, 0.812, 0.493], which includes the man and his interactions with the woman and the bench. The response is helpful, relevant, and captures the relationships described in the context, such as the man sitting on the bench, having his arm around the woman, and both looking at the ocean. It also integrates the broader context of the bench being in front of the ocean, which is consistent with the provided relationships and descriptions. The level of detail is appropriate and contributes to a clear understanding of the scene.\n\nAssistant 2's response is vague, overly focused on physical attributes (e.g., hair and earlobes) that are not directly relevant to the question about interactions between objects in the specified region. It fails to address the relationships and spatial dynamics described in the context, such as the man and woman sitting on the bench, their interaction with each other, and their shared activity of looking at the ocean. The response lacks accuracy, relevance, and helpfulness, and does not provide meaningful insights into the scene. Consequently, it receives a significantly lower score.", "tuple": [8.0, 3.0], "answer1": "The region includes a man who is sitting on a bench. He has his arm around a woman, indicating a close relationship between them. They are both looking towards the ocean, suggesting that they are enjoying the view together. The bench they are sitting on is in front of the ocean.", "answer2": "Two individuals with short hair, one with a side-swept fringe and the other with a more pronounced side-swept fringe, both with visible earlobes and a portion of their shoulders and upper backs."} +{"id": 30, "question_id": 30, "answer1_id": 30, "answer2_id": 29, "category": "refer_desc", "content": "7 4\n\nAssistant 1 provides a more comprehensive and contextually relevant description of the region in question. It accurately identifies the player wearing knee and leg pads and integrates this observation with the surrounding context, such as the player's role as a catcher, his uniform, the dirt field, and nearby objects like the wrist band and chalk lines. This response demonstrates good spatial correspondence, relevance, and accuracy, while also being helpful in understanding the scene holistically. However, it could have included more specific details about the pads themselves.\n\nAssistant 2, on the other hand, focuses exclusively on the knee pad, describing its physical features in great detail. While this level of detail is impressive, it lacks relevance to the broader context of the image and fails to address the interaction of the pads with the surrounding elements or the player's role. The response does not align well with the user's question, which asks for a description of the region and its interaction with the context. This limits its helpfulness and spatial correspondence, resulting in a lower score.", "tuple": [7.0, 4.0], "answer1": "This region is occupied by a baseball player wearing knee and leg pads. These pads are a part of the player's protective gear. The player, dressed as a catcher, is crouched on the field, ready to catch a ball. He is in a white uniform, which includes pants with a line on them, and he's wearing sneakers. His gloved hand is extended, prepared to receive. We can also see a black and red wrist band on his wrist. The field beneath him is brown dirt, contrasting with the green grass in the rest of the baseball field. Nearby, there are white chalk lines painted on the field.", "answer2": "The knee pad features a hard, black, rounded protective shell with a glossy finish, designed to cover the knee. It is attached to a black, adjustable strap system with a buckle for securing the pad around the leg. The shell has a contoured design to fit the shape of the knee, with a central ridge running vertically down the middle. The strap system includes a red adjustment buckle for size customization."} +{"id": 31, "question_id": 31, "answer1_id": 31, "answer2_id": 30, "category": "refer_desc", "content": "7 4 \nAssistant 1 provides a more accurate and relevant response to the user's question. It correctly identifies the region as containing a little girl standing near a scooter and incorporates spatial correspondence by mentioning the scooter's position relative to the girl. Additionally, it includes details about the girl's blonde hair, white socks, and her position on the pavement, which aligns with the provided object relationships and descriptions. However, it could have elaborated further on the scooter's features and the girl's outfit for a more comprehensive answer.\n\nAssistant 2's response is less accurate and relevant. It describes two young girls with detailed clothing and accessory features, which does not correspond specifically to the region [0.060, 0.086, 0.462, 0.908]. The region description and object relationships clearly indicate that the region contains one girl near a scooter, not two girls. Furthermore, Assistant 2 introduces details about the handlebar that are not directly tied to the region in question, making its response less spatially and contextually aligned. While the level of detail is high, the relevance and accuracy are significantly lacking.", "tuple": [7.0, 4.0], "answer1": "This region contains a little girl who is standing near a scooter. The scooter has an orange board and black handles, and it's specifically located to the right of her. The girl has blonde hair and she's wearing white socks. She is also standing on the pavement.", "answer2": "Two young girls with light skin and blonde hair, wearing white short-sleeved shirts with a circular emblem on the left chest, white pleated skirts, and white sneakers with yellow socks. The girl on the left is holding a black handlebar with a silver shaft, while the girl on the right is holding a similar handlebar."} +{"id": 32, "question_id": 32, "answer1_id": 32, "answer2_id": 31, "category": "refer_desc", "content": "8 3\n\nAssistant 1 provides a detailed and accurate description of the region in question ([0.588, 0.327, 0.850, 0.703]) and its surroundings. The response correctly identifies the yellow tennis ball flying toward the man in white clothing, as described in the region descriptions and object relationships. It also incorporates relevant contextual elements, such as the tennis court, white boundary lines, net, fence, trimmed bushes, and tall trees, which are all part of the surroundings. This demonstrates strong spatial correspondence, relevance, and accuracy. However, the response could have been slightly more concise, which is why it does not receive a perfect score.\n\nAssistant 2, on the other hand, provides a description that is entirely unrelated to the specified region. Instead of addressing the yellow tennis ball and the man in white clothing, it describes a tennis net with white tape and black mesh squares, which is not relevant to the region in question. This response fails in spatial correspondence, relevance, and accuracy, as it does not address the correct region or its surroundings. The level of detail provided is also irrelevant to the user's question. Consequently, Assistant 2 receives a low score.", "tuple": [8.0, 3.0], "answer1": "In this region, a man in white clothing is preparing to hit a yellow tennis ball with his racket. He is on a tennis court with white boundary lines and a net in front of him. Behind him, there are a fence, trimmed bushes, and tall trees in the distance.", "answer2": "A tennis net with a white tape running along the top edge, supported by a white post on the right side. The net is composed of a grid of black mesh squares."} +{"id": 33, "question_id": 33, "answer1_id": 33, "answer2_id": 32, "category": "refer_desc", "content": "7 3\n\nAssistant 1 provided a response that aligns with the spatial correspondence of the region [0.524, 0.740, 0.734, 0.856], which corresponds to the object \"pants\" (object 11) worn by the man (object 10). The assistant correctly identified the interaction between the pants and the man wearing them, as well as the surrounding context of the sidewalk or decorative square. However, the response could have been more detailed in describing the pants themselves and their visual characteristics. While the answer is helpful and relevant, it lacks precision in describing the specific visual features of the region.\n\nAssistant 2's response is overly generic and does not address the spatial correspondence or interaction with the surroundings. The description of \"black trousers with a straight-leg cut\" is not supported by the provided context or bounding box data, and it fails to incorporate the relationships or global caption information. The response lacks relevance and accuracy, as it does not connect to the specific region or the surrounding context described in the question. Additionally, it does not provide any meaningful interaction details, making it less helpful overall.\n\nThus, Assistant 1 receives a higher score for its relevance, spatial correspondence, and contextual accuracy, while Assistant 2 scores lower due to its lack of alignment with the provided data and insufficient detail.", "tuple": [7.0, 3.0], "answer1": "The region is where a man is found wearing a pair of pants. This man is also wearing a long black coat. He seems to be walking on a sidewalk or decorative square, which fills the background of the image.", "answer2": "Black trousers with a straight-leg cut, featuring a smooth texture and a regular fit."} +{"id": 34, "question_id": 34, "answer1_id": 34, "answer2_id": 33, "category": "refer_desc", "content": "7 5\n\nAssistant 1 provided a more detailed and spatially accurate response to the question about the region [0.650, 0.428, 0.858, 0.600]. It correctly identified the presence of the airplane's engine and mentioned other relevant features such as the front door, letters, and windows, which are consistent with the objects located within or near the specified region. Assistant 1 also contextualized the scene by referring to the airplane's landing gear being lowered, which aligns with the global caption describing the airplane in flight. However, it could have been more concise and focused specifically on the region rather than including broader observations about the airplane.\n\nAssistant 2, while concise, lacked spatial correspondence and detail. It described the airplane's fuselage and logo, which are not explicitly tied to the specified region based on the provided object coordinates. The mention of \"ANA\" and the logo is speculative and not supported by the given data. Assistant 2 did not address the engine or other objects within the region, making its response less relevant and accurate.\n\nOverall, Assistant 1 demonstrated better spatial correspondence, relevance, and helpfulness, earning a higher score of 7. Assistant 2, while concise, lacked accuracy and detail, resulting in a lower score of 5.", "tuple": [7.0, 5.0], "answer1": "In the region, there is an airplane's engine. The airplane seems to be in mid-flight, given the sky that surrounds it. The front door of the airplane is also visible in this region. The plane appears to be a commercial airline, as indicated by visible letters and windows. Notably, the landing gear of the airplane is lowered, suggesting that it's preparing to land.", "answer2": "A commercial jet with a white fuselage featuring the word \"ANA\" in blue letters, a blue and white logo, and a partially visible engine under the wing."} +{"id": 35, "question_id": 35, "answer1_id": 35, "answer2_id": 34, "category": "refer_desc", "content": "7 5 \nAssistant 1 provides a response that is spatially accurate and relevant to the region in question. It correctly identifies the front window of the train with windshield wipers and mentions its relation to the train's front, which is painted yellow and white. Additionally, it notes the proximity of the region to the train's headlights, which is consistent with the provided data. However, the response could have been more detailed in describing the window itself or its integration into the train's design. \n\nAssistant 2, while attempting to provide a detailed description of the windshield, introduces speculative details that are not supported by the provided context or data. For example, it mentions the windshield being \"curved\" and \"slightly tinted,\" along with a \"yellow frame,\" none of which are explicitly stated or implied in the given information. This reduces the accuracy and relevance of the response. Furthermore, Assistant 2 does not address the spatial relationship between the region and nearby objects, which was a key aspect of the user's question. \n\nOverall, Assistant 1 delivers a more accurate and helpful response, while Assistant 2's speculative details and lack of spatial correspondence diminish its effectiveness.", "tuple": [7.0, 5.0], "answer1": "The region contains the front window of a train, which has windshield wipers. This window is part of the front of the train, which is painted yellow and white. Also, the region is located near the headlights of the train.", "answer2": "The windshield is a large, curved, and slightly tinted glass panel with a smooth surface, seamlessly integrated into the train's front. It features a dark wiper at the bottom and is bordered by a thin, yellow frame that matches the train's exterior color."} +{"id": 36, "question_id": 36, "answer1_id": 36, "answer2_id": 35, "category": "refer_desc", "content": "8 5\n\nAssistant 1 provided a detailed and contextually relevant response, accurately describing the activity in the specified region ([0.844, 0.777, 0.958, 0.897]) as people sitting at a table, likely dining or socializing outside a restaurant. The assistant effectively connected this activity to the surrounding area, mentioning the bustling city street, nearby buildings, and the presence of cars. The response demonstrated strong spatial correspondence, as it aligned well with the provided region description and object relationships. However, the response could have been slightly more concise and avoided redundant phrasing.\n\nAssistant 2, on the other hand, gave a much less detailed and somewhat vague response. While it correctly identified people sitting at a table and suggested a relaxed atmosphere, it lacked spatial correspondence to the surrounding area and failed to incorporate the broader context of the city street, cars, and buildings. The response was less helpful and relevant, as it did not provide a comprehensive understanding of the scene or its relationship to the surrounding environment. Additionally, the mention of a green tablecloth and casual attire was not supported by the provided data, reducing its accuracy.\n\nOverall, Assistant 1 demonstrated a stronger grasp of the context and provided a more complete and accurate answer, while Assistant 2's response was overly simplistic and lacked sufficient detail.", "tuple": [8.0, 5.0], "answer1": "In this region, there are people sitting at a table, likely dining or socializing outside a restaurant. This area is part of a bustling city street, filled with various cars, some parked and others potentially in motion. There are numerous buildings nearby, with diverse businesses and stores. One notable building nearby even has a marquee sign indicating \"for lease\". This scene suggests that the region is in a vibrant urban setting, where people are engaging in day-to-day activities such as dining outdoors and commuting by car.", "answer2": "A group of people are seated at a table, engaged in conversation. The table is covered with a green tablecloth, and the individuals are dressed in casual attire. The scene suggests a relaxed and social atmosphere."} +{"id": 37, "question_id": 37, "answer1_id": 37, "answer2_id": 36, "category": "refer_desc", "content": "7 5 \nAssistant 1 provides a response that is moderately accurate and helpful. It correctly identifies the region as a patch of grass and connects it to the interactions of the zebras (objects 6 and 7) grazing and standing in the grass. However, it introduces some inaccuracies by mentioning bushes and a tree, which are not explicitly described in the provided data. Additionally, the response lacks detailed spatial correspondence to the specific region coordinates [0.206, 0.853, 0.356, 0.982]. \n\nAssistant 2 focuses on describing the physical appearance of the grass in the region, which aligns with the object type and coordinates provided. However, it does not incorporate the interactions or relationships involving the zebras, which are crucial for understanding the context of the region. While the description is detailed, it is less relevant to the user's question, which asks for information considering surrounding entities and their interactions. \n\nOverall, Assistant 1 scores higher due to its inclusion of contextual interactions, but it loses points for inaccuracies and lack of precise spatial correspondence. Assistant 2 scores lower because, while detailed, its response is less relevant and does not address the interactions or broader context.", "tuple": [7.0, 5.0], "answer1": "The region is a patch of grass. There are two zebras standing in and grazing on this grass. They are feeding themselves and are near bushes and a tree.", "answer2": "A dense cluster of green grass blades with varying lengths, some standing upright while others are slightly bent, interspersed with thin, dry, brownish strands."} +{"id": 38, "question_id": 38, "answer1_id": 38, "answer2_id": 37, "category": "refer_desc", "content": "8 4 \nAssistant 1 provided a response that aligns well with the spatial correspondence of the region [0.444, 0.459, 0.552, 0.853], accurately describing the interaction between the man and the little girl as indicated in the region description and object relationships. The response is helpful, relevant, and detailed, incorporating the context of the girl pulling the surfboard and the man looking back at her, which matches the provided data. However, it could have explicitly mentioned the coordinates to further strengthen the spatial correspondence.\n\nAssistant 2's response lacks relevance and accuracy. It describes a person with long hair tied back, wearing a yellow hoodie and holding a surfboard, which does not correspond to the objects or relationships in the specified region. The description seems disconnected from the provided data and does not address the interaction or relationship between the man and the little girl in the region. Additionally, it fails to utilize the contextual information about the objects and relationships effectively. As a result, the response is less helpful and detailed compared to Assistant 1.", "tuple": [8.0, 4.0], "answer1": "In the region, there's a man and a little girl, they seem to be having a conversation. The man is looking back to the girl, who is pulling a surfboard, probably getting ready to surf. They are part of a larger group of people who are heading to the water with their surfboards.", "answer2": "A person with long hair tied back, wearing a yellow hoodie with a graphic on the back, black shorts, and brown shoes, is holding a blue surfboard with a black traction pad on the tail."} +{"id": 39, "question_id": 39, "answer1_id": 39, "answer2_id": 38, "category": "refer_desc", "content": "7 4 \nAssistant 1 provides a more comprehensive and relevant description of the region [0.154, 0.392, 0.300, 0.964]. It accurately identifies the man standing on the lake side wearing shorts and connects this observation to nearby objects and individuals, such as the elephant and other people. The response also incorporates contextual details from the global caption and relationships, such as the serene setting and the interaction between the man and the elephant. However, it could have been more precise about the man's appearance and actions, as these details are missing. Spatial correspondence is adequate, as the description aligns with the bounding box and relationships provided.\n\nAssistant 2, on the other hand, focuses solely on the man's appearance and actions, describing his clothing, posture, and accessories in detail. While this level of detail is helpful, the response lacks relevance and broader context, as it does not address the relationship between the man and nearby objects or individuals, nor does it connect to the scene described in the global caption or relationships. Spatial correspondence is weaker, as the description does not fully utilize the information about the region or its surroundings. Overall, Assistant 2's response is less helpful and accurate in addressing the user's question.", "tuple": [7.0, 4.0], "answer1": "In the region, there's a man standing wearing shorts. He is standing on the side of a lake, next to an elephant. The elephant is emerging from the water and seems to be interacting with the man and two other individuals not far from him. All three people appear to be tourists posing for a picture with the elephant. The surroundings include water, and some mountains and trees in the far distance, creating a serene and natural setting.", "answer2": "A smiling man with short hair, wearing dark gray shorts with white stripes on the sides, stands with his left hand on his hip and his right arm extended outward. He has a watch on his left wrist and is barefoot."} +{"id": 40, "question_id": 40, "answer1_id": 40, "answer2_id": 39, "category": "refer_desc", "content": "6 3\n\nAssistant 1 provided a response that was somewhat helpful and relevant but lacked precision and clarity. While it correctly identified that the region contains a cow and mentioned its relationship to the pasture and the mountain, the description was overly general and did not directly address the specific region's details or its unique characteristics. Additionally, it introduced unrelated elements like trailers and storage, which were not directly tied to the region in question. This diluted the accuracy and relevance of the response.\n\nAssistant 2, on the other hand, provided a very brief and vague description of a cow's appearance, which was not supported by the provided data. The response did not address the spatial context or relationships of the region [0.436, 0.860, 0.454, 0.890] within the scene, making it unhelpful and irrelevant to the user's question. Furthermore, the description of the cow's physical traits (e.g., \"white pattern on its forehead and snout\") was speculative and not grounded in the given information.\n\nOverall, Assistant 1 scored higher due to its partial relevance and attempt to connect the region to the broader scene, despite its inaccuracies and lack of focus. Assistant 2 scored lower because it failed to provide meaningful or accurate insights about the region and its relation to the scene.", "tuple": [6.0, 3.0], "answer1": "Within the region, there is a cow. This cow is in a pasture, which is located near a mountainous area. The mountain is partially covered in snow. There are also multiple trailers in the pasture, and one of them appears to be storage for animal equipment. The pasture and its surroundings provide a peaceful and natural living environment for the cows.", "answer2": "A brown cow with a white pattern on its forehead and snout, dark eyes, and a pair of curved horns."} diff --git a/evaluation/Ferret-Bench/gpt4_result/gar_8b/refer_desc/ferret_answer.jsonl b/evaluation/Ferret-Bench/gpt4_result/gar_8b/refer_desc/ferret_answer.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..694239faa19939409fdb42a26cc31b0df40742e7 --- /dev/null +++ b/evaluation/Ferret-Bench/gpt4_result/gar_8b/refer_desc/ferret_answer.jsonl @@ -0,0 +1,40 @@ +{"question_id": 0, "image": "000000069138.jpg", "category": "refer_desc", "text": "A rectangular signboard with a dark background and light-colored text. The text is arranged in three lines, with the first line reading \"FROM 2,500 SQ FT,\" the second line \"IMMEDIATELY,\" and the third line \"AVAILABLE.\" The text is capitalized and there is a border around the signboard."} +{"question_id": 1, "image": "000000131138.jpg", "category": "refer_desc", "text": "A white, round saucer with a slightly raised rim, featuring a metallic spoon resting on its surface. The saucer has a smooth, glossy finish with some light brown stains and smudges near the center."} +{"question_id": 2, "image": "000000475150.jpg", "category": "refer_desc", "text": "A giraffe with a pattern of large, irregular brown patches separated by lighter, beige lines on its skin. The giraffe has two small horn-like structures, called ossicones, atop its head, which are covered with fur and have a rounded appearance. Its eyes are large with long eyelashes, and the giraffe's ears are pointed and tufted with hair. The visible part of the neck shows a continuation of the same patterned skin, and the giraffe's snout is elongated with prominent nostrils and a slightly darker coloration around the mouth area."} +{"question_id": 3, "image": "000000356424.jpg", "category": "refer_desc", "text": "A person with dark hair tied back, wearing a dark-colored jacket, is seated and looking down at an object in their hands."} +{"question_id": 4, "image": "000000491090.jpg", "category": "refer_desc", "text": "A black motorcycle wheel with a silver exhaust pipe attached to the left side, featuring a visible brake disc and caliper assembly."} +{"question_id": 5, "image": "000000484415.jpg", "category": "refer_desc", "text": "A white ceramic toilet bowl with an open black seat cover, featuring a visible water surface within the bowl."} +{"question_id": 6, "image": "000000184324.jpg", "category": "refer_desc", "text": "The floor consists of alternating black and white rectangular stripes, forming a pedestrian crosswalk pattern. The stripes are evenly spaced and run parallel to each other, creating a clear path for pedestrians."} +{"question_id": 7, "image": "000000341058.jpg", "category": "refer_desc", "text": "A white, rectangular saltshaker with a textured surface and a black, perforated top featuring multiple small holes for dispensing salt."} +{"question_id": 8, "image": "000000184384.jpg", "category": "refer_desc", "text": "A plate with a grilled sausage, a portion of scrambled eggs, and a side of potatoes."} +{"question_id": 9, "image": "000000259097.jpg", "category": "refer_desc", "text": "A long, continuous hill with a gentle slope, covered in dense green trees and vegetation. The hill extends horizontally across the entire image, with a slightly uneven tree line at the top."} +{"question_id": 10, "image": "000000377882.jpg", "category": "refer_desc", "text": "A vertical, cylindrical pole with a textured surface, featuring a series of diagonal, crisscrossing lines that create a diamond pattern across its entirety."} +{"question_id": 11, "image": "000000415748.jpg", "category": "refer_desc", "text": "The ground is a flat, light brown surface with a slightly rough texture. It appears to be made of compacted dirt or sand, with some areas showing faint, irregular patterns. The ground is relatively even, with no significant elevation changes visible."} +{"question_id": 12, "image": "000000408120.jpg", "category": "refer_desc", "text": "The ground is a rough, uneven surface with a mix of small pebbles and fine gravel. The texture is coarse, with a predominantly gray color interspersed with occasional darker and lighter patches. The surface appears slightly compacted, with some areas showing minor indentations and irregularities."} +{"question_id": 13, "image": "000000184400.jpg", "category": "refer_desc", "text": "A cylindrical pole with a flared base and a decorative capital featuring a series of concentric rings and a flat, overhanging element with a scalloped edge."} +{"question_id": 14, "image": "000000276018.jpg", "category": "refer_desc", "text": "A young boy with short, curly brown hair, wearing a black jacket, is holding a brown and white stuffed monkey."} +{"question_id": 15, "image": "000000376322.jpg", "category": "refer_desc", "text": "A man with short gray hair and glasses, wearing a light green button-up shirt with a name tag on the left side of his chest, is holding a glass of white wine in his right hand."} +{"question_id": 16, "image": "000000125472.jpg", "category": "refer_desc", "text": "The sky is a clear, bright white with no visible clouds, creating a stark contrast against the person in the foreground."} +{"question_id": 17, "image": "000000361551.jpg", "category": "refer_desc", "text": "A person with short, dark hair, wearing a light-colored top with a dark strap over the shoulder."} +{"question_id": 18, "image": "000000412240.jpg", "category": "refer_desc", "text": "A pair of black leather dress shoes with a low heel and black laces, positioned side by side."} +{"question_id": 19, "image": "000000130566.jpg", "category": "refer_desc", "text": "The railcar is a modern passenger model with a predominantly maroon and cream livery. It features a streamlined design with a curved front end, large windows on the sides for passenger viewing, and a set of black doors for entry and exit. The lower portion of the railcar has a yellow stripe running horizontally, and there are visible vents and grilles on the side, likely for engine ventilation. The roofline is smooth and slightly overhangs the body of the railcar."} +{"question_id": 20, "image": "000000421923.jpg", "category": "refer_desc", "text": "A lush bouquet of white daisies with prominent yellow centers, complemented by green foliage."} +{"question_id": 21, "image": "000000513567.jpg", "category": "refer_desc", "text": "A person with dark hair tied back, wearing a brown top and blue jeans, is walking."} +{"question_id": 22, "image": "000000543300.jpg", "category": "refer_desc", "text": "A pink banner with the text \"Port River Dolphin Cruises\" in a serif font, with the word \"Port\" in a larger size than the rest of the text."} +{"question_id": 23, "image": "000000241668.jpg", "category": "refer_desc", "text": "A woman with long, straight red hair, wearing a black suit jacket over a white dress shirt and a dark tie. She is holding a slice of chocolate cake with white frosting on a plate."} +{"question_id": 24, "image": "000000535578.jpg", "category": "refer_desc", "text": "A gently sloping hill covered in lush green grass, with a winding stone path running horizontally across its midsection. The hill is dotted with small bushes and patches of exposed soil, and a few scattered rocks are visible along the path."} +{"question_id": 25, "image": "000000277051.jpg", "category": "refer_desc", "text": "A small sparrow with brown and gray feathers, a short beak, and a slightly rounded body. It has a light brown head with darker streaks and a small, dark eye. The wings are brown with darker streaks, and the tail is short and brown. The bird is standing on thin, pinkish legs with small claws."} +{"question_id": 26, "image": "000000018519.jpg", "category": "refer_desc", "text": "A black, rectangular pad with rounded corners, featuring a central white logo consisting of a stylized letter 'S' within a square outline."} +{"question_id": 27, "image": "000000106048.jpg", "category": "refer_desc", "text": "A white coach bus with a curved front windshield and a large side window. The bus features a purple and blue wave-like design on its side, with the text \"Divine Transportation\" written in elegant script above the design. The vehicle number \"7006\" is displayed on the top front corner. The bus has a black front bumper, side mirrors, and a visible door towards the front."} +{"question_id": 28, "image": "000000058393.jpg", "category": "refer_desc", "text": "A person with short, curly hair is attached to another person with long, wavy hair."} +{"question_id": 29, "image": "000000010764.jpg", "category": "refer_desc", "text": "The knee pad features a hard, black plastic shell with a glossy finish, designed to cover and protect the knee. It has a contoured shape to fit the knee, with a series of ridges and indentations that provide structural reinforcement. The edges of the shell appear to be smooth and rounded for comfort. There are two adjustable straps with fastening mechanisms, likely Velcro, for securing the pad around the leg. The straps are black and blend with the overall color scheme of the knee pad."} +{"question_id": 30, "image": "000000271402.jpg", "category": "refer_desc", "text": "Two young girls with light skin and blonde hair tied back, wearing white short-sleeved shirts with a circular emblem on the left side of the chest, white pleated skirts, and white sneakers with yellow socks. The girl on the left is holding a silver scooter with black grips and an orange handle."} +{"question_id": 31, "image": "000000273493.jpg", "category": "refer_desc", "text": "A tennis net with a white tape running along the top edge, spanning horizontally across the image. The net is supported by a white post on the left side and another white post on the right side. The netting is made of black mesh material."} +{"question_id": 32, "image": "000000360960.jpg", "category": "refer_desc", "text": "Black trousers with a straight-leg cut, featuring a smooth texture and a visible hem at the bottom."} +{"question_id": 33, "image": "000000452122.jpg", "category": "refer_desc", "text": "A commercial jet with a white fuselage featuring the word \"ANA\" in blue letters, a dark blue and white logo to the right of the text, and a partially visible landing gear with a single wheel."} +{"question_id": 34, "image": "000000134722.jpg", "category": "refer_desc", "text": "A curved, dark-tinted windshield with a visible wiper on the lower right side, reflecting a hint of the surrounding environment."} +{"question_id": 35, "image": "000000039484.jpg", "category": "refer_desc", "text": "A rectangular table with a white tablecloth, surrounded by multiple chairs. The tablecloth has a subtle pattern and is draped over the table, with the edges hanging down the sides. The chairs are arranged around the table, with some facing the table and others turned slightly."} +{"question_id": 36, "image": "000000159311.jpg", "category": "refer_desc", "text": "A dense cluster of green grass blades with varying lengths, some standing upright while others are bent or leaning, interspersed with small patches of bare soil."} +{"question_id": 37, "image": "000000326174.jpg", "category": "refer_desc", "text": "A person with long blonde hair tied in a ponytail, wearing a bright yellow jacket with a black design on the back, black pants, and brown shoes, is holding a blue surfboard."} +{"question_id": 38, "image": "000000562207.jpg", "category": "refer_desc", "text": "A smiling man with short hair, wearing dark gray shorts with white stripes on the sides, and a black wristwatch on his left wrist."} +{"question_id": 39, "image": "000000332318.jpg", "category": "refer_desc", "text": "A brown cow with a white pattern on its forehead and snout, white lower legs, and a visible udder."} diff --git a/evaluation/Ferret-Bench/gpt4_result/gar_8b/review_refer_desc.jsonl b/evaluation/Ferret-Bench/gpt4_result/gar_8b/review_refer_desc.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..78d1359119460ab26ee9a494b0004e92b79444cc --- /dev/null +++ b/evaluation/Ferret-Bench/gpt4_result/gar_8b/review_refer_desc.jsonl @@ -0,0 +1,40 @@ +{"id": 1, "question_id": 0, "answer1_id": 0, "answer2_id": 0, "category": "refer_desc", "content": "7 6\n\nAssistant 1 provided a generally accurate response. It correctly identified the sign's relationship with the building by mentioning that it offers short-term office space, which aligns with the information in the region description. Additionally, Assistant 1 connected the sign to the overall street scene, mentioning the stop light and the road, which adds relevance to the context. However, it could have benefitted from more specific details regarding the sign's appearance or text content.\n\nAssistant 2 focused more on describing the physical characteristics of the sign itself, including its text layout and color scheme. While this detailed description is somewhat helpful, it lacks context and does not address the interaction of the sign with its surroundings or its significance in the scene. Therefore, it fails to contextualize the object as well as Assistant 1 did, leading to a lower score. \n\nOverall, Assistant 1 was more effective in answering the question about the interaction with surroundings, while Assistant 2 provided useful detail but missed essential context.", "tuple": [7.0, 6.0], "answer1": "The object is a sign that is placed on a building. The building is offering short term office space, which is stated on the sign. This sign is part of the overall street scene, with other objects nearby, such as a stop light on a pole and a road. The sign is directly related to the building, contributing to the information about the building's function to the overall scene.", "answer2": "A rectangular signboard with a dark background and light-colored text. The text is arranged in three lines, with the first line reading \"FROM 2,500 SQ FT,\" the second line \"IMMEDIATELY,\" and the third line \"AVAILABLE.\" The text is capitalized and there is a border around the signboard."} +{"id": 2, "question_id": 1, "answer1_id": 1, "answer2_id": 1, "category": "refer_desc", "content": "7 4\n\nAssistant 1 provides a clear and relevant response. It accurately identifies the object as a plate and notes that a fork is on top of it. It also mentions the plate's location on a desk and includes nearby objects like a cup and a computer mouse. This response demonstrates a good understanding of spatial relationships and details, contributing to its helpfulness.\n\nOn the other hand, Assistant 2 describes a saucer with a metallic spoon and provides details about its aesthetic and condition (light brown stains and smudges). However, this description is inaccurate, as the object in question is specifically identified as a plate, not a saucer, and Assistant 2 has incorrectly described the presence of a spoon instead of a fork. This information detracts from its relevance and accuracy, as it does not correctly reflect the context of the image provided. Furthermore, this response lacks the insight into surrounding objects and relationships, making it less comprehensive than Assistant 1\u2019s.\n\nOverall, while Assistant 1 delivers a relevant and mostly accurate response with adequate detail, Assistant 2\u2019s description deviates from the key facts and lacks meaningful context, leading to much lower scores.", "tuple": [7.0, 4.0], "answer1": "The object is a plate, and on top of this plate is a fork. It's located on a desk. Nearby, there's also a cup and a computer mouse.", "answer2": "A white, round saucer with a slightly raised rim, featuring a metallic spoon resting on its surface. The saucer has a smooth, glossy finish with some light brown stains and smudges near the center."} +{"id": 3, "question_id": 2, "answer1_id": 2, "answer2_id": 2, "category": "refer_desc", "content": "7 6\n\nAssistant 1 provides a clear response that specifically addresses the region in question, which contains the face of the giraffe. The assistant accurately describes the colors of the face and mentions its proximity to the neck, which is consistent with the object relationships provided. However, the mention of branches and the sky as background elements is vague and lacks specific context, which detracts slightly from the overall relevance and detail of the response.\n\nOn the other hand, Assistant 2 gives a more detailed description of the giraffe's features, including its skin pattern and specific anatomical details like ossicones and long eyelashes. However, it does not directly reference the specific region [0.288, 0.324, 0.572, 0.649], which diminishes the relevance and focus on the user's question. While the details about the giraffe are helpful and informative, their lack of spatial correspondence to the region mentioned results in a lower score.\n\nOverall, Assistant 1 is more focused on the specific region requested, while Assistant 2 provides interesting, detailed information but fails to tie it back to the specified area, impacting its effectiveness in answering the question.", "tuple": [7.0, 6.0], "answer1": "The region is primarily occupied by the face of a giraffe, which is white and brown in color. This portion of the giraffe's body is in close proximity to its neck, which is covered with wrinkles. The giraffe's eye and ear are also located within this region. The giraffe is standing in front of a tree, and its face and neck are spotted with tan and brown spots. The background is filled with branches of the tree and the sky.", "answer2": "A giraffe with a pattern of large, irregular brown patches separated by lighter, beige lines on its skin. The giraffe has two small horn-like structures, called ossicones, atop its head, which are covered with fur and have a rounded appearance. Its eyes are large with long eyelashes, and the giraffe's ears are pointed and tufted with hair. The visible part of the neck shows a continuation of the same patterned skin, and the giraffe's snout is elongated with prominent nostrils and a slightly darker coloration around the mouth area."} +{"id": 4, "question_id": 3, "answer1_id": 3, "answer2_id": 3, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provides a detailed and accurate description of the activity occurring in the specified region, noting that a woman is having lunch. It also connects her to the surrounding objects on the table, such as the plate of food containing raspberries, a bottle, and a glass of water. Additionally, it mentions the presence of a nearby man wearing glasses and a striped shirt, suggesting a social dining context. This depth of detail, spatial awareness, and relevance to the question contributes to its high score.\n\nIn contrast, Assistant 2 offers a vaguer description that lacks specific details about the objects in the region and does not identify the woman's activity clearly. There is an attempt to describe her appearance, but it introduces ambiguity by stating she is \"looking down at an object,\" without specifying what that object is. This lack of clarity and connection to the relevant objects and actions results in a significantly lower score, as the response does not address the user's question comprehensively or accurately.", "tuple": [8.0, 4.0], "answer1": "In the region, there's a woman who seems to be having lunch. She is sitting near a table where there are several objects, including a plate of food that contains raspberries, a bottle, and a glass of water. There is also a man nearby, who is wearing glasses and a striped orange and black shirt. Both of them seem to be engaged in a meal.", "answer2": "A person with dark hair tied back, wearing a dark-colored jacket, is seated and looking down at an object in their hands."} +{"id": 5, "question_id": 4, "answer1_id": 4, "answer2_id": 4, "category": "refer_desc", "content": "7 5\n\nAssistant 1 provides a detailed description of the region, identifying it as the back wheel of the motorcycle. It effectively relates the back wheel to nearby elements, namely the tail pipe on the left and the person sitting on the motorcycle above. This response demonstrates a good understanding of spatial relationships and provides context about the objects in relation to the region in question.\n\nAssistant 2, while describing the region as a black motorcycle wheel with a silver exhaust pipe, lacks detail about the spatial arrangement or the interaction between the back wheel and other nearby elements. It does not mention the person sitting on the motorcycle or other relevant aspects that would add value and context for the user. The response feels more mechanical and less insightful compared to Assistant 1.\n\nOverall, Assistant 1 displays a better understanding of the context and relationships, justifying a higher score, while Assistant 2's response misses critical elements and connections, leading to a lower score.", "tuple": [7.0, 5.0], "answer1": "The region corresponds to the back wheel of a motorcycle, which is an integral part of the black motorcycle with silver accessories. Close to the back wheel, you can find the tail pipe of the motorcycle located on the left. Above the back wheel, there's a person sitting on the motorcycle, wearing a sweater, jeans, and sneakers.", "answer2": "A black motorcycle wheel with a silver exhaust pipe attached to the left side, featuring a visible brake disc and caliper assembly."} +{"id": 6, "question_id": 5, "answer1_id": 5, "answer2_id": 5, "category": "refer_desc", "content": "8 5\n\nAssistant 1 provides a detailed and contextually relevant observation regarding the container and toilet brush cleaner found in the specified region. It notes their proximity to the toilet bowl, highlighting their purpose in maintaining cleanliness, which is insightful and shows an understanding of the scene's dynamics. The assistant's response indicates how the objects interact with their surroundings by emphasizing their utility in the bathroom setting. This response is both accurate and provides a good level of detail.\n\nOn the other hand, Assistant 2's response is less relevant and lacks detail. It describes a white ceramic toilet bowl with an open black seat cover and a visible water surface, but it does not address the specific container and toilet brush cleaner located in the area of interest. This response does not directly answer how the objects interact with the surroundings, which is a crucial part of the user's question. The overall lack of focus on the specific region requested in the question diminishes the helpfulness and accuracy of the response. \n\nThus, while Assistant 1 gives a strong and relevant answer, Assistant 2 falls short in both relevance and detail regarding the specific requested observation of the region.", "tuple": [8.0, 5.0], "answer1": "In the region, there is a container and a toilet brush cleaner. This region is right next to the toilet bowl, indicating that the brush cleaner is accessible for bathroom cleaning. It's crucial for maintaining the cleanliness of the toilet nearby.", "answer2": "A white ceramic toilet bowl with an open black seat cover, featuring a visible water surface within the bowl."} +{"id": 7, "question_id": 7, "answer1_id": 7, "answer2_id": 6, "category": "refer_desc", "content": "8 5\n\nAssistant 1 provides a rich description of the activities happening in the region, mentioning both the crosswalk and the presence of cyclists and pedestrians. It contextualizes the scene by noting the presence of bikes and cars, emphasizing the busy nature of the city intersection, which aligns well with the provided region description. This response reflects spatial correspondence, relevance, and a high level of detail.\n\nConversely, Assistant 2 focuses solely on describing the appearance of the crosswalk stripes without addressing the activities occurring in the region. While it accurately describes the alternating colors of the stripes, it overlooks the broader context of people and vehicles interacting with the crosswalk. This lack of engagement with nearby objects and activities diminishes its helpfulness and relevance to the user's question about what is happening in the specified region. The response lacks depth compared to Assistant 1's comprehensive account.", "tuple": [8.0, 5.0], "answer1": "The region is a crosswalk on a busy city street, highlighted by white stripes. It's being used by a cyclist and a group of people who are crossing the street. There are bikes on the road, and cars are also visible within the vicinity. A large white vehicle with a big windshield is also nearby. This bustling scene is typical for a city intersection.", "answer2": "The floor consists of alternating black and white rectangular stripes, forming a pedestrian crosswalk pattern. The stripes are evenly spaced and run parallel to each other, creating a clear path for pedestrians."} +{"id": 8, "question_id": 8, "answer1_id": 8, "answer2_id": 7, "category": "refer_desc", "content": "8 6\n\nAssistant 1 provides a clear identification of the object as a salt shaker and correctly relates it to other nearby objects, specifically mentioning its presence on a dining table along with a napkin and another shaker containing pepper. This response effectively captures the relevant information about the object and its relationships, making it highly helpful and relevant to the user's query. The level of detail is appropriate for understanding the context in which the salt shaker is located.\n\nIn contrast, Assistant 2 offers a detailed description of the salt shaker's appearance but fails to address its relationship with nearby objects or provide context about its location in relation to the table and other items. Although the description of the salt shaker is accurate, it lacks the spatial correspondence and helpfulness that Assistant 1 provided. As a result, the response is less relevant and informative, leading to a lower score. \n\nOverall, Assistant 1 successfully identifies the object and explains its surroundings, while Assistant 2 focuses too much on description without connecting to the context of the query.", "tuple": [8.0, 6.0], "answer1": "The object is a salt shaker. It is located on a table along with a napkin and another shaker, which contains pepper. The table appears to be set for dining at a restaurant, as indicated by the presence of these objects.", "answer2": "A white, rectangular saltshaker with a textured surface and a black, perforated top featuring multiple small holes for dispensing salt."} +{"id": 9, "question_id": 9, "answer1_id": 9, "answer2_id": 8, "category": "refer_desc", "content": "7 5\n\nAssistant 1 provided a more detailed and accurate description of the contents found in the specified region. It correctly identified that there is a plate with food, mentioning the sausage and egg specifically while highlighting the arrangement of the food and the context of being part of a meal setting on a table. This shows good spatial correspondence to the objects and relationships given in the context.\n\nAssistant 2's response was less detailed and incorrectly described \"a portion of scrambled eggs and a side of potatoes,\" when the information about the specific contents (egg and sausage) did not include potatoes at all. This reduced its accuracy and relevance to the question asked. \n\nOverall, Assistant 1's comprehensive response and emphasis on the organization of the food on the plate makes it the more effective assistant, while Assistant 2's vague and partially incorrect description limits its effectiveness. Thus, the ratings reflect the difference in quality and detail provided in their responses.", "tuple": [7.0, 5.0], "answer1": "In the region, there is a plate with various types of food on it. This includes a sausage, an egg, and a few other unidentified items. The food is attractively arranged on the plate, which is positioned towards the back of the table. The plate and its contents seem to be part of a larger meal setup on the table.", "answer2": "A plate with a grilled sausage, a portion of scrambled eggs, and a side of potatoes."} +{"id": 10, "question_id": 10, "answer1_id": 10, "answer2_id": 9, "category": "refer_desc", "content": "7 6\n\nAssistant 1 provides a relatively accurate description of the region in question, mentioning the presence of trees and a village on a hill in the distance, which aligns well with the provided region description. This assistant also connects the region to the surrounding areas by indicating that they are behind a grassy field where a man is jumping to catch a frisbee, mentioning the man's shadow as well. The overall response is spatially relevant and accurate, and it includes a reasonable level of detail about the relationship between the described region and its surroundings.\n\nOn the other hand, Assistant 2's response lacks a strong connection to the specific region described and fails to mention key details like the village and the man in the grassy field. While it describes the hill covered in dense green trees and vegetation, it does not tie this back to the visible activities or elements around it. The description is more generic and does not adequately address the user's inquiry about the relationship with the surrounding areas.\n\nTherefore, Assistant 1 performed better due to its relevant insights and structured connections, while Assistant 2's response, while descriptive, did not sufficiently relate to the requested context, leading to a lower score.", "tuple": [7.0, 6.0], "answer1": "Region is full of trees and there is a village on a hill in the distance. These trees and buildings are located behind a grassy field where a man is seen jumping to catch a frisbee. The man's shadow can be seen on the grass.", "answer2": "A long, continuous hill with a gentle slope, covered in dense green trees and vegetation. The hill extends horizontally across the entire image, with a slightly uneven tree line at the top."} +{"id": 11, "question_id": 11, "answer1_id": 11, "answer2_id": 10, "category": "refer_desc", "content": "7 3\n\nAssistant 1 provides a well-rounded response that accurately describes the context of the region [0.242, 0.211, 0.302, 0.989]. It references the black fence pole as part of the chain-link fence and appropriately connects this detail to the surrounding elements such as the waterway, boats, and greenery. This assistant effectively synthesizes information from the specific objects, relationships, and region descriptions to offer a comprehensive view of the spatial context.\n\nIn contrast, Assistant 2's response is much more focused but lacks relevance to the user's question about the overall context of the specified region. While it describes the black fence pole's features, it does not provide insight into its surroundings or how it interacts with other objects and areas in the image. Therefore, it falls short in terms of helpfulness and contextual awareness, leading to a lower score. \n\nOverall, Assistant 1 demonstrates a more thorough understanding and articulation of the requested information while effectively considering the spatial relationships and context, whereas Assistant 2's contribution is limited to a basic physical description without broader context.", "tuple": [7.0, 3.0], "answer1": "The region contains a black fence pole, which seems to be part of a chain-link fence enclosing the area. This fence is next to a water way and encloses several boats and surfboards. There are buildings on the horizon, and some green shrubs growing along the side of the lake.", "answer2": "A vertical, cylindrical pole with a textured surface, featuring a series of diagonal, crisscrossing lines that create a diamond pattern across its entirety."} +{"id": 12, "question_id": 12, "answer1_id": 12, "answer2_id": 11, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provides a well-rounded and relevant answer to the user's question about the elephant, identifying it as a large object and noting its significant interaction with nearby objects, specifically mentioning the man riding it and the proximity to a building. It also includes details about the painted appearance of the elephant, hinting at cultural aspects, which adds depth to the response. The spatial correspondence of the information is appropriate and accurately reflects the relationships described in the provided data.\n\nOn the other hand, Assistant 2 focuses solely on describing the ground, which does not address the user's question about the elephant or its interactions. While the description may be accurate, it lacks relevance to the question and does not provide insights into the object in question or its surrounding context. This response suffers from a lack of helpfulness, as it fails to engage with the specifics of the image or the entities involved.\n\nOverall, Assistant 1 is rated significantly higher due to its comprehensive and contextual understanding of the elephant and its relationships with nearby objects, while Assistant 2's response lacks relation to the user's inquiry.", "tuple": [8.0, 4.0], "answer1": "The region contains an elephant, which is quite large. There's a man riding on the back of the elephant, and they are moving close to a building. The shadow of the elephant can be seen on the ground. Additionally, the elephant's face and trunk are painted, which indicates some cultural significance.", "answer2": "The ground is a flat, light brown surface with a slightly rough texture. It appears to be made of compacted dirt or sand, with some areas showing faint, irregular patterns. The ground is relatively even, with no significant elevation changes visible."} +{"id": 13, "question_id": 13, "answer1_id": 13, "answer2_id": 12, "category": "refer_desc", "content": "8 5\n\nAssistant 1 provides a clear and relevant description of the objects within the specified region, identifying the concrete surface and its interactions with nearby objects, such as the curb, road, and a girl holding an umbrella. The assistant accurately acknowledges the relationship between the observed elements and gives a coherent summary of their spatial context within the image, making it easier for the user to visualize the scene.\n\nOn the other hand, Assistant 2's response focuses on detailing the texture and appearance of the ground in the specified region but fails to mention the key objects or their interactions in relation to the girl with the umbrella, car, and other elements nearby. While it provides some descriptive detail, the lack of contextual relevance to the user's inquiry concerning interactions reduces the helpfulness and effectiveness of the answer. Consequently, while the information is accurate regarding the ground\u2019s texture, it falls short of answering the question fully, leading to a lower score. \n\nOverall, while both responses provide some valuable information, Assistant 1 offers a more comprehensive and relevant perspective that aligns better with the user's question.", "tuple": [8.0, 5.0], "answer1": "In the region, there is a concrete surface which is part of the alley. It is placed alongside the curb and the road, and there is a car parked on it. Also, nearby, there is a girl holding an umbrella walking along this path.", "answer2": "The ground is a rough, uneven surface with a mix of small pebbles and fine gravel. The texture is coarse, with a predominantly gray color interspersed with occasional darker and lighter patches. The surface appears slightly compacted, with some areas showing minor indentations and irregularities."} +{"id": 14, "question_id": 14, "answer1_id": 14, "answer2_id": 13, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provides a well-rounded answer that accurately describes the relationship between the metal support column and its surrounding objects, particularly the bridge and the train passing over it. It specifically mentions that the column supports the bridge and notes the presence of a red line on it. The assistant also contextualizes the region as part of a larger scene that includes the train track on an elevated bridge, making the response spatially coherent and relevant to the user's inquiry.\n\nIn contrast, Assistant 2 gives a description that focuses solely on the physical attributes of the column, mentioning its cylindrical shape, flared base, decorative features, and lack of context regarding its function or interaction with surrounding objects. This response lacks spatial correspondence and relevance since it does not address the surrounding context or the interaction between the column and other objects in the scene, such as the bridge and the train.\n\nOverall, Assistant 1's response is more helpful, accurate, and detailed in relation to the interaction between the objects, while Assistant 2's response is not only less relevant but also lacks necessary detail. Hence, the scores reflect this disparity in performance.", "tuple": [8.0, 4.0], "answer1": "In the region, there is a metal support column. This column is providing support for a bridge above it, which a train is passing over. The column also features a red line on it. This region is part of a larger scene that includes a train track on an elevated bridge.", "answer2": "A cylindrical pole with a flared base and a decorative capital featuring a series of concentric rings and a flat, overhanging element with a scalloped edge."} +{"id": 15, "question_id": 15, "answer1_id": 15, "answer2_id": 14, "category": "refer_desc", "content": "8 5\n\nAssistant 1 provided a more comprehensive and accurate description of the region in question. It correctly identified the boy as wearing a black jacket and holding a brown stuffed dog, aligning with the context provided. Additionally, it contextualized the setting by mentioning a group of children carrying stuffed animals and potentially participating in an event, which enhances the relevance and depth of the description.\n\nOn the other hand, Assistant 2 inaccurately described the stuffed animal as a \"brown and white stuffed monkey\" instead of the correct \"brown stuffed dog with a red and white collar.\" This significant detail impacts the accuracy and relevance of the response. The description of the boy's appearance was also less detailed than in Assistant 1's response. Overall, while Assistant 2 acknowledged the boy's attire, it failed to capture important details about the stuffed animal and the context of the region, resulting in a less effective and informative answer.", "tuple": [8.0, 5.0], "answer1": "The region is occupied by a boy who is wearing a black jacket. He is holding a brown stuffed dog with a red and white collar. The boy seems to be part of a larger group of children who are all holding various stuffed animals and dolls. They seem to be walking across some grassy area, possibly in some kind of event or gathering.", "answer2": "A young boy with short, curly brown hair, wearing a black jacket, is holding a brown and white stuffed monkey."} +{"id": 16, "question_id": 16, "answer1_id": 16, "answer2_id": 15, "category": "refer_desc", "content": "7 5\n\nIn evaluating the responses, Assistant 1 provides a good overview of the interaction in the specified region, correctly identifying that it is a man who is engaged in a social setting, presumably in conversation with others. This response highlights the communal atmosphere and mentions that the table is full of various items, maintaining relevance to the question. However, it lacks specific details about the man\u2019s appearance or specific objects he is interacting with, which could enhance the response.\n\nOn the other hand, Assistant 2, while providing a description of the man's appearance, focuses particularly on the man wearing a light green shirt and holding a glass of white wine. However, the detail about the name tag might be a specific interpretation that is not universally applicable based on the provided context. Additionally, it does not address the broader interaction or context at the table, leading to a lack of cohesiveness with the overall scene described.\n\nOverall, Assistant 1's response is more helpful and relevant to the question about interaction within the specified region, while Assistant 2 provides less context and fails to connect the details back to the social interactions taking place at the table. Thus, Assistant 1 receives a higher score for being more comprehensive in its explanation.", "tuple": [7.0, 5.0], "answer1": "In the region, there is a man wearing a green shirt. He is sitting at a table, presumably in a social setting, along with other people. The table is full of items such as plates, glasses, and a decanter. One of the significant interactions is that the man is engaged in a conversation with the people around him.", "answer2": "A man with short gray hair and glasses, wearing a light green button-up shirt with a name tag on the left side of his chest, is holding a glass of white wine in his right hand."} +{"id": 17, "question_id": 17, "answer1_id": 17, "answer2_id": 16, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provides a well-rounded and detailed response that accurately describes the contents of the specified region, emphasizing the man performing a trick on the skateboard and mentioning details about his clothing and the surrounding environment. This assistant effectively incorporates information about the relationships with nearby objects, such as the skateboard being beneath the man and the presence of trees, buildings, and the fence in the background. The description aligns well with the visual context given.\n\nIn contrast, Assistant 2's response lacks relevance and detail. While it mentions the sky, it does not provide substantial information about the man, skateboard, or other surrounding objects, which are critical to answering the user's question. This assistant fails to connect the sky description to the action occurring in the specified region, which reduces its overall helpfulness and accuracy. \n\nOverall, Assistant 1 demonstrated a strong understanding of the task by maintaining spatial correspondence, relevance, accuracy, and detail, while Assistant 2 fell short in these aspects.", "tuple": [8.0, 4.0], "answer1": "This region is primarily occupied by a man, who appears to be in mid-air, performing a trick on a skateboard. The skateboard is beneath him. He is wearing jeans and shoes with laces, and has a bracelet on his wrist. In the background of this region, there are trees, a building, and a fence. The scene seems to be taking place in a stadium, as there are stadium lights on poles in the vicinity.", "answer2": "The sky is a clear, bright white with no visible clouds, creating a stark contrast against the person in the foreground."} +{"id": 18, "question_id": 18, "answer1_id": 18, "answer2_id": 17, "category": "refer_desc", "content": "8 3\n\nAssistant 1 provides a well-rounded response to the user's question, detailing the interaction occurring in the specified region. They accurately describe the woman\u2019s actions (bending over her luggage), her appearance (wearing a sleeveless black top and a black and white headband), and situates her within the airport context surrounded by other people. This response demonstrates good spatial correspondence with the provided descriptive region and relationships listed in the context, making it relevant and helpful for understanding the scene.\n\nOn the other hand, Assistant 2 offers a vague and incomplete description that focuses solely on the woman\u2019s hair and attire but lacks any context about her actions or the environment she's in. The response does not address the interaction taking place, missing crucial details that would be necessary for a clear understanding of the situation. This lack of detail, relevance, and helpful commentary leads to a significantly lower score. Overall, Assistant 1 effectively captures the essence of the scene, while Assistant 2 fails to provide a meaningful and informative response.", "tuple": [8.0, 3.0], "answer1": "This region features a woman, who is dressed in a sleeveless black top. She is bending over her luggage, possibly preparing or checking something inside it. The woman is wearing a black and white headband as well. She is located in the service area of an airport, where there are other people standing around as well, some of them are holding their luggage. This scene is quite typical in an airport setting where passengers are usually seen handling their luggage.", "answer2": "A person with short, dark hair, wearing a light-colored top with a dark strap over the shoulder."} +{"id": 19, "question_id": 19, "answer1_id": 19, "answer2_id": 18, "category": "refer_desc", "content": "8 5\n\nAssistant 1 provided a thorough and detailed response regarding the region in question. It identified the shoe as the primary object and described its context, including its position on the floor and its relationship with the surrounding dog and light. The mention of specific features of the shoe (laces, heel, toe) indicates a strong attention to detail and enhances the understanding of the visual content. Overall, this answer effectively captures the essential interactions and surroundings of the shoe.\n\nAssistant 2, on the other hand, offered a brief description of the shoes without mentioning the surrounding environment or the interactions with other objects, such as the dog or the floor. This response lacks the depth and context provided by Assistant 1, making it less informative regarding the relationship between the shoe and other elements within the scene. Thus, it received a lower score as it fails to adequately address the user's question about the interactions and surroundings of the specified region.", "tuple": [8.0, 5.0], "answer1": "This region primarily contains a shoe. The shoe appears to be placed on a floor, and light is reflecting off of it. A dog is sitting nearby on the floor as well, and the shoe is positioned next to the dog. The shoe features several distinct elements like laces, a heel, and a toe.", "answer2": "A pair of black leather dress shoes with a low heel and black laces, positioned side by side."} +{"id": 20, "question_id": 20, "answer1_id": 20, "answer2_id": 19, "category": "refer_desc", "content": "8 5\n\nAssistant 1 provides a relevant and accurate description of the region [0.630, 0.471, 0.682, 0.550], focusing on the presence of windows on the side of a train engine and its interaction with the surrounding environment. The mention of the train traveling down the tracks, the electric lines above, and the general rural setting with buildings and trees is helpful and presents a clear spatial context. The response effectively ties the region to its surroundings, highlighting important objects and their relationships, making it a strong answer.\n\nOn the other hand, Assistant 2 presents a detailed description of a railcar, which appears to be less relevant to the specific region asked about. The focus on the design details of the railcar, such as its color scheme and features, detracts from the request for information regarding the specific region and its interactions with the environment. While it shows some level of detail, it does not address the surrounding context effectively, leading to a lower score for relevance and overall helpfulness. Thus, while Assistant 1 successfully answers the question with spatial and contextual relevance, Assistant 2 diverts from the main topic, resulting in a less effective response.", "tuple": [8.0, 5.0], "answer1": "The region features windows on the side of a train engine. The train itself is traveling down a set of tracks, which are part of a larger railway system that includes multiple sets of tracks on the ground. Nearby, there are also electric lines hanging above the tracks. Further off, there are buildings, trees, and a wall, which add to the overall rural setting.", "answer2": "The railcar is a modern passenger model with a predominantly maroon and cream livery. It features a streamlined design with a curved front end, large windows on the sides for passenger viewing, and a set of black doors for entry and exit. The lower portion of the railcar has a yellow stripe running horizontally, and there are visible vents and grilles on the side, likely for engine ventilation. The roofline is smooth and slightly overhangs the body of the railcar."} +{"id": 21, "question_id": 21, "answer1_id": 21, "answer2_id": 20, "category": "refer_desc", "content": "7 4\n\nAssistant 1 provides a response that clearly indicates the relationship between the vase (object at [0.838, 0.774, 0.994, 0.974]) and the flower (object at [0.796, 0.462, 0.982, 0.550]), stating that the flower is in the vase. This directly answers the user's question and accurately reflects the established relationship from the provided information. The assistant's response is relevant and straightforward, making it easy to understand. However, Assistant 1 could have included a bit more context about the decorative aspect or significance of these objects within the room, which is why it did not score a perfect 10.\n\nOn the other hand, Assistant 2 fails to address the question directly. Instead of explaining the relationship or confirming that the flower is in the vase, it describes the flowers in a vague and subjective manner, referencing specific colors without tying them back to the objects or their relationships. This response lacks relevance to the user's inquiry and does not reflect the relevant relationships outlined in the provided information, resulting in a lower score. Overall, Assistant 2 seems less helpful and less accurate in relation to the question asked.", "tuple": [7.0, 4.0], "answer1": "The object is a vase, and the object is a flower. The flower is in the vase, suggesting it is a decorative element within the room.", "answer2": "A lush bouquet of white daisies with prominent yellow centers, complemented by green foliage."} +{"id": 22, "question_id": 22, "answer1_id": 22, "answer2_id": 21, "category": "refer_desc", "content": "8 7\n\nAssistant 1 provided a clear and accurate description of the activity happening in the specified region, stating that a woman in a brown shirt and jeans is crossing the street. The information aligns well with the details provided in the region description, which identifies a woman in a brown shirt and jeans crossing the street. The response demonstrates strong relevance and is factually correct, thus earning a high score.\n\nAssistant 2 also offered a relevant response, indicating that a person with dark hair tied back, wearing a brown top and blue jeans, is walking. However, the phrase \"walking\" is less precise than \"crossing the street,\" as the first response specifies the action more clearly. Therefore, while Assistant 2's answer is reasonably accurate, it lacks the specificity that would enhance its relevance to the user's question. Consequently, this leads to a slightly lower score compared to Assistant 1. \n\nOverall, both responses were valuable, but Assistant 1 was more precise in its depiction of the action taking place in the region specified, which is reflected in the scoring.", "tuple": [8.0, 7.0], "answer1": "A woman, who is wearing a brown shirt and jeans, is crossing the street.", "answer2": "A person with dark hair tied back, wearing a brown top and blue jeans, is walking."} +{"id": 23, "question_id": 23, "answer1_id": 23, "answer2_id": 22, "category": "refer_desc", "content": "8 6\n\nAssistant 1 provides a detailed description of the region in question, mentioning the red letters, their location on the side of the white boat, the presence of two levels on the boat, and other surrounding elements such as the buildings, outdoor canopies, and sea wall. This response effectively relates the specific region to its context and includes relevant details that paint a clear picture of the surroundings. It also addresses the user\u2019s question directly, providing a strong correlation with the descriptive context provided.\n\nOn the other hand, Assistant 2's response is more limited in scope and detail. While it identifies the specific text in the region, it lacks the broader context by not describing its surroundings or how it connects with other objects in the image, such as the boat's structure or other nearby elements. The response is focused solely on the text rather than the overall scene, which diminishes its helpfulness and relevance to the inquiry. \n\nOverall, Assistant 1\u2019s response is more comprehensive, accurate, and relevant, while Assistant 2\u2019s response, although correct in identifying the text, falls short in providing sufficient context and detail. Thus, the ratings reflect this difference in performance.", "tuple": [8.0, 6.0], "answer1": "The region is displaying red letters. These letters are on the side of a large, white boat that's sitting in the water. The boat has two levels and there is a set of long, black windows on its side. A silver railing is present on the top level of the boat. Close to the boat, there are buildings with red roofs and outdoor canopies. There's also a blue container on the dock, and a gray sea wall next to the ship.", "answer2": "A pink banner with the text \"Port River Dolphin Cruises\" in a serif font, with the word \"Port\" in a larger size than the rest of the text."} +{"id": 24, "question_id": 24, "answer1_id": 24, "answer2_id": 23, "category": "refer_desc", "content": "7 8\n\nAssistant 1 provides a description of the woman with red hair holding a plate with a piece of cake; however, it incorrectly identifies her as wearing a tie when the reference indicates that the woman in this region is not described as having a tie. The mention of her attire is somewhat generic and lacks precise detail about her clothing, which could lead to ambiguity about her actual appearance. While noting the context of a wedding reception is relevant, there is room for improvement in specifying the type of cake or further details about the woman's appearance.\n\nAssistant 2 offers a more precise description, accurately detailing that the woman has long, straight red hair and specifying her attire, including that she is wearing a black suit jacket with a white dress shirt and a dark tie. By also mentioning the slice of chocolate cake with white frosting, Assistant 2 adds a valuable detail that enhances the understanding of both the food item and the subject's action. This level of detail improves clarity and assists the user in visualizing the scene accurately.\n\nOverall, both assistants provide relevant descriptions of the region, but Assistant 2 presents a more detailed and accurate account. Assistant 1's vagueness about the attire and some inaccuracies regarding the woman's presentation lead to a lower score compared to Assistant 2, which clearly articulates the woman's physical attributes and actions.", "tuple": [7.0, 8.0], "answer1": "In the region, there is a woman with red hair. She's wearing a tie and a suit jacket, and is holding a plate with a piece of cake. The woman is dressed in formal attire, suggesting that she's attending a special occasion like a wedding.", "answer2": "A woman with long, straight red hair, wearing a black suit jacket over a white dress shirt and a dark tie. She is holding a slice of chocolate cake with white frosting on a plate."} +{"id": 25, "question_id": 25, "answer1_id": 25, "answer2_id": 24, "category": "refer_desc", "content": "7 4\n\nAssistant 1 provides a detailed and relevant description of the region [0.189, 0.032, 0.703, 0.178]. It accurately identifies the presence of rocks and grass as stated in the region description, and mentions the nearby white sheep grazing in the field, which corresponds well with the overall image context. Additionally, it notes the presence of trees, a bush, and a stone wall bordering the field, thereby capturing the spatial relationships and the context of the surrounding areas effectively. The level of detail is appropriate for the user\u2019s question.\n\nOn the other hand, Assistant 2 presents a description that strays from the specific details given in the prompt. While it mentions a hill and grass, it introduces elements such as a \"winding stone path\" and \"exposed soil\" that are not supported by the context or objects listed. This deviation from the factual information makes the response less helpful and relevant. The details provided are not inline with the region's description, resulting in a lower score for accuracy and helpfulness.\n\nOverall, Assistant 1 performs better due to its higher relevance and accuracy regarding the specific region and surrounding context.", "tuple": [7.0, 4.0], "answer1": "The region contains rocks and grass, providing a background for the pasture. Nearby, there are white sheep grazing in the green grassy field. There are also trees and a bush in the vicinity. A stone wall is running across the grassy field, bordering it. Besides, there's a hill in the field where some sheep and a rock are located.", "answer2": "A gently sloping hill covered in lush green grass, with a winding stone path running horizontally across its midsection. The hill is dotted with small bushes and patches of exposed soil, and a few scattered rocks are visible along the path."} +{"id": 26, "question_id": 26, "answer1_id": 26, "answer2_id": 25, "category": "refer_desc", "content": "8 7\n\nAssistant 1 provides a more coherent and contextually relevant description of the bird's surroundings and its interactions with other objects. It notes the bird's position on the edge of the table and mentions important nearby objects such as the plate with food and crumbs, the steak knife, and the chair next to the table. This response demonstrates a good understanding of the scene context and the bird's relationship with other objects, which makes it more helpful for the user.\n\nAssistant 2, while offering a detailed description of the bird's physical characteristics, fails to incorporate any interaction or surrounding objects, which is critical for the user\u2019s original request. The response is more about the bird's appearance than providing insight into how it relates to its environment. Therefore, this response is less relevant and not as helpful in addressing the user's question.\n\nOverall, Assistant 1 is rated higher for its spatial awareness and contextual relevance, while Assistant 2, despite its accurate description of the bird, lacks in understanding the task at hand regarding interactions with surrounding objects.", "tuple": [8.0, 7.0], "answer1": "In this region, a bird is standing on the edge of a table. The table is covered with a red tablecloth and there are several objects on it, including a plate with food and crumbs, a bottle, and a steak knife. The bird is close to the knife and the plate with food. There's also a chair next to the table.", "answer2": "A small sparrow with brown and gray feathers, a short beak, and a slightly rounded body. It has a light brown head with darker streaks and a small, dark eye. The wings are brown with darker streaks, and the tail is short and brown. The bird is standing on thin, pinkish legs with small claws."} +{"id": 27, "question_id": 27, "answer1_id": 27, "answer2_id": 26, "category": "refer_desc", "content": "8 3\n\nAssistant 1 provided a detailed and accurate description of the region at [0.279, 0.524, 0.341, 0.570], noting that it contains a black wrist guard worn by the skater. Furthermore, the assistant effectively connected this detail to the broader context of the skater's safety gear, mentioning other items he is wearing and the environment (i.e., the skate park and the presence of a shadow). This response demonstrates high relevance and helpfulness, as it captures both the specifics of the region and their relationship to surrounding objects.\n\nIn contrast, Assistant 2's response was much less relevant to the user's question. It failed to mention the region's contents or provide context related to other nearby objects. The description of a black pad with a logo does not accurately represent the wrist guard in the specified region, which diminishes its accuracy. The lack of detail and connection to the scene makes this response unhelpful in understanding the region and its relation to the overall image context.\n\nOverall, while Assistant 1's response is informative and relevant, Assistant 2's lack of pertinent information leads to a significantly lower score.", "tuple": [8.0, 3.0], "answer1": "The region contains a black wrist guard that the skater is wearing. This wrist guard is part of the safety gear that the skater has on, which also includes a black helmet, elbow pad, knee pad, and a pair of roller skates. The skater is performing a trick at the skate park, his shadow is cast on the cement ramp, and there is a grey post to a metal fence at the top of the ramp nearby. Overall, this region is an important part of the scene, showing the skater's safety equipment.", "answer2": "A black, rectangular pad with rounded corners, featuring a central white logo consisting of a stylized letter 'S' within a square outline."} +{"id": 28, "question_id": 28, "answer1_id": 28, "answer2_id": 27, "category": "refer_desc", "content": "8 7\n\nAssistant 1 provides a detailed description of the bus, accurately identifying it as a large decorated white bus and mentioning the text \"Divine Transportation\" as well as the bus identification number. It effectively conveys the scene by including relevant details about the bushes and gray trash can behind the bus. The response captures the overall context and visual elements pertinent to the specified region well, showing a good spatial correspondence to the coordinates given.\n\nAssistant 2 also gives a fairly detailed description, specifically noting the design on the bus and the display of the vehicle number. However, while it mentions the elegant script and wave-like design, it lacks the broader context about the bus being in front of a building, as noted in the existing data. Additionally, it presents less information about the surrounding elements (e.g., the bushes or trash can), which diminishes the overall relevance and completeness compared to Assistant 1's response.\n\nOverall, Assistant 1's response is more comprehensive and contextually rich, leading to a higher score, while Assistant 2's response, while accurate, is slightly less detailed and relevant to the asked question.", "tuple": [8.0, 7.0], "answer1": "This is a large decorated white bus. It seems to be driving past a tall building. You can see \"Divine Transportation\" written on the front of the bus. There's also a bus identification number on top. The bus features a design, including stripes, and there are headlights at the front. You can also see the side mirrors and wheels. Behind the bus, there's a gray trash can next to some large green bushes.", "answer2": "A white coach bus with a curved front windshield and a large side window. The bus features a purple and blue wave-like design on its side, with the text \"Divine Transportation\" written in elegant script above the design. The vehicle number \"7006\" is displayed on the top front corner. The bus has a black front bumper, side mirrors, and a visible door towards the front."} +{"id": 29, "question_id": 29, "answer1_id": 29, "answer2_id": 28, "category": "refer_desc", "content": "9 4\n\nAssistant 1 provides a thorough and accurate description of the interaction between the man and woman within the specified region. It mentions their seating arrangement, the intimate gesture of the man having his arm around the woman, and their shared activity of looking at the ocean. This response directly relates to the objects and relationships identified in the context, making it highly relevant and helpful.\n\nOn the other hand, Assistant 2\u2019s response is vague and lacks detail. While it touches upon the relationship between the two individuals by mentioning their hair types, it does not elaborate on the context of their interaction or describe any of their actions, such as sitting or looking at the ocean. The lack of specificity and the absence of clear connections to the other identified objects and relationships in the context diminish its relevance and usefulness.\n\nOverall, Assistant 1 scores high for its clarity and detail, while Assistant 2 falls short due to its lack of context and descriptive depth.", "tuple": [9.0, 4.0], "answer1": "The region includes a man who is sitting on a bench. He has his arm around a woman, indicating a close relationship between them. They are both looking towards the ocean, suggesting that they are enjoying the view together. The bench they are sitting on is in front of the ocean.", "answer2": "A person with short, curly hair is attached to another person with long, wavy hair."} +{"id": 30, "question_id": 30, "answer1_id": 30, "answer2_id": 29, "category": "refer_desc", "content": "8 5\n\nAssistant 1 provides a thorough and accurate description of the region in question, clearly identifying the player wearing knee and leg pads along with their context in the baseball game. The details such as the player's crouched position, white uniform, gloved hand, and the contrasting ground surfaces are well articulated. Furthermore, Assistant 1 effectively ties the relevant objects and relationships to the context, enhancing the completeness of the response.\n\nOn the other hand, Assistant 2 offers a detailed description of the knee pad itself, focusing on its design features and materials. While this information is interesting, it is less relevant to the user's question about the broader interaction of the player and the surrounding context. Assistant 2's response lacks a connection to how the knee pads relate to the player\u2019s actions or the baseball field, thereby failing to fully address the user's inquiry regarding the visual scene.\n\nOverall, Assistant 1's response is far more helpful and aligned with the user's question, while Assistant 2, although detailed, does not sufficiently engage with the context, resulting in a lower score.", "tuple": [8.0, 5.0], "answer1": "This region is occupied by a baseball player wearing knee and leg pads. These pads are a part of the player's protective gear. The player, dressed as a catcher, is crouched on the field, ready to catch a ball. He is in a white uniform, which includes pants with a line on them, and he's wearing sneakers. His gloved hand is extended, prepared to receive. We can also see a black and red wrist band on his wrist. The field beneath him is brown dirt, contrasting with the green grass in the rest of the baseball field. Nearby, there are white chalk lines painted on the field.", "answer2": "The knee pad features a hard, black plastic shell with a glossy finish, designed to cover and protect the knee. It has a contoured shape to fit the knee, with a series of ridges and indentations that provide structural reinforcement. The edges of the shell appear to be smooth and rounded for comfort. There are two adjustable straps with fastening mechanisms, likely Velcro, for securing the pad around the leg. The straps are black and blend with the overall color scheme of the knee pad."} +{"id": 31, "question_id": 31, "answer1_id": 31, "answer2_id": 30, "category": "refer_desc", "content": "8 7\n\nAssistant 1 provides a clear and straightforward description of the region [0.060, 0.086, 0.462, 0.908], accurately noting that it has a little girl standing near a scooter. The mention of the girl's blonde hair and white socks aligns well with the provided object and region descriptions. Furthermore, Assistant 1 effectively connects the girl to the surrounding objects, specifically noting that she is on the pavement and correctly describes the scooter's attributes.\n\nOn the other hand, Assistant 2 presents a more detailed and descriptive account, mentioning two young girls with specific clothing and how they are positioned. However, this response diverges from the original focus on the specific region in question, which only depicts one girl. While it provides a richer description involving color and a specific emblem, it does not correctly emphasize the context of the region being evaluated, which may lead to confusion about which subjects are present.\n\nOverall, Assistant 1 completes the task with greater accuracy and relevance to the specific region while Assistant 2, though providing rich detail, strays from focusing on the specific region in question. Thus, Assistant 1 scores higher for being more focused and relevant.", "tuple": [8.0, 7.0], "answer1": "This region contains a little girl who is standing near a scooter. The scooter has an orange board and black handles, and it's specifically located to the right of her. The girl has blonde hair and she's wearing white socks. She is also standing on the pavement.", "answer2": "Two young girls with light skin and blonde hair tied back, wearing white short-sleeved shirts with a circular emblem on the left side of the chest, white pleated skirts, and white sneakers with yellow socks. The girl on the left is holding a silver scooter with black grips and an orange handle."} +{"id": 32, "question_id": 32, "answer1_id": 32, "answer2_id": 31, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provided a thorough and accurate description of the events happening in the specified region. It gave a clear account of the man preparing to hit the yellow tennis ball and included relevant details about the surroundings, such as the tennis court, the boundary lines, the net, and the features behind him (fence, bushes, and trees). The information aligns well with the provided context, making it highly relevant and helpful for understanding the scene.\n\nIn contrast, Assistant 2 focused exclusively on the description of the tennis net without addressing the man, the ball, or the overall activity taking place, which is the primary focus of the user's question. The information about the net, while informative, lacks relevance to the specific action occurring in the requested region and does not provide a comprehensive view of the surrounding environment.\n\nOverall, Assistant 1's response stands out for its spatial correspondence, helpfulness, and level of detail, while Assistant 2 falls short in relevance and accuracy to the user's inquiry.", "tuple": [8.0, 4.0], "answer1": "In this region, a man in white clothing is preparing to hit a yellow tennis ball with his racket. He is on a tennis court with white boundary lines and a net in front of him. Behind him, there are a fence, trimmed bushes, and tall trees in the distance.", "answer2": "A tennis net with a white tape running along the top edge, spanning horizontally across the image. The net is supported by a white post on the left side and another white post on the right side. The netting is made of black mesh material."} +{"id": 33, "question_id": 33, "answer1_id": 33, "answer2_id": 32, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provides a relevant description of the region [0.524, 0.740, 0.734, 0.856], noting that it is where a man is found wearing pants and a long black coat. This response aligns well with the question about the specific region and offers some context about the surroundings (sidewalk or decorative square), which enhances the spatial correspondence and level of detail. However, the description could have included more specific information about the appearance of the pants themselves or additional details about the man's activity.\n\nAssistant 2 gives a very focused description of black trousers, highlighting their characteristics such as the straight-leg cut and smooth texture. While this is accurate and straightforward, it lacks context regarding the overall interaction of the trousers with their environment or the person wearing them. Thus, it misses the bigger picture that the user might be looking for in response to the question about the interaction with the surroundings.\n\nOverall, Assistant 1 is more helpful and relevant due to its contextualization and broader scope, whereas Assistant 2, while accurate, is limited in its response and lacks sufficient interaction with the surroundings.", "tuple": [8.0, 4.0], "answer1": "The region is where a man is found wearing a pair of pants. This man is also wearing a long black coat. He seems to be walking on a sidewalk or decorative square, which fills the background of the image.", "answer2": "Black trousers with a straight-leg cut, featuring a smooth texture and a visible hem at the bottom."} +{"id": 34, "question_id": 34, "answer1_id": 34, "answer2_id": 33, "category": "refer_desc", "content": "7 8\n\nBoth Assistant 1 and Assistant 2 provide information about the region specified in the question. However, there are distinctions in detail and accuracy that influence their scores.\n\n**Assistant 1:**\n- **Spatial Correspondence:** The assistant identifies an airplane's engine and mentions the front door, which is relevant since the engine and front door are within the specified coordinates [0.650, 0.428, 0.858, 0.600]. However, it does not clearly specify which engine it is referring to, nor does it clearly distinguish the specific objects more precisely.\n- **Helpfulness:** The assistant provides context about the airplane being in mid-flight and about the landing gear being lowered, which adds useful context. It also mentions that it appears to be a commercial airline.\n- **Relevance:** While the assistant discusses elements relevant to aviation, it might not be directly capturing the main focus of the region since the query is about a more limited area where specific features reside.\n- **Accuracy:** Although the information provided is generally accurate, the assistant's reference to the engine could create ambiguity since it involves multiple engines visible in the image.\n- **Level of Detail:** While it covers several aspects of the airplane and its flight status, the description could have been more precise regarding the objects explicitly within the specified region.\n\n**Assistant 2:**\n- **Spatial Correspondence:** The assistant accurately identifies the commercial jet, details the visible elements (text \"ANA\" and the logo), and mentions the landing gear. This corresponds well with the region described.\n- **Helpfulness:** The level of specificity adds to the helpfulness of the response, especially as it names the airline and describes its colors, which are essential identifiers.\n- **Relevance:** The focus remains appropriately on the airplane and its visible characteristics, maintaining a direct relevance to the user's inquiry about the specific region.\n- **Accuracy:** The details provided closely match the bounding box description and the overall scene description, indicating a good understanding of the content.\n- **Level of Detail:** While slightly more succinct than Assistant 1, the specifics mentioned (the airline name and logo) enhance clarity and assertiveness in the context of airplane identification.\n\nOverall, Assistant 2 provides a more precise and detailed response regarding the specific region in question, leading to a higher score, whereas Assistant 1, while informative, lacks some clarity and specificity.", "tuple": [7.0, 8.0], "answer1": "In the region, there is an airplane's engine. The airplane seems to be in mid-flight, given the sky that surrounds it. The front door of the airplane is also visible in this region. The plane appears to be a commercial airline, as indicated by visible letters and windows. Notably, the landing gear of the airplane is lowered, suggesting that it's preparing to land.", "answer2": "A commercial jet with a white fuselage featuring the word \"ANA\" in blue letters, a dark blue and white logo to the right of the text, and a partially visible landing gear with a single wheel."} +{"id": 35, "question_id": 35, "answer1_id": 35, "answer2_id": 34, "category": "refer_desc", "content": "8 6\n\nIn evaluating Assistant 1, the response effectively describes the region [0.320, 0.451, 0.460, 0.592], stating that it contains the front window of a train with windshield wipers. It accurately relates the window to the rest of the train, noting the yellow and white colors and its proximity to the headlights. This response demonstrates spatial correspondence, relevance, and accuracy regarding surrounding objects.\n\nIn contrast, Assistant 2 offers a less detailed description that focuses on the aesthetic aspect of the windshield, mentioning it is dark-tinted with a wiper visible. However, it lacks clear references to the specific objects or features nearby, such as the train's headlights or the color scheme of the train's front. The response feels more vague and less informative compared to Assistant 1.\n\nOverall, while both responses address the question, Assistant 1 provides a more complete and relevant description that indicates a better understanding of the region's context within the image and its relationship to nearby objects.", "tuple": [8.0, 6.0], "answer1": "The region contains the front window of a train, which has windshield wipers. This window is part of the front of the train, which is painted yellow and white. Also, the region is located near the headlights of the train.", "answer2": "A curved, dark-tinted windshield with a visible wiper on the lower right side, reflecting a hint of the surrounding environment."} +{"id": 36, "question_id": 36, "answer1_id": 36, "answer2_id": 35, "category": "refer_desc", "content": "9 4\n\nAssistant 1 provided a well-rounded response that accurately described the activities happening in the specified region, noting that people are sitting at a table, likely dining or socializing. This description aligns well with the region's context, highlighting its vibrancy and connection to the surrounding area filled with cars and businesses. The mention of the marquee sign saying \"for lease\" added relevant detail, illustrating the urban environment in which this scene is set. The spatial correspondence is strong as it accurately relates the specific activities to the broader context of the busy street.\n\nOn the other hand, Assistant 2 focused solely on the details of a table and chairs without connecting this description to the people or the surrounding area. While it provided a specific image of the table setup, it lacked context about the people in the region and failed to relate this back to the activity happening on the street or within the larger urban environment. This makes its response less helpful and relevant to the user's question about what is happening in the region and how it relates to the surrounding area. The lack of broader context significantly diminishes the quality of its response. \n\nOverall, Assistant 1 demonstrated a deeper understanding of the scene, effectively capturing both the spatial and social dynamics present, earning a score of 9. Assistant 2's response, while descriptively accurate, failed to provide necessary context, resulting in a score of 4.", "tuple": [9.0, 4.0], "answer1": "In this region, there are people sitting at a table, likely dining or socializing outside a restaurant. This area is part of a bustling city street, filled with various cars, some parked and others potentially in motion. There are numerous buildings nearby, with diverse businesses and stores. One notable building nearby even has a marquee sign indicating \"for lease\". This scene suggests that the region is in a vibrant urban setting, where people are engaging in day-to-day activities such as dining outdoors and commuting by car.", "answer2": "A rectangular table with a white tablecloth, surrounded by multiple chairs. The tablecloth has a subtle pattern and is draped over the table, with the edges hanging down the sides. The chairs are arranged around the table, with some facing the table and others turned slightly."} +{"id": 37, "question_id": 37, "answer1_id": 37, "answer2_id": 36, "category": "refer_desc", "content": "8 6\n\nAssistant 1's response accurately identifies the region as a patch of grass and incorporates the relevant interactions involving the zebras, correctly stating their activities of grazing and standing in the grass. The mention of bushes and trees provides additional context that aligns well with the relationships described. The overall response is helpful, relevant, and detailed, which demonstrates a good understanding of the question and the provided context.\n\nOn the other hand, Assistant 2 provides a description of the grass but lacks specific reference to the zebras and their interactions, which are crucial elements in understanding the dynamics within that region. While it does describe the physical characteristics of the grass, it does not address the relationships between the entities involved, making the response less relevant and contextualized compared to Assistant 1. The level of detail is lower in this case, leading to a less informative answer overall.", "tuple": [8.0, 6.0], "answer1": "The region is a patch of grass. There are two zebras standing in and grazing on this grass. They are feeding themselves and are near bushes and a tree.", "answer2": "A dense cluster of green grass blades with varying lengths, some standing upright while others are bent or leaning, interspersed with small patches of bare soil."} +{"id": 38, "question_id": 38, "answer1_id": 38, "answer2_id": 37, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provides a clear and relevant description of the interaction in the specified region, accurately identifying the elements present: a man and a little girl who are conversing, along with the context of the girl pulling a surfboard. This response effectively ties in the relationships indicated in the object list and makes connections to the broader activity observed in the larger group.\n\nIn contrast, Assistant 2's response primarily focuses on the description of a person with a specific physical appearance, including their clothing and surfboard, without addressing the interaction or relationship explicitly as requested. While it provides some details, it lacks context about the interaction between objects in the specified region and is not directly relevant to the question. This results in a lower score for Assistant 2, as the nature of the question called for an explanation of relationships and interactions, which was not met. \n\nOverall, Assistant 1 offered a more comprehensive and contextually accurate response than Assistant 2.", "tuple": [8.0, 4.0], "answer1": "In the region, there's a man and a little girl, they seem to be having a conversation. The man is looking back to the girl, who is pulling a surfboard, probably getting ready to surf. They are part of a larger group of people who are heading to the water with their surfboards.", "answer2": "A person with long blonde hair tied in a ponytail, wearing a bright yellow jacket with a black design on the back, black pants, and brown shoes, is holding a blue surfboard."} +{"id": 39, "question_id": 39, "answer1_id": 39, "answer2_id": 38, "category": "refer_desc", "content": "8 4\n\nAssistant 1 provides a comprehensive description of the region specified by the user, accurately identifying the subject (a man standing with shorts) and detailing his interaction with the nearby elephant as well as the context involving water and the group of people. The response effectively captures the setting and relationships among the objects, making it relevant and helpful for the query.\n\nIn contrast, Assistant 2's response lacks depth and does not provide any contextual information about the actions or relationships in the specified region. It only describes the man, but fails to mention the elephant or the surrounding elements that enrich the narrative of the scene. This makes its response less informative and less relevant to the user's request regarding the specific region of interest. Overall, Assistant 1's higher score reflects its detailed and contextually rich explanation compared to Assistant 2's brief and limited description.", "tuple": [8.0, 4.0], "answer1": "In the region, there's a man standing wearing shorts. He is standing on the side of a lake, next to an elephant. The elephant is emerging from the water and seems to be interacting with the man and two other individuals not far from him. All three people appear to be tourists posing for a picture with the elephant. The surroundings include water, and some mountains and trees in the far distance, creating a serene and natural setting.", "answer2": "A smiling man with short hair, wearing dark gray shorts with white stripes on the sides, and a black wristwatch on his left wrist."} +{"id": 40, "question_id": 40, "answer1_id": 40, "answer2_id": 39, "category": "refer_desc", "content": "8 4\n\nAssistant 1 effectively identifies the primary object within the specified region \u2014 a cow \u2014 and situates it within the broader context of the scene, noting its presence in a pasture near a mountain and the existence of trailers. This response provides a clear understanding of the relationship between the cow and its environment and includes relevant details such as the mountainous backdrop and the peaceful living conditions for the cows. However, it could have provided more specific visual details about the cow\u2019s appearance or behavior to enhance the descriptive richness.\n\nIn contrast, Assistant 2 provides a very specific and detailed description of the cow\u2019s coloring and physical characteristics but lacks contextual information about the cow's setting or how it relates to the surrounding environment. This answer does not address the broader scene or the relationships between objects, which limits its relevance and comprehensiveness.\n\nOverall, Assistant 1 presents a more helpful and relevant response by framing the cow within its environment and considering the broader scene, while Assistant 2 is accurate in detailing the cow but fails to connect that information to the larger context, resulting in a lower score for clarity and usefulness.", "tuple": [8.0, 4.0], "answer1": "Within the region, there is a cow. This cow is in a pasture, which is located near a mountainous area. The mountain is partially covered in snow. There are also multiple trailers in the pasture, and one of them appears to be storage for animal equipment. The pasture and its surroundings provide a peaceful and natural living environment for the cows.", "answer2": "A brown cow with a white pattern on its forehead and snout, white lower legs, and a visible udder."} diff --git a/evaluation/Ferret-Bench/inference.py b/evaluation/Ferret-Bench/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..7b083293457ea8dd3334386abd4e6e877e740f00 --- /dev/null +++ b/evaluation/Ferret-Bench/inference.py @@ -0,0 +1,163 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang +# -------------------------------------------------------- + +import argparse +import json +import os + +import numpy as np +import torch +from PIL import Image +from pycocotools import mask as mask_utils +from pycocotools.coco import COCO +from tqdm import tqdm +from transformers import AutoModel, AutoProcessor, GenerationConfig + +from evaluation.eval_dataset import SingleRegionCaptionDataset + +TORCH_DTYPE_MAP = dict(fp16=torch.float16, bf16=torch.bfloat16, fp32=torch.float32) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Inference of Grasp Any Region models on Ferret-Bench." + ) + + parser.add_argument( + "--model_name_or_path", + help="HF model name or path", + default="HaochenWang/GAR-1B", + ) + parser.add_argument( + "--cache_name", + help="cache name to save model outputs.", + default="gar_1b", + ) + parser.add_argument( + "--data_type", + help="data dtype", + type=str, + choices=["fp16", "bf16", "fp32"], + default="bf16", + ) + parser.add_argument( + "--anno_file", + help="path to the annotation file.", + default="evaluation/Ferret-Bench/annotations/box_refer_caption.json", + ) + parser.add_argument( + "--image_folder", + help="the folder of images", + default="evaluation/Ferret-Bench/annotations/coco/val2017", + ) + parser.add_argument( + "--seed", + type=int, + default=0, + help="Random seed for reproducible text generation", + ) + args = parser.parse_args() + return args + + +def annToMask(ann, h, w): + rles = mask_utils.frPyObjects(ann, h, w) + rle = mask_utils.merge(rles) + m = mask_utils.decode(rle) + return m + + +def main(): + args = parse_args() + data_dtype = TORCH_DTYPE_MAP[args.data_type] + torch.manual_seed(args.seed) + + # init ditribution for dispatch_modules in LLM + torch.cuda.set_device(0) + torch.distributed.init_process_group(backend="nccl") + + # build HF model + model = AutoModel.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + torch_dtype=data_dtype, + ) + model.cuda() + model.eval() + + processor = AutoProcessor.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + ) + model_outputs = [] + cache_name = args.cache_name + + with open(args.anno_file, "r") as file: + data = json.load(file) + + for idx, item in enumerate(tqdm(data)): + image_path = os.path.join(args.image_folder, item["image"]) + img = Image.open(image_path).convert("RGB") + width, height = img.size + + mask_r = item["annotation"]["segmentation"] + mask = ( + annToMask(mask_r, height, width) + if isinstance(mask_r, list) + else mask_utils.decode(mask_r) + ) + mask = (mask.astype(np.uint8) * 255).astype(np.uint8) + + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [ + "" + ] + dataset = SingleRegionCaptionDataset( + image=img, + mask=mask, + processor=processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=data_dtype, + ) + + data_sample = dataset[0] + + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + do_sample=False, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + outputs = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + print(outputs) # Print model output for this image + + model_outputs.append( + { + "image_path": image_path, + "annotation": item["annotation"], + "caption": outputs, + } + ) + + with open(f"evaluation/Ferret-Bench/model_outputs/{cache_name}.json", "w") as file: + json.dump(model_outputs, file, indent=4, ensure_ascii=False) + + print(f"Cache name: {cache_name}") + + +if __name__ == "__main__": + main() diff --git a/evaluation/Ferret-Bench/model_outputs/gar_1b.json b/evaluation/Ferret-Bench/model_outputs/gar_1b.json new file mode 100644 index 0000000000000000000000000000000000000000..9f3153230bb4c16debb8e5d2ed183d1a6345dd59 --- /dev/null +++ b/evaluation/Ferret-Bench/model_outputs/gar_1b.json @@ -0,0 +1,962 @@ +[ + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000069138.jpg", + "annotation": { + "bbox": [ + 230.39, + 52.48, + 56.02, + 32.0 + ], + "segmentation": [ + [ + 230.39, + 52.48, + 286.41, + 52.48, + 286.41, + 84.48, + 230.39, + 84.48 + ] + ] + }, + "caption": "A rectangular signboard with a dark background and capitalized, bold, sans-serif font text that reads \"FROM 2,500 SQ FT IMMEDIATELY AVAILABLE.\"" + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000131138.jpg", + "annotation": { + "bbox": [ + 117.12, + 383.52, + 91.52, + 46.56 + ], + "segmentation": [ + [ + 117.12, + 383.52, + 208.64, + 383.52, + 208.64, + 430.08, + 117.12, + 430.08 + ] + ] + }, + "caption": "A white ceramic plate with a glossy finish, featuring a smudge of a dark brown substance, possibly a sauce or food residue, and a small, dark-colored object, likely a utensil, resting on the plate." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000475150.jpg", + "annotation": { + "bbox": [ + 184.32, + 138.35, + 181.76, + 138.77 + ], + "segmentation": [ + [ + 184.32, + 138.35, + 366.08, + 138.35, + 366.08, + 277.12, + 184.32, + 277.12 + ] + ] + }, + "caption": "The giraffe has a pattern of large, irregular brown patches separated by cream-colored lines on its skin. Its eyes are large with long lashes, and the giraffe's ears are pointed and tufted with hair. The visible part of the neck shows a continuation of the same patterned skin." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000356424.jpg", + "annotation": { + "bbox": [ + 253.44, + 162.56, + 90.72, + 263.68 + ], + "segmentation": [ + [ + 253.44, + 162.56, + 344.16, + 162.56, + 344.16, + 426.24, + 253.44, + 426.24 + ] + ] + }, + "caption": "A person with dark hair tied back, wearing a dark-colored top with a visible collar and a patterned garment underneath. The individual appears to be seated and is looking downwards." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000491090.jpg", + "annotation": { + "bbox": [ + 48.76, + 318.72, + 108.51, + 124.16 + ], + "segmentation": [ + [ + 48.76, + 318.72, + 157.26, + 318.72, + 157.26, + 442.88, + 48.76, + 442.88 + ] + ] + }, + "caption": "A black motorcycle with a visible rear wheel featuring a multi-spoke design and a disc brake system. The exhaust system includes a cylindrical chrome muffler with a heat shield. The rear suspension is partially visible, showcasing a telescopic fork. The motorcycle's bodywork includes a black fender over the wheel and a portion of the frame with a visible bolt pattern." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000484415.jpg", + "annotation": { + "bbox": [ + 229.12, + 46.08, + 56.96, + 85.92 + ], + "segmentation": [ + [ + 229.12, + 46.08, + 286.08, + 46.08, + 286.08, + 132.0, + 229.12, + 132.0 + ] + ] + }, + "caption": "A white ceramic toilet with an open black seat and lid, featuring a visible flush handle on the left side." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000184324.jpg", + "annotation": { + "bbox": [ + 360.96, + 327.68, + 199.68, + 93.5 + ], + "segmentation": [ + [ + 360.96, + 327.68, + 560.64, + 327.68, + 560.64, + 421.18, + 360.96, + 421.18 + ] + ] + }, + "caption": "Two bicycles with black tires and silver rims, positioned parallel to each other on a crosswalk. The bicycle on the left has a visible rear wheel and part of the frame, while the bicycle on the right has a visible front wheel and part of the frame." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000341058.jpg", + "annotation": { + "bbox": [ + 222.22, + 536.32, + 5.03, + 7.68 + ], + "segmentation": [ + [ + 222.22, + 536.32, + 227.25, + 536.32, + 227.25, + 544.0, + 222.22, + 544.0 + ] + ] + }, + "caption": "A cylindrical, transparent glass saltshaker with a metal screw-on lid featuring multiple small holes for dispensing salt." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000184384.jpg", + "annotation": { + "bbox": [ + 401.92, + 57.6, + 236.8, + 129.12 + ], + "segmentation": [ + [ + 401.92, + 57.6, + 638.72, + 57.6, + 638.72, + 186.72, + 401.92, + 186.72 + ] + ] + }, + "caption": "A plate with a grilled sausage, scrambled eggs, and a side of hash browns." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000259097.jpg", + "annotation": { + "bbox": [ + 6.0, + 173.16, + 492.0, + 36.96 + ], + "segmentation": [ + [ + 6.0, + 173.16, + 498.0, + 173.16, + 498.0, + 210.12, + 6.0, + 210.12 + ] + ] + }, + "caption": "A long, continuous hill with a dense covering of dark green trees and vegetation, extending horizontally across the image. The hill has a slightly uneven, jagged top edge and is characterized by a mix of tall and short trees, creating a textured appearance." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000377882.jpg", + "annotation": { + "bbox": [ + 154.88, + 101.28, + 38.4, + 373.44 + ], + "segmentation": [ + [ + 154.88, + 101.28, + 193.28, + 101.28, + 193.28, + 474.72, + 154.88, + 474.72 + ] + ] + }, + "caption": "A vertical, cylindrical pole with a textured surface, possibly metallic, exhibiting a pattern of diagonal ridges that create a diamond-like appearance. The pole has a consistent diameter throughout its visible length and is topped with a flat, circular cap." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000415748.jpg", + "annotation": { + "bbox": [ + 35.78, + 280.32, + 273.92, + 330.24 + ], + "segmentation": [ + [ + 35.78, + 280.32, + 309.7, + 280.32, + 309.7, + 610.56, + 35.78, + 610.56 + ] + ] + }, + "caption": "An adult elephant with a predominantly dark gray skin tone, adorned with colorful paint markings on its forehead and trunk. The elephant's ears are partially visible, and it has a long, curved trunk with visible wrinkles and a pair of ivory tusks protruding from the upper jaw. The elephant's eyes are small with long lashes, and it has a robust body with thick, pillar-like legs. The feet are broad with visible toenails." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000408120.jpg", + "annotation": { + "bbox": [ + 252.16, + 241.25, + 112.64, + 65.33 + ], + "segmentation": [ + [ + 252.16, + 241.25, + 364.8, + 241.25, + 364.8, + 306.59, + 252.16, + 306.59 + ] + ] + }, + "caption": "A rectangular concrete curb cut with a rough, textured surface and slightly uneven edges." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000184400.jpg", + "annotation": { + "bbox": [ + 385.28, + 401.76, + 60.16, + 76.8 + ], + "segmentation": [ + [ + 385.28, + 401.76, + 445.44, + 401.76, + 445.44, + 478.56, + 385.28, + 478.56 + ] + ] + }, + "caption": "A cylindrical, vertical pole with a flared base transitioning into a narrower shaft, topped by a flat, horizontal element with a series of evenly spaced, small, rectangular protrusions along its upper edge." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000276018.jpg", + "annotation": { + "bbox": [ + 29.54, + 241.92, + 177.63, + 296.96 + ], + "segmentation": [ + [ + 29.54, + 241.92, + 207.17, + 241.92, + 207.17, + 538.88, + 29.54, + 538.88 + ] + ] + }, + "caption": "A young boy with short, wavy brown hair, wearing a black jacket with a hood, is holding a plush toy resembling a brown and white dog. He has a slight smile on his face and is looking slightly to his left." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000376322.jpg", + "annotation": { + "bbox": [ + 319.3, + 161.28, + 115.2, + 236.8 + ], + "segmentation": [ + [ + 319.3, + 161.28, + 434.5, + 161.28, + 434.5, + 398.08, + 319.3, + 398.08 + ] + ] + }, + "caption": "An elderly man with short gray hair and glasses, wearing a light green button-up shirt with a name tag on the left side of his chest. He is holding a clear glass in his right hand and appears to be looking slightly to his left." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000125472.jpg", + "annotation": { + "bbox": [ + 85.83, + 1.28, + 315.55, + 483.84 + ], + "segmentation": [ + [ + 85.83, + 1.28, + 401.38, + 1.28, + 401.38, + 485.12, + 85.83, + 485.12 + ] + ] + }, + "caption": "The sky is a pale, almost white, light gray with a smooth gradient, transitioning from a slightly darker shade at the top to a lighter shade towards the bottom." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000361551.jpg", + "annotation": { + "bbox": [ + 459.36, + 394.24, + 19.2, + 34.56 + ], + "segmentation": [ + [ + 459.36, + 394.24, + 478.56, + 394.24, + 478.56, + 428.8, + 459.36, + 428.8 + ] + ] + }, + "caption": "A person with dark hair tied back, wearing a black top with a scoop neckline and a visible strap over the shoulder." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000412240.jpg", + "annotation": { + "bbox": [ + 1.0, + 163.88, + 359.0, + 131.25 + ], + "segmentation": [ + [ + 1.0, + 163.88, + 360.0, + 163.88, + 360.0, + 295.12, + 1.0, + 295.12 + ] + ] + }, + "caption": "Two black leather shoes with a low heel and a rounded toe. The shoes have a smooth, polished finish and are positioned side by side." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000130566.jpg", + "annotation": { + "bbox": [ + 403.2, + 201.12, + 33.28, + 33.73 + ], + "segmentation": [ + [ + 403.2, + 201.12, + 436.48, + 201.12, + 436.48, + 234.85, + 403.2, + 234.85 + ] + ] + }, + "caption": "The passenger car is a modern rail vehicle with a predominantly maroon and cream color scheme. It features a series of windows with black frames, some of which are partially open, and others are closed. The side of the car displays a pattern of vertical and horizontal lines, creating a grid-like texture. The lower portion of the car has a yellow stripe running horizontally, and there are visible steps leading up to the entrance. The roof of the car is equipped with a pantograph for electrical power collection." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000421923.jpg", + "annotation": { + "bbox": [ + 339.1, + 295.68, + 79.24, + 56.32 + ], + "segmentation": [ + [ + 339.1, + 295.68, + 418.33, + 295.68, + 418.33, + 352.0, + 339.1, + 352.0 + ] + ] + }, + "caption": "A white daisy with a dense cluster of elongated petals radiating from a central yellow disc, accompanied by green leaves with jagged edges." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000513567.jpg", + "annotation": { + "bbox": [ + 592.64, + 121.44, + 46.08, + 188.16 + ], + "segmentation": [ + [ + 592.64, + 121.44, + 638.72, + 121.44, + 638.72, + 309.6, + 592.64, + 309.6 + ] + ] + }, + "caption": "A person with dark hair wearing a brown top and blue jeans, walking with their left arm slightly bent and their right arm hanging down." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000543300.jpg", + "annotation": { + "bbox": [ + 264.96, + 331.68, + 158.72, + 16.32 + ], + "segmentation": [ + [ + 264.96, + 331.68, + 423.68, + 331.68, + 423.68, + 348.0, + 264.96, + 348.0 + ] + ] + }, + "caption": "A white boat with the name \"Port River Dolphin Cruises\" written in a stylized font on its side." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000241668.jpg", + "annotation": { + "bbox": [ + 224.02, + 87.04, + 147.66, + 544.0 + ], + "segmentation": [ + [ + 224.02, + 87.04, + 371.68, + 87.04, + 371.68, + 631.04, + 224.02, + 631.04 + ] + ] + }, + "caption": "A smiling woman with long, straight red hair, wearing a black blazer over a white shirt and a dark tie. She is holding a slice of cake with both hands." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000535578.jpg", + "annotation": { + "bbox": [ + 80.7, + 20.48, + 219.48, + 93.44 + ], + "segmentation": [ + [ + 80.7, + 20.48, + 300.18, + 20.48, + 300.18, + 113.92, + 80.7, + 113.92 + ] + ] + }, + "caption": "The hill is gently sloping with a mix of green grass and patches of exposed soil. It features a winding stone path that ascends from the bottom left to the top right. The hill is dotted with small bushes and scattered rocks, adding texture to the landscape. The terrain is uneven, with slight undulations and a few small mounds." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000277051.jpg", + "annotation": { + "bbox": [ + 245.76, + 158.84, + 200.96, + 177.21 + ], + "segmentation": [ + [ + 245.76, + 158.84, + 446.72, + 158.84, + 446.72, + 336.05, + 245.76, + 336.05 + ] + ] + }, + "caption": "A small bird with a brownish-grey plumage, displaying a subtle gradient from lighter shades on the chest to darker tones on the back and wings. The bird has a distinct white eyebrow stripe above its eye, a short, conical beak, and a visible eye with a dark pupil. Its legs are thin and pinkish, with delicate toes adapted for perching." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000018519.jpg", + "annotation": { + "bbox": [ + 143.69, + 335.36, + 31.93, + 29.44 + ], + "segmentation": [ + [ + 143.69, + 335.36, + 175.62, + 335.36, + 175.62, + 364.8, + 143.69, + 364.8 + ] + ] + }, + "caption": "A black, textured strap with a series of evenly spaced, circular indentations along its length, and a metallic buckle with a prong at one end." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000106048.jpg", + "annotation": { + "bbox": [ + 142.08, + 61.63, + 382.72, + 262.36 + ], + "segmentation": [ + [ + 142.08, + 61.63, + 524.8, + 61.63, + 524.8, + 324.0, + 142.08, + 324.0 + ] + ] + }, + "caption": "A white coach bus with a curved front windshield and a large, tinted side window. The bus features a purple and blue wave-like design on the side, with the text \"Divine Transportation\" written in elegant script. The vehicle number \"7006\" is displayed at the top front corner. The bus has a black front bumper, side mirrors, and a visible door towards the front. The wheels are not visible in the image." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000058393.jpg", + "annotation": { + "bbox": [ + 346.88, + 166.7, + 172.8, + 72.9 + ], + "segmentation": [ + [ + 346.88, + 166.7, + 519.68, + 166.7, + 519.68, + 239.6, + 346.88, + 239.6 + ] + ] + }, + "caption": "Two individuals with short hair, one with a side-swept fringe and the other with a more pronounced side-swept fringe, both with visible earlobes and a portion of their shoulders and upper backs." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000010764.jpg", + "annotation": { + "bbox": [ + 349.44, + 265.0, + 51.2, + 74.62 + ], + "segmentation": [ + [ + 349.44, + 265.0, + 400.64, + 265.0, + 400.64, + 339.62, + 349.44, + 339.62 + ] + ] + }, + "caption": "The knee pad features a hard, black, rounded protective shell with a glossy finish, designed to cover the knee. It is attached to a black, adjustable strap system with a buckle for securing the pad around the leg. The shell has a contoured design to fit the shape of the knee, with a central ridge running vertically down the middle. The strap system includes a red adjustment buckle for size customization." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000271402.jpg", + "annotation": { + "bbox": [ + 25.44, + 55.04, + 170.45, + 526.08 + ], + "segmentation": [ + [ + 25.44, + 55.04, + 195.89, + 55.04, + 195.89, + 581.12, + 25.44, + 581.12 + ] + ] + }, + "caption": "Two young girls with light skin and blonde hair, wearing white short-sleeved shirts with a circular emblem on the left chest, white pleated skirts, and white sneakers with yellow socks. The girl on the left is holding a black handlebar with a silver shaft, while the girl on the right is holding a similar handlebar." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000273493.jpg", + "annotation": { + "bbox": [ + 294.0, + 108.89, + 131.0, + 125.21 + ], + "segmentation": [ + [ + 294.0, + 108.89, + 425.0, + 108.89, + 425.0, + 234.1, + 294.0, + 234.1 + ] + ] + }, + "caption": "A tennis net with a white tape running along the top edge, supported by a white post on the right side. The net is composed of a grid of black mesh squares." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000360960.jpg", + "annotation": { + "bbox": [ + 223.22, + 473.6, + 89.46, + 74.24 + ], + "segmentation": [ + [ + 223.22, + 473.6, + 312.68, + 473.6, + 312.68, + 547.84, + 223.22, + 547.84 + ] + ] + }, + "caption": "Black trousers with a straight-leg cut, featuring a smooth texture and a regular fit." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000452122.jpg", + "annotation": { + "bbox": [ + 416.0, + 182.76, + 133.12, + 73.44 + ], + "segmentation": [ + [ + 416.0, + 182.76, + 549.12, + 182.76, + 549.12, + 256.2, + 416.0, + 256.2 + ] + ] + }, + "caption": "A commercial jet with a white fuselage featuring the word \"ANA\" in blue letters, a blue and white logo, and a partially visible engine under the wing." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000134722.jpg", + "annotation": { + "bbox": [ + 204.8, + 216.48, + 89.6, + 67.68 + ], + "segmentation": [ + [ + 204.8, + 216.48, + 294.4, + 216.48, + 294.4, + 284.16, + 204.8, + 284.16 + ] + ] + }, + "caption": "The windshield is a large, curved, and slightly tinted glass panel with a smooth surface, seamlessly integrated into the train's front. It features a dark wiper at the bottom and is bordered by a thin, yellow frame that matches the train's exterior color." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000039484.jpg", + "annotation": { + "bbox": [ + 540.16, + 339.55, + 72.96, + 52.44 + ], + "segmentation": [ + [ + 540.16, + 339.55, + 613.12, + 339.55, + 613.12, + 391.99, + 540.16, + 391.99 + ] + ] + }, + "caption": "A group of people are seated at a table, engaged in conversation. The table is covered with a green tablecloth, and the individuals are dressed in casual attire. The scene suggests a relaxed and social atmosphere." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000159311.jpg", + "annotation": { + "bbox": [ + 103.0, + 284.05, + 75.0, + 42.96 + ], + "segmentation": [ + [ + 103.0, + 284.05, + 178.0, + 284.05, + 178.0, + 327.01, + 103.0, + 327.01 + ] + ] + }, + "caption": "A dense cluster of green grass blades with varying lengths, some standing upright while others are slightly bent, interspersed with thin, dry, brownish strands." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000326174.jpg", + "annotation": { + "bbox": [ + 284.16, + 220.32, + 69.12, + 189.12 + ], + "segmentation": [ + [ + 284.16, + 220.32, + 353.28, + 220.32, + 353.28, + 409.44, + 284.16, + 409.44 + ] + ] + }, + "caption": "A person with long hair tied back, wearing a yellow hoodie with a graphic on the back, black shorts, and brown shoes, is holding a blue surfboard with a black traction pad on the tail." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000562207.jpg", + "annotation": { + "bbox": [ + 98.56, + 166.6, + 93.44, + 243.1 + ], + "segmentation": [ + [ + 98.56, + 166.6, + 192.0, + 166.6, + 192.0, + 409.7, + 98.56, + 409.7 + ] + ] + }, + "caption": "A smiling man with short hair, wearing dark gray shorts with white stripes on the sides, stands with his left hand on his hip and his right arm extended outward. He has a watch on his left wrist and is barefoot." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000332318.jpg", + "annotation": { + "bbox": [ + 279.04, + 368.94, + 11.52, + 12.87 + ], + "segmentation": [ + [ + 279.04, + 368.94, + 290.56, + 368.94, + 290.56, + 381.81, + 279.04, + 381.81 + ] + ] + }, + "caption": "A brown cow with a white pattern on its forehead and snout, dark eyes, and a pair of curved horns." + } +] \ No newline at end of file diff --git a/evaluation/Ferret-Bench/model_outputs/gar_8b.json b/evaluation/Ferret-Bench/model_outputs/gar_8b.json new file mode 100644 index 0000000000000000000000000000000000000000..fbc621006c529114242c08776f7f786b2856b01b --- /dev/null +++ b/evaluation/Ferret-Bench/model_outputs/gar_8b.json @@ -0,0 +1,962 @@ +[ + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000069138.jpg", + "annotation": { + "bbox": [ + 230.39, + 52.48, + 56.02, + 32.0 + ], + "segmentation": [ + [ + 230.39, + 52.48, + 286.41, + 52.48, + 286.41, + 84.48, + 230.39, + 84.48 + ] + ] + }, + "caption": "A rectangular signboard with a dark background and light-colored text. The text is arranged in three lines, with the first line reading \"FROM 2,500 SQ FT,\" the second line \"IMMEDIATELY,\" and the third line \"AVAILABLE.\" The text is capitalized and there is a border around the signboard." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000131138.jpg", + "annotation": { + "bbox": [ + 117.12, + 383.52, + 91.52, + 46.56 + ], + "segmentation": [ + [ + 117.12, + 383.52, + 208.64, + 383.52, + 208.64, + 430.08, + 117.12, + 430.08 + ] + ] + }, + "caption": "A white, round saucer with a slightly raised rim, featuring a metallic spoon resting on its surface. The saucer has a smooth, glossy finish with some light brown stains and smudges near the center." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000475150.jpg", + "annotation": { + "bbox": [ + 184.32, + 138.35, + 181.76, + 138.77 + ], + "segmentation": [ + [ + 184.32, + 138.35, + 366.08, + 138.35, + 366.08, + 277.12, + 184.32, + 277.12 + ] + ] + }, + "caption": "A giraffe with a pattern of large, irregular brown patches separated by lighter, beige lines on its skin. The giraffe has two small horn-like structures, called ossicones, atop its head, which are covered with fur and have a rounded appearance. Its eyes are large with long eyelashes, and the giraffe's ears are pointed and tufted with hair. The visible part of the neck shows a continuation of the same patterned skin, and the giraffe's snout is elongated with prominent nostrils and a slightly darker coloration around the mouth area." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000356424.jpg", + "annotation": { + "bbox": [ + 253.44, + 162.56, + 90.72, + 263.68 + ], + "segmentation": [ + [ + 253.44, + 162.56, + 344.16, + 162.56, + 344.16, + 426.24, + 253.44, + 426.24 + ] + ] + }, + "caption": "A person with dark hair tied back, wearing a dark-colored jacket, is seated and looking down at an object in their hands." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000491090.jpg", + "annotation": { + "bbox": [ + 48.76, + 318.72, + 108.51, + 124.16 + ], + "segmentation": [ + [ + 48.76, + 318.72, + 157.26, + 318.72, + 157.26, + 442.88, + 48.76, + 442.88 + ] + ] + }, + "caption": "A black motorcycle wheel with a silver exhaust pipe attached to the left side, featuring a visible brake disc and caliper assembly." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000484415.jpg", + "annotation": { + "bbox": [ + 229.12, + 46.08, + 56.96, + 85.92 + ], + "segmentation": [ + [ + 229.12, + 46.08, + 286.08, + 46.08, + 286.08, + 132.0, + 229.12, + 132.0 + ] + ] + }, + "caption": "A white ceramic toilet bowl with an open black seat cover, featuring a visible water surface within the bowl." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000184324.jpg", + "annotation": { + "bbox": [ + 360.96, + 327.68, + 199.68, + 93.5 + ], + "segmentation": [ + [ + 360.96, + 327.68, + 560.64, + 327.68, + 560.64, + 421.18, + 360.96, + 421.18 + ] + ] + }, + "caption": "The floor consists of alternating black and white rectangular stripes, forming a pedestrian crosswalk pattern. The stripes are evenly spaced and run parallel to each other, creating a clear path for pedestrians." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000341058.jpg", + "annotation": { + "bbox": [ + 222.22, + 536.32, + 5.03, + 7.68 + ], + "segmentation": [ + [ + 222.22, + 536.32, + 227.25, + 536.32, + 227.25, + 544.0, + 222.22, + 544.0 + ] + ] + }, + "caption": "A white, rectangular saltshaker with a textured surface and a black, perforated top featuring multiple small holes for dispensing salt." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000184384.jpg", + "annotation": { + "bbox": [ + 401.92, + 57.6, + 236.8, + 129.12 + ], + "segmentation": [ + [ + 401.92, + 57.6, + 638.72, + 57.6, + 638.72, + 186.72, + 401.92, + 186.72 + ] + ] + }, + "caption": "A plate with a grilled sausage, a portion of scrambled eggs, and a side of potatoes." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000259097.jpg", + "annotation": { + "bbox": [ + 6.0, + 173.16, + 492.0, + 36.96 + ], + "segmentation": [ + [ + 6.0, + 173.16, + 498.0, + 173.16, + 498.0, + 210.12, + 6.0, + 210.12 + ] + ] + }, + "caption": "A long, continuous hill with a gentle slope, covered in dense green trees and vegetation. The hill extends horizontally across the entire image, with a slightly uneven tree line at the top." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000377882.jpg", + "annotation": { + "bbox": [ + 154.88, + 101.28, + 38.4, + 373.44 + ], + "segmentation": [ + [ + 154.88, + 101.28, + 193.28, + 101.28, + 193.28, + 474.72, + 154.88, + 474.72 + ] + ] + }, + "caption": "A vertical, cylindrical pole with a textured surface, featuring a series of diagonal, crisscrossing lines that create a diamond pattern across its entirety." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000415748.jpg", + "annotation": { + "bbox": [ + 35.78, + 280.32, + 273.92, + 330.24 + ], + "segmentation": [ + [ + 35.78, + 280.32, + 309.7, + 280.32, + 309.7, + 610.56, + 35.78, + 610.56 + ] + ] + }, + "caption": "The ground is a flat, light brown surface with a slightly rough texture. It appears to be made of compacted dirt or sand, with some areas showing faint, irregular patterns. The ground is relatively even, with no significant elevation changes visible." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000408120.jpg", + "annotation": { + "bbox": [ + 252.16, + 241.25, + 112.64, + 65.33 + ], + "segmentation": [ + [ + 252.16, + 241.25, + 364.8, + 241.25, + 364.8, + 306.59, + 252.16, + 306.59 + ] + ] + }, + "caption": "The ground is a rough, uneven surface with a mix of small pebbles and fine gravel. The texture is coarse, with a predominantly gray color interspersed with occasional darker and lighter patches. The surface appears slightly compacted, with some areas showing minor indentations and irregularities." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000184400.jpg", + "annotation": { + "bbox": [ + 385.28, + 401.76, + 60.16, + 76.8 + ], + "segmentation": [ + [ + 385.28, + 401.76, + 445.44, + 401.76, + 445.44, + 478.56, + 385.28, + 478.56 + ] + ] + }, + "caption": "A cylindrical pole with a flared base and a decorative capital featuring a series of concentric rings and a flat, overhanging element with a scalloped edge." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000276018.jpg", + "annotation": { + "bbox": [ + 29.54, + 241.92, + 177.63, + 296.96 + ], + "segmentation": [ + [ + 29.54, + 241.92, + 207.17, + 241.92, + 207.17, + 538.88, + 29.54, + 538.88 + ] + ] + }, + "caption": "A young boy with short, curly brown hair, wearing a black jacket, is holding a brown and white stuffed monkey." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000376322.jpg", + "annotation": { + "bbox": [ + 319.3, + 161.28, + 115.2, + 236.8 + ], + "segmentation": [ + [ + 319.3, + 161.28, + 434.5, + 161.28, + 434.5, + 398.08, + 319.3, + 398.08 + ] + ] + }, + "caption": "A man with short gray hair and glasses, wearing a light green button-up shirt with a name tag on the left side of his chest, is holding a glass of white wine in his right hand." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000125472.jpg", + "annotation": { + "bbox": [ + 85.83, + 1.28, + 315.55, + 483.84 + ], + "segmentation": [ + [ + 85.83, + 1.28, + 401.38, + 1.28, + 401.38, + 485.12, + 85.83, + 485.12 + ] + ] + }, + "caption": "The sky is a clear, bright white with no visible clouds, creating a stark contrast against the person in the foreground." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000361551.jpg", + "annotation": { + "bbox": [ + 459.36, + 394.24, + 19.2, + 34.56 + ], + "segmentation": [ + [ + 459.36, + 394.24, + 478.56, + 394.24, + 478.56, + 428.8, + 459.36, + 428.8 + ] + ] + }, + "caption": "A person with short, dark hair, wearing a light-colored top with a dark strap over the shoulder." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000412240.jpg", + "annotation": { + "bbox": [ + 1.0, + 163.88, + 359.0, + 131.25 + ], + "segmentation": [ + [ + 1.0, + 163.88, + 360.0, + 163.88, + 360.0, + 295.12, + 1.0, + 295.12 + ] + ] + }, + "caption": "A pair of black leather dress shoes with a low heel and black laces, positioned side by side." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000130566.jpg", + "annotation": { + "bbox": [ + 403.2, + 201.12, + 33.28, + 33.73 + ], + "segmentation": [ + [ + 403.2, + 201.12, + 436.48, + 201.12, + 436.48, + 234.85, + 403.2, + 234.85 + ] + ] + }, + "caption": "The railcar is a modern passenger model with a predominantly maroon and cream livery. It features a streamlined design with a curved front end, large windows on the sides for passenger viewing, and a set of black doors for entry and exit. The lower portion of the railcar has a yellow stripe running horizontally, and there are visible vents and grilles on the side, likely for engine ventilation. The roofline is smooth and slightly overhangs the body of the railcar." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000421923.jpg", + "annotation": { + "bbox": [ + 339.1, + 295.68, + 79.24, + 56.32 + ], + "segmentation": [ + [ + 339.1, + 295.68, + 418.33, + 295.68, + 418.33, + 352.0, + 339.1, + 352.0 + ] + ] + }, + "caption": "A lush bouquet of white daisies with prominent yellow centers, complemented by green foliage." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000513567.jpg", + "annotation": { + "bbox": [ + 592.64, + 121.44, + 46.08, + 188.16 + ], + "segmentation": [ + [ + 592.64, + 121.44, + 638.72, + 121.44, + 638.72, + 309.6, + 592.64, + 309.6 + ] + ] + }, + "caption": "A person with dark hair tied back, wearing a brown top and blue jeans, is walking." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000543300.jpg", + "annotation": { + "bbox": [ + 264.96, + 331.68, + 158.72, + 16.32 + ], + "segmentation": [ + [ + 264.96, + 331.68, + 423.68, + 331.68, + 423.68, + 348.0, + 264.96, + 348.0 + ] + ] + }, + "caption": "A pink banner with the text \"Port River Dolphin Cruises\" in a serif font, with the word \"Port\" in a larger size than the rest of the text." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000241668.jpg", + "annotation": { + "bbox": [ + 224.02, + 87.04, + 147.66, + 544.0 + ], + "segmentation": [ + [ + 224.02, + 87.04, + 371.68, + 87.04, + 371.68, + 631.04, + 224.02, + 631.04 + ] + ] + }, + "caption": "A woman with long, straight red hair, wearing a black suit jacket over a white dress shirt and a dark tie. She is holding a slice of chocolate cake with white frosting on a plate." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000535578.jpg", + "annotation": { + "bbox": [ + 80.7, + 20.48, + 219.48, + 93.44 + ], + "segmentation": [ + [ + 80.7, + 20.48, + 300.18, + 20.48, + 300.18, + 113.92, + 80.7, + 113.92 + ] + ] + }, + "caption": "A gently sloping hill covered in lush green grass, with a winding stone path running horizontally across its midsection. The hill is dotted with small bushes and patches of exposed soil, and a few scattered rocks are visible along the path." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000277051.jpg", + "annotation": { + "bbox": [ + 245.76, + 158.84, + 200.96, + 177.21 + ], + "segmentation": [ + [ + 245.76, + 158.84, + 446.72, + 158.84, + 446.72, + 336.05, + 245.76, + 336.05 + ] + ] + }, + "caption": "A small sparrow with brown and gray feathers, a short beak, and a slightly rounded body. It has a light brown head with darker streaks and a small, dark eye. The wings are brown with darker streaks, and the tail is short and brown. The bird is standing on thin, pinkish legs with small claws." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000018519.jpg", + "annotation": { + "bbox": [ + 143.69, + 335.36, + 31.93, + 29.44 + ], + "segmentation": [ + [ + 143.69, + 335.36, + 175.62, + 335.36, + 175.62, + 364.8, + 143.69, + 364.8 + ] + ] + }, + "caption": "A black, rectangular pad with rounded corners, featuring a central white logo consisting of a stylized letter 'S' within a square outline." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000106048.jpg", + "annotation": { + "bbox": [ + 142.08, + 61.63, + 382.72, + 262.36 + ], + "segmentation": [ + [ + 142.08, + 61.63, + 524.8, + 61.63, + 524.8, + 324.0, + 142.08, + 324.0 + ] + ] + }, + "caption": "A white coach bus with a curved front windshield and a large side window. The bus features a purple and blue wave-like design on its side, with the text \"Divine Transportation\" written in elegant script above the design. The vehicle number \"7006\" is displayed on the top front corner. The bus has a black front bumper, side mirrors, and a visible door towards the front." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000058393.jpg", + "annotation": { + "bbox": [ + 346.88, + 166.7, + 172.8, + 72.9 + ], + "segmentation": [ + [ + 346.88, + 166.7, + 519.68, + 166.7, + 519.68, + 239.6, + 346.88, + 239.6 + ] + ] + }, + "caption": "A person with short, curly hair is attached to another person with long, wavy hair." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000010764.jpg", + "annotation": { + "bbox": [ + 349.44, + 265.0, + 51.2, + 74.62 + ], + "segmentation": [ + [ + 349.44, + 265.0, + 400.64, + 265.0, + 400.64, + 339.62, + 349.44, + 339.62 + ] + ] + }, + "caption": "The knee pad features a hard, black plastic shell with a glossy finish, designed to cover and protect the knee. It has a contoured shape to fit the knee, with a series of ridges and indentations that provide structural reinforcement. The edges of the shell appear to be smooth and rounded for comfort. There are two adjustable straps with fastening mechanisms, likely Velcro, for securing the pad around the leg. The straps are black and blend with the overall color scheme of the knee pad." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000271402.jpg", + "annotation": { + "bbox": [ + 25.44, + 55.04, + 170.45, + 526.08 + ], + "segmentation": [ + [ + 25.44, + 55.04, + 195.89, + 55.04, + 195.89, + 581.12, + 25.44, + 581.12 + ] + ] + }, + "caption": "Two young girls with light skin and blonde hair tied back, wearing white short-sleeved shirts with a circular emblem on the left side of the chest, white pleated skirts, and white sneakers with yellow socks. The girl on the left is holding a silver scooter with black grips and an orange handle." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000273493.jpg", + "annotation": { + "bbox": [ + 294.0, + 108.89, + 131.0, + 125.21 + ], + "segmentation": [ + [ + 294.0, + 108.89, + 425.0, + 108.89, + 425.0, + 234.1, + 294.0, + 234.1 + ] + ] + }, + "caption": "A tennis net with a white tape running along the top edge, spanning horizontally across the image. The net is supported by a white post on the left side and another white post on the right side. The netting is made of black mesh material." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000360960.jpg", + "annotation": { + "bbox": [ + 223.22, + 473.6, + 89.46, + 74.24 + ], + "segmentation": [ + [ + 223.22, + 473.6, + 312.68, + 473.6, + 312.68, + 547.84, + 223.22, + 547.84 + ] + ] + }, + "caption": "Black trousers with a straight-leg cut, featuring a smooth texture and a visible hem at the bottom." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000452122.jpg", + "annotation": { + "bbox": [ + 416.0, + 182.76, + 133.12, + 73.44 + ], + "segmentation": [ + [ + 416.0, + 182.76, + 549.12, + 182.76, + 549.12, + 256.2, + 416.0, + 256.2 + ] + ] + }, + "caption": "A commercial jet with a white fuselage featuring the word \"ANA\" in blue letters, a dark blue and white logo to the right of the text, and a partially visible landing gear with a single wheel." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000134722.jpg", + "annotation": { + "bbox": [ + 204.8, + 216.48, + 89.6, + 67.68 + ], + "segmentation": [ + [ + 204.8, + 216.48, + 294.4, + 216.48, + 294.4, + 284.16, + 204.8, + 284.16 + ] + ] + }, + "caption": "A curved, dark-tinted windshield with a visible wiper on the lower right side, reflecting a hint of the surrounding environment." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000039484.jpg", + "annotation": { + "bbox": [ + 540.16, + 339.55, + 72.96, + 52.44 + ], + "segmentation": [ + [ + 540.16, + 339.55, + 613.12, + 339.55, + 613.12, + 391.99, + 540.16, + 391.99 + ] + ] + }, + "caption": "A rectangular table with a white tablecloth, surrounded by multiple chairs. The tablecloth has a subtle pattern and is draped over the table, with the edges hanging down the sides. The chairs are arranged around the table, with some facing the table and others turned slightly." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000159311.jpg", + "annotation": { + "bbox": [ + 103.0, + 284.05, + 75.0, + 42.96 + ], + "segmentation": [ + [ + 103.0, + 284.05, + 178.0, + 284.05, + 178.0, + 327.01, + 103.0, + 327.01 + ] + ] + }, + "caption": "A dense cluster of green grass blades with varying lengths, some standing upright while others are bent or leaning, interspersed with small patches of bare soil." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000326174.jpg", + "annotation": { + "bbox": [ + 284.16, + 220.32, + 69.12, + 189.12 + ], + "segmentation": [ + [ + 284.16, + 220.32, + 353.28, + 220.32, + 353.28, + 409.44, + 284.16, + 409.44 + ] + ] + }, + "caption": "A person with long blonde hair tied in a ponytail, wearing a bright yellow jacket with a black design on the back, black pants, and brown shoes, is holding a blue surfboard." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000562207.jpg", + "annotation": { + "bbox": [ + 98.56, + 166.6, + 93.44, + 243.1 + ], + "segmentation": [ + [ + 98.56, + 166.6, + 192.0, + 166.6, + 192.0, + 409.7, + 98.56, + 409.7 + ] + ] + }, + "caption": "A smiling man with short hair, wearing dark gray shorts with white stripes on the sides, and a black wristwatch on his left wrist." + }, + { + "image_path": "evaluation/Ferret-Bench/annotations/coco/val2017000000332318.jpg", + "annotation": { + "bbox": [ + 279.04, + 368.94, + 11.52, + 12.87 + ], + "segmentation": [ + [ + 279.04, + 368.94, + 290.56, + 368.94, + 290.56, + 381.81, + 279.04, + 381.81 + ] + ] + }, + "caption": "A brown cow with a white pattern on its forehead and snout, white lower legs, and a visible udder." + } +] \ No newline at end of file diff --git a/evaluation/Ferret-Bench/summarize_gpt_review.py b/evaluation/Ferret-Bench/summarize_gpt_review.py new file mode 100644 index 0000000000000000000000000000000000000000..3531b3b628d895d716c183be0051741f752656f2 --- /dev/null +++ b/evaluation/Ferret-Bench/summarize_gpt_review.py @@ -0,0 +1,74 @@ +import argparse +import json +import os +from collections import defaultdict + +import numpy as np + + +def parse_args(): + parser = argparse.ArgumentParser(description="ChatGPT-based QA evaluation.") + parser.add_argument("-d", "--dir", default=None) + parser.add_argument("-f", "--files", nargs="*", default=None) + parser.add_argument("-i", "--ignore", nargs="*", default=None) + parser.add_argument("-s", "--save", action="store_true") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + if args.ignore is not None: + args.ignore = [int(x) for x in args.ignore] + + if args.files is not None and len(args.files) > 0: + review_files = args.files + else: + review_files = [ + x + for x in os.listdir(args.dir) + if x.endswith(".jsonl") + and ( + x.startswith("gpt4_text") + or x.startswith("reviews_") + or x.startswith("review_") + ) + ] + + metrics = [] + for review_file in sorted(review_files): + config = ( + os.path.basename(review_file) + .replace("gpt4_text_", "") + .replace(".jsonl", "") + ) + scores = defaultdict(list) + print(config) + with open( + os.path.join(args.dir, review_file) if args.dir is not None else review_file + ) as f: + for review_str in f: + review = json.loads(review_str) + if args.ignore is not None and review["question_id"] in args.ignore: + continue + if "category" in review: + scores[review["category"]].append(review["tuple"]) + scores["all"].append(review["tuple"]) + else: + if "tuple" in review: + scores["all"].append(review["tuple"]) + else: + scores["all"].append(review["score"]) + summ_dict = defaultdict(list) + for k, v in sorted(scores.items()): + stats = np.asarray(v).mean(0).tolist() + stats = [round(x, 3) for x in stats] + # print(k, stats, round(stats[1]/stats[0]*100, 1)) + print(k, round(stats[1] / stats[0] * 100, 1)) + summ_dict[k] = round(stats[1] / stats[0] * 100, 1) + print("=================================") + metrics.append(summ_dict) + + if args.save: + with open(os.path.join(args.dir, "metric.json"), "w") as f: + json.dump(metrics, f, indent=2) diff --git a/evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Detailed.json b/evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Detailed.json new file mode 100644 index 0000000000000000000000000000000000000000..60e3712b6e127d84ecfa164354db99608854f26a --- /dev/null +++ b/evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Detailed.json @@ -0,0 +1,2356 @@ +[ + { + "image": "images/caption_detailed_0.png", + "subject_name": "person", + "object_name": "skateboard", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hT\\63W=1N3M2O1O2N1O1O2O0O1O001O00O10O10O001000O011N1O1O10000O1O1000000\\MCZH>e7GTFGk1b0Q81mGOS83kGMU85hGKY86fGJZ88cGJ\\88`GIb88[GIe8:WGGj8:PGIQ99lFHU99dFK]96\\F0d92WF0k90SF0o90PF0P:1nENU:b12O1O1O1M3N2M3L4N2O2N2O1O001N1OZFoMT8P2kGUNS8k1jGYNU8c1aFRNY1>V8^1aFTNX1a0d0@j5b1ZH]NW1d0a0@n5^1ZH^NV1g0>@Q6[1\\H]NT1k0=]OT6[1`I\\O9ZOV6\\1^I]O:WOX6]1\\I_O:UOZ6\\1ZIB:SO[6[1ZIE8QO^6Z1YIH7nN`6[1VIK8jNb6]1SIL8iNd6\\1RIN0ZN_O<`7[1PI2OWN@=a7Z1oH6LUNB=Ic0^O7J4K2N2O0O2O0O10O010TOhBOY=KoBAGNg=a0d0O0101XOAlB`0Q=ElB=Q=FmB=P=FmBbNXO]:8QG`0bNXO_:6oFb0j9\\OVFb0m9]OSFa0R:\\OnE?Y:^OiE?\\C1N100O100O10O0000010O0100O000000000000000000000O10000O100VNiCj0WRSN`0\\9RO`H?TN?[9RO^Hc0XN9\\9SOZHf0\\N3\\9WOVHg0aNOZ9ZOTHh0cNLZ9[OSHj0dNIZ9[OSHm0fNCY9]ORHQ1hN^OX9_ORHS1iNXOX9EoGS1jNUOX9HoGR1lNROW9KnGS1QOeNX95jGV1Y9hNhFX1X9gNiFX1X9hNhFX1X9gNhFZ1X9eNhF\\1X9dNfF]1[9cNcF_1]9aNbF`1]9aN`Fb1a9]N_Fc1a9]N_Fc1a9]N_Fc1a9]N^Fd1b9\\N^Fd1b9\\N]Fd1d9\\N[Fe1e9[NZFf1f9ZNYFg1g9YNXFh1h9XNXFh1S3XNj10RKi1R3\\Ni1KUKh1Q3`Ni1HVKh1Q3aNg1HXKf1o2fNh1DZKe1i2mNl1^O\\Kc1g2SOk1ZO_Kb1e2VOk1XO`Ka1f2XOi1WOaK`1f2[Oh1UO_K`1l2[Oe1UO]K_1Q3\\Oc1TO[K`1R3^Oa1TO\\K]1T3_O`1UO[K[1V3_O`1XOWKY1Z3_O_1ZOUKU1]3B^1ZOTKS1_3C]1\\ORKQ1a3C]1]OQKo0c3E[1]OQKn0c3G[1_OnJj0g3H[1_OlJi0i3HZ1AlJg0j3IY1BkJe0k3JZ1DhJb0n3JZ1GeJ?Q4JZ1IcJ>R4I[1KaJM2\\2^OgMb0MO^2\\OfMe0LO^2\\OeMg0LM`2ZOdMjN^NP1_1k0c2VO`MfNjNX1S1k0g2RO]MjNkNY1P1k0k2nNZMlNoNZ1m0k0m2kNXMnNQO\\1j0j0m2lNWMnNTO\\1h0i0n2_M`KY1f1SOWO[1d0j0n2`MbKV1f1TOXO[1c0j0l2aMgKR1b1XOZO[1`0j0W3aNoLZO\\OZ1?j0X3_NnL]O[O[1`0g0aNbMa4n0RM_O]OZ1>g0Z3^NkLA]O\\1>d0\\3\\NjLD\\O\\1>d0]3ZNiLF]O]1c3QNdL2]O_1==f3lMaL9[O`1=:kNjM]6<[Ja1=8kNoMX69`Ja1=6lNoMV6:dJ_1:8nNmMT6ZMdNl1i5i1SKeM0Z1`0F[4\\1PKSLYOc1k0W1`0I\\4[1nJjM6P1`0M[4Y1kJoM9j0a00Z4W1jJRN;f0`03Z4U1iJUN=?b09W4S1cJSLKU2i09c0>U4R1bJTLKU2j07c0cMcNi2b5i3TKnK1^MMP3m4e3SKnK2^MNP3l4g3fJTL?XMNo2l4i6SK[F1m2k4h6RK^F1l2l4f6RK_F2l2k4e6QKaF4l2i4c6SKaF4m2h4b6SKcF4l2h4a6RKXFF7a0R3d4_6UKfF7k2c4`6TKfF9l2a4_6TKfFk2^4Z7bKgH]4Y7bKiH]4W7cKiH]4W7_KnH`4R7`KnH`4Q7aKQI]4o6aKXEEh3i4P7aKZEFf3h4Q7aKUI_4k6aKUI_4l6`KUI_4k6`KVI`4k6^KWIa4j6^KWIa4k6\\KWIc4T;O001O001O001O00001OL`Ki@_4]?O1O002N2N1O2N2N2N2N1O2N2N1O2N2N2N2N1K6M5K6J0O10000O2YOo^O^NQa0X20O100O1[On^O]NRa0V220000N20000N3O1N2^Od^O]N`a0b1`^O]Naa0b1a^OSNia0j18N2N2O1O1N2N2N2O1N2N2N2O1O1O1M3O1N2O1N1O2O0000MXOP]Of0Rc0ZOn\\Od0Xc0N2OIAo\\O=Sc0Cl\\O=Uc0Ck\\O;Wc0Eh\\O;Yc0Eg\\O:Zc0Ff\\O8[c0Gf\\O9[c0Ge\\O7^c0Gb\\O8`c0H`\\O6ec001N[KM`E3W?N2OPPi1" + } + ] + }, + { + "image": "images/caption_detailed_4.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "al_6=^>8I7AWO[BS1a=:M1N04MmNfN_DZ1];kNaDU1[;POdDQ1X;ROhDn0S;WOmDi0Q;YOPEf0m:]OSEc0k:_OUEb0h:@ZE>d:FZE:e:HhDoNIM8[1V;KhDmNIN9Z1U;MgDPO4T1l0Ne73WGgNOM9^1`0f0_7YOkGbNO08b16j0f7RORHbN3e1ES1U8gNaHQ3_7nLbHS3\\7nLdHR3[7oLeHQ3Z7PMeH`NYO^4Q8TMeH]N[O_4^7aKQIc1I]N20F_4Z7eKUI_19l2_6iKVI\\1;k2\\6mKXIX1=j2Y6PLZIV1=j2X6PL[IW1=i2V6RL]IU1>i2R6ULVI@1b1g0i2P6WLUI\\1l0\\2n5\\NRJd1m5]NSJc1l5^NTJb1l5]NUJc1j5^NVJb1j5]NWJc1i5]NWJc1h5^NWJc1i5]NWJc1i5\\NWJe1i5[NVJf1j5ZNVJf1j5ZNRJj1n5UNPJn1P6RNnIP2R6PNiIU2W6kMcI[2^6eM\\I`14^L`6]6eIaI[6[6mIaIS6e0[I\\4i0lJm5f0]IW4l0RKg5e0`IS4o0UKe5d0_IR4Q1VKe5d0]Im3V1^K_5c0]Ii3Y1dK[5a0^Ia3`1nKY56]Ic3b1ULU53\\Ia3^:K4L3K6G\\DRMg;m28M3N2N6I6K5Ic0J5M2L5L3N3N2M4M2M3N3M2N2M2O2N1O2N1O1O1O2O1N1O001O00WO]CaN1?c2O0L4N00100N2O1O0]OFaB<^=J\\B7e=KVB7j=>`BSOP=l0QCVOnRCCo<=PCCR==kBCX=>cBCb=k03M2N1N2O1N2NTCXOd;g0\\D\\Ob;d0]D^Oa;i0XDXOh;m0RDTOn;S1hCPOX6K4L3N2M3N2N1O2N100O2N100O100O1000O01000O1O010O1O00001O1N2O0O1O2M3C=DeA4Y>LjA4T>LmA7o=ISB5m=KTB3m=MUB0l=1a000O2Oln00SQO2O0N4L4L3N1M4F9M3O1O2O00O0100000O100O101N1O2N2M2O3L3M6GTl]6" + } + ] + }, + { + "image": "images/caption_detailed_8.png", + "subject_name": "surfboard", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 408, + 640 + ], + "counts": "Ved18VO11O000000001O1N2N2N2N2N2N2N2N2O12N1O2NjN]E9b:EgE6W:IkE9R:DSF;m9DUF;_OQO[:>[Fc0e9[O_Fa0o:N2M3M1O4M:F000O1O0O2O010O1O1O1OhV^3" + } + ] + }, + { + "image": "images/caption_detailed_9.png", + "subject_name": "person", + "object_name": "grass", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "d45k>0N200O1O2N2O1N1O1O1O010O2O3L4M000O100O010OO2N1NI9O10O1010O0100O1O2M2O2N2FfAK^>OfANg>OjSi8" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i8U6k80O10001O0000000001O1O2N2M6K6K1N2M2O3M2O0O1O0O2O001O1O1O001O10O00O10000000000000000000001O0000001O000000001O000000000000001O000000000000000000O1000000O2O00O02N1O1O001O1O1O1O100O1O1O1N2O1O1N2O1O1N2N2N2010N2O001O001O00001O00001O001O001O1O000000001O0000000000001O00001O00001O0000001O000000001O00001O1O3M1O1O2N2N2N001O2N1O100O0O2O1O001O001O0000001O0001O000001O00O010000000000000O100000000O10000O100000000000000001O000000000000O1O10000001O0000000000000000001O00000000001O00001O000000001O0000001O000000001O0000010O000O10001O0000001O00001O000000001O0000001O00001O0000001O00001O000000001O000000001O00000000001O00000000001O0000001O000000001O00001O000000001O00000000000000001O0000010N2O3M2N4L2N3M3M2N1O2N1O1O001O1O1O1O0000001O0000000000000000001O000000000000O10000000000O10000O10000O10000O100O100O1O100O100O1O100O100O1O1N2O1N2N2N2O1M3O1M3N2M3L4M3K5K5M3G9L4L4N2000000000000001OO10000O1O100O1O100O1O1O100O100O100O1O100O1O10000O100O100O100O100O100O100O100O100O1O10000O100O100O100O1O10000O100O1O100O1O10000O100O1O10000O10000O10000001O00001O2N001O001O001O001O0000001O000000000000000000O10000O1O100O100O1O1N2O1O2K3K6H8I7O1O100000001O00O1000000000000000O1FfHmH[7Q7iHlHX7S7;O10000O1O100L4M3ZOQHkI31R8S6a00000O100001OO10000000000O100001OO10000000000000000000000000000000000001O0000000000000000000000000000001O0000000000mI" + } + ] + }, + { + "image": "images/caption_detailed_10.png", + "subject_name": "umbrella", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "b>;j1JWN3[a05f^OE106OO6Jm0Xa0d1M2O1O0O2O00001O0O2O1O00001N2O00001N101O001N101N10001O000O2O001O1N101O00001N101N101O0O3N001O0O2O001O1O0O2O1N101O001N2N101O1O0O2O001O000O2O0O2O000O2O000O100O2O00000O101N10000O100O100O10000O01000O100O010O1000O1000000000O0101O00O10O10000O100000000O100000000O1000000O3N7I7dK[@o3U`0F9G5KcNmL]Bc2b=fMZBV2e=UN[BXOaNS2T?QOXBWOeNS1S?0WBbNiN]1P?[1i@^Mg0X1_>f1XAZNh>i1UAWNj>l1RAkLIY1U?`3000000000001O0O1VHUK?k4AUK?k4[71O0000000000000O11O0000O0100O1O100O1O100O10O010000O1O1O1O10000O1O010O1O100O10000O100O00101OO0100O100000O010000O10O1001O1N100O2O0000001N100000001N10001N1O100O101O0O2O0O100O1O2N10000O100O1O1O2O0O1O100O2N1O1O1O100L5O0O100O101M2N2O1O1O1O1N2O1O1Oo_O]Mh>a2ZA_Me>^2`1O100O1N2N2O1O1O1O1N2O1O1O1N1O2O1O1O2Nf0YO7J5L3L7H3M3N1O1NUiV2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "cj04413L20gb0j7Q@gJa3Z5[LiJd3X5]LgJd3W5^LhJb3X5_LgJ`3Z5`LfJ`3Z5aLeJ_3Z5bLfJ^3Z5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5dLdJ\\3[5fLdJZ3\\5fLdJZ3[5gLeJY3[5hLdJX3\\5hLdJX3\\5hLdJX3\\5iLcJW3\\5jLdJV3\\5jLdJV3\\5kLcJU3]5kLcJU3\\5mLcJS3]5nLbJR3^5nLbJR3^5oLaJQ3^5QMaJo2_5QMaJo2_5QMaJo2^5RMbJn2^5SMaJm2_5SMaJm2^5UMaJk2_5VM`Jj2`5VM`Ji2a5WM_Ji2`5XM`Jh2`5YM_Jg2a5YM_Jg2`5[M_Je2a5\\M^Jd2b5]M]Jc2c5]M]Jc2c5^M\\Jb2c5_M]Ja2c5`M\\J`2d5aM[J_2e5aM[J_2d5cM[J]2e5cM[J]2e5dMZJ\\2e5fMZJZ2f5fMZJZ2e5hMZJX2f5iMYJW2g5iMYJW2g5jMXJV2g5kMYJU2g5lMXJT2h5lMXJT2h5mMWJS2h5nMXJR2h5oMWJQ2h5QNWJo1i5QNWJn1j5SNUJm1j5TNVJl1j5TNVJl1i5UNWJk1i5VNVJj1i5WNWJi1i5WNWJi1i5XNVJh1j5XNVJh1j5XNVJh1i5YNWJg1i5YNWJg1i5YNWJg1h5[NWJe1i5[NWJe1h5\\NXJd1h5\\NXJd1h5\\NXJe1f5\\NZJd1f5\\NZJc1f5^NZJb1f5^NZJb1e5_N[Ja1e5_N[Ja1e5_N[Ja1d5`N\\J`1d5`N\\Ja1cNcIj6m4cJ`1cNhIe6h4hJl1U5TNlJP2P5PNPKU2i4mMWKS2g4oMYKQ2f4PNZKP2f4oM[KQ2iNUIX5j4oKQ2fN[IY5c4QLR2eN^IY5^4SLU2bN`IZ5Z4TLR3oNVHR4h4oLT3fN]HY4_4QMU3aN`H]4\\4RMl3n2TLRMm3l2TLTMl3h2WLYMo3i1WGlLo4[1Q4`1dL`Nd3U1_LkNe3m0_LSOj3OjL2h3RObLn0P4_NPLb1W;00000O100000000O1000000000000000000000000O11O0000iJ]NQHc1m7cNoG]1`7]NhC=g4V1U7AkH?o6GQI9n6HRI8n6IQI7n6JRI6l6KUI5j6LVI4i6MWI3e61[IO`66`IJZ64L2O1N3N2M2O2M2O1O1N3N2N2M3N1O1N3N1O1N2O1N3N2M3N1N102M2O2M2N2O1O2M2N3M2O1N2N2O2M2N3M2N2N2O1N3M2N1O3M1O3M2N1O2N2O1O1N2N2N2N2O1N2M3OiL^EQ2`:oMcEP2[:PNiEn1V:QNnEn1o9SNUFj1i9WNYFi1e9VN_Fh1_9YNeFe1X9[NlFd1Q9\\NSGb1k8]NZGa1e8_N^G_1`8aNcG^1[8cNhG[1V8eNmGZ1Q8fNSHX1k7hNXHW1g7iN[HV1c7jN`HU1^7kNeHT1Z7kNiHS1V7nNkHR1S7nNPIP1o6QOSIn0l6QOVIn0i6ROZIm0d6TO]Ik0b6UOaIj0]6WOdIh0[6YOgIf0X6YOjIg0T6ZOmIe0R6[OPJe0n5\\OTJb0k5^OWJb0h5^OZJa0d5_O^J`0b5_OaJ`0\\5BfJiLAW3`0hL_OX3b0hL^OW3c0iL\\OW3e0hL\\OW3e0iLZOW3g0iLXOX3h0hLWOX3j0hLUOY3k0gLUOZ3k0dLUO]3l0bLSO`3n0^LQOd3o0[LPOg3Q1VLoNl3R1RLmNP4S1oKlNT4T1iKlNY4U1eKjN]4V1aKjNa4V1^KiNd4W1[KgNi4X1VKfNl4[1RKeNQ5Z1nJdNU5\\1iJdNY5\\1eJdN]5[1bJfN_5Y1aJgNa5X1^JhNc5W1\\JkNd5U1[JkNf5U1XJlNi5T1VGTNg2i0S6R1SGZNg2e0W6^1eIcN\\6^1bIaNa6^1]IcNc6^1\\IaNf6_1XIbNj6^1TIaNn6^1QIbNR7\\1nHdNR7]1lHcNW7\\1gHeNZ7Z1eHfN]7X1cHhN_7W1`HiNb7V1]HkNd7T1\\HkNg7U1_FaNi09j8W1YFdNd0\\OWOi0n9X1RFhNf07Y9Y2[FhMh9Y2SFhMo9d31N2N3N1N2N2N3M2N2N1O3M2O1N2O1N2O1N2O1N2N2O1N2N2N2N3M2O0O2N2O1N2N2N2N2N2N2N2O1N2N3M1O2N3M1O2O1N2N3M1O3M2N102M1O2O1M3O1N2O1N3M1O2O1N2N2O1N2O1N2N2N2O0O2O1N2N2N2O1N2N2N3N1Mocg0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "2l>400001OO1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000oJb0`J^O\\5k0_JUO]5S1_JmN]5[1_JeN^5`1`J`N[5i1aJWN]5m1aJSN[5S2cJmMZ5Y2cJgMZ5]2eJcMW5c2gJ]MV5h2hJXMV5k2iJUMT5P3jJPMR5U3mJkLP5Z3nJfLn4_3QKaLm4c3QK]Lk4h3TKXLi4l3VKTLf4R4XKnKg4T4XKlKa4\\4^KdKc31XJ]4U2bKb36SJ[4Z2_Kc38nI^4]2ZKd3=hI]4c2VKe3a5[L_Je3b5ZL^Jf3b5ZL^Je3d5ZL\\Jf3e5YL[Jg3f5XLZJg3h5XLXJh3h5XLXJe3k5[LUJb3o5]LQJ`3R6`LnI]3V6bLjI\\3Y6cLgIX3^6hLbIV3a6iL_IS3e6mL[IP3i6oLWIn2l6RMTIk2o6UMQIf2U7YMkHe2W7[MiHa2[7_MeH^2_7aMaH\\2b7dM^HY2f7fMZHW2i7iMWHS2m7mMSHQ2P8nMPHm1U8SNkGk1X8TNhGi1[8WNeGf1^8ZNbGc1a8]N_G`1d8`N\\G]1g8cNYG[1j8dNVGY1m8gNSGX1o8gNQGW1Q9iNoFU1S9kNmFT1U9kNkFS1W2gLd4V2UIR1X9nNhFP1Z9POfFo0\\9POdFo0^9PObFn0`9RO`Fm0b9RO^Fm0c9SO]Fl0d9TO\\Fk0f9TOZFk0g9UOYFk0g9UOYFj0h9VOXFi0i9WOWFh0k9WOUFh0l9XOTFg0m9YOSFf0n9ZORFf0o9YOQFf0P:ZOPFf0P:ZOPFe0R:ZOnEe0S:[OmEd0T:\\OlEd0T:\\OlEc0V:\\OjEd0V:\\OjEc0W:]OiEc0W:]OiEb0Y:]OgEc0Y:]OgEb0[:]OeEc0[:]OeEb0\\:^OdEa0]:_OcEa0]:_OcEa0^:^ObEa0_:_OaEa0`:^O`Eb0`:^O`Ea0a:_O_Ea0a:_O_Ea0b:^O^Ea0c:_O]Ea0c:_O]Ea0c:_O]Ea0c:_O]E`0d:@\\E`0e:_O[Ea0e:_O[Ea0e:_O[Ea0f:^OZEb0f:^OZEa0g:_OYEa0h:^OXEb0h:^OXEb0h:^OXEb0i:]OWEc0i:]OWEc0i:]OWEb0j:]OWEc0i:]OWEc0i:]OWEc0j:\\OVEd0j:\\OVEd0k:[OUEe0k:\\OTEd0m:[OSEe0m:[OSEf0l:ZOTEf0l:ZOTEf0m:YOSEg0m:YOSEg0n:XOREh0n:XOREh0n:XOREi0m:WOSEi0m:WOSEi0n:VOREj0n:UOSEk0m:UOSEk0m:UOSEk0m:UOSEk0n:TOREl0n:TOREl0n:TOREm0m:SOSEm0n:SOQEn0n:ROREn0n:QOSEo0m:QOSEo0m:QOSEo0n:POREQ1m:oNSEQ1m:oNSER1l:oNSEQ1m:oNSEQ1m:oNSEQ1m:oNSER1l:mNUES1k:mNUES1k:mNUES1k:mNUES1k:mNUET1k:kNUEU1k:kNUEU1k:kNUEV1j:kNUEU1j:lNVET1j:lNVET1j:lNVET1j:lNVEU1i:kNWEU1i:kNWEV1h:jNXEV1g:kNYEU1g:kNYEV1f:jNZEV1f:jNZEV1f:jNZEV1e:kN[EU1e:kN[EV1d:jN\\EV1c:kN]EU1c:kN]EU1c:kN]EU1c:kN]EV1a:kN_EU1a:kN_EV1`:jN`EV1_:kNaEU1_:kNaEU1^:lNbET1^:lNbEU1]:kNcEU1\\:lNdET1[:mNeES1[:mNeET1Y:mNgES1Y:mNgES1Y:mNgES1X:nNhES1V:nNjER1U:oNkEQ1U:oNkER1S:oNmEQ1S:oNmEQ1S:oNmEQ1R:POnEQ1P:POPFP1P:POPFP1o9QOQFP1m9QOSFo0k9SOUFm0k9SOUFn0i9SOWFm0h9TOXFl0g9UOYFk0g9UOYFk0f9VOZFk0d9VO\\Fj0c9WO]Fi0b9XO^Fh0`9ZO`Fg0^9ZObFf0]9[OcFe0\\9\\OdFd0[9]OeFd0X9^OhFb0W9_OiFb0T9@lF`0R9BnF>P9DPGRHCl7>THBl7>THBk7?UHBj7>VHBj7>VHBj7>VHCi7=WHDh7UKAk4?VKAi4?WKAi4?XK@h4`0YK_Of4b0ZK_Oe4a0\\K^Od4b0\\K^Od4b0]K]Oc4c0^K]Oa4c0`K\\O`4d0`K\\O_4e0bKZO^4f0bK[O]4e0dKZO\\4f0eKZOZ4f0fKZOZ4f0gKYOX4h0iKXOV4h0kKWOU4i0kKXOS4i0nKVOR4j0nKVOR4j0nKWOP4j0QLUOn3l0SLSOm3m0SLTOl3l0ULTOi3m0XLROh3n0YLROf3n0ZLROf3n0[LROd3n0\\LROd3n0]LQOb3P1_LPO`3P1aLoN]3S1cLnN\\3R1eLnNY3S1gLnNX3R1iLmNW3S1jLmNU3S1kLmNT3T1lLmNR3T1PMkNm2V1TMkNk2U1VMkNh2U1YMlNe2U1[MkNd2U1^MkN`2U1cMiN\\2V1gMiNY2U1iMmNT2S1mMmNR2S1PNmNl1U1UNlNj1R1YNnNe1R1]NmNa1U1_NlN_1T1cNmNZ1P1kNPOT1m0POROP1m0QOTOm0m0SOUOk0k0VOUOh0i0\\OWO`0j0CUO=i0EXO9h0IYO4g0MZO1b05^OJa08_OF?=C@=a0D]O=e0BZO=h0DRO=Q1ElN;U1FhN;Z1FbN;_1F`N7d1JZN4i1LVN1n11nMOT22eMWMdLg2h54bMO`22]MMe24XMMi27QMJQ36lLKV36gLH]3:_LCf3?WL@j3b0SL_On3b0PL^OP4g0eIaLQ2f2^4S1YKlNk4Z1\\IQL^1e2W5_1gJ`N[5`1cJ`N_5`1`J_Nb5b1[J\\Nh5e1VJZNl5g1SJYNn5g1oI[NQ6f1nIXNU6h1iIVN[6j1dIUN^6k1`IVN`6k1^IVNc6i1\\IUNg6l1WIPNo6P2PIoMR7R2kHoMU7R2jHnMW7R2gHmM\\7S2bHjMb7V2]HhMg7X2WHiMi7X2VHhMk7X2SHhMo7X2PHeMT8[2jGdMX8a42N1O2L4N2N101M3K5N10000O2M3M200O3L3L4M2O2N1N3M3M201O0O2M3N2N1O2M2M4N2M2N3M3M3M2L4N3N1O2N101N10_J" + } + ] + }, + { + "image": "images/caption_detailed_13.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\n_13W=1N2N3M2N2N2N2N2N2N2N2O1M3N2N2N2O1N2N2N2N2O1N2N2N2N2O1N2N2N2O1N2N2N2N2N2O1N2N2N2M3[NbM\\H_2c7cMZH_2d7cMZH_2e7bMXHa2h7_MWHb2h7^MWHd2i7]MUHd2j7]MTHe2l7[MRHg2n7YMPHi2P8WMnGk2R8UMmGl2T8SMjGo2U8RMiGP3W8QMgGP3Y8PMeGR3[8nLbGU3^8lL_GV3a8j00000001OO100000000O10000O100000000000000000000000000000000O10000O100O100O1N2N2N2N2O1N200O1O100O1O1O1O1N2jNlJSJV5k5mJSJT5l5nJXIO:Z5]6UK[IP5e6k0N2O1O1N2ZO\\IcJf6\\5_I^Jc6b5_I[Jb6b5e0K5OaI[JZ5c5hJ]JX5b5hJ_JX5_5hJdJW5Y5gJmJX5R5dJSK\\5k4bJgJRO?\\6i4bJ[K^5d4aJ^K_5b4\\JcKd5\\4ZJgKf5Y4XJhKi5X4VJiKj5W4UJjKk5V4SJkKn5U4oInKP6S4oImKS6R4lIoKT6Q4kIoKV6P4iIQLX6o3fIRL[6n3cITL]6k3cIUL^6j3bIWL^6h3cIWL^6i3aIWL`6i3_IWLb6i3]IXLc6W5100O10000O100O1O10000002N5K;E3M4L4VLlHU1X7eNRIS1R7eNSIZ1o6bNTI]1n6_NYI\\1j6^N[I_1i6[NZIf1g6TN]Il1e6lMaIT2a6dMeI\\2`6XMgIh2^80001O000000O100DTMWFn2d9]MTFe2l9=0000O10O1001N0lNfLYHZ3`7nL^HS3Y7YM]Hn2a7WMZHk2d7XMXHk2f7XMXHi2f7^MSHd2k7_1O2A>ASJ^Io5`6TJ_Il5]6ZJcId5X6bJiI\\5W6fJhIY5W6jJiIT5W6mJiIR5V6PKjIo4V6SKiIl4V6UKkIj4U6UKlIk4S6dJcI7;T5S6cJdI89U5T6aJeI97V5_6jJ`IW5`6jJ_IV5b6jJ\\IW5d6kJYIV5g6g000000000000000000000000000000000cNgIcJLQ1P3nN:P5hLTKa0h0\\2WO9m4jLTKj0?T2^O9o4iLTKl01SO3o2GTM0Q3Q5mLTKl01TO1o2JRM1P3o4oLSKm02XOKk22nL2Q3l4QMSKm03\\29@a4XMRKm03[2;_O`4YMRKo02Y24fLMg2k4[MRKo03W2>^O]4\\MUK2Ma04a2?^O[4^M_Kc0H_2a0^OZ4]M^Kg0G]2a0_Om4UNaJ\\2a0@o4RNaJ^2h0XOW4fMkJN?=Hg2f0XOX4fMeK3^Oo2e0XOj5g0VJYOk5f0UJZOk5f0VJYOk5f0UJZOk5f0UJZOl5e0TJ[Ol5e0TJ[Om5d0SJ\\On5c0RJ]Oo5b0QJ^OP6b0oI^OQ6b0oI^OR6a0nI_OS6`0mI@U6>kIBV6=jICV6=jICW6mGZO@O123O17_8QEBo:N1000000O100O2O1N2NQW_7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "f;Y3g;000000000000000001O0000000000000000O100000000000000O100O1O1000000000000000000001O001O1O001O001O1O3M1O1O000O100000000000O10001O0000000O10000O1000000O1000000M3O1O1N2O1L4J6N2O100J6O1OISE]Lm:k3000000000000O1O100O100N2O10000000000000000001O0000000000VOjKjFV4P:01O00000000001O0000001O00001O0000001O00000000001O000000000^ORERMo:_3000000001O00000000001O000000001O00000000001O0000001O000000001O0000000000001O00000000001O0000000000001O0000001O00000000001O0000001O000000001O000000001O000000000000001O000000001O00000000001O0000001O00000000001N11O000001O000000001O00000000001O0000001O000000001O00001O001O0000001O00001O0000001O0000001O0000001O0000001O00001O000000001O0000001O0000001O0000000000001O0000001O00001O00001O001O00001O00001O000000001O00001O0000001O00001O00001O0000001O00001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O0000001O00001O00001O0000001O00001O00001O000000001O000000001O00001O0000000000001O001O0XNlBd1U=[NkBe1U=ZNlBf1W=000O11O000000000000O1000000000000000000O10000000000000000000000O10000000000000000000000O100000000000000O10000000000000000O10000000000000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000O10000000000000000000000000000000000O1000000000000000000UC" + } + ] + }, + { + "image": "images/caption_detailed_18.png", + "subject_name": "person", + "object_name": "baseball glove", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "YRa01Z=000Uh?4eW@8I3L5L2N3M3N1\\DTOf:n0XEVOc:m0YEWOe:k0XEWO2Jl9Q1oEWO5Hk9S1mEXO7EEOk9U1VFYO9ECOm9U1TFYO4E^9N`F:3I\\9MeF50NZ9NgF3OOY9OjF0IUOJk0b91mFNE7]9LnFME7[9MjF3HnNNR1_9NhF5J1]9KdF9NL^9d20OO2O010O0001O001O00000O2N2N10100O001O0N3O010O000M3001O001000000O1O1O1O1O2N0G^LiFd3R9=N2N2O1N2O010O100O01000O010O10000^GhKQ8X4nGjKQ8V4nGlKQ8T4jGQLV8P4gGSLX8`400010O01O01O000010O000010O0dMiGEW85oGJQ81UHNk70VH1j7NWH1k7MUH4k7KVH5k7JUH7j7JUH6l7JSH6m7KRH5o7LoG4R8LmG4V8JiG6X8JgG6Z8lM`G22U12n0T9SOjFm0V9TOiFl0X9UOfFk0[9b11O1O2N5J4XOYFfMh9X2YFhMh9V2ZFiMg9`0TFc06mNg9>TFd07mNf9=TFe07mNf9>SFd08nNf9oEd0m:\\OTEa0n:_ORE`0P;_OPE`0R;_OnD`0T;@kD>X;BgD;];EbD:_;FbD8`;H`D5b;M]D1e;N^DMe;3^DFf;:\\DZOn;e0<10O100O100O10lC]O_O0k;d0dDC[;=cDE\\;;cDG^;8[D1c;n0N00O110O002M2N2N100O1O100O2N1O0O1O2N3M3L3N3K4L4L5M3KjSi4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V\\o23Z=N^n25[QM3N2N2N2ZOEXD>e;EXD?e;CWDb0f;AVDb0i;a0O1N110O01O1O2N1O010O1O101M2O1O0O1O2N2N3K6K3L4L4N3L4L^fh4" + } + ] + }, + { + "image": "images/caption_detailed_19.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "j8o01UO03m0J\\16iM0N12JX>b1gA]N=;BM00000:0A1N11O50:OB1N10l=P6XBSJN?af4=00000O100001O00001O1O001O001QLi@S3X?lLh@T3X?lLi@S3X?kLi@U3X?jLh@W3X?hLh@X3X?gLi@Z3W?dLk@e3l>XLVAl3h>gKbA\\4P?102M1O100O1O1O1O1O100O001O010O1O001O10O01O1O00100O1O1O10O01O1O1O00001O010O1O1O1O0000010O00001O0000O1O2SNYAZNl>d1XAVNl>h1WASNm>m1TAnMQ?P2QAkMS?U2n@fMW?X2k@dMX?\\2i@_M[?`2h@ZM\\?f2g@SM^?l2k00001N101O1O0O2N101N2N101N1O2O1O1O1O1O1O1O1O2N1O1N2O2O1N1O1O1O1O1O2N1O01O001N3N2N1N3M2N2O1N1O2O1O1N00O20O103K2O1O1O1O003M1O10N11n^OXMl`0n2O1OK5N2100O1O1O101O000O100000002NO1O1001O7I0O2O0O100O2N101OO1000O2O00O1000O010000O10O1000O10O100000O10000O1000000O10001O00001O0O1000001O000O2O001O001O1O0O2O001O001O1O0O2O00001N2O001O1N3N3L1000001N2O0O3N2N2N3M1N3N1N3M3M2O2M4L5J6K5J6J6Hh\\b0" + }, + { + "size": [ + 640, + 425 + ], + "counts": "Yi0;6HWb09f]OH6V3a?m0O00000000O10O1000O100000000O10000000O1000O1000000O100000O100000O1000000O100O10000000000O10000O10O1000O10000O100000000O10O100000O100000001O000O0100000O1000000O100000O100000O100000000O0100000000000O10000000O10000000O10000001O00000000000O10000000000000000O100000001O0010O0001O0101O1N2O1O2N2M2O2N1O3MROfLWAW3h>jL[AT3d>nL]AX3Z>jLgAU3W>lLnAP3P>RMQBl2n=UMXBd2h=^MYB`2f=aM]B]2a=eMdBV2Y=mMkBo1S=RNRCi1mVC^Oj01OO1WHRIS7n6mHRIS7n6g0O003M00_HVI`6i6`IWI`6i6_IXIa6h6_IXIa6h6TIWI_O020[7i6oHWID00020\\7h6_IXIa6h6^IYIa6h6_IXIa6h6^IYIc6f6nHXIN2T7f6\\I[Ic6f6[I\\Ie6d6]IZIc6f6oHWID051Z7g6mHXID0j7h6RIYIb6g6^IYIb6g6]IZId6e6[IZIh6e6XI[Ih6e6VI[Il6e6RI]Io6b6QI]IP7c6oH^IQ7b6oH]IR7c6nH]IS7b6mH]IT7c6lH]IT7c6lH]IU7b6lH]IT7c6k0000001O0000001O1O0000001OeGbIg7^6YHbIh7]6XHbIi7^6c00001O00000000aGcIo7\\6PHeIP8[6a00000000]GdIV8[6iGfIW8Z6iGfIW8Z6jGeIV8[6>000001O01O00000000000000000000000000000000000000000O2O00000000000\\GcIX8]6hGcIX8]6<0000000000000000000000000000001O000001O0000000000001OO101O1O00001O1Oj3_9fLSGX3m8jLQGV3o8mLnFS3R9nLlFS3T9\\M\\FT22iLb9[1SFc1f0QMW9]4n0O2K7ULnDA=f2X;^MRELXO4>5Pjm1" + }, + { + "size": [ + 493, + 640 + ], + "counts": "V;P11T2W;lMiDT2W;lMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;lMkDT2U;kMlDU2T;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2R;kMnDU2R;kMnDU2S;jMmDV2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;lMmDT2S;mMlDS2T;mMlDS2T;mMlDS2T;mMlDS2T;lMmDT2S;mMlDS2T;mMlDS2T;mMkDT2U;kMlDU2T;kMlDU2T;kMlDU2T;lMjDU2V;Q100000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000001O0000001O0000001O0000000000000000001O000000000000001O0000001O0000001O000000001O0000001O0000001O00001O00001O001O001O001O1gL^De2b;ZM`De2`;ZMbDe2_;YMcDf2^;WMeDh2[;WMfDi2[;UMgDj2Y;UMhDk2Y;SMiDl2X;RMiDn2W;RMjDm2l;O1O001O00001O001O0000001O001O001O001O000000001O1O001O00000000001O00001O1O00001O001O00001O000000001O0000001O001O1O1O00000000001O001O001O001O001O00001O001O001O001O001O00000000001O0000001O00001O00001O0001O01O0O101O001O001O001O001O00001O0000001O001O00001O001O000000001O001O002N00001O00000000001O0000001O001O00001O001O0000001O00001O001O1O1O00000000001O00001O00010O0O101O001O000010O0001O001O001O1O00000000001O0000001O001O00001O00001O00001O001O001O001O001O0000001O0000001O00001O001O0000001O001O001O1O001O001N2O00hhP3" + } + ] + }, + { + "image": "images/caption_detailed_21.png", + "subject_name": "sandwich", + "object_name": "sandwich", + "predicate_name": "leaning on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lg_32m>2O1N101N2O1M4M2O1N2O0N3N101N1O2N1SOUOdCm0Z:G8J3O3I5M4N1N2M3M4L3N1O2M3M3N2N1O2M3N2M3O1N2N2N101O0O2O0O2O1N101N101O1N101O1N101N2O001N2N101O001N2O1O0O2O001N2O001N2O1N2O1O001N101O001N101N10001O0O101O1O0O101O000O101O00000O101O0O10001N10000O1000000O10000000O010O10000O01000O100000O010000000000O100000O10O100000000O0100000000000O1000000000O10O10000000000O10000O100O1000000O1000000O1000000000000000000O10000O1000000O10010O0O10000000000O11O00000000O11O00O100001O00O10000O100O2N11O1O0000O2N1000000O20O000001O0000010O0O11O010OO10001O00000O1100O0000O1O10001N1O1001O00O2O0O100O1O10001O000O1O10001N1L4N21O1O00O2L3001O01N100003NO000O10010O000000010O000O11O01O00001O0000001O0O100000000010O01N101O001N101O00001O00000001O01O00001O00010O00000O101O000000001O00001O00001O001N10001O0000001O0000001O0000001O00001O00001O01O0001O00001O001O001O001O1O001O100O2N2N2M3N2M3N3L3N3L3N1O1N2O1O3L2N2O2M4M3M2M3N4K5Ib]f2" + } + ] + }, + { + "image": "images/caption_detailed_22.png", + "subject_name": "cat", + "object_name": "sand", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "g^S67T=3N3M101N3]CBS5b0@W30oKf5ImJ=1a0]OY30PL\\62aIHj0j0XO]32nK[6MfJe0nN_32oKZ6LiJd0jNb32oKh7=VHe32nKi7;UHh32mKk79SHj32mKl77SHl31mKm76QHn32lKo72PHS40lKR8NoGV4OlKT8JoGY4MnK^8R4bGnK^8S4_GoKa8^4001O00001O1O001OOQM^GV1b8hNaGW1^8hNdGX1\\8hNeGW1[8jNdGV1\\8lNaGU1^8nN`GR1`8m1VMXKPMh4n2\\KZJNb2f4S3fKhLZ4W3jKfLV4Z3mKcLS4]3RL]Lo3c3ULXLl3h3TLVLn3j3RLTLP4l3PLRLR4n3nKnKV4S4iKjKZ4V4eKjK\\4V4dK^Kh4b4VK\\Kn4e4jJ[J@h0P6m4`J\\J^Oe0U6P5\\JPKh5P5WJmJm5S5SJkJo5V5PJiJQ6W5oIhJR6X5nIcJW6]5hI]JEKl00h4h5gJ[Jg0Oa4f5fJ\\Jj0N`4f5fJZJf6e5[I\\Jd6b5_I]Ja6c5_I\\Jb6d5c000000000000QJeJkNI\\4b5dK\\J`0<\\OI_4_5eK]JO`KTIb2l6\\MXIb2h6]MYIc2h6[MZId2f6[M[Ie2e6[M[Ie2e6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2e6YM[Ig2e6ZMZIf2f6[MXIf2h6o10000000mIRIH3j5k6a000000gITIR6l6mIUI5Me5n6VJUIS6k6lIVIT6Q700RJkHe5U7UJmH00j5S7]JlHb5Q7bJnH^5R7bJnH^5R7aJoH_5P7aJQI_5o6bJPI^5P7bJPI^5Q7bJnH^5S7aJmH_5S7aJmH_5S7aJmH_5S7=0000O10000000000001O0RKmHc3S7\\LQIa3o6`KPIa01o3o6`KPIa01o3o6_KQIb00o3P7^KQIb0OP4P7^KQIa01P4o6^KRI`0OR4o6_KQIa0MQ4R7_KnH`5R7=000000000000001O001O00000000000000000YJiHY5X7=0000O100000000000000001O00O11O0000000000000000001O00000000001YKjHV3V7iLoHS3Q7lLQIS3o6mLRIR3n6nLTIP3l6oL[Ik2f6TMhI^2X6bMiI]2W6dMlIX2T6hM[Ji1f5WNdJ^1\\5bNeJ]1[5dNeJ[1[5gNdJX1\\5iNdJV1\\5kNcJU1]5kNdJT1\\5mNcJS1]5mNcJS1]5mNcJWOSNT1[7DaJUOYNV1W7D`JUO\\NU1T7G_JSO_NU1R7I]JROcNT1P7J[JSOfNS1P7H[JSOgNU1n6H[JROiNU1m6HZJSOiNU1m6HZJROjNV1m6F]JoNhN[1k6F\\K:e4D\\KcJC]5>bJB^5a0_J_Ob5a0\\J@d5a0[J_Oe5c0YJ]Og5g0UJYOk5k0oIWOR6S1mH]MMa1V7]1\\H]M8W1[7T2_HmMa7o3001O001O000000001O00O100000000O100000aJ`HR5_7nJcHQ5]7oJdHP5]7oJdHP5\\7oJeHQ5[7PKdHP5\\7PKdHP5]7oJcHQ5]7oJcHQ5]7PKbHP5^7SK^Hn4b7>1O3M1O001O0000000000001aLTH\\1m7aNVH^1k7VMSHg02S2l7TMSHh03S2j7UMSHh03S2j7UMTHh02R2k7TMTH:0C2n2j7UMTH;OB4m2i7VMTH;0A3n2i7VMTH;0A3n2j7UMSH;1B3m2j7UMRHm0Qc04N000O100000000000000000000000000000000000000000000000000000000000000000000000O100000O10000000000000000000000000000000000000000000000000000000000000000000000000000O10000001N4M7Ib0\\OjSQ10WlnN3X\\OLSRS6" + } + ] + }, + { + "image": "images/caption_detailed_24.png", + "subject_name": "airplane", + "object_name": "sea", + "predicate_name": "flying over", + "mask_rles": [ + { + "size": [ + 488, + 640 + ], + "counts": "kjg73T?2O00000O1K5VAH]>NfA:MI0OT>1mA8OHO1T>OnA?NBT>OnA?NBT>OnAf0R>8O7E=D6M100000O1O1L4I7J>Cc0]O8L10000O10000000000000000000O100N5K7G6K4N10O1M4Ha0E2N2O001N_nn0" + }, + { + "size": [ + 488, + 640 + ], + "counts": "Y7k0Q1R4^7nKbHR4_7mKbHR4^7nKbHS4]7mKcHS4]7mKcHS4\\7mKfHR4Z7jKeG[OQ1k4Z7fKnH[4Q7cKQI]4o6bKSI]4m6bKTI^4l6bKTI_4k6aKUI_4k6`KVIa4i6_KWIa4i6^KXIb4h6^KXIc4g6\\KZId4f6\\KYIf4f6ZKZIf4f6ZKYIg4g6ZKWIh4h6YKhHGWOU5P8UKfHY5Y7iJcHY5]7hJaHZ5^7gJaHZ5_7fJ`H[5^7eJbH\\5^7dJbH\\5^7dJbH\\5^7dJ_H`5`7`J^Hc5a7^J^Hb5b7bJYH_5g7eJUH[5k7hJPH[5o7o0000001O001O0000000O110O00001O00gIYHd2ObNh7fN`Hc2KfNe7dNgH_OA0OL7h1N9d7bNlH[OC0=f1C0001OO10000O100O2N1O1O1PLc0kEUO^19b8k0dE[Ob1Kg8V2mFkMW9>^ESONj1i0hNW;^1XDhNi;[22N2N002N1O1O1O001O1O001O001O001O00001O001O1OmNaM]E_2Z:mMdE7[Ol0j:UOkEKAm0\\:ASF]OFQ1n9L_FgNL\\1Q9b0PH^Oo7d0oG]OP8d0oG]Oa5gNXJ2iNl1]1[Ob5iNUJ5eNl1a1WOe5kNRJb28cNf5oNnI_2:cNh5POlI_2:aNj5TOjI[2:aNl5VOkIW28cNm5XOkI\\OZOS2k0ZOP6XOeJZ1ZO^OQ6ZOfJV1XO@R6]OeJR1WOBS6^OeJP1VODU6^OcJo0VODX6]OaJQ1ROE]6[O`JR1nNFb6]O[JX1dN_MIl1X7@WJa3i5jLkIW3U6kLhIV3X6mLeIS3Z6SM_Io2a6VMWIm2i6YMlHl2U7c2000000000000000000O100001O0000000000000000000000000000000000O10000001O00O1000000000000000000000000001O0000000000000000000000000O10000O1SOSI^In6[6S1001O1O00001O00001OmIeGY5[8cJkG\\5T8cJnG\\5R8dJnG\\5R8cJoG^5P8cJoG]5Q8cJnG^5R8dJlG\\5T8hJfG[5Y8j0000bJiGeN0n4W8TL\\Hj3d7PLbHQ4]7nKeHQ4[7mKgHT4X7kKiHU4W7jKjHW4V7gKkHZ4T7fKlHZ4T7`KRIa4m6[KXIe4g6[KYIe4g6ZKZIh4d6XK\\Ii4c6WK]Ij4a6WK_Ii4a6WK_Ii4a6WK_Ij4`6VK`Ik4_6TKbIm4]6SKcIm4^6PKcIQ5]6nJdIS5[6mJdIT5\\6lJaIW5_6iJRIf5n6ZJQIh5n6ZJPIf5P7\\JmHe5R7^JlHb5T7_JjHb5V7_JiHa5W7^JiHc5X7\\JgHe5Y7[JeHg5Z7ZJfHf5Z7ZJeHg5[7ZJdHg5[7ZJdHf5\\7aJ\\H`5d7bJZH^5f7cJYH]5g7R101O00aIZH[5g7dJ`HV5`7iJbH=Kh0M9e7bNdH9Oi0J;c7aNPIUOB6=e1A>`7_N_Jo0UNa0\\7]NcJP1RNc0[7XNjJR1lMf0Z7VNnJP1kMi0X7PNWK7]Ni1b;000O1O10000O1O2O0O100N2_ObMUDa2b;g02YMQDQ2Z<_MRDZ2a00001O0O1000000O1N2O100N2]LDXH>[7nMdAT2\\>PN^AR2a>PNVAX2i>nMY@k2b=oLUCe4j<_KfBALT5\\=W1O1O1M3O100O1N2O1O100O1M3O100O1O1O100O100O10000000000000000000000000000001O00aNUIiEk6U:XI`EJXOo6W;a1N2I7N2O1O100O1O1O100O1O100O1O100O10000O1O1O1O100O1O100TOfFQG[9l8iFSGW9j8oFdFD9]9Q9XGmFi8R9S1O100O100oM^EdJb:[5`EcJa:\\5aEbJ`:^5aE_Ja:`5aE\\Jb:c5_E\\Jb:c5bEWJa:i5aESJa:m5`ESIAa0R;[6_ESI_OI2129P;j6\\EXIAE5ON0Q;T7ZE_IE^OX;R7SElIj:T6VEmIi:S6WEmIi:R6XEmIi:S6WEmIi:R6XEmIi:S6XEkIi:T6XElIh:T6YEjIh:V6YEgIi:X6ZEbIj:^6WE`Ij:`6YE]Ig:c6ZE_Ic:a6fEXIX:h6fEXI\\:i6`EZI`:W6aDiIU1J[:\\6eDcIk<\\6=000000000000O10hBgIiYL^A?GoN2k2j>eMjA1IT2`>eMlA3HV2_>bMnA4GW2h`0N3M1O2J6XOZ^OQOia0h0b^ORO_a0j0X1D;E8Hm]T2N^njM=Ji00i7ZN]Ag1c>]NWAe1i>bNo@_1Q?h1O10000000000O10000O10000O100O1O100O100O1O100O100O1O100O1O1O1O1O1O1O1O1N2M3lKeJ`I^5_6dJ^I^5`6eJZI`5e6aJoHk5P7VJfHT6Z7mIYGM`NKh0`6^9iIXGc0gNg5P:gIVGb7i8_HVGb7i8^HVGd7i8]HUGe7k8[HTGf7k8[HUGe7j8\\HVGd7i8]HWGc7i8^HVGb7i8_HVGb7j8^HVGb7i8`HUGa7k8bHRG^7o8cHnF^7R9gHiFY7X9jHbFX7_9lHZFV7f9mHSFW7n9]1001O1O1O1O2TGlEh7V:QHSFk7U;L2N2N2N2`LXDdNi;X1^DdNc;X1bDfN_;W1dDhN];V1dDjN];S1fDlN[;P1hDPOY;m0iDSOX;k0jDTOW;h0lDXOU;c0PE\\OY;ZMeC2?[1j0Y1oYLmAg3Q>\\LnAd3o=_LQBa3n=bLoA_3o=dLPB\\3n=gLQBY3n=jLoAW3n=mLPBT3n=oLPBR3o=PMmAS3l=d1J6J6E;D3M101N2O2M3N1O1N0100O010O01O0001000O010O001M201O100O1O1O1O1N2N3N1N2O101O000O2O0O101O0O2O1O1O1N2N2O1N3N1O2M3N2N1N4M2LLXCWNeoDA]12d94_]A4YAIc>;N2O1O10000O01O0102NO0020aA@[>c000O00100O10O1O001O001O1O001O001O001N2O1M3O1O10OO3M2O00Y`g5" + } + ] + }, + { + "image": "images/caption_detailed_28.png", + "subject_name": "bird", + "object_name": "floor", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "\\WR3=k<5L4K5M3L3N3M3L8H>C3M2N2M3N3MAVEdNd:\\1^EdN`:\\1bEdN]:\\1dEdNZ:]1gEbNY:]1jEaNV:_1kE`NT:a1nE]NR:c1oE\\Nj9JgEk1`0ZNg9NhEi1a0XNf90iEh1b0WNBLj96RFg1c0VN@Oi96SFe1e0VN^OOi98SFc1g0UN]O1f99VFa1g0UN]O1e99XFa1g0SN]O3`9=\\F^1l0VNd8`0`FZ1k0WN]8f0iFS1j0WN[8h0kFQ1j0WNZ8h0mFQ1h0XNZ8k2fGUMZ8k2eGVM[8j2eGVMZ8k2eGVM[8j2dGWM\\8i2cGWM^8i2aGWM`8h2_GZMa8f2\\G]Md8c2TGeMl8[2oFkMP9j2TG^Ll8]3YGbLg8[3]GdLd8Z3^GeLd8X3j0dMPFW1U:]NTFa1Q:YNTFb1FYNi:c1[EaN`:^1cEaN]:]1fEbNZ:\\1hEcNX:\\1kEbNU:[1oEdNR:Z1PFeNP:Z1QFfNP:X1RFgNo9V1SFjNP:o0UFPOm9k0VFUOk9i0VFWOl9e0VF[Om9a0TF_Oo9=RFCR:7QFHi;000QZY4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\7m5^7000000000000000000000000000000000000000000000000000001O00O10000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O1001O00000000000000000000000000000000000000000000000000001O1O4L3M2N2N1O2N1O2N2N1O3M1O2NcJYOYOV1g0jNQO^1R5b0\\Od0A?@`0YOg0TOl0L4O1M3N2N2N2N2N2N2M3N2M3O1N2O1N2O1N2N2N2N2N2O1N2O1O1O21M3N3M2N2N3M3M2N1O3M102MGRJjJl5f5K3M3M2N3M2N3M2N3M3M1O2N2N4L5L7H9G9F9H7I7IgLoLUNh2k1XM[Nc2e1\\MdN[2\\1eMlNS2T1mMUOi1k0XN\\Ob1c0^NDZ1=fNJS16mN3j0MVOc03DMS61O1^DN];2cDN\\;3dDM];2cDM^;2cDN\\;4bDN];1eDM];0_V20giM1O0O11O0O10O11000SlV2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "nnW35a;2N2ZEJd9=XFFf9=UFFj9=nEJQ:7lEKT:l000000000001O001O001O000O1001O0O100O10000O10000O10000000O1000001N10001O0O10O1O100000O0100000000O100000010O0100O010O01O00000000000O100000000000000000000dNeN\\H[1c7gN\\HY1a7kN^HU1b7kN^HU1a7lN_HT1a7lN^HU1a7lN_HT1`7mN`HS1_7nNaHR1]7PO[HDVO]1]8ROZHEWOY1^8UOWHV1i7lNSHV1l7lNoGX1Q8iNlGY1T8iNhGY1X8iNeGX1[8R10000oMoL]KP3b4SM\\Km2c4VM[Kj2e4XMYKh2f4ZMYKf2g4[MWKf2i4]MnI^Oj0U3X5]MkICk0P3Z5_MhICl0o2]5jMbJW2^5iM`JY2`5gM]J\\2d5cMYJ`2g5`MXJa2i5_MUJb2k5^MTJc2n5[MRJe2Q6XMnIi2U6TMjIm2W6SMhIm2Z6QMfIo2]6nLcIR3^6nLbIQ3_6nLaIR3`6nL_IW1DNn6kN^IQ3c6oL^Io2d6PM[IP3f6PMZIo2g6QMYIn2i6QMWIn2k6QMUIn2l6QMTIo2m6QMRIo2n6QMSIn2m6TMPIm2Q7TMeH@1_3Z7i01O00001O0000O10000O1000000O1XOhKQJX4o5hKPJY4o5iKoIX4Q6hKmIZ4S6gKiI\\4X6eKaI`4`6;1O00000000001O2N1O1O000000O1N2O100O1000000O100000000001O2N6J3M2N4L1O1OO100O1O100O1O10000001O1O001O1O1O1O1O2N1OdH" + } + ] + }, + { + "image": "images/caption_detailed_30.png", + "subject_name": "grass", + "object_name": "stop sign", + "predicate_name": "on back of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "\\:c4^:O000000001O1O001O1O001O1O1O2N1O001O00001O000000O10000O100O1O100O1O1O1O100O100O10000001O001O001O00O100000000001O1O1OO1O1000000O1O1000000000000001O001O1O1O001O0000001O0000001O1O0000001O1O001O1O1O1O1O001O1O1O1O001O001O1O1O1O1O1O1O1O1O1O001O1O002N001O1O001O1O1O001O1N2O1O001O1O002N010O1O1O1O001O1O1O1O001O1O1O1O1O001O1O1O1O001O002N1O001O001O1O1O1O001O001O1O001O1O1O001O1O1O1O1O001O1O1O1O1O1O001O1O1O1O1O001O1O1O1O1O001O1O1O1O001O1O1O001O1O001O1O001O1O001O001O00001O00001O00000000000000000000000000000000O1000000000000000000000000O1000000000000000000000000000O2DoAEogX5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "abe1h1V=Z1kBQMh;c3@l0TOf0ZO?A4L2N2N2N2N2N2M201N2N2N2N2N1O2N2N2N2N2N2N1O2N2N2N2N1O2N2N2N2O1N2M2O2N3M1O2N[MPI]Mn6b2VI\\Mj6d2VI]Mh6d2WI_Mf6b2XI`Mg6a2YI`Me6a2[I`Mc6a2\\IaMa6a5OnL`IeM^6[2dIdM[6]2eIQKNS2[6m2gIcMX6^2hIcMV6^2jIcMT6^2lIcMR6]2oIdMo5^2PJcMn5i5N2N2N2N]MXJPLg5P4ZJQLd5P4\\JPLc5R4\\JPLa5i6N2N2N1OYMgJjKW5W4iJjKU5W4kJjKS5W4mJjKQ5S7OUMPKiKn4X4SKgKl4Z4SKhKk4Y4UKiKh4Y7NPMZKhKe4j3[KdI0c2c4Y4]KgKb4Z4^KhK`4]7NlLbKhK]4Y4cKhK[4Y4dKhK[4`7N2N2N2N2N2N1O2N2NfIWL`1g3bNYL^1e3cN\\L]1b3Q5O2O1MbIbL_1]3aNcL_MOa1\\3Q1eL^MOa1[3c3eL^LX3g7N3N1N1OcJoLmNP3c6NkJUMWNj2j1XMUIN6M]4l2Y2TMYI0j4i2V7N3N2N3N1N1O2N1O2N2O0O1O2N1000O1000000001OO1000001OO10000001O0O100000000000000000001N1000000000000000000000001O000O1000000000000000000O1000001O00000000000000001N100000000000000000000O2O0000000000000000000O1000001O00000000000O10000000001O00000O10000000000O100000001O00000000000O11O00O10001O000000000000000O2O000000000000000O101N1O1O1O1O1O1O1O100N2O1O2N1O1N2O100O2N1O2N2N2N2N1N3N2N3N3L2N1O3M2N2N2M3N4L2O1N2M3N3M2N2N2N2M4N0O2N3M3M2N2N2M3N2N2O1N2N3M3L3N3M1O3M2N2N2O3L2M3N2N3M1O2N3M3M2O1N2M4L3N3M1O3M2O1N2N2N3M2M4L3N2O1N2N2O1M4N2L3M3N3M1O2N2K5H:]Oa0H9^Oa0Df2`1000001O1MgN" + } + ] + }, + { + "image": "images/caption_detailed_32.png", + "subject_name": "truck", + "object_name": "road", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Q7V1U<1O1O1O00001O000O0100O1000001O000O101N10001O1O1O1O00000001O1O2N1O1O000O2OOO2O1000000000O100O10000O10000001O1O1O000000000000000001M2O1O1000000O1O1O100000000000O10O100L4O1O2N2WObC4a[V7" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\8m08E\\;;dDE\\;;dDF[;:eDF[;:eDF\\;9dDG\\;9dDH[;8eDH[;8eDH[;8eDH[;8eDHZ;9fDHY;8gDHX;9hDHW;8iDHW;8iDHW;8iDIV;8iDHV;9jDGV;9jDGV;9kDGT;9lDGT;9lDHS;8mDHS;8mDHS;9lDGU;8kDIT;7lDIU;6kDKU;4lDKU;4kDLU;4kDLU;4kDLU;4kDLV;4iDLW;4iDMV;3jDMV;3jDMV;3jDMV;3jDMV;3kDMT;3lDMT;3lDMT;4kDLU;4kDMT;3mDLR;5nDKR;5nDLQ;4oDLQ;4oDLQ;4oDLQ;5oDKP;5PEKP;5PEKQ;4oDMP;3PEMP;3PENo:2QENo:2QENo:2QEOn:1REOn:1REOm:2SENm:3RENm:2SENm:2TEMl:3TENj:3VEMi:4WELi:4WEMh:3XEMh:3XEMg:4YELg:4YELh:4WEMi:2XEMm:NSE3m:LSE4m:LTE3m:LSE4m:LRE5o:JQE6o:JQE7n:JRE5n:KRE5n:KRE6m:JSE6m:JSE6m:JSE7l:IUE7k:HUE8k:HVE7j:IVE7j:IVE7j:IVE8i:HWE8i:HWE8i:IVE7j:IVE8i:HWE8i:HWE8j:GVE9j:GVE:i:FWE:i:GWE8i:HWE9h:GXE9h:GXE9h:HXE8g:HYE8h:GXE9h:GXE9h:GYE9g:FYE@O6k:8VE_O37h:9VE[O7;c::lEES:=mECR:=oEBQ:>oEBR:=oEBR:>nEBR:=nECR:>mEBS:>mECR:>]EXON9e:`0ZEZO07e:a0WE[O34f:c0SE[O72g:R1YEnNg:S1XEmNh:S1XEmNh:S1XEnNg:R1YEnNg:R1YEnNg:R1YEnNf:T1YEmNf:S1ZEmNf:S1ZEnNf:P1[EPOe:P1\\EoNd:Q1\\EoNe:P1\\EoNd:1oDa0=_Oc:P1]EPOc:P1]EQOb:n0`EQO`:S1\\EmNd:T1ZEmNf:S1[EmNd:S1\\EmNd:T1[ElNe:T1[ElNe:T1[EmNc:T1^EkNb:U1^EkNb:U1^EkNb:V1]EkNb:U1_EkN_:V1aEkN^:U1bEkN]:V1cEjN]:V1cEkN\\:U1dElN[:T1eElN[:U1dEkN\\:U1dEkN\\:U1dElN[:K\\Ei09\\OY:V1gEjNU:Z1kEfNU:e0bE[O90U:Z1kEfNU:`0bEF8JU:\\1kEdNU:\\1kEdNU:\\1kEdNU:\\1lEcNT:]1lEdNS:\\1mEdNS:\\1mEeNR:[1nEeNS:Z1mEfNS:Z1mEgNR:Y1nEgNR:Y1nEgNQ:Z1oEgNP:Y1PFgNP:Y1QFgNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFhNk9X1UFhNk9X1UFhNk9X1UFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFjNi9V1WFjNi9V1WFkNh9U1XFkNg9V1YFiNh92QF47Kg9V1YFjNh94oEO9Ng93PFO9Ng92QF08Ng9OTFXOKf0:3g99oED:3g9:UF\\O4;f9S1ZFmNf9S1ZFmNf9S1ZFmNf9S1ZFnNe9R1[FnNk9l0UFTOn9i0RFWOQ:f0nE[OT:c0lE]OV:a0jE_O]::cEG_:6aEJ`:5`EK`:5`EKa:4_EM_:4aEL_:4aEL_:4aEM^:3bEN]:2cEN]:2cEN^:nNYEP193\\:mN]Eo065o9POnE0Kk094m9UOlEMOh087h9XOQFIOh078d9]OTFe08Nc9h0]FXOc9h0]FYOb9g0^FYOb9g0^FYOb9g0^FYOb9g0^FYOb9f0_FZOa9f0_F[O`9e0`F[O`9e0`F[O`9e0`F[Oa9d0_F]O`9c0`F]O`9c0`F]O`9c0`F]O`9c0_F_Oa9`0_F@a9`0_F@a9`0_F@a9@XF27>a9@XF]O285k0b9JZFlNL;8P1a9J]FVO2P1b9JWFZO7l0b9<_FDa9]OXF27a0a9^OVF29a0`9;`FEa9:_FFb99^FHc96\\FKf93ZFNf91ZFOh9OXF1i9NWF2j9MVF3j9MVF3j9MVF3j9MVF4i9LWF4i9KXF5h9KXF6g9JXF8g9HYF8g9HZF6g9JYF6g9YOQFC7U1h9gNRF3O17V1g9fNSFd06g0f9eNTFd06h0e9dNTFe07g0e9F[F:e9VOXF]O3]1e9UO[F\\O0`1d9TO\\F\\O0`1d9TO\\F[O1a1c9VOUF^O8\\1c9D]Fc9A^F?b9A^F?b9YNVFR18f0c9VNXFR15h0j9_NmE=9T1m9TOSFl0m9TOSFl0m9TOSFm0k9TOUFm0j9SOVFn0j9TNnEh09T1n9SNjEd08X1S:]NiEJ4i1S:]NjEI3j1S:]NeEG127j1S:]NeEH018k1f9hNRF\\OO29k1e9hNUF^O5l1e9fNWF\\O5o1c9eNXF\\O5o1d9cNYF]O3P2e9bNXF^O3P2n9YNoEG3P2j9]NRFD3Q2f9nMQF`02C7o1f9iNZFX1d9iN\\FW1d9`NSFC9m1e9_NRFD9m1i9[NnEH9n1g9[NPFG9n1e9]NRFD:o1c9^NTF^O1O8U2d9]NSF^O207U2e9\\NbFe1_9^NRF\\O7V2g9bNYF^1h9bNWF^1i9bNWF_1i9`NWFa1k9\\NUFd1l9[NTFd1m9[NTFe1h9jMPFa08e1g9`NYF`1[9iMgFf0Na1[9iMjFc0Kd1[9jMiFb0Lg1X9gMjFd0Mf1Y9fMhFf0Od1Y9fMiFe0Ne1Y9fMoF>Il1X9fMPG=Hl1Y9gMnF>Ik1Y9gMgFe00d1Y9gMgFe0Of1Y9eMhFe0Og1X9dMiFe0Oh1W9cMjFe0Oh1W9cMjFe0Oi1V9bMkFe0Oi1V9bMkFd00j1U9bMkFd0Om1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9_MmFd0Om1T9_MmFd0No1`9QN`Fo1`9QN`Fo1`9PNaFP2_9PNaFo1a9PN_FQ2`9oM_FR2a9nM_FR2V9[MlFc0OQ2T9^MlFa00P2V9^MjFb0OQ2W9]MjFa00R2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM^FS2b9lM_FT2a9lM_FT2a9lM_FT2a9lM^FV2a9jM_FV2a9iM`FW2a9hM_FX2a9hM_FX2a9hM_FX2a9hM_FW2b9iM^FW2b9iM^FX2a9hM_FX2a9gM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9eM`F[2`9eM_F\\2a9dM_F\\2a9dM_F\\2a9dM_F\\2a9cM`F]2`9cM`F]2a9bM_F^2a9bM_F^2a9bM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9`M_F`2a9`M^Fa2b9_M^Fa2b9_M^Fa2b9_M^Fa2b9_M]Fb2c9^M]Fc2c9\\M]Fc2d9\\M]Fd2c9\\M]Fd2c9\\M\\Fe2d9[M\\Fe2d9[M\\Fe2d9[M\\Fe2e9YM\\Fg2c9ZM]Ff2c9ZM]Ff2c9ZM]Ff2d9YM[Fh2e9XM[Fh2e9XM[Fh2e9XM[Fh2e9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM[Fk2d9TM]Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2e9SM\\Fm2d9SM\\Fm2d9SM[Fn2e9RM[Fo2d9PM\\FP3e9QMZFo2f9PM[FP3e9oL\\FQ3n91000000O101OO010000O100O100O1FiL_FX3`9jL^FW3b9900O100000001N100O1O0100000O10000O2M200O0100000000000O101OO1000001O1O1O000000O1O1N200O100000001N11O3L5L1O01O00O11N1O1K5O10O11O00000O1000000000000001O0000000000O100000000O10000O1000000000000O100000000O100001OO1000000O11O1O00000TOjFcMW9Y2SG_MQ9]2Q1N2N3M2M5L4L00000000O100C=O1000O100000000001O0000000000000000O11O000dF" + } + ] + }, + { + "image": "images/caption_detailed_33.png", + "subject_name": "paper", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 500, + 333 + ], + "counts": "R`?3`?2N3N1O2M2O1N103M1N3N1O1N2O1O0O2O0O0100O2VOZAd0k>00O2OO010000O01000O100O100O2O00000O2N1N2N3HS?0TA0^Ul3" + }, + { + "size": [ + 500, + 333 + ], + "counts": "Q>e0U=\\OmBd2OaM100LS<_3M100O100O100O100O1O100O1O10000O11O00OBYDQMf;P3]DmLd;Q3^DnLb;R3_DmLa;S3`DlL_;T3cDkL[;W3gDgLY;3ZDn2>oLY;V3iDiLX;V3e0L3M4N1N2N2N3M3L3N2N3L3M2L5J6K4O1O1O1000O010O10O10O010O0100O001O001O001O10O10O10N101O001O1G^N`Bb1^=dN^B\\1b=;2M2O1O1O2N100O2N1000010N10000000O1O2O0O00001O1O1O2N6J2ZCoMe;Q2WDVNf;k1WDZNf;f1YD^Nd;e1VD^Nj;f1mCeMMg0W?VA@a>o0F9F:H8I7F:H9F9H8CDH8EE;K6B=K5N3N100010O01PLkI_NV6c0gJ]OZ58PKHQ57oJIS55lJLU53kJMU53kJMV53hJNX52gJOY52fJN[51eJO[51dJ0]50bJ1]5OcJ1^5NbJ2_5N_J3a5M_J3b5L^J4b5L]J5c5K]J5d5K[J5e5KZJ6g5JXJ6h5JWJ7j5HVJ8j5IUJ7l5HTJ8l5oMPHn0T2S1l5oMQHm0R2U1n5mMRHm0o1W1n5lMTHl0m1Y1P6jMTHm0k1Y1Q6kMTHl0i1Z1T6iMSHm0i1Z1T6jMSHk0i1[1U6iMSHk0g1]1V6hMSHk0g1]1V6hMSHl0f1\\1X6hMRHk0e1^1Y6gMRHk0e1^1Z6gMPHk0f1_1Y6fMRHj0d1a1Z6eMRHk0c1`1\\6eMPHk0d1`1\\6eMQHj0b1b1^6cMPHk0b1b1^6dMoGk0a1c1_6bMQHk0_1c1a6aMPHl0_1c1a6bMoGk0`1c1b6aMoGk0^1e1c6`MoGl0]1d1d6`MoGl0]1e1d6^MPHl0[1g1e6^MoGk0\\1g1f6^MnGk0Z1h1h6]MnGk0Z1h1h6]MoGj0Y1i1i6]MmGj0Y1j1R4nLmL>hMk0X1i1e3]MZMOiMk0X1i1X3jMhMBhMj0W1l1k2UNVNUOhMk0V1k1\\2eNfNeNhMk0U1l1m1TOWOTNhMk0T1m1_1BEfMhMk0T1m1S1N1[MgMk0S1m1d0>c0iLgMk0R1n1NT1Y1SLgMk0R1n1F\\1a1lKfMj0R1P2ZOf1o1_KfMj0Q1k7Y1\\GeMj0Q1j7[1[GdMj0R1k7Z1[GdMk0P1k7\\1ZGdMk0P1k7\\1[GdMi0P1l7]1ZGcMk0o0k7^1ZGcMk0n0l7_1ZGcMi0n0m7`1ZGaMi0o0m7`1ZGaMj0m0m7b1ZG`Mi0n0m7b1[G_Mh0o0m7c1]GZMg0S1l7c1oH\\NR7d1nH\\NR7e1nHYNS7g1mHZNR7f1nHZNR7g1mHYNS7g1mHYNS7g1mHYNS7g1mHXNT7h1mHWNS7j1lHVNT7j1lHVNT7j1lHVNT7j1mHUNS7l1lHTNT7l1lHTNT7l1lHTNT7m1kHSNT7n1lHRNd6^2\\IbM]6e2cI[M\\6g2RIYLiNQ1V8e2iHfLkNg0\\8c2dHXN\\7h1aH[N_7e1^H[Nd7f1[HZNg7e1XH]Ng7c1XH_Ng7`1YHbNe7_1ZHdNd7\\1[HeNe7\\1YHfNf7Z1YHhNf7X1ZHhNe7Y1ZHhNf7Y1YHgNf7Z1WHiN`7`1`HaNV7i1iHWNj6V2UIkMa6`2^I`MZ6h2eIYMP6R3PJnLe5^3YJcLP5U4nJlKd4b4\\KXKE`Mf4c7cKlJ[4[8\\KXER4Z;F;I6N3N1O101O000000001O00001O0000001O0000001O0000000O2O0000001O0000001O0000001O0000001O00000000001O1O0O1000001O0000001O0000001O000000001O000O11O01O000O2O000000001O1O0000000000001O000000001O0O11O01O000101N4Lb0^Oc0^OV1\\DoJW:U6RO`0@W1jN;D000001O00001O000000001O000000001O0000001fHnHP6R7mI[Ii5f6RJnI\\OfNn5\\7eJ]JQ5c5nJhJh4^=" + } + ] + }, + { + "image": "images/caption_detailed_35.png", + "subject_name": "person", + "object_name": "giraffe", + "predicate_name": "feeding", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "mgV58^;4L1O100O100O01000O100O1000000O1000001O1O001O1O2N1O1O2O1N2O00N1N3M3M3M4L3MaT1GhI" + }, + { + "size": [ + 375, + 500 + ], + "counts": "bil0=U;:H6J5K3N2O0010O001O100O001O010O00100O001O010O10O0100O010O10O10O100O10O10O10O010O10O10O1000000O10O01000000O10O10O10000O1000O10O101OO10O@aEJ_:5fEF[::hE@FOb:a0a00O10000O10000000001N100O10001N10000O10001O00001N100O2O1O00001O0010O010O0101N003N2N1N2O1N101N1O101O0O2O0000000001OO2O0O101N8H1O1O1N2L3O1O2O000000O1RNjF>2IV9HlF5;Jl8NnF1W:MkE0mRT3" + } + ] + }, + { + "image": "images/caption_detailed_36.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Wme21kc06N000[O6Q]OMkb0d01O14T]OXOYb0[1L3N2N3L5K1O1DSNd^Om1[a0=O0100000001O1kM_^Oh1da0PNb^OP2ha0N1G9F:K6O0O2O1O2N3Mf0SO[\\O1gVP6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "hU18d?LVDZ2i9nMPFR2o9lMTFT2l9jMVFV2i9jMXFV2h9jMXFV2h9iMYFW2g9dM^F\\2c=0TJXN`Ih1[6bNaDQOl4]2^6QO[Io0d6UOYIk0f6XOXIh0h6XOXIh0h6WOYIi0g6WOYIi0Y7_NmHa1Z;QNm@o1o>YNm@g1P?aNk@_1S?eNk@[1n>oNo@Q1n>UOo@k0P?XOn@h0P?[On@f0o>_Oo@a0n>El@>Q?Z2M3N2N2O1M3N2N2N2O1O1O1O100001O1O1O1O1O1O1O1O1O1O1O1O1O001O1O1O00001O001O0000POXKhBh4U=^KYBD7n4_=iK_BW4`=kK_BU4_=nK`BR4_=PL_BQ4`=RL]Bo3c=SLYBo3f=V100M3O1N200O1001gIbBJ5k5S>I2N1O1O00002N;E:F7iLl@T1Y?cNm@[1[?YNi@g1b?lMa@S2a?hMc@W2_?eMc@[2^?`Mg@_2]`00001O00001O00000000000000001O000ZO`^OlN`a0S1a^OmN^:EWL_1ZIlN]:LTLn1l3RNULm1j3WNSLi1m3XNRLh1m3]NoKc1P4`NfETOU6\\2T4eNkK[1U4fNjKZ1k3iMSFj0OROa23k0X2a6lNSGhNQO\\1Y1`7lNTG]O1e0S1R1h7lNTG]O2j0l0m0n7iNXG_O0a136e8iNYG@4c1G4l8hN[G_O4d1E5l8_NVFVOZ1a00e1C5n8YNjGLFf1A5o8UNPHMBi1^O5Q9oMUH2]Oj1\\O5S9nMUH1^Ol1YO5T9mMVH1^Om1XO5T9mMVH1_Om1VO5V9lMVH1^OQ2SO2[9iMUH4]OR2RO1j;mMTER2RO1k;lMSES2RO1k;lMREU2ROOm;kMPEW2SONm;jMoD[2SOKo;iMmD^2SOIP`0\\2n0]:eNiBN=>^2o0X:lNgBM`06d2P1o9DYCZOj2R1k9GZCUOn2S1h9HZCTOP3S1e9J[CQOR3U1c9KZCnNU3W1a9KZCmNV3X1`9K[CkNW3Y1^9L[CjNX3Z1]9L[CiNh1M\\O^1a;L[ChNa1OVNk0d0b0ZB1O00O1J6L4[OSNU_Oo1o?gM]@S3`?TMR@J1S3m?g0001O1O00001O1O2TMk_OTO0`2W`0PNc@e1Va0F5K1O1O0000IZNS^Og1Sb0000002YNh]O[1cb0L3NO2001O2N2M3DM4O010N2O1N101O001O01O00001N100O2O1O1N10100O_OnI_KR6`4PJ_KQ6Q4^JoKb5k3dJUL\\5k3oIhK?;e5k3bJUL^5k3bJUL_5j3aJVL`5i3`JWL`5i3`JVLa5j3_JVLb5h3^JYLb5g3^JYLc5g3\\JXLe5h3[JXLf5n3gIhK4:U6`3dIVL3O2129W6^3UJYLD9W6\\3WJ[LB9W6[3XJ\\LA9W6Z3ZJ\\L_O:X6W3fJiLZ5V3gJiLZ5W3fJiLZ5V3gJjLY5U3hJkLX5T3iJlLW5T3\\JcL]O9W6T3\\JcL]O9W6S3jJmLV5S3jJmLV5S3iJnLW5R3hJoLX5Q3^JeLUO<]6n2^J[Mc5d2\\J]Mc5d2[J^Me5b2ZJ_Mf5`2ZJaMe5`2YJbMg5^2XJcMh5\\2XJeMg5\\2XJeMh5Z2XJhMg5X2XJiMg5X2XJiMh5V2WJlMh5T2WJoMh5P2XJQNg5P2XJQNh5o1WJSNg5m1YJTNg5l1WJVNh5j1XJXNf5h1ZJYNf5g1XJ[Nh5d1XJ^Nf5c1YJ^Ne5f1nIiL1c1o5k3N3L3K6I6K5N2M3L4LdMTKUNh4o1YKPNe4Q2\\KPN`4S2`KmM_4T2aKmM[4V2eKkMY4V2gKRMNoNY4P4jKoLNROW4o3lKlL0VOP4P4QLiLOYOm3o3TLgL1]Of3m3YLeL3AQ3QOPMk4LbL4Hd1lNoN]5XOnK5NT1GdNd43gK53l0`5nN]J67g0^5RO[J7h<01M20100000O1nNEXES;BmD>S;BmD=T;ClD=T;DjD=V;EfD=Z;DeD<[;DeD<[;DeD;];EbD<^;DaD=_;C`D>T;]OhD44`0R;^OkD12a0R;_OmDO2b0o:@oDO1`0Q;AnDO1`0Q;AoDOO`0S;@nD1Ma0U;^OnD0Mb0U;@lDONa0V;3gDMZ;P10XNjD^1c;N1O1O1O000O_DgNT;Y1hDoNT;P1kDTOS;l0lDWO7DZ:V1\\EYO8CZ:l1dEUNP:LoEQ20SNo91nEm12SNn9\\2PFeMQ:h2001O0\\MmET2T:jMPFR2Q:mMPFS2Q:lMQFR2R:jMQFS2Q:jMQFV2`:0O01DUE\\Nk:c1WE\\Ni:c1XE\\Nh:c1ZE]Ne:c1\\E]N_:f1dEWN^:f1a0XOgDGV;9mDFR;;oDCQ;=RE@n:a0UEZOl:g0k0N2O2ROjCg0[<000O2N2N2N1O2N2O1N2OO0O110N2O00000L400100O20N100N2O0010O01O5^OeC0a1O10000001O3M3M1O001O00000000000000000000000000O100^NSMWB1A2=j2^>_MbAa2^>_MbAa2]>aMbA_2l=SMSB?1^2l=SMSB?0_2^>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`MbAa2^>_MaAb2^>_MbAa2^>^McAb2]>^McAb2]>^McAb2^>]MbAc2^>\\McAd2\\>\\MdAe2\\>XMgAh2Y>VMiAj2V>VMkAj2U>UMlAk2R>WMnAi2Q>XMoAh2o=ZMRBe2l=\\MUBd2i=^MWBb2i=^MWBb2i=^MWBb2h=_MYB`2g=`MZB_2f=aM[B^2e=bM[B^2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=bM]B^2c=bM]B^2c=bM\\B_2d=`M]B`2c=_M^Ba2c=]M^Bc2b=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=]M^Bc2b=^M]Bb2c=`M[B`2f=_MZBa2f=`MYB`2g=`MZB_2f=aMZB_2f=aMZB_2f=aM[B^2e=bM[B^2e=cMZB]2f=cMZB]2f=cMnAEFh2\\>dMlAFGf2]>eMjAGGe2_>dMjAHFd2`>dMjAi2V>WMiAj2W>VMfAm2Z>TMdAm2\\>SMdAm2]>RMbAo2^>RM`Ao2`>QM_AP3a>f00000000000001O00000000000000000000001O001O4L2N1O1O1O001O1O1O1O2N1O2N1O1O1O2N2N3M1O1O1O2N1O1O1O1O1O1O1O001O1O1O1O001O1O001O1O001O1O001O1O1O1O1O001O1O1O001O0000001O00001O000000001O000000001O0000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000O100000000O10000000000O1000000O1000000O10000O100O1O100O1O100O1N2O1O100O1O1O100000000001O00001O0000001O0000000000\\OaMj@_2U?cMi@^2V?eMg@\\2Y?eMe@\\2[?c0O10000000000O1000000O10000O100O100O10000O1000000O100O10000O1O10000O10000000000000000000000000000000000000000000000000000000000O100000000O10000O100O100O100O10000O1O100O100O100000000001OP1PO5K3M2N:F8H=C;E3M2N1O1O001O1O001O001O001O0000001O000000000000000000001O0000000000000000000000000000O100000000000000O100000000O10000O1O1L4K5hLmN]BN\\O0R1j1T=ZNUBb3k=^LUBb3k=]LWBb3i=^LWBb3i=^LXBa3h=\\L]Bb3c=]LaB`3b=\\LbBa3j>aLe@NF3NX2]a0SO?A6J4L3M001O1O001O001O001O0000001O000000001O0000000000001O0001OO100001O00000000000000O1000O2O000000000000O100000O2O000000000O0100000N2O1nK1TE1l:3lD1S;1lCHnMd0V>EkCn0UdDA\\;`0dD_OUM" + } + ] + }, + { + "image": "images/caption_detailed_41.png", + "subject_name": "sports ball", + "object_name": "person", + "predicate_name": "touched", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kiR13W=2EOYC2d<3YCNe<<00000O10000001O00O20O0001O1ObCDn;=PDHm;8RDKl;3UDOi;2VDOj;0VD1j;0TD1k;0TD2k;f010000000000O1000VDiN`;W1_DjNa;V1_DjNa;V1_DjNa;U1_DlNa;T1_DlNb;S1^DmNb;S1]DnNc;Q1^DoNc;P1]DoNd;Q1\\DoNe;P1ZDQOf;o0ZDQOf;n0[DROf;m0YDSOi;m0TDUOm;T101O10O1O2N1N2O1N2M5J5M4L5K4L3K4NST\\6" + }, + { + "size": [ + 427, + 640 + ], + "counts": "bQT12Y=1J0oB01Nm<0`U:8QjEd0E:XDPOl:n1G7G9I6J8J5L5J5G:K4L4L5K6G7L5K5L4K3VLeKgKUOe2Y5b1mKjKjN\\11\\OY5]3nKiKjN[13[OW5_3RLUMnNTOR5e3SLTMROiN^O2a5P4RLRMi4l2[KQMf4n2[KoLh4P3YKWLWN`0a6Y3ZKTLWNa0`6Z3bKeL^4[3cKbL_4]3bKcL^4\\3cKdL]4\\3fK`L[4_3lKZLU4e3mKYLT4g3lKYLT4g3mKXLa27RL`3d6eLZIZ3d6kLZIT3d6PM[IP3d6RM[Im2e6VMYIi2i6XMUIh2k6\\MQId2o6]MPIb2Q7_MnHa2R7`MmH`2S7cMjHc0K[NAn0k79dH;>VOo6`0bH7d0VOk6h0[H2n0ROh6R1SHKZ1POc6h2dIRM]6m2gIoL[6P3iIkLY6S3P2O2N1O10000O2O0O10002N5K4K4M4L2N4L2N2N4L2N3N2M5K9H0O0010O00001O2N1O001dEfNe8[1UGkNj8W1RGlNm8U1PGnNo8S1mFPOS9S1eFRO[9o0`FVO_9k0oE_N1n0o9U2O00lNTFjNk9[201VNSMTIn2i6VMUIj2j6YMTIh2k6ZMSIf2l6`MoHa2X3fLkNLRM^1f0Q2Z3kLaNe1RNa1[3kL\\Nl1XNZ1Z3lLSMCNd2_Oo0X3XNQMX1[Ob0a3_NaLW5]3]2O1O1O1O1O1O2N001N2O1O1O1O1O1N2O1O1M4L3O1M4M2M8G6J5UNcD]1o;aNUDi0c3N2N2O1O0O101O00000000000000000000000000000000000000000000000O1000O10000000000000000000000000QJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0[63nJV7R5jHnJV7R5jHnJV7R5jHnJV7R5iHWJO6W7c5jHWJO6W7c5jHUJ19T7b5kHTJ2:S7b5kHSJ3V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LR9J6K4L5K4L3N1O2M2O2M101N2O1N3M2M3M3M2O2N2O1O1N1O2O001O0O1OO21O0O0XORN`Do1_;WNZDl1f;UNXDk1i;f01N2N101N1O2N2O0O2O1N2N2N1O2N100O2N1O1O100O1O1O100O2O0O1O10000O2O000000010O01O00001O0010O00010O01O1O100O1O010O1O010O1O10O01O100O1O1O10000O1O1O100O100O0010000O2N2N2O2N1N2N1O2O2M2N1O100O2O1N10000O100O1O1O100O1O0010O0101N1O3N3L5K3M3M3RHnKi4U4RKlKn4V4oJkKR5W4gJmKY5X4aJiK_5[4[JfKf5d4nI^KS6g4fI[KY6k4^IXKb6l4YIUKg6n4TISKm6S5jHPKV7]60O1000000GUH\\Il7b6YHZIh7d6;O0O2O1iNeGhK\\8T4RHbKn7[4\\H]Ke7d4[HZKf7h4YHWKg7n4SHRKn7T5gGSKW8l5OO1O2N11O1OnLiGUO]8j0XH_Nk7a1bHfLkNR1d8X2PIaMQ7_2UI[Mk6e2[ISMg6m2[IPMf6P3^IjLe6U3c21O0000001O001O010O00001O00001O00001O00010O001O00001O001O000000001O00000O10001O0000000000001O0000000000000O1000000000000000O01000000000O1000000000O01000000O10000O10O010000O100O10O10O1O1O1O100O100O100O1O\\GjL`5U3`JmL_5R3^JSMa5l2\\JXMd5h2[JYMe5g2YJZMh5f2UJ\\Ml5e2PJ^Mo5e2lI]MU6e2hI\\MX6e2eI]M[6f2aIZM`6h2\\IWMg6k2VIPMP7R3lHnLV7T3gHmLY7T3eHkL]7W3`HjL`7X3\\HiLd7Y3ZHgLg7[3VHdLl7]3QHcLQ8`3kG_LW8c3eG]L]8e3`GZLb8i3YGXLh8l3RGTLP9l42O1N2O1O1N3N1O1O1N2O1NmGnJ\\6Q5dIPK]6m4bIWK]6h4cIZK\\6d4dI_K[6`4dIbK]6\\4cIeK]6Y4eIgK\\6W4dIjK\\6U4bInK_6Q4_IQLa6R4YIQLg6Q4SIRLo6P4gHVLZ7m3\\HWLh7]53N2O2N1O2O0O2N1O1M4XM[GPOh8n0^GbNo8[1VGWNU9g1PGRNT9l1PGlMW9Q2mFgMZ9V2jFbM]9Z2iF^M]9_2d1M2M4L3N3M3M3L3M3N3N2M2M4M3L4PO_B9g=F_BOh=0d00001O00001O001O1O0O3NTef1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "V6e17W5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6k1000000000000000000000000000gHiIi5W6WJiIi5W6WJjIh5V6XJjIh5V6XJjIh5V6XJjIh5W6WJhIj5X6UJiIk5W6VJhIj5W6`100001O5K0kKcIG^6RMaIo12o0]67cII]67cII^67`IJb6Y4000000001gH^IR6b6mI`Ii1N[1b6lL_Ii11Z1`6mL_Ii11Z1_6oL^Ih12Z1`6nL^Ih11[1a6U400000000000000O_H_Ie6`6\\IbIb6]6_IdIj1Mm1_6YLcIa6]6_IcIa6]6_IcIa6]6_IbIb6]6Q1000UKcIlLOd3^62dIN\\6dLbIb23j0Z6eLcIa23j0Z62eIO[60fI0Z60fI0Z6dLcI`23l0Y6eLdI_23l0Z60eI1[60dI0\\6OeI1Z6kKfIo306Z6jKhIo3N7Y6kKiIn3N7Y6jKjIo3M7X6kKkIn3M7X6lKjIX8V6hGjIT4LKZ6SLgIR40KY6TLfIP43KV6>jIBV6?iIAV6`0jI@U6b0iI_OV6f4O1O1OTNoIkJQ6S5RJlJm5S5UJmJk5S5VJlJj5S5WJmJi5S5WJmJi5R5XJnJh5R5XJnJh5S5VJnJj5R5VJnJj5S5SJoJm5Q5SJoJm5Q5SJoJm5T5nInJR6P700000000000000O100000000001_GRJn7n5mG^Jl7b5RHbJl7\\6M2N2N1O001O1O000000001O0000000000000000O1000000O10000O100N2O1N2N2O1O1O100O100O100O100N200O1O1O100O100O1N200O1O1O1O1O1O100000000O10000O11O00O1000000001O00001O0000001O1O1O2N001O1O001O1O1O001O001O1O2N001O2N1O1O1O001O1O1O1O001O2N1O1O3M2N2N2N1O2N3M2N3M001O002N2N1O1O2N1O1O1O1O1O1O1O00001O2N2N3M3M4L2TMVG^On8O1O11O0000000000000000O100000000O1kLRMXJn2h5_MkGYOW13`NX3^8eM`G_OS1V3]7UNZHl1f7VN`GWNa0d3o7\\NoGe1Q8]NkGe1V8b200001O001O001O0000001O001O00001O001O001O0000001O0000001O00001O00000000001O00001O000000000000000000000000000000000000O1000000000000O100000000O100000000O10000O10000O10000O1O10000O100O1O100O1O\\L_Gk0a8SOkGc0T8[OPHd0P8YOTHf0k7YOWHg0i7XOXHh0g7WO\\Hh0c7WO_Hi0b7QObHP1^7oNcHQ1^7kNeHT1]7iNeHW1\\7dN_GVNU1W3^7aNeH`1\\7^NcHc1`7YN`Hh1b7UN^Hl1c7QN^HP2d7mM[HU2f7iMZHX2g7fMQHnNQO_3Q9^MnGj2S8TMnGl2S8RMmGo2W8lLjGT3X8hLhGZ3[8aLeGa3\\8]LcGe3k91N2M3O1N2O1O1N2NPMcEa1\\:\\NiEc1W:[NlEd1S:[NoEe1P:ZNSFe1i9^NXFb1g9^NZFb1e9^N\\Fb1c9^N^Fb1`9_NbF`1]9`NdF`1[9`NfF`1Z9^NhFb1W9^NjFb1U9[NoFe1P9WNUGi1l8TNVGl1l8jM[GW2g8]McGc2\\:100O1O1O1H8O1O1O100O1N2N2O1M3F:bM]LTIn3i6ZLlHl3S7WLgHm3V7YL`Hn3^7UL\\HP4a7WLTHP4i7VLPHn3P8a1N2N2L4M3N2L4O1M3N2O1M3O1K5N2H8@`0O1O1O1000000O100000000N2O10000O1N2O100000000000000000\\HWJQ6i5mIYJS6g5lIZJT6g5kIYJU6g5kIZJT6e5mI[JS6e5mI[JS6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHQMX1bNhN3^7k5jIQJjN3\\7[4bHQM\\1dNR6[4cHPM^1bNo5^4dHoLm8Q3TGnLl8R3UGmLk8S3TGnLl8R3SGeKNU1o8V3SGPMl8R3RGnLn8Q3TGfK1n0k8\\3TGfK0o0l8Z3UGgKNP1m8Y3UGoLk8Q3VG\\LMEm8o3VGcKLe02Jk8n3ZGYLJIl8n3ZGlLf8T3ZGlLf8T3YGnLf8R3YGoLg8Q3XGQMg8o2YGQMg8o2WGSMi8R4VGiJk8\\4WG`K00OJN6l8`4WGaKOO0IO7k8`4VGbK0OOH08j8T4VGkK1NNL20a8T4oFmK`0Oa8T4oFmK?0^ONj8U4YGnK=0@Mj8O" + } + ] + }, + { + "image": "images/caption_detailed_47.png", + "subject_name": "cake", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "PYR41h01[=2cBO[=4dBL[=6cBK\\=7bBJblKBT4=mKCT4fFGU9;dF@cN7f:=aFN]92dE^O=d0n9OcE@b5n600000O10000000000000000O100001OO10000fH" + } + ] + }, + { + "image": "images/caption_detailed_49.png", + "subject_name": "donut", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 612, + 612 + ], + "counts": "_Ql1l0oa0c0A8H7J6I:D;K5J5M6H7H6L2N2M4K7I5L3M3N4I6L2N4L4K0020001O1N2N110O1N2N1O2O1O1O2M2O2N1O1N2O001O100N2N3OO10O1N2O011N1O1O1N201O0O1O1O1O1O101N2O0O1O1O1O10000O2N1O101N1O100O100O1O1O101O0O100O10000O1O1O10000O101N100O100O2N1000000O10000O101O0O10000O1000lLhD\\NW;a1PE\\NP;a1VE]Ni:b1ZE\\Ne:e1\\EZNd:f1]EYNc:f1^EZNa:g1aEWN^:i1dEVN[:k1eEUN[:j1fEVNZ:i1hEUNY:j1hEVNW:k1jETNV:l1jETNV:l1jEUNU:k1kEUNU:j1mEUNR:l1nETNR:l1oESNQ:m1oESNQ:m1oESNP:n1QFRNn9n1RFRNn9n1RFRNn9n1RFRNn9n1RFRNm9o1TFPNl9P2TFPNl9P2TFPNl9P2TFQNk9o1UFRNj9n1VFRNj9o1UFRNj9n1VFRNj9n1VFRNj9n1WFRNh9n1XFRNh9n1XFRNh9n1XFRNg9o1YFQNg9P2XFQNg9o1YFQNg9P2XFPNh9P2XFQNg9P2YFPNf9Q2YFoMg9Q2YFPNf9Q2YFoMg9Q2YFPNe9S2ZFlMf9U2YFlMf9U2YFlMf9T2[FkMe9V2ZFjMf9W2XFjMh9X2VFhMj9Y2TFiMk9X2TFhMl9Z2PFhMP:Z2mEhMR:]2cEjM\\:_50001O001O001O1O1O01O0001O1O001O00100O00002N10O01O001O100O010O1O100O1O010O100O100O2OO01O011O0O01O010O10O01N101N2O0O2N2O0O101N1O2O1N2N1O2O1N2N1O2N1O2N2O0O2N2N2N2N1N3M3N2N1N3N2N2N2N1O2N2O1N2N2N1O3M2N2N2N2L5J5L4N3M2M3M5K3N2M3N2M3M3N2M3N3L3N2N2M3N3K5M2M4K6J5J5L7I5K8F9I6K9F6JZb0`0F7G8H8G:B=F:B>N2O1000000000000O10001O00000000000000O02O00O100000000O100O1O1O1O1O1O1O100O1O001O1O100O1O1O1O1O10O010000O1ROf_OROZ`0h0o_OUOQ`0g0T@XOk?g0W@YOi?e0Z@ZOe?f0]@YOc?e0`@ZO`?e0b@ZO^?d0d@\\O\\?c0f@\\OZ?c0h@[OY?d0h@[OY?d0i@YOY?g0h@VOZ?i0j@POZ?P1]100O1000001O000010O3M10O00010O000O101N1O1O2N1O2N1O1Ok_O_Oi=`0TBEk=:TBIl=4TBNl=1oA5Q>JkA;V>DeAa0[>^OeAc0\\>ZOcAi0]>VO[AS1f>lNTAZ1m>dNPA`1Q?]Nn@f1U`03H8N2K5L4N3K3M4K5L5K4I7L3L6L3J6K4J7M3M4L2M4H8G9N2L4N2N200O100O1O100O100O1000000O10000000000000000000000001O000000010O0O2O00010O1O001O001O1N2O1O001O2L3@`0H8J7J5I7K5K5H:F9I7J5K6J8UNY_Om0ea0H6L4K6J6K4L4M2MdgZ6" + } + ] + }, + { + "image": "images/caption_detailed_50.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0R8n6000000O10000O100O10000O10YLSH;l7]3000`LTHLk75VHJj73[HKd7i3000000O100O100O1000000O10000O10002M102N2M5L3L5L3M3L5L5K3L4M1N2O00001N101N2O0O2O00000O2O001N2O001N2O001N2O1O0O2O1O1N2O0O2O1O0O2O1N102N001N2N101O1O1N101O1N2N2O1O001N2O0O2O1N2O1O1N101N2O1O0O2O1O001N3N0O2O1O0O2O1N2O001N2O1N2O1O0O2O1O001N2N2O1O001O1N101N2O1O1O0O2O1N2O0O2O1O001O2M101N2O001N3N001N2O0O2O1N2O1O1O0O101O1O1N2O1O0O2O1N2O1O1O0O2O0O2O1O0O2O1O0O2O1N3N1N101O001N2O1O001N2O1O1N2O1O0O2O001O1N2O1O0O2O2N001N2O1N2Nf]Q6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "U8i6W8000O100O10000O10000O10000O100O1000000O100O100O1000000O100O10000O100O10000O10000O100^HlHX7T7gHnHW7S7iHQIS7o6lHVIP7j6PIWIn6j6RIYIk6g6UI_Ie6a6[IaIb6`6^IaIa6_6_IaIa6_6`I`I_6a6aI_I_6a6`I`I`6`6`I`I_6a6aI_I_6a6bI^I]6c6bI^I^6b6bI^I^6b6bI^I]6c6cI]I]6c6dI\\I\\6d6dI\\I[6e6dI\\I\\6d6dI\\I\\6d6eI[I8VOe5_7SJ[I8VOd5`7TJZI7XOd5^7TJ[I8ZOa5[7WJ[I7\\O`5Z7ZJYI6]O`5Z7ZJYI6^O_5Y7[JXI6@_5X7[JXI6@_5X7[JXI6A^5W7\\JXI5C]5V7^JWI5D\\5U7_JWI5EZ5U7aJVI5FY5T7bJVI5GX5S7cJVI5GW5T7dJUI5HV5S7eJUI5IT5S7gJTI5JS5R7hJTI5KR5Q7iJTI4LR5Q7jJSI4LR5Q7jJRI5NP5P7kJQI60m4P7mJPI53l4m6oJPI54j4m6QKoH54j4m6QKoH55h4m6SKmH66g4m6SKmH58g4k6TKmH59e4k6VKlH5:c4k6XKkH5:c4k6XKkH4`4i6[KiH5>`4i6\\KhH4`0^4i6]KgH6a0\\4h6^KgH6b0Z4h6`KfH6c0Y4g6aKfH6c0Y4g6aKTHL>:g0X4h6bKSHL>9j0W4e6dKdH6h0U4d6eKcH7j0R4d6gKaH8l0P4c6hKRHL:=Q1n3d6iKQHL:=R1m3c6jK`H7n0o3b6jK`H7o0m3b6lKPHL;;T1l3a6mKPHL;:V1k3`6oKoGL;:W1j3_6PL^H6T1h3_6RL]H6T1h3_6RL\\H7V1f3^6SL\\H6X1f3\\6TL\\H6Y1d3\\6VL[H6Z1c3[6WL[H5[1c3[6XLZH5\\1b3Z6YLkGM;8`1b3Z6YLlGL:9a1`3Z6[LYH4`1_3W6]LYH4a1^3V6^LYH4a1]3W6_LXH4b1\\3V6`LWH5c1Z3W6aLVH5e1X3U6cLVH5f1W3T6dLVH5f1W3T6dLhGM99l1T3T6fLhGL89m1U3S6fLSH6k1R3S6hLRH6k1R3S6hLQH7n1o2Q6jLQH6o1o2Q6kLPH6P2n2P6lLoG7R2l2o5mLoG7S2j2o5oLnG7S2j2o5PMcGL7:X2i2n5QMmG6V2g2n5RMlG7X2f2l5SMlG7Y2e2k5TMhGNJ9c2d2l5UMgG;^2_2k5VMgG:`2^2j5XMfG:a2]2i5YMgG9a2]2h5ZMfG:c2[2g5\\M`GL2=g2Z2h5]MdG9e2Y2g5]MeG:e2X2f5^MeG:f2V2f5`MdG9h2V2d5bMbG9j2U2d5aMcG:j2S2d5cMcG9i2T2d5cMcG9j2S2c5eM]GL2j4BWK=i4BYK=g4BZK>f4BfHEf0h0d6CbHLh0?g6E_H1h08j6F\\H7i02k6GYHk0Jm6HWH`0k0Go6ITHc0n0Bn6JTHe0o0_On6LRHg0P1\\On6MQHh0Q1ZOo6NoGj0R1WOo6OnGk0S1UOP70mGl0T1ROo62lGm0V1oNo64kGn0W1kNo67jGn0Y1gNo6:iGo0i:QOWEP1h:POXEQ1g:oNYEQ1g:oNYER1f:nNZER1f:nNZES1e:lN\\EU1c:kN]EU1c:kN]EV1b:iN_EW1a:iN_EX1`:hN`EY1_:gNaEZ1^:fNcEY1]:fNdE[1[:eNeE[1[:eNeE\\1Z:dNeE^1Z:bNfE_1Y:`NhE`1X:`NhEa1W:_NiEb1V:^NjEb1V:^NjEc1U:\\NlEd1T:[NmEf1R:ZNmEh1R:WNoEi1Q:WNoEj1P:VNPFk1o9UNQFk1o9UNQFl1n9TNRFm1m9SNRFo1m9QNSFo1m9PNUFP2j9PNVFQ2i9oMVFR2j9nMVFS2i9mMWFT2h9lMXFU2g9kMXFV2h9jMXFW2g9iMXFX2h9gMYFZ2f9fMZF[2e9eM[F[2e9eM[F\\2d9dM\\F]2c9bM^F^2b9bM^F_2a9aM_F`2`9`M_Fb2`9^M`Fb2`9^M`Fc2_9]MaFd2^9\\MbFe2]9[McFe2]9[McFf2\\9ZMdFf2\\9ZMdFg2[9YMdFi2[9WMeFj2Z9VMfFj2Z9VMfFk2Y9UMgFl2X9TMgFm2Y9SMgFn2X9RMhFo2W9QMiFP3V9PMjFQ3U9oLkFQ3U9oLkFR3T9nLkFT3T9lLlFU3S9jLnFW3Q9iLoFW3Q9iLoFX3P9iLnFX3R9gLoFZ3P9fLPG[3o8eLQG\\3n8dLRG\\3n8dLRG]3m8cLSG^3l8bLSG`3l8`LTGa3k8_LUGa3k8_LUGb3j8^LVGc3i8]LWGd3h8\\LXGe3g8[LXGf3h8ZLXGg3g8YLYGh3f8XLZGh3f8XLZGi3e8WL[Gj3d8VL[Gk3e8UL[Gl3d8TL\\Gm3c8SL]Gm3c8SL]Gn3b8RL]GP4b8PL^GP4b8PL^GQ4a8oK^GR4b8nK^GS4a8mK_GT4`8lK`GT4`8lK`GU4_8kK`GW4_8jK`GW4_8iKaGW4_8iKaGX4^8hKbGY4]8gKcGY4]8gKbG[4]8fKbG[4]8eKcG\\4\\8cKeG]4[8cKeG^4Z8cKeG^4Z8bKeG_4[8`KfGa4Y8_KgGb4X8^KhGc4W8\\KjGd4V8\\KiGf4V8ZKjGg4U8YKjGi4U8WKkGj4T8VKlGj4T8VKlGk4S8UKmGl4R8TKnGl4R8TKmGn4R8RKnGo4Q8QKoGo4Q8QKoGP5P8PKPHQ5o7nJRHS5m7mJSHS5m7nJRHS5m7mJSHS5m7lJSHV5l7jJTHW5k7jJTHV5l7jJTHW5k7iJUHX5i7iJVHY5i7gJWHY5i7gJWHZ5h7fJXH[5g7eJXH]5g7cJYH]5g7cJXH_5g7aJYH`5f7`JZH`5f7`JZHa5e7_J[Hb5d7]J\\Hd5d7]J[Hd5c7]J]Hd5b7\\J^Hd5b7\\J^He5a7[J^Hg5`7ZJ`Hg5_7YJaHg5_7YJaHh5]7YJcHh5\\7XJdHi5[7WJeHi5[7WJeHj5Y7WJfHk5Y7UJgHl5X7TJiHk5W7UJiHl5T7VJlHk5R7VJoHi5Q7WJPIi5o6WJQIj5n6WJQIj5m6WJTIh5k6ZJVIe5i6[JWIe5i6\\JVIe5i6[JWIf5g6[JYIf5f6ZJ[If5d6[J[Ie5e6[J[If5d6[J[If5c6\\J\\Id5c6]J^Ic5`6^J`Ic5_6^J`Ib5`6^JaIb5^6^JbIb5]6`JcI`5\\6`JeI`5Z6`JgI_5Y6bJfI_5X6bJhI_5V6bJjI_5U6bJjI_5T6bJmI^5Q6dJnI]5P6dJPJ]5o5dJQJ[5n5gJRJY5m5gJTJX5k5iJUJX5j5iJUJX5i5iJWJW5i5iJXJW5f5kJYJV5f5kJYJV5e5kJ[JU5d5lJ\\JU5b5mJ^JS5a5mJ`JR5_5oJaJR5^5nJbJS5\\5oJcJQ5\\5PKeJP5Y5QKgJP5X5PKiJP5U5QKkJP5T5QKlJn4S5SKmJn4Q5TKoJXNYOa6f5XKQKk4n4WKQKj4m4WKUKg4j4ZKWKf4g4\\KXKd4g4]KYKd4e4]K[Kd4c4^K\\Kb4d4^K]Kb4b4^K^Kb4b4_K]Kb4a4_K_Kb4_4_KaKa4_4`KaKa4\\4aKcK_4]4bKbK_4\\4bKdK_4Z4bKgK^4W4cKjK\\4V4eKjK[4U4eKlK[4R4fKnKZ4Q4hKnKY4P4hKPLY4o3hKQLX4m3iKSLW4m3iKSLX4k3jKULU4k3lKVLS4h3nKXLR4h3oKWLR4g3oKYLQ4f3PLZLP4e3QL\\Ln3d3SL[Ln3c3SL]Ll3c3VL\\Lj3c3WL]Li3c3XL]Lg3c3YL]Lg3b3[L]Le3b3\\L^Lc3c3]L]Lc3b3^L_La3a3`L_L^3b3bL^L^3a3cL`L[3`3fLaLY3_3gLcLV3]3lLcLS3\\3nLdLQ3]3oLcLQ3\\3PMeLn2\\3RMdLn2Z3TMfLl2Z3TMfLk2Z3VMfLi2Z3XMgLg2Y3YMgLg2X3ZMhLe2Y3^MdLb2[3_MeL`2[3aMeL_2[3aMeL^2[3aMhL^2W3dMhL[2X3fMmKXLg0R6\\3iMeLV2[3jMgLU2X3lMhLS2X3oMgLQ2Y3PNeLP2[3QNfLn1Z3QNhLm1X3TNiLk1W3WNgLi1X3YNgLf1Y3SNQLTLh0h5W3TNPMk1P3VNPMj1o2YNoLg1Q3\\NlLc1T3]NmLb1T3]NnLb1Q3_NPM_1P3bNPM^1o2dNPM\\1o2eNQMZ1P3eNRMY1n2hNSMW1m2jNRMU1n2lNRMT1m2mNSMR1n2oNQMQ1n2POSMn0m2TORMl0m2VORMi0o2VORMj0m2WOSMh0m2YOSMg0m2ZORMe0n2\\ORMd0m2^ORMa0o2_OQMa0n2@RM?o2BPM>P3BPM=P3EoL;P3FQM8P3HQM7n2KRM3n2NPMPKUOQ5k3OSM0m22RMNm23SMLm25SMKm26RMIn28SMGl2;TMCl2>UMAk2`0TM_Ol2c0SM]Om2c0SM\\Om2e0TMZOk2g0VMWOj2k0UMUOk2l0TMSOl2n0TMROl2n0TMQOk2R1TMnNk2S1UMlNl2T1TMlNk2V1PMYJZO`4f3X1PMXJZO`4f3Y1oLWJ[O_4g3Z1nLWJ[O_4f3[1oLVJ[O^4f3^1oLSJ\\O]4f3`1SMSJVOY4f3f1VMZNi2h1VMWNk2j1gLoIH0KV4f3l1gLnIIOKV4e3n1fLmI0S4Y3Q2gLkI2R4W3U2fLjIE09Q4[3V2gLiIE19n3[3Z2fLfIH08o3[3[2eLfI5o3U3\\2fLeIL2I10k3f3^2dLeILU4a3W2aLeIMU4a3W2bLeIHM2V4d3Y2cLfIHQ4d3Y2eLgIFP4d3Z2fLfIFo3e3[2eLfIGm3d3^2eLaILNKo3d3c2RM[Mm2h2SMWMm2i2cL]I0NKl3b3k2bL\\I0MLl3a3l2cLZI00Li3a3m2lL[IFd3^3R3kL[IHb3]3T3jL[IH`3_3W3fL[IJ]3`3X3gL\\IH\\3`3Y3iLZIG\\3a3Z3hLZIG[3`3]3iLYIFZ3a3^3hLXIGZ3`3_3hLYIGW3b3a3gLWIGX3a3b3hLVIGW3a3e3gLgLX3Z3iLdLW3]3iLcLW3o0" + } + ] + }, + { + "image": "images/caption_detailed_51.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "bS\\1SEGg:=VEFg:;XEGf:;XEHd:4N2M2O2N1O101L3O1M3O1O0\\OWORCj0jI7I7L4M2O2M3N2M3N2N2M3O1M3N2L4L5WIiIc4[6SKTJb4R6]KQJ\\4T6cKoIV4V6jKlIn3[6QLfI^3k6_LWI`3l6]LVIb3R7ULPIi3T7bKoGOo0_4R9O2N1O2M2O2N1O2O1M2O2M2O1N3N1N3M3N1O2L3N3L4M3M2N3L4L5L5K4K6J6I8J6I7KU1iN_Rj0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nj_66l03V@`Rj0" + } + ] + }, + { + "image": "images/caption_detailed_53.png", + "subject_name": "handbag", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Qha0a0f=F^BY1j2N10000O0100O0001O0O101_NMXC074=FQ;;VD070a0Jm:;WDg0j0ROl:c1TE_Nj:a1UEbNj:^1UEdNi:\\1VEfNj:Y1VEjNh:U1WEnNh:R1WEPOh:P1WEROh:n0TEWOk:j0nD]OQ;d0lD_OT;a0iDBV;U200001cLgDP3[;lLgDT3b;2O0O2N2O1N2_EdL[9]3aFeL_9^3]FcLd9`3WFaLi9b3SF_Lm9\\2iE]N2\\OU:U2nEROR:l0QFROQ:=aFA`9fF_O\\9?gF^OZ9UOcEg0Y10T9YOgE`0[13P9\\OiEj8BmEM]1?h8CfE2m11^8j0dGSO^8k0dGSO^8l0cGSO]8l0dGSO^8k0dGSO^8j0eGUO\\8d0kGYOY8b0U3M3Jgh>" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y3e;[3000001O000000001O0000001O0000001O00000000001O0000000000001O0cKbLXM^3f2iLUMW3i2mLUMS3i2QMUMP3i2SMUMm2j2ZMPMf2[2eLYJk0W3`2_2hLWJl0W3]2`2jLWJk0W3]2_2kLVJk0Z3]2[2ZNdMl1V2UNiMl1V2TNjMQ2P2PNPNW2i1iMWNX2h1hMXNZ2f1gMYN[2e1eM[N\\2d1dM\\Nd2\\1]MbNk2W1VMhNk2X1UMhNj2X1XMfNi2Z1\\M`Nh2]1cMWN`2g1cMUN_2k1cMQN^2P2fMjM]2V2hMbMk2m1XMnMj2Q2VMnMk2R2TMnMl2R2UMmMl2R2UMmMk2S2WMkMi2U2XMjMh2V2]MeMc2[2bM`M^2`2eM]M[2c2gM[MY2e2iMYMW2g2jMXMV2h2kMWMT2j2mMUMS2k2nMTMQ2m2QNQMn1P3SNoLm1R3TNlLn1R3SNmLo1R3RN`LmKLQ6d3WNWLoK3j5f3dNZL\\1f3eNYL^OOaMi3P3XLZO5fMc3V7^LjHc3U7^LjHc3U7]LkHd3S7gLbHZ3^7QMWHP3^2VLa1Q1kKi2d2YL^1P1lKh2e2ZL]1o0lKj2f2nKbL6j4S1WKgN`0T4R3PLV1\\1eKg2T8]MgGf2W8\\MfGh2X8XMhGi2R4bKgKOm3b6aMmIh0Di1a6ZM`J;oNZ2d6XMbJ9kN_2f6UMbJ7jNe2g6PMdJ5gNj2i6mLdJ4dNo2m6hLbJ6aNS3n6dLcJ7`NU3n6cLeJ4^NY3n6QLhH9Q2N`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + } + ] + }, + { + "image": "images/caption_detailed_55.png", + "subject_name": "bowl", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "ejb06Q=7I7I5K4N3M2M3M4M1O2N2N2N1O2N1O2N1O2N1O2N101N1O100O1O2O0O1O2O0O1O101N1O10001N100O2O0O2O000O100O2O0O100O101O0O100O1000000O10000O10000O10000000000O100000O100000000O10O1000O10000O10000O100O100O100O100O100O2N100O1O1O2O0O1O1O1O1O2O0O1N3N1O1O2N1N3M2O2M3M2N3N2M2O2M3N3M2M3N3L4L4K6HZQn5" + }, + { + "size": [ + 426, + 640 + ], + "counts": "d1Z29Bb0f0b4IkJBc0e0a4JjJDd0b0a4JjJFe0`0a4Q2_KPN_4Q2bKnM^4R2bKnM\\4U2dKjM\\4V2dKjM[4W2eKiMY4Y2hKgMV4DoJ`0k0LU4\\2jKdMU4]2kKdMS4]2mKcMS4]2nKbMQ4_2oKaMP4`2PL`Mo3b2PL^Mo3c2RL]Mk3e2UL[Mj3g2VLXMi3i2XLVMg3k2YLVMf3j2ZLVMe3k2[LUMd3m2\\LSMb3n2^LRMa3o2_LQM`3P3`LPM^3R3cLmL\\3U3cLlL\\3T3dLlL[3U3eLlLY3U3gLkLY3U3hLjLW3W3iLiLW3X3hLiLU3Y3lLfLS3[3mLeLR3\\3oLcLo2_3QMaLn2`3RMaLm2_3TM`Lk2i0[L[Oi0Lk2f0dLZOa00j2e0kLWO;5h2b0RMXO56h2`0XMXO08g2?]MWOM9d2a0aMUOL9b2a0eMUOI;a2?hMUOH;_2`0kMTOF=^2>nMUOD<^2=QNVOA=^2;TNWO^O>^2;TNWO^O>^2:VNXO\\O=^2:XNXOZO>^29YNYOYO?]27\\NYOXO?\\27]N[OVO>]26_N[OTO?]26_N[OTO`0\\24aN\\OSO`0\\23cN]OQO`0[22eN^OPO`0[21fN_OPO?Z22gN^OoN`0[20gN@nNa0[2nMdLc1S2OnN?o2ZOTN7mN`0o2XOUN7lNa0Q3UOTN:lN`0R3SOSN>jN?T3QOTN?hN`0U3POSN`0hNa0V3lNSNc0gNa0X3jNRNd0gNa0\\3eNnMk0eN`0a3`NkMP1eN`0a3gM^Ka0\\2X1eN`0b3fM_K?\\2Z1cNa0l3RNaM]1dN`0n3oM_Mb1bN`0P4kM`Md1`Na0P4kM`Md1aN`0P4kM_Me1aN`0Q4jM^Mf1aN`0R4hM_Mh1_N`0R4eMaMk1]N`0j6@VI`0j6@VIa0i6@VI`0k6_OUIa0k6_OVI`0j6@VIa0i6_OWIa0i6@WI?j6@VI`0j6@VI`0j6@VIa0i6@VI`0j6@WI?i6AWI`0h6@YI?g6AYI?g6BXI>h6AZI?e6A[I?e6A[I?j2fMIj1^Ma0h2iMFd1eMb0e2kMEb1gMc0d2lMD_1jMe0b2nMB\\1mMf0a2PN_OZ1RNf0^2RN^OV1VNh0\\2TNWLEk2^1dNh0Z2_NROg0fNj0X2_NROf0hNj0[2[NmNi0jNm0Y2YNlNj0lNm0Z2WNjNj0oNn0X2WNiNi0QOQ1V2UNiNi0ROR1d5lN^JT1a5lNaJS1_5kNcJU1]5jNdJV1\\5hNgJX1W5hNjJX1V5gNkJZ1T5dNoJ[1P5eNRKZ1n4eNSK[1l4eNVKZ1j4fNVKZ1i4gNWKZ1h4gNWKY1i4gNXKX1g4iNYKW1f4jNZKV1f4jNZKW1d4jN]KU1b4lN^KU1a4lN^KT1a4mN_KS1a4mNaKQ1^4PObKQ1\\4POeKo0[4QOeKo0Z4SOeKn0Y4SOgKm0X4TOhKl0W4UOjKj0U4WOkKj0l0nM]OY1Gi0j0QN]OW1Jg0i0SN[OX1Ke0j0SNZOY1Le0i0SNQMGd1b1b0d0h0UNoLHe1`1d0c0h0VNmLIe1_1g0a0g0XNjLJg1_1g0?h0YNhLKf1_1j0>g0\\NcLIj1_1l00n3d2dMXOPNTN>Oo3e2cMYOoMSN`0MP4f2aMZOoMSN`0LQ4g2aMXOoMVN?IR4j2_MWOPNVN`0FS4m2]MWOQNUNY5d2gLVOPNVN=IV4k2\\MVOQNVN60]4d2\\MVOQNWN32_4a2]MWOnMXN53^4^2_MWOlMZN65\\4Z2cMGPNQN\\4X2dMHnMSNo2@QNd2S1g0e0PMSNZ2W1g0b0UMTNT2Z1i0=YMVNn1]1j0;[MVN:]OOS2\\27_MXN3@3P2\\26aMXNMF5l1^23cMZNHI7j1^21gMZNCL8j1e0iMGU2K[N^O0;g1d0kMGR2N[N[O2=e1c0lMHQ2M]NXO4?b1d0lMIo1O]NSO8b0`1b0mMIm11^NPO:d0^1b0nMIk12^NmN>e0[1c0oMHi15^NiNa0g0Y1c0PNHf16aNeNb0k0V1b0RNGd19aNaNe0m0T1b0TNEa1=aN]Nh0o0S1a0UNE^1?aNZNl0P1P1b0VND\\1a0bNXNm0Q1o0b0VND\\1b0aNUNP1S1m0b0WNCZ1e0bNoMS1X1i0a0YNBY1f0bNmMV1Y1g0a0XNCY1g04UO;a0XNCX1h06SO:b0YNBW1i06TO9a0ZNBW1i06TO9a0ZNCU1i08TO9?ZNDU1i09SO8`0ZNDU1j08RO9`0ZNDU1j08RO:?YNDV1k08RO8?ZNDU1l09QO8?ZNDU1l09QO8?ZNDU1l09QO9>YNEU1l0:QO7>ZNEU1l0:QO7>ZNEU1l0:QO7>YNFV1k0:QO8=XNGV1k0;PO7>WNHW1j0;PO7>VNHY1j0:QO7Y2a7VNQHA>Z2`7VNVIj1j6VNVIj1j6WNUIi1k6WNUIi1k6XNTIh1l6XNTIh1l6XNTIh1l6XNTIi1k6XNTIh1l6XNTIh1l6XNSH^O9[2OfMc6a0UI^O9[2NgMd6`0UI^O:Z2MhMd6a0UI\\O:[2MiMc6`0VI\\O:\\2LjMc6>VI^O:Y2MmMa6gM]O:W2MQOX2oNJ_OA]NSO?S2IiNl1c0\\O_O`NSO>R2JiNj1h0ZO[OeNQO=S2JiNi1m0VOWOjNPO=T2IgNj1Q1TOUOkNoN>T2IgNi1S1ROUOnNmN?S2HgNh1W1oNUOROjN?S2HgNg1c1dNjN_OhN>T2HfNg1l1ZNfNIcN>U2HeNg1S2QNdN2`N=T2IeNf1Z4SNmJ>T2IdNf1\\4TNkJ=U2IdNd1^4VNiJ>T2HeNb1a4XNeJ>U2HeNa1b4YNdJ>V2HbNa1f4XNbJ?U2IcN_1g4YNaJ?U2IcN^1h4[N_J>V2IcN]1j4\\N\\J>W2IcN[1l4_OaLVOcN[1l4@`LUOcN[1n4@_LUOcNY1Q5A\\LWObNW1S5B[LWObNU1V5CXLXObNT1W5EVLWOaNU1Z5DULWOaNS1]5ERLXO`NS1_5EQLXO`NR1`5FPLXO_NR1c5EnKYO_NQ1d5GlKYO^No0i5GiKZO^Nn0j5IgKYO^No0k5HhKXO]No0l5IgKXO\\Nn0o5JeKXO[Nn0R6IcKYO[Nl0U6K_KYO\\Nk0V6L]KZO]Ni0W6M\\KZO\\Ni0Y6M[K[O[Ng0[6NZK[O[Nf0]6OXKZO[Ne0_61VKZO[Nd0`62UKZO[Nc0a64RKZO\\Nb0d63PK[OZNb0h64nJYOZNb0j64lJ[OXNa0m64jJ\\OYN?n66hJ[OYN?Q75fJ\\OYN=S77dJ\\OYN_IDZNMX8`0]IC[NLY8a0\\IC[NJ[8c0ZIDZNH]8c0ZIEZNF^8d0XIFZND`8f0VIGZNBa8f0UIH[N_Ob8i0SIG]N^Ob8j0QIH]N\\Oe8k0nHJ[N\\Og8j0nHJZN\\Oj8i0lHK[NYOl8k0jHKZNYOn8k0hHL[NWOf0M[7P1cIL[OVOR7n0cILZOXOR7l0dILXO[OT7i0cILXO^OS7f0eILWO@S7e0eIKWOBS7c0fILVOBS7b0gILUODT7?gIMTOGS7=hILTOHT7W2SAjMk>V2UAkMj>V2UAmMh>S2WAPNh>Q2PAWNn>_3N2O1N2N2N2O0O2N1O2N2N2O0O2O0O2N2O0O2N101N1O101N1O1O100O2O1N1O2O0O101N1O100O2N100O1O100O100O101N100O1O100O2O0O100O1O100O1O100O1O10O01O100O010O1O010O00100O010O0010O01O1O00100O001O01O10O010O010O01O010O0010O00100O0010O0010O01O010O01O010O01O01O010O0010O001O010O01O010O01O0010O1O010O01O010O001O010O010O001O010O10O01O01O01O10O1O01O0010O01O010O010O010O001O10O010O01O010O1O0010O10O1O001O010O1O10O01O001O010O1UKbBS3_=f11O001O100O1O00100O100O00100O1O1O010O1O1O1O1O101N1O1O1O101N1O1O2N1O2O0O2N1O2O0O2N2N2N1O2N1O2N2O1N1O2N2N1O2N3M2N4L4M4K7I2N2N2N2N1O2O1N2N2N2O1N2N2N2N2O2M4L3N2M6J:F3M1O3N1N2N3M3Mf6" + } + ] + }, + { + "image": "images/caption_detailed_57.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0V5j90WFlJf9W501O1O4L4L1O2N2N5K2N3M1O3M2N3M2N3M2N3M6J1O002N4L3M1O12O0100NO2N2O3M1O01O02M7J2M1O2O2L3N3L2O1N5J:E4N2O004N2NO2N0101O1N8H0O00003N1N2N2N2M3N1O0000O1O1O2O0O1M3O1O0100000O001O01M3O1N2JkE_KW:_46N2O00100O10001O0001eEdKP:\\4oEfKP:[4PFdKP:]491iEfKf9]4QFiKo9d40000000000000000000000001O00001O1O001O000iKRF\\3o9`LUF_3k9^LYFa3g9]L[Fc3e9\\L\\Fd3d9[L]Fe3d9XL_Fg3a9XL`Fh3`9VLbFj3Z:0O10000000000M3O1000000O1HmK`E0NT4e:mK[ES4e:nKZEQ4f:5L4000000LeKbE\\4\\:5N2000XMkEe0T:XOWFa0h9\\ObF>Z9lMnEc1R1:T9CQG;o8\\OjElN[1f1k8ZOaGe0^8ZOdGf0\\8WOhGh0U8YNmEn0o1i0T8XOnGh0R8WOPHh0o7UOUHk0k7QOYHo0g7POZHP1f7nN]HQ1c7mN_HS1`7jNcHW1U7ZN[F=c2Y1Y7gNgHY1Y7fNhHZ1Y7dNhH\\1X7bNiH_1W7`NfHIZMg1P:]NiHLWMg1P:[NoHe1U:1M3JVNUCk1P=00000K5LoMYCQ2j<100M3O100000000O100000000O1O1001O00000O2O1O1O2N2N001O1O1O1O001O001O1O0000001O1O00000000MYNmBg1S=YNmBg1S=YNmBg1S=YNmBg1S=YNmBg1V=000001O000000000000001O00000000001O00001O1O00000O2O00001O001O001O1OYMfNhGZ1W8gNjGX1V8iNiGW1W8iNjGV1V8kNjGT1V8lNkGS1U8nNkGQ1U8oNkGQ1T8QOkGo0U8QOlGn0S8TOmGk0S8UOnGj0R8VOnGj0R8VOoGi0P8YOPHf0o7[OQHe0o7[OQHe0o7[ORHd0n7\\ORHd0m7^OSHa0l7@TH`0l7@UH?j7CUH=k7CUH=k7DUH;i7GWH8i7JWH5h7LXH4h7LXH4g7MZH2d70\\H0c72\\HNc73^HLb74^HLa75_HKa75`HJ^78bHH]7:bHF^7:cHE\\7=cHC[7?eHBZ7?eHA[7?eHA[7?eHA[7?eHAZ7a0eH_O[7a0eH_O[7a0eH_O\\7`0eH_O[7a0eH_O\\7a0bH@^7`0bH@_7?bH@^7`0bH@^7a0aH_O_7a0aH_O`7`0`H@`7`0`H@a7`0^H@b7`0^H_Oc7a0\\H@e7`0YHAg7?YHAh7>WHCi7>VHBj7>VHBj7>VHAl7>THBl7>THBm7>RHBo7=QHZOeMB[:T1PHUO^8k0bGSO_8m0aGSO_8n0aGPOa8o0`GPO`8Q1_GoNb8P1^GoNc8Q1^GlNd8U1[GjNf8V1ZGjNg8U1ZGjNf8V1c2O10O01O01O0010O01O01O1O010O001O001O01O000000010O00100O010O0001O3M2O0O2N2N2O2M1O2N3N0O3N1N3M2N3N1NTgZ3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[5e2=`1KdM`5l0eJ`1KdMa5k0cJb1LcMc5i0`Je1MbMe5h0]Jf1NbMg5f0[Jh1NbMk5b0WJl1NbMm5`0UJm1OcMn5>SJo1OcMP6R1V8]O]G@>R1U8]O^G@`0Q1S8]O^GBa0o0Q8^O`GAb0o0n7@`GAd0m0l7AaGBd0l0k7BbG@d0n0j7AcGAd0m0h7CdG_Of0m0f7CfG^Of0n0d7DfG^Og0m0d7DeG_Oh0l0P2eNj1o0_K_Oh0m0b7DfG^Oj0m0`7DgG_Oj0l0_7EgG_Oj0l0^7EiG^Ok0l0\\7EiG@k0k0\\7EiG_Ol0l0[7DjG@k0l0[7DjG@l0k0l1mN]1g0kKAm0k0k1POYLLh4g0XLBm0j0j1ROY1b0PLAn0k0j1QOX1b0QLBm0k0j1SOW1>SLCm0l0i1YOQ18XLDn0k0j1XOP18YLEm0k0j1YOo06[LEm0l0j1XOn06\\LFl0l0j1_Og0ObLFn0l0i1Bd0KfLEP1m0g1C7ZOPLa0R1DR1n0e1G3WORLa0T1CR1n0e1H2VORLb0T1AU1o0d1JNNYMYOU1o0d1KMLZMZOV1o0d1JLMZMYOW1P1c1JKM[MYOY1o0b1KIM\\MYOY1o0c1JHM\\MZOZ1o0b1KFM\\M[O\\1m0b1LEL\\M[O^1m0a1LEK]M\\O]1m0b1KDL]M\\O\\1n0c1KCJ^M]O]1n0c1JBK]M^O^1m0c1K@K^M]O`1m0c1J_OL^M]O`1m0c1J@J^M^O`1n0c1I_OK^M^O`1n0c1J^OJ_M]Oa1o0b1J^OI`M^O`1o0c1I]OJ`M\\Ob1Q1a1J\\OHbM]Oa1Q1a1J\\OHbM\\Ob1R1a1JYOIdMZOc1S1`1KXOGc0>5KXOGb0?6JXOFc0`05JXOFc0`05KWOEd0`05KWODe0a04KWODe0a04LVOBg0b04KUOBh0c03KUOBh0c03LTOAi0c03LTO@j0d02MSO_Ok0d03LRO_Ol0e01NRO]Om0e01OQO\\On0e02NPO\\Oo0f01NPO\\Oo0f01OoN[OP1f01OoNZOQ1g00OoNZOQ1g00OPOYOP1h000POVOP1k00OPOVOP1k000POTOP1l000QOSOo0m001POQOQ1n0O1POQOQ1n0O1QOPOP1o0O2POoNQ1o0O2QOmNQ1Q1N2ROkNP1T1O0ROkNo0U1N1TOhNo0W1M2d3N\\L2d3N\\L2d3N\\L2d3N\\L3c3M]L3c3M]L4b3L^L4b3L^L4b3L^L4b3L^L5a3K^L6b3J_L5a3K_L6`3J`L6`3J_L7a3I_L8`3H`L8b3F_L9a3G^L:c3E]L;e3C[L>e3A[L?e3A[L`0e3_O[La0e3_O[Lb0d3^O\\Lb0d3^O\\Lb0e3]O[Lc0e3]O\\Lb0d3^O\\Lc0b3^O^Lb0b3^O_La0a3_O_La0a3_O`L`0`3@bL?]3AeL=[3CfLhM@X2`0kM]OU2c0lM\\OT2d0nMZOQ2g0PNYOo1g0SNWOm1i0UNUOj1l0WNSOi1m0YNQOg1o0_NjNa1W1T60000000O100000000O1000000O10000O1000000000000O100O100000000000000O100000000O100000000O1000000O1O100O1O100O100O1O10000O1000000O1000000000000000000000000001O00001O000000001O00001O000000001O000000000000001O00000000001O00eB" + } + ] + }, + { + "image": "images/caption_detailed_58.png", + "subject_name": "person", + "object_name": "surfboard", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "_nd22\\=3N2N100O11O000000QFJe65ZINd62ZI0f60XI2i6MTI6m6JnH9S7GkH;U7EhHj7BTH?m7BoG?S8AkG?X8@fGa0[8@bG`0`8@_G?c8A\\G>g8BWG=k8CTG;o8EPG9T9GkF7W9JgF6[9IdF7]9JaF6a9J]F7c9I]F6d9J[F7f9IXF7i9JUF7k9IUF6m9JQF7o9JoE7Q:KlE5MBP9lFO0ET9=kF2JD[9>fFR1Y9QOeFo0[9SObFn0^9SO`Fm0a9UO]Fk0c9WOZFj0g9XOUF?U:Y16WO`EZNh:c1d0O2M101O0O101O001N2O1O1O001N2O1O1N2O2M4L=C3L5K6Hejf4" + }, + { + "size": [ + 432, + 640 + ], + "counts": "c\\k24\\=001O00000000001O0O100000001O000000000000000O1000001O0000000000000000000000000O1000000000000000O10000000O1000O100000O01000000O10O0100O10O0100O1O100O2Meab4" + } + ] + }, + { + "image": "images/caption_detailed_59.png", + "subject_name": "horse", + "object_name": "sand", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VSR21[=0ZS_16]l`N5K4M3M4L5QDUOT;R1bDVOZ;`1M3L4M3N2N3M\\OPE@\\:=jEBU:8QFHn98QFJo97oEJQ:h10]OkEPNT:n1SFmMn9U2c02N00]OWEcNj:\\1[EYNI1m:d1d0N2N30\\EaNZ9b1bF`N]9b1`F_Na9`1^FaNc9^1[FdNe9[1YFhNg9X1UFlNl9S1SFlNP:S1oEkNU:V1eElN]:o12N2M4L5M3L2B`0H7L5M3N2M3N001N101O0@eC1[d3^8bLaG^3_8aLUGM6b3e8aLUGN6`3e8oLTGS3l8f000O1000000000000000000000O11O000000000000000000000000000000bLTG]2m8QM[GDI10Z3l8mLcGX3]8dLhG[3X8cLjG]3R9000000000004L0000000000001OO1M3N2000000000000001O000000001O1O001O00000000001O00O1WOdLcG\\3T92002N1O000000000000O1O100O1O2N00101N1O1O1L4001O1O2N3M1O1O0000000000000000001O2N1O0000001O00mNYMaGh2\\8_M`Ga2\\8dMcG\\2\\8fMcGZ2]8Z11OO101N1000000000000O10000000000000000000000000000000000001aK`GQ4Q9L2N3M2N2N2N8SLaF5KR3R:00O2O000001O0000O01AQMbFo2]9TM`Fm2`9TM_Fl2a9UM^Fk2a9UM`Fk2_9RMfFm2Y9oL`FM8T3h8mLPGO682l2g8cMZG]2f8`M]G`2c8^M_Gb2`8ZMeGf2[8YMfGg2Y8ZMgGf2^8PMnFKf0U3\\91M3M3O1L4N2M3N2N2N2J6E;M3O10000O1001O3M>B>oK^F]3l9K2O1O00001O0000lNmLRG>:e2_8jM]GV2`8PN\\GQ2^8kL`GW10n1_8a110000000000O1000000O10000O10000O10000000000O1000000000000000000O100000001O0000000O100000000000000O1000001O0O10000000000000000O2O0000000000001O0O1000001O00000000000O1000000000000000O100000000000O1000O1000000000000000000000000000000000O11N100000O11O000000O11O00000000000000000000000000000000000000000000000000000000000O100000001O0000000000000000000000000000000000000000000000000O2O01O00O1000000000000000000000000000001O000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000001O0001O0000000000000000000000000000000000001O00_OWG`Li8`3XG_Lh8a3YG^Lg8b3YG^Lg8a3ZG_Lf8a3[G^Le8b3[G^Le8b3[G^Le8a3]G^Lb8c3^G]Lc8b3]G^Lc8a3^G_Lb8a3_G^LW1" + } + ] + }, + { + "image": "images/caption_detailed_60.png", + "subject_name": "banana", + "object_name": "bowl", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c`]5c0\\>`0@3N1N2O1N2O0O2O1N101N2O0O2O1N101O0O2O0O2O001N10001O000O2O00001N100000001O0000001O0000001O0000000O11O000001O00000000001O0000001O00000001O0001O00000001O0001O00000000010O00000001O01O00000001O01O000000010O0000010O000001O01O0000010O0001O01O00010O0001O01O01O01O00010O000010O00010O00010O00010O0100O0001O0001O10O001O00010O010O02O0O1O010O101N010O1O10O02O0O1O100O100O100O2O0O101N101N1eGZM[4f2aK`MbLK[7g2PLaMaL1Y7_2SL[Ng3g1TL_Nj3a1SLdNj3^1QLeNP4]1kKeNV4\\1dKhN\\4\\1\\KgNf4[1SKgNo4\\1gJgN^5\\1YJgNj5_1gIfN\\6c1RIdNQ7c42O2N1N3M3N1N3N2N1O2N2M3N1O2N2N1O2O1N1O2N2N2N2N2O1N1O2O1N2O1N2O1O1N2O1N2O1N2O1N2O1N2O1N3M2N2O1N2N3M2N3M2N3M2N2N3M2N3M3M2N3L4M3M3L4L3OXJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hZ^55j>3N1M3eI9YMJe2k0iGmNe4:`3_1UL`Ni3c1VL\\Ni3e1YLZNe3g1[LZNc3g1^LYN`3h1`LXN_3i1aLWN^3j1cLVN[3k1eLUNZ3l1hLRNW32aHV1Z4gNS31hHV1W4gNP33nHR1T4iNm26RIn0Q4mNk25XIk0n3oNi27ZIi0m3POh27]Ih0k3QOg28_If0j3ROf29aI7VOAc4Oe29dI5WOA`41d2:fI2WOC_41b2lIIV4Jl1=QJFT4Mj1>SJDS4Ni1?TJCS4Nh1`0VJAR4Og1a0WJ@R4Ng1c0XJ]OQ41f1c0ZJ[OP42e1c0\\JZOP43c1d0^JWOP45a1e0O[O0f00ZOOf02ZOMg03YOLh04XOLh04XOKh05YOKg05YOJh06XOIh07YOIg07YOHh08XOHh07YOIf08ZOGg09YOGg08YOIg07YOIg07YOIf07[OHf08ZOHf07[OIe06\\OJd06\\OJc06^OJb05_OKa05_OKa05_OK`06@J`06@J`07_OI`09_OGa09_OGa09_OGa0:^OFa0<^ODb0<^ODb0=]OCc0=]OCc0=]OCc0>\\OBc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAb0a0]O_Oc0a0]O_Oc0a0]O_Oc0a0]O_Ob0b0^O^Ob0b0^O^Ob0a0^O@a0a0_O_Oa0a0_O_Oa0a0_O_Oa0`0@@?a0A_O?a0A_O?a0A_O?a0A_O?a0A_O?`0B@>`0B@>`0B@=a0C_O=a0C_O=a0C_OX15nJ_Oh3=Z14nJ_Og3>[13mJ@g3=]13kJAh3<]12kJCg3;_12iJDg3;`11hJEg3;a10gJFg3:c10eJGg3:d1OeJFg3;e1NdJHf3;f1McJIg39g1NaJJf39j1M_JKf38l1M^JKe38n1M\\JLe38o1L[JMd38R2JYJ0d36T2JWJ0e36U2JUJ1e35W2JRJ3f33Y2JPJ4f32[2JnI5NZOg2f0_3KjI7OZOg2c0a3MfI81YOf2c0d3LbI:3YOf2a0f3M^I;5XOf2`0h3NYI=7WOg2>j33nH=?TOg2=n3a1ZISNf2=Q4`1WIUNg2:T4a1SIWNg29X4_1oH[Ng26[4_1lH]Ng24`4^1fHaNg23d4\\1bHdNi20g4[1]HiNi2Mk4_2RKbMQ5^2lJcMU5]2gJfM[5Z2bJfMa5[2ZJgMh5Z2RJiMP6Y2jIiMX6Y2_IlMd6V2RIoMP7X2_HoMe7d42M3N4L3M4L9GA;E?B=B;D9GmeT1" + } + ] + }, + { + "image": "images/caption_detailed_61.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 482, + 640 + ], + "counts": "W]o31o>4M2N2[D1T81gG9R8IiG?R8CkGm0f7UOWHR1c7oNYHV1e7lNXHW1g7kNkGa1T8bN_Gj1^8\\NUGm1j8VNkFS2U9PNbFV2]9h1O10000O1O1O10000O1000000O1000000hMiJbH1ONk1X5Z5^KdJb4X5dKfJ\\4m4SLQKm3n4XLnJh3R5ZLkJg3T5aLeJ_3X5gLeJY3Y5mLcJS3Z5SMcJm2\\5XM_Ji2`5[M]Je2\\5eMaJ[2Y5nMdJR2W5UNfJl1W5ZNfJf1W5_NgJa1V5dNgJ]1V5hNgJY1S5^4O1N2O1M3M3N2N2O1O1L400O1O1O1O1O1O1O100N200OoGiKY5W4i2O1O10000O100O100O10000O1000000O1000001O0O0100001OO01000000001O00000000001O00001O1O1O0000001O0000010O0O2O1O1O001O000010OO2O1O1O1O1eIfKl1[4RNiKk1X4TNkKi1V4UNnKh1S4WNnKh1S4VNPLh1R4UNPLj1R4SNQLk1P4SNQLm1Q4PNQLo1Q4mMQLS2Q4iMQLW2Q4fMPLZ2S4aMPL^2S4]MoKd2U4UMnKj2Z4jLjKU3a4`JSJ11O0_1_1P4]5]KfJb4a5UKaJk4`5PKdJP5^5nJcJQ5^5mJcJS5^5kJdJU5\\5iJeJW5^5dJdJ\\5b7N100010OO100010O01O001N110O0O1000000jJWFS5i9mJWFS5i9lJXFT5h9lJXFT5h9mJWFS5i9mJWFS5i9mJWFS5i9lJYFS5k900001O01O1O001O1O0O1000O2O0O10000000O101O0001O01O0O101O00001O0000001O001O1O1O1O00001O001O001O1O00001O0000001O1O1O1O1O1O1O1O1O1O001O1O1O1O1O001O001O1O1O001O002N1O1O001O001O1O1O1O1O1O001O1O1O1O001O2M2O0O2O001O100O1N2O010O1O1O100N101O1O1O1O1O00100O1O1N2O00011N2M110O002N1O1N110O1O2N1O001O1O1O2N001O002N1O1O001O2N1O1O1O2N2N1O1O2N1O1O1O2N1O1N4L2N2O2L4L5K:DRo0" + }, + { + "size": [ + 482, + 640 + ], + "counts": "Zn071200001J0005O11J0O2O50K0:0K0J10001O0O5^2IiM7JKR7h4WJSMb5o2ZJUMe5l2WJWMi5l2SJUMm5m2PJTMP6o2iIUMW6n2bIVM^6f510000000000O100000000000000O1000000000O010000000000000000O10O11O1O0O2ROaIWI_6a6U1K5K5L4N102N1O1O00010O10000O1O1N200O100000000000000O10000O2O000O10O1TH^JX6b5fIfJT6Z5lIhJR6W5nIlJP6U5oIkJQ6U5oIkJR6U5lImJS6S5lInJT6R5lInJS6S5lImJU6S5jInJV6R5iIoJW6R5hInJX6R5gIoJY6Q5fIoJ[6e3RI`La0L]6a3XI`L:O_6_3[I`L61_6^3]I`L33`6\\3`I`LO3c6[3aIaLH7g6W3cIaLAVOES1X7U3cIbLB9n6R3aIfL_O9R7o2`IhLZOg0QO_OKKB2H7CJo11f2S1XKiN1c1h0_O]O^OLK0GW34^1k2PLUN^OYONMNFo0LW19_2i2QLWN^OWOONNF<`0f1Fb2f2PL\\N_OROO1NE;c0e1Dc2d2PL_N@POO1OD9f0e1Dc2e0]Ke0e0H_OnNO4OA9h0d1Ce2c0]Ke0e0L]OlN05N_O:j0d1Cd2`0`Kf0c0N^OiN07;:`1Cc2?^Kh0e0O^OeN298:b1Bc2e0^K;f08\\OaN3=5:d1Bd2b0^K9h0=YO_N4>49e1Cc2n1XLZOWO^N6>2;e1Bb28`Kg0j0=XOTN6d00:f1Bb28`Kc0l0c0WOPN5h0O9e1Cd25`K>Q1i0]OiNH9f1Ba2[OdKh01?n0m0]OgNH8g1Cc20dKd0i0m0_OeNJ8e1Dc2DeKI3V1e0m0AdNI9e1Ec2BgKH0Y1e0n0BbNH:f1Cc2@hK_1d0c0]OeMOX2c1cNd2^OhKa1c0b0_OdMOX2b1eNc2_1XL3CaM0Y2a1eNb2a1UL4G\\M2[2^1eNc2h1kK41C_1cNc2IoKl3NkM^1bNc2HQL_6[1kIc2V7[MlHd2U7ZMlHf2V7VMmHi2T7UMnHk2S7QMPIn2R7nLQIQ3Q7kLRIT3Q7eLSI[3Q7_LRI`3Y9O1O1O1O001O1O1O1O1O001O1O1O1O1O1O1O1O1O001O001O001O2N001O1O1O1O1O001O1O1O1O1O1O1O1O1O1O1O001O1O1O2N002N001O1OhMRN]Gm1`8YN^Gf1`8_N^G`1`8dN_G[1m2WNa0a0aLW1l2^N=>gLS1i2cN>=iLn0g2jN<:mLj0f2oN;9oLg0d2SO;8QMd0c2WO98TM?b2[O87XM=]2_O:4ZM<[2A:5[M:Y2C;4]M8V2F<3^M7T2H<2aM6R2H=3aM4Q2K=1dM2o1M=1eM1n1O<1gMMm13<0iMJl16:1kMKg15>0lMKe15`0OlMLb17`0OnMJ`19ROWNbM1h0h1UOVNc1_o0" + } + ] + }, + { + "image": "images/caption_detailed_62.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 444, + 640 + ], + "counts": "mm[43g=3N1O2O001O001N1010O01O0aJD]M1=Q1]OS2I_Me4a0bKoNa0n0@S3\\4lMnKROHQ3Y4QNiKPOOo2W4UNbKPO8k2U4NlK3S4LmK5R4KoK5P4LPL4o3MQL3o31mKOS43iK0U4l40000O10000000YHfKl5Y4l101O00000O10QOnKTHR4g7XLTHh3f7cLTH^3d7PMUHQ3b7i1^OmIVIX6g6a0M2O20O000001O0001O000001O00010O0000001[JTIX4l6`K`I\\4a6bKaI]4_6bKcI]4]6cKdI\\4\\6cKfI\\4[6nJQI9h0h4W6mJTI9f0j4W6kJVI4i0P5R6kJ]JU5V7O1O010O001O001O1O1O1O1O1O2N1O001O1O2N001O1O1O1O1O1O002O0O1N200O2N1O1O1O2[N[FoNf9c0YFSN0L4\\1d9l0XFhM3]1g9i0VFjM2^1j9f0TFlM2^1l9h0UFWOm9h0RFXOo9Z23M2M4M2N3L3N4L6H4M4L3N3L3L6J4M3M4J:D8I6M5GUCWOZ=6c__1" + }, + { + "size": [ + 444, + 640 + ], + "counts": "o_l5:P1Ib9?gFNjND?3n93L3O1O1O1N1O2O001N2O2N2M4M1O1N2N0O2O1O1O1O001O2O000O010O2O000000010O1O:F2N2N1O001OO100010OO1O011O002M5L1O1O1O2N1O2N1O1O1N2O1N2N2N8G6D`ALcml02WaSO2M2O2N1O2M2O2K6L3N1O2N1O3M2N2O0O2N4L2O0YHeNi1\\1UNiNg1X1UNmNh1V1VMeNQL?b6n0YMjNiKe0i6b0\\M:_2H]M`0^2B_Mc0\\2@[Mj0a2ZOYMm0c2TO[Mo0c2ROZMQ1e2SOUMR1g2SORMR1k2QOQMS1l2ROnLR1o2TOkLP1R3QOlLR1Q3POkLT1S3nNkLT1S3lNlLW1R3jNlLZ1R3eNnL^1o2aNPMd1n2\\NQMf1m2ZNSMh1l2VNUMl1j2RNWMo1h2PNWMT2g2kMTM\\2l2dMQM`2n2_MPMd2o2[MmLk2S3TMgLS3Y3lL`L\\3_3dL\\Lb3d3^LWLg3i3YLTLj3k3WLPLn3P4SLjKR4V4oKUK[M200P7h4S3000000O1O100O0010000O2N100O100O00100O100O100O1O2O0O001O100O100O1nMiEnMFLNO;b0Z:\\1\\ESN_1>U9Z1XGcNi8Y1_GcN`8[1eGaN]8V1nGfNT8Y1j2N2O1O1O2O000000O01000001M200O100O10O01O1O2O0O10000O10000O1000000O100000nFXOl3h0V501O0000O0101O0000000000000000000001O00001O00000YFXOV5i0`4001O001O001O1O001O001O001O1O1O1O001O001O1O1O1O1O2N2N1O1YEcNi7^1VHbNj7_1THcNk7^1gFiNFKc9^1VF`N@i03^OV:Z1oEgNEZ1\\:1iEjNIV1^:3aElN0S1^:g2O1O00001O001O1lMeKaI\\4Z6iKeIX4V6oKgIR4S6VLjIj3Q6\\LnId3o5bLPIQOCF:5M5JYbd2" + } + ] + }, + { + "image": "images/caption_detailed_64.png", + "subject_name": "car", + "object_name": "road", + "predicate_name": "driving on", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "kRZ83h`09H9a_OQ7l7jIdGWOkNgAT1^>bNhA]1n>O001O00000000O100000000O100O100O1O1SO]NXBi1g=h0M300O100O1M3O100M3O1O1N2O1O1O1N2N2N2M3O100L]LkBd3Y=O1O3M;cLWBn2n=O00cMQMkFn2U9RMSGf2j;ZMdA[2d>L1O5K4L6J2N1O1O1O1O2N0000000000O1O100N2H^Nn@g1m>8FPN_A10P2W>c000001O1O7`M^AU2h>N4L1O002N1O1O00000000000000O100O1lKTNSIn1h6YNTIi1l6aNiH`1V7YO_FeNYOOd0S2d9B^E\\NOU1l0m0h9T1TFmNl9X1nEiNQ:Y1nEgNR:Z1lEgNS:\\1jEeNV:_1eEbN[:b1]EbNc:m3O1O100O1O100O1O1O11O1O1O1O001O1O0000001O1O0000001O1O1OO1O13M2N1O001O1O001O0000001O2N001O001O001O002N1O001O00001O000000001O1O000000000000O11O001O0000000000O1001O001O000000000lJhDa4W;_KkD`4U;_KlDa4T;_KmD`4T;^KoD`4Q;`KPE_4P;aKQE^4o:bKQE^4o:aKSE^4m:aKVE]4j:cKWE\\4i:eKVE[4i:fKXEY4h:hKWEX4j:gKXEW4h:iKXEW4h:hKZEW4f:iK[EV4e:jK[EV4e:jK\\EU4d:jK]EV4c:kK\\EU4d:kK]ET4c:lK]ET4c:lK_ER4a:nK`EQ4`:nKbEQ4^:oKbEQ4^:nKdEQ4]:nKdEQ4\\:oKdEQ4\\:oKdEQ4\\:oKeEP4[:PLfEo3Z:QLgEn3Y:RLgEn3Y:QLhEo3X:QLiEn3e;O1O1O00lNRLWEn3i:RLXEm3h:SLYE9A5]OHNl1k;nM[E4c0g1R:UNaGd1o;N8H3fN[@T1j;lNRHS1n7mNSHR1l7oNUHP1k7POWHn0j7QOYHl0g7TO\\Hi0m;01O00000000001O1O0000001O000000O11O01O000000O100000000001O00000O1N110O1O1L5L3O1O1K50ON3O100O1100O001O001O001O000000001O2N5QOT@a0b`0B9FYoi01P`UO0``02]_O1_`09JDh_O=V`0Fh_O;V`09N10000000000000000m_O" + } + ] + }, + { + "image": "images/caption_detailed_65.png", + "subject_name": "cup", + "object_name": "bed", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 436, + 640 + ], + "counts": "oTo37[=5L3M2N2N2O1N101N100ODYCNf<1[COe<1\\CNd<2\\CNd<2\\COc<1]COc<1]COd<0\\C0d<0\\C0d<0\\C0d<1\\CNe<1[COe<2[CMe<4ZCLg<3ZCL^S5DnJYIAi6;ZIDg69\\IFf64_IKb63cII_65bIJ_65aIK_64bIL_63aIM`62aIM_62bIN^62bIN_60bI0^6OcI1]6NeI1\\6MhIOY60iIOX6OiI1W6NjI2W6LkI3U6LnI2S6LgIHoL=Y9JiIInL=Z9IPJ6P6IQJ7o5HSJ7n5GSJ9m5GSJ:l5DVJgJ\\O_L6h8`0iJZO_L6o75WH?Z3VO`L5n7]1bK]NaL5n7^1fKaN[4_1]K_NeL2o7^1[KaNeL1Q8_1YK^NhL2P8`1XK]NiL2P8c1TK^NjLOR8d1SKdNn4]1QKbNP5`1nJ_NS5b1lJ^NT5c1kJ]NU5d1jJ\\NV5e1iJ[NX5d1hJ[NY5f1gJYNZ5g1eJYN[5f1`JUNVM5[8f1_JVNUM4]8f1]JUNWM5\\8g1\\JTNXM5\\8f1aJ[N`5b1bJ]N`5b1eGXNc26h5i1YJWNh5h1jGTNLNg17d6h1RJZNk1J^Om1dNZNn1K^Oj1cN\\No1I_Ol1`N\\NQ2H@l1]N]NS2G@m1\\N[NT2J^Om1\\NZNV2I^On1ZNZNW2I^Oo1ZNWNY2J]OP2XNWNZ2J]OQ2UNXN]2H]OR2TNWN_2H\\OS2RNVNb2G[OV2oMTNg2FYOY2mMRNj2EXO[2lMQNk2EXO]2iMoMP3DWO_2fMnMS3DUOa2dMmMW3BUOe2_MiM]3BSOi2ZMhMa3ATOj2UMiMf3_OSOl2QMhMl3\\OSOQ3dLjMZ4UOQOc4o0^KoNc4Q1]KoNb4R1_KlNb4S1_KmNa4R1`KnN_4Q1cKnN^4Q1dKnN\\4Q1eKoNZ4R1gKlNZ4T1fKkN[4T1fKlNZ4T1gKjNY4W1hKhNX4X1hKhNW4Y1iKfNX4Z1iKeNW4Z1jKeNV4Z1mKeNS4[1nKcNS4]1nKbNR4^1nKaNR4`1oK^NR4b1oK\\NR4d1nK\\NR4d1nK[NR4f1oKYNQ4f1PLYNQ4g1PLWNQ4j1oKTNQ4m1oKSNQ4n1oKPNR4P2oKnMQ4T2nKkMS4V2mKhMT4X2lKgMT4[2mKbMT4^2mK`MT4a2mK\\MS4e2nKYMS4h2mKVMT4k2lKQMV4Q3jKmLW4S3jKkLV4W3lKdLV4]3jK`LX4`3lKYLV4j3iKRLZ4o3hKkKZ4V4jKbKZ4^4iKYK]4h4V22N2O001N2O001N100O2O0O2O1O0O2O000O2O1O00100O001O001O001N102N1O1O1O1O1O1O1O1O1O101N1O1O1O100O2N1O2N2N3M4L2N4L3L5L3L4K5DH9B=D;G9C=H9G8M3L6J4M5J6J5L5I7H?WOhmk0" + } + ] + }, + { + "image": "images/caption_detailed_66.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "n[k523VOa@l0R1N]>VO]@S1S1G`>l0\\ATOd>o0YAROf>R1VAnNk>U1QAkNo>V1PAkNn>X1PAhNP?Y1o@gNo>^1n@cNo>a1o@_N:\\OY=Y2ZB[N4MAQOa=j2gBYN2;D`N]=m2lBXNOc0Q=Z1kBVN2b0R=l4N4L2N3M1OCUC]I05k^571O2N1O2O1N]OjA_KT>b4lA_KR>Y5O001N2N101O001N1O101O000O1O2[KmIWGJJ241JO50i2]6f4PM^HlLg1V6i5l5O0001O7I:Fa0_Od0\\O:F=C;E<\\F\\ET9Z;^O;E4jGWDl7Sl0K4000H8134JN2BXAIP?K^`e4" + } + ] + }, + { + "image": "images/caption_detailed_68.png", + "subject_name": "cow", + "object_name": "grass", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "TVU22m>2O0O2O0000000O2L9\\ADX>c0N2N1000001O01O01O010O100O1O2M2O2M2O4K4M6I;F3M1O00YOg0O1O1O1O10000O11O001O2N2N4L2N2NCiBROV=n0kBSOS=m0oBUOmn0L4K5O13N6IWOiBFY=6iQn5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m7R7n7000000000001O00001O0000000000000000001O00000000001O000000000000000000000000001O0000000000001O0000000000000000000000001O0000000000000000001O000000001O000000000O1000000001O0000O1001O0000000000000000O10000001O00000000O100000000001O0000O2O0001O00000000000000O20O00000000000001O0000000O1UJfGQ5Y8oJhGP5X8PKiGo4W8QKiGo4W8QKjGn4W8PKjGP5V8PKkGo4U8QKkGo4U8PKlGP5T8PKmGo4T8PKlGP5T8mJoGS5Q8lJSHQ5m7oJ]Hg4c7YK^Hf4b7YK`Hf4`7ZKaHe4_7ZKbHf4^7ZKdHd4\\7\\KeHc4[7]KeHc4[7]KeHc4[7^KdHb4\\7^KcHc4]7]KcHc4^7\\KcHc4]7]KcHc4]7]KfH`4Z7`KgH_4X7bKjH\\4W7cKkH[4U7eKmHY4S7gKPIV4P7iKSIU4m6kK_Ii3a6VLfId3[6[LfId3Y6\\LiIc3W6^LhIb3X6^LhIb3X6]LhId3Y6[LXIT4h6lKQI[4o6eKoH]4Q7dKmH]4S7bKnH^4R7bKnH]4S7cKmH]4S7cKnH\\4R7eKnHZ4R7eKQIY4o6gKSIW4m6iKVIU4i6kKXIT4h6lKZIR4g6mK[IP4f6PL]Im3c6SL`Ij3`6WL`Ih3`6XLlHFWOR4l7WLmHLSOm3P8WLlH3nNf3V8WLlH5lNd3X8WLlH6jNd3Z8VLlH`4T7`KlH`4T7`KkH`4W7_KiHa4W7^KkHa4U7_KkHb4T7^KmHa4S7_KmHa4S7_KnH;eN^3]8WLnH7iNb3Y8VLRI1iNi3U8VLfIj3Z6VLfIi3[6WLfIh3Z6XLfIh3[6WLeIj3Z6VLfIj3Z6WLeIi3[6WLeIi3[6WLeIi3[6WLeIi3[6XLoH]4Q7cKiHc4W7^KiHa4X7`KgH_4Y7bKgH]4Y7dKeH]4[7eKjGT5V8nJcGW5]8f000000000000O11O0000000000O10000001O01O00000O10001O000001O00000000O2O00000000001O000000000000000000000000000000000000000000000000000000000001O00O1000000000000000000000001O0000000000000000000000000001O00O100000000000000000000000001O00000000000000001O0000001O00001O00000000001O000000000000001O0000000000001O00000000001O0000000000000000001O00O1001O00000000000001O00O100000001O000000000000000000001O0000000000000000000000001O00000000000000001O00000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000aJjFT5W9iJkFW5V9hJjFX5V9hJjFX5W9fJjFZ5V9fJjFZ5V9fJjFZ5V9eJjF\\5V9cJkF]5\\900000001O00001O0000001O0000001O001O001O0000000000001O00000001O00O10000000000000000000001O0000001O00000000001O00000000O1000000001O00O10000000000000000000000O10000O100_OiJYGW5g8jJWGW5i8lJSGU5m8lJQGU5o8mJlFV5T9;000000000000000000000000001O0000000000001O00000000000000001O000000000000000001O0001O0000gF" + } + ] + }, + { + "image": "images/caption_detailed_69.png", + "subject_name": "wine glass", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "WUb4:ec02M3N2N2O1N2O1N101]N\\Oh_Oe0T`0_Ok_Oa0R`0Cl_O?Q`0Co_O>n?EQ@;l?IS@7k?KV@5g?N]_OWO=j0T`02]_OUO?i0R`05^_ORO>k0S`05^_OQO:o0U`03^_OPO=m0P`0e0o_O[OP`0g0o_OYOP`0h0P@XOP`0i0V@POi?R1W@mNi?S1W@lNj?U1U@kNk?V1U@iNk?W1U@hNl?X1U@fNl?Z1U@dNk?]1V@bNj?^1W@`Nj?`1W@^Nk?`1W@^Nj?b1X@[Ni?e1Y@VNj?j1V1O2O000O2O0O2O0O2N1O2M3M4L4iN]Rg2" + }, + { + "size": [ + 640, + 427 + ], + "counts": "_jT11VSc25a`]M9H5K6VIM\\I9]6M`I7\\6KbI:Y6JbI1RO=Y2Y5X2X5?VKj4TO`H9NGZ2e5c0RKd4\\O^H<2AT2d5m0nJm2@_J0h0\\1h4S1jJf2L^JH=k0\\1g4T1iJd2OZKa0n0g4V1fJ[2MZJ5k0a1`4V7nKWGBc1\\4W7VLUG^OAROo1Z5Z7WLWG\\O@SOo1Z5Y7XLXG[Oc1]4T7YLYGZOc1\\4Q7aLXGTODROo1Y5S7dLYGQOEROo1Y5S7`L]GUOAROo1Y5S7`L^GTO@SOo1X5S7bLmFVN`0k0AUOo1W5S7kL[GgNCWOo1W5S7PMWGaNGYOn1V5T7nLZGbNDYOo1W5S7lL\\GdNe1`4o6kL_GcNb1b4o6iLcGbN_1e4n6iLdGaN^1f4n6iLdG^NXOCV2W5m6hLfG\\Na1l4i6hLiGYN^1o4i6gLlGSNSONY2Y5g6fLmGSN`1W5c6fL`JZ3`5fL`JZ3a5eL_J[3a5dL`J]3_5cLaJ]3`5bL`J^3a5aL_J_3a5^LbJb3_5]LaJd3^5[LcJe3^5YLcJg3]5YLcJh3]5SLgJm3[5lKjJU4W5_KjFUOT4\\5[:1O00001O00001O1O1O001O001O2N1bDZJ`8f5_G[Ja8f5WGcJg8^5QGkJm8W5mFnJR9R5oFmJQ9T5PGkJo8V5mFnJR9U5jFmJU9Y5eFgJ[9Y5eFgJ[9Y5fFfJZ9\\5cFeJ]9j5nD]Il0n0U:l5hEVJX:k5bEYJ_:g5`EWIBg0n:T6[EYIEd0P;Y6PEZJP;[700000000000000000001O0000001O2N00000000001O00001O0000001O001O0000001O000000001O0000001O000PKlF`KNZ2V9l1[GdK@`2U9T1YHULcNg2T9n0gHoKYNS3Q9h0oHPLlMGOb3V9?eIPLTMa3X93oIaLhLX3Y92TJdLeLZ3X9MVJjLbLY3Y9GUJmL`Lf3[9YOYJk0h5ROYJoNbLJU9V1YJoNhLGP9Y1XJkNVMCb8b1XJeN_MFZ8d1UJfNdMEW8g1SJcNhMEV8h1PJcNnMBR8k1oIcNRN@P8m1jIfNYNZOn7P2gIbNiLaNe1l0k7R2fIaNbN]Oh7S2dIaNfNZOg7U2aI`NnNWOb7Y2^I`NTOTO_7\\2[I`NXOSO^7]2YI^N\\OTO[7_2WI]NBQOX7b2TI^NFnNW7e2mH`N0hNU7g2jHaN4eNS7i2hHcN6cNR7k2gHbN9aNQ7n2bHbN_:_1]EcNc:_1WEeNj:[1TEfNn:Y1QEfNQ;Y1mDhNW;V1eDmN[;U1cDkN^;U1`DkNb;T1]DmNd;T1XDmNk;S1QDmNRoNZAE2O4KNOL323ij0" + } + ] + }, + { + "image": "images/caption_detailed_70.png", + "subject_name": "person", + "object_name": "snow", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kTY55c0Nj;9oCJP<;kCFTU8R6THnIm7o5VHoIk7h4jEiKf2SOb7Q5oEZKCGf=n4iBVKb=i4l0O0SO\\A]Le>\\3gA\\L]>a3gAZL\\>a3T1N2OhAcLkM9f=P3_D_M[;^2bDjM];S2bDQNVNlNH1ne0f2T6g4oHVH8b6C]I=c6D\\IOn0d4j5[NjHoLe0Ng0g4k5\\NiHmLj2f4^4]NiHhLm2j4[4^NhHeLP3m4X4^NXLa1j3^NVLb1j3bNRL]1o3dNPL[1Q4eNoK[1Q4eNoKZ1R4fNnKZ1R4fNnKY1S4fNnKZ1R4fNnKY1T4fNlKZ1T4fNlKY1V4fNjKY1X4fNhKZ1[4cNeK\\1^4aNcK_1_4_NaKa1a4]N^Kc1m4RNTKn1Q5YLaG7_3`3T5SL`G;]3b3^5_K_Gn0S3c3`6ZLaId3d6WL]Ii3d6VL\\Ij3d6VL\\Ij3e6TL[Im3f6RLZIm3l6nKTIR4n6kKSIU4n6jKRIU4P7jKPIV4R7hKoHW4T7eKmH[4V7bKjH^4X7`KhH_4];0000OeEcKU5]4kJbKV5^4V50cEfKT5Z4kJmKn4T4RKmKn4R4RKmKo4S4QKmKn4T4SKjKmMoAR2R>oMmAP2e8TMgKm0dKo1`8\\MhKf0hKm1_8aMfKe0iKj1]8gMgKa0kKh1\\8jMgK?mKf1\\8mMeK?nKc1\\8RNcK=PL`1]8UNaK=QL^1Z8ZNcK:RL\\1W8_NeK7SLY1X8cNbK6ULW1W8fNcK3VLW1V8hNbK3WLU1W8iN_K5YLR1X8kN\\K5[Lo0Y8nNZK5\\Lm0Y8QOWK5^Lk0[8SORK5cLg0[8UOQK5cLf0\\8VOoJ6dLd0]8XOkJ8fL`0_8[OeJ9kL<_8_ObJ7nL:a8_O^J:PM7b8A[J:RM5b8EWJ9VM2c8IoI:]MMd8KlI:_MKe8NgI9dMIe8OdI:gMGe83]I9nMCf89SI8WN_Of8=lH7^N\\Of8a0cH8gNWOf8e0[H7POTOe8n0gG:CgNg8a5YG_Jg8a5YG_Jg8a5YG^Jh8b5XG^Jh8b5XG]Ji8c5WG]Ji8c5WG]Ji8c5WG]Ji8CVGd31iLh8A_GoMC^56QMi8@fG]3ASMh8^OnG[3ZOWMh8]OUHV3SO]Mh8ZO`HQ3hNeMh8WOeHR3cNfMh8SOmHU3[NhMh8ROPIT3XNjMh8POVIR3RNnMh8POVIR3RNnMg8mN\\IT3mMoMg8kN_IU3iMQNh8gNcIW3fMQNg8fNgIW3bMSNg8cNlIX3]MUNg8\\NWJ[3RMXNg8YN]J^3kLZNh8VN_J`3jLZNe8UNdJ`3gL[Ne8QNiJc3bL\\Ne8oMlJd3_L^Nd8jMRKg3YL`Ne8gMUKh3WL`Ne8cMYKl3RLaNo<_1QCaNQ=]1oBcNQ=]1oBcNR=\\1nBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBgNR=X1nBhNR=W1oBiNQ=W1oBjNP=W1oBiNQ=W1oBiNQ=V1PCkNP=T1PClNQ=S1oBnNo:hM\\FX3fNPOh:oMbFo2gNSOd:QNdFl2hNTOb:SNfFe2jNZO]:TNfFc2mNZOY:XNgF^2QOZOW:ZNgF[2SO\\OS:\\NhFY2UO\\OQ:]NiFV2WO^Om9_NkFQ2ZOAg9bNnFl1\\OCb9eNQGg1^OE]9gNUGd1^OE\\9iNUGa1@GY9iNVG_1CIS9lNXG\\1EIQ9lNZGZ1FKl8oN]GV1GKk8PO^GT1HMh8QO^GR1KOa8SOdGl0M2]8TOeGi0O4Z8UOfGg004Y8VOgGe016U8XOiGb027R8ZOjG>69n7ZOlG=6:l7[OmG:8a7BSH0=`0\\7DUHK`0b0X7FWHFc0e0T7GWHDf0f0n6KYHAi0e0h6]3XIeLa6a3`I`L\\6a3eIaLU6b3lI`Ln5c3SJ`Lg5b3ZJ`LZ2PL_Oc7WN_LW2RL[Ob7_N]LU2]LlNY7PO[Lo1cLiNU7YOZLk1gLgNo6_OZLg1kLhNl6BXLf1mLVMPOF0Q1m7=WLd1mLUM]O1CNR8Y1QL_1hKSMP;_1XI]1k6eNVIX1k6iNUIU1m6kNTIT1k6mNVIQ1j6oNXIn0h6TOYIj0h6WOXIg0h6ZOYIe0g6[O[Ib0e6_O\\I?d6B]I;d6F]I9b6H`I5`6L`I3a6M`I2_6ObIO^62cIL^64dII[69eIF[6;fIDZ6hIBW6?jI@U6a0mI\\OS6e0nIYOR6h0oIWOQ6i0PJVOP6j0QJTOo5m0SJoNn5R1WJ_NQ6c1YJlMn5T2n5100O1O1O100O1O1O1O1O1O1O100O100O1O1N20Dn^OjMQa0c2O1O1O1O10000N2O1O1O100O1O1O1O1O100O002N1O1O010O2O0O1N10101O0O1O0O201N100O1N2O2N1O2Lm5Z@^GN^O2YOO3N44oa1" + } + ] + }, + { + "image": "images/caption_detailed_72.png", + "subject_name": "fence", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bbe32T=9K2N3M3M2N1000000000000000000000000001OO10000001O000000000000000000000000000000000000000000000000000000000000001O00O1000000001O0000000000000000001O0000000000000000000000000000000000000000000000000000000000000000O10000000000001O0000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000O10000000000000000000000001O000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "e`l0i0[<=B`0D9J4PJWN6o1@ZNeMYOH8d2?dMYOH8d2?dMYOH8d2`0cMYOH7e2`0cMYOH6f2a0bMXOI7e2a0bMYOH6f2`0cMZOF7g2?cMZOF7g2?dMYOE8g2`0cMXOF8g2`0cMXOF7h2a0bMXOF7h2a0bMXOE8i2`0bMXOE8i2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD6k2a0aMZOC5l2a0aMZOC5l2b0`MYOC6m2a0aMXOB7m2a0aMXOB7m2a0aMXOB7m2a0aMYOA5o2b0`MYOA5o2b0`MYOA5o2b0aMXO@6o2b0aMXO_O7P3a0`MYO@6P3a0`MYO@6P3a0aMXO_O7P3a0aMXO_O7P3a0aMXO^O8Q3`0aMYO]O7R3`0aMYO^N]O:i0h3a0aMXO\\NB8e0k3a0aMXO\\ND6c0m3a0aMXO[NG5`0o3a0aMXOZNI5>P4b0`MWO[NJ4=Q4b0`MWO[NJ3=S4b0_MWOZNK4XLBh3?WLAi3a0TL@l3b0RL_Om3b0RL^On3d0PL\\OP4e0oK[OQ4g0mKYOS4j0iKWOW4j0hKVOX4l0fKTOZ4m0eKSO[4o0cKQO]4P1bKPO^4Q1`KPO`4S1]KmNb4U1]KkNc4X1TKnNm4f30000000000000000000001O000000000000000000001O00000000001O0000001O0000000000001O000000001O000000001O000000001O00001O001O1O1O1O1O2N2N2N1O2N2N2N2N2N1O2N2N2N2N1O3M2N1O1O2N1O2N2N1O3M1O1O1O2N3M4RMeGg2`8N2N2N1O3M2N1O1O1O1O2N3M2N2N2N2N1O1O1O1O2N2N2N1O2N2N2N001O1O2N1O1O2M3N1N7[ObENg:N^EN[Y\\2" + } + ] + }, + { + "image": "images/caption_detailed_74.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "looking at", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "\\aT38cc06J5N2N4n\\OEZb0Q1G9O1OHn]OcNSb0[1Q^ObNPb0\\1;2N;E00010O`Nk]OU1Vb0iNl]Oa0<]Oia00m]Oa0<]OUb0=l0GQ[W2" + }, + { + "size": [ + 640, + 296 + ], + "counts": "QXZ28cc07d\\OLeb0i0N3N1O2[]OUOa0Ek`0`2M3M100O1O100O100N1O2OYO]_OQNc`0o1f0201d^OhMm`0S3C001O0001O1TMY_O]2i`0_M[_O_2Va0M3M1O3N4XNZ^Ol0oa0jNV^O:8CZc0O1O0O3NfYf2" + } + ] + }, + { + "image": "images/caption_detailed_75.png", + "subject_name": "motorcycle", + "object_name": "grass", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "gmT72Y=2N001O1O1O2N1O2N1O2N1O6J:F1O1O00000000000000000000O100O100O100O100O1O100N2O100N2O1O1O1N2N2N2M3Lb`a0" + }, + { + "size": [ + 427, + 640 + ], + "counts": "1o1a0\\5o2dJQM]5n2aJTMa5j2_JVMb5i2^JVMd5i2\\JWMf5g2ZJYMg5f2YJZMh5e2XJ[Mi5d2VJ\\Mm5b2SJ^Mo5`2QJ`MP6_2PJaMQ6^2oIaMT6]2kIdMV6[2jIeMW6Z2iIfMY6X2gIhM[6V2eIjM[6V2eIhM_6V2`IkMb6S2^IlMd6S2\\ImMe6R2[InMf6Q2YIPNi6n1WIQNk6n1UIRNl6m1TISNm6l1RITNP7k1PIUNR7j1lHVNV7i1jHWNW7h1hHYNY7e1hH[NY7d1gH[N\\7c1dH]N]7b1cH^N_7`1`HaNa7^1_HbNb7]1^HbNc7_1[HbNg7\\1YHdNh7[1WHfNk7W1VHiNk7V1UHiNn7U1QHlNP8S1PHmNQ8R1oGnNS8P1mGoNV8o0jGQOW8n0hGRO[8l0eGTO]8j0cGVO_8h0aGWOa8h0_GXOc8f0]GYOe8f0ZG[Oh8UOn0V9dNeG>[Om0l8dNiG>_OP1c8bNnG>AQ1^8aNQH=CT1Y8_NSH>IP1R8bNUH>KQ1m7aNXH=OP1h7cNXH>0o0h7cNXH>5k0b7gNYH>:g0\\7kNZH=?e0V7nNZH>`0e0U7mN[H=a0g0`NQOV8KiH=a0h0]NSOX8HiH=c0V1c6]NjH=c0O[Ng0X8mNjH=b00^Nd0V8oNjHD5HU1n7XNfH>B7IS1S1lNh4]OjJ=A9IQ1P1VOf4TOnJgNf4]NXK>C_2=kNe4XN[K>C_2;POd4SN^K=D`28TOd4oM`K=D_26[Oc4iMcK:]12S2V3RL^L>:]12T2U3QL_L=;_10T2U3PL_L>=^1NT2V3PL_L=>`1LT2V3oK`L=>a1JU2W3mKaL=>a1JU2W3mKaLi2X5_1iJcNW5\\1iJeNV5[1jJeNV5[1jJfNV5Y1jJhNV5W1iJjNW5V1iJkNV5U1jJkNW5T1hJoNV5Q1iJROV5m0jJTOU5l0kJTOU5l0jJVOV5i0jJXOU5h0jJZOV5e0jJ\\OV5c0jJ^OU5a0lJ_OT5a0lJAS5>lJDS5[J^Oj5a0VJ^Ok5b0TJ]On5b0SJ\\Oo5d0QJZOR6f0mIXOU6h0kIXOU6h0jIXOX6f0jIXOW6i0iIUOY6j0kIQOV6o0PJjNQ6V1RJfNP6Y1RJeNn5[1ZJ\\Nh5c1[JYNf5g1^JTNc5l1dJlM]5S2hJiMY5V2oJaMR5`2VIeLb1e0Y5f2UIhL`1a0\\5f2TIoL[1:a5g2TISMX14e5i2SIXMT1Ni5j2SI[MQ1Jm5k2RI^Mn0GQ6j2QIhMe0]O[6k2PInM>WOc6k2oHQN:UOg6j2oHVN5oNm6k2nHWN3oNP7h2nHUOR7k0nHUOS7j0mHUOT7l0kHTOU7l0kHTOV7k0jHTOW7k0jHUOV7k0jHUOW7k0hHlL7g1R7\\1gHlL9g1Q7[1gHmL9h1P7[1gHmL:g1P7[1fHmL;g1P7\\1eHlL2O1J6OM21LHcA6Vm0KbA1N21OZhb0Mnh\\O0k>1]AOc>000fm01_cN1d>N\\A0k>0bP17``NJ\\95`K;Y4L`K:[4MXK>d4HlJg0P5[OmJh0Q5\\OgJi0W5ZOdJk0Z5ZO^Jl0[2QNbMW1Lm0ISO_NZOc1h02n0GP13UN1n0JQ1EbLYNa1T2P1IR1GbLXN^1T2T1GQ1J[N:i0IP1JYN:j0Jo0KWN:l0Io0KWN9n0Hm0OVN6o0In0OUN6n0Jo0OTN5n0JQ1hN[L7f1W1n0JR1dN_L9a1`0dMH[3j0U2mNlLY1P1HV2nNkLY1o0IW2mNkLY1n0IY2mNiLZ1n0IZ2lNiLZ1n0H\\2kNhL\\1m0HU57lJGU59lJFT5:lJEU5;lJDU5;mJCT5V5BjJ>V5BiJ?W5AiJ?W5AiJ`0V5@iJa0W5_OiJa0V5@iJb0V5^OjJb0V5^OiJc0W5]OiJc0W5]OhJd0X5\\OfJg0Y5YOUH3`0e0[7XOSH6?d0^7VOQH9>d0`7SORH;6h0h7mNRH=2i0k7jNSH>0i0m7iNSH>Ok0m7gNTH?Mk0P8eNSHb0Il0R8cNVHa0^OV1[8YNWHb0dNA5f1P9WNWHc0bNC4d1S9VNWHe0^NE4c1W9RNWH[1^Ne0[9PNWHd3g7]LYHc3g7]LYHc3g7]LYHc3g7]LYHb3h7^LXHa3j7^LVHa3k7_LTHb3l7^LTHa3m7_LSH_3o7aLQH]3P8dLPH[3i2gLfK0l06Y2H\\NZ3l2VMSL3a2]O`NX3o2fN^NRNbNX3P3hN]NPNcNW3f2iLiKU2l2kMeNV3f2lLgKT2e1lM]OMa0U3l2YOQMjM_OHd0U3l2[OmLkMBEe0T3j2BiLjMF@g0T3?QMMk2mNeMM[Oj0S3=_MPN3j0`2KdM2XOl0R3=WNjNn1HcM5VOl0R3;YNjN\\2XOWMg0ROl0R3:ZNiNd3NPKn0R3;\\NeNf31lJo0R39_N^N^NIV5a0kJo0R38aNcNd36iJn0S38bNbNc38hJn0S38bNaNd39gJn0R37fN_Nc3i4dNXHn0o2>i4dNXHn0n2?j4cNXHm0o2`0j4bNWHl0Q3a0i4cNVHk0Q3c0j4aNUHl0Q3c0j4aNUHl0Q3b0m4`NRHm0P3d0Q5]NoGm0Q3g0S5XNmGQ1o2h0Y5RNhGU1P3i0_5VObJj0^5UOaJl0a5RO_Jn0c5PO]JP1d5nN]JS1c5lN^JT1c5hN_JX1e5cN[J^1h5_NXJb1k5XNXJh1`90O1O010[GkM]4W2bKkM\\4V2_KoMa4Q2^KPNa4Q2\\KRNd45hGQ1c3kNd46hGo0d3kNd46hGo0d3kNd44jGQ1b3jNe43lGS1G`Nb3;j41oGY1V3eNl40PHZ1U3fNk40PHY1U3hNj40QHV1T3lNl4MQHV1o2^NeL?\\8MPHV1o2_NcL>_8MoGV1o2_NcL=`8OmGU1P3^NdL=`8OmGV1o2_NcL;c8MnGX1l2lNY5JlGZ1k2kNZ5KkGZ1k2kNZ5JlG[1j2kNZ5JlGZ1k2kN[5IlGU1m2SOY5GjGT1g2iNhL>g8EjGS1m2XO[5DiGS1k2YOW6f0jIZOW6d0hI^OY6a0fI@Z6?fIA\\6=eIC[6;fIE]64hIK[61gIOZ6NhI1[6KgI4^6DeI<^:11O:DaQl2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l;S3m;0O10000O10000O1O100O1000000O10000O100O100O10000O1000000O100O100O100O1000000O100O1000_OoDRMR;k2b0N3M2O2N1O2O001O1O10O01000O010O1O01OTO_MZE`2g:bMXE\\2i:eMWEY2k:hMTEV2n:jMQEV2o:lMPEQ2S;PNlDn1V;SNiDk1Y;UNfDj1[;WNeDg1];YNcDf1^;[N`Dd1a;]N^Dd1a;]N^Dc1c;^N\\Da1d;`N\\D_1e;bNZD\\1g;fNWDZ1j;gNSD[1m;fNQDY1QO100O100O1O2QOhB3Z=ETC4n=M2O2N2MdTh27njWM9B_OTBj0j=7O100000O03N7I9PM]NPHf1k7\\NTHe1k7\\NUHd1j7^NUHa1k7_NUHb1j7_NVH`1j7`NWH_1i7bNVH_1i7aNXH^1h7bNXH_1h7`NXH`1h7`NYH_1g7aNYH`1f7_N[Ha1e7]N]Hc1d7YN`Hf1]:0000000001O0001N1000001N100YDaNl9a1PFiNh9X1UFnNg9T1UFPOj9[1eEkNZ:n2O1O2N1N2O1O100O10000000000000000O10O02O0O100O1O1O1O1O1O100O1O1O1O100O100O100O100O1O100O1O1O100O1N2O1M3C=N2O1M3O1O1O1N2N2N2N2N2N2O1O100001O00000000001N1001O0001O000O10001O00000000001O00001O00001O00000000001O0000000000001O00000000001O000000001O00000000001O000000000000001O0000000000000010OO10000000001O00000000000000001O0000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000001O000000000000000000000000000000000000001O00000000000000000000001O00iF" + } + ] + }, + { + "image": "images/caption_detailed_77.png", + "subject_name": "person", + "object_name": "road", + "predicate_name": "crossing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "`gm36Q=4kNIjDc0P;GhD>LWOm:?QE?J\\OR;6PEP1e0_N`9Y2]FhMb9[2[FgMd9\\2YFdMh9_2SFbMm9m22O1O5K4L1O1kNdEYO]:e0eEZO]:e0cEYO_:f0cEWO`:g0bEWO`:c0eE[O_:LXEI?9]:FYF9`;L1O1N3N2M_PQ4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "c9g3c910000O100O01000O2N100O10000O1000000O100O1000000O100O100O10000O10000N2O10000O1O10000000000002N8H1O001O00000000001O000000O100000000000000FVLRGk3^8i0J6O1N2F:O100000000O1000000001O000000000000000000O100000000000000000000000000000000000000O1000000001O0000000000000000OYLWHn1i7mM]HR2c7iMcHV2]7iMdHW2]7gMeHX2[7fMgHZ2Y7dMiH\\2W7aMlH_2S7`MoH`2Q7^MQIb2o6\\MSId2n6WMVIi2k6PM[IP3f6oLZIQ3g6nLYIR3i6kLXIU3k6cLZI]3Z8000O100O100O100O100001O0000000000000000O10000001O1O00000000UOdLfG]3Y8eLfG[3W8jLgGV3X8kLgGV3X8lLgGT3X8mLgGT3Y8lLgGT3Z8kLeGV3\\8iLdGW3_8fLaGZ3h8\\LXGe3i8ZLVGg3W901O2N[N_LaI`3^6cL`I]3`6dL_I\\3a6fL]IZ3b6kLZIU3f6lLYIT3g6mLXIS3h6nLWIR3h6[MlHe2S7aMhH_2W7fMeHZ2[7iMbHW2^7lM_HT2a7mM^HS2d7kM\\HU2j7fMUHZ2S8^MlGc2W8ZMiGf2Y8ZMdGg2\\8T100000000000000000000000000000000000000O100O1LUKgGl4U87O1O100000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000001O000000002N4L00001O1O0000001O0000001O004L1O00000000000000000000000000O100O1O1O1O100O100M3N2M3N2O1N2O1N21O001O00001O1O1O00000000O1N200O100000000001O1O2N1O00000000O100O1001O001O00001O=lJ_Gd4Q9C2N001O0000000000000000O100001O0000O100000000000000O1000000000000O100M3FeKcG\\4X8eKaGf4^84O1001OO10000001O000000000000001O0000001O1O1O00000000000000001O0000000000000000000000000000000000O100O1001OO1O100000000O10000001O0000000000001O1O2N2N5K4L002N000000000000000000000000000000001O2N4L3M3M2N2N000O1001O0000000000000000000QMYLZLg3e6000O1L4O1LSLiFn3W93001O1O6J5K3M5K4L1O_HiLo4U3d21O00000000000000000000000000000O100O100N2N2O1O100N2O1O100O1001O000000000000O100001O00000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000ZF" + } + ] + }, + { + "image": "images/caption_detailed_78.png", + "subject_name": "person", + "object_name": "bench", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "WS_24X=2N1O2N1O100O1O100N2O1O1O1O1[OBYD`0c;FZD;c;KYD6f;MWD4g;OVD3i;h0O2O0O2M2O1O1O10001O001N1L4N2O2M2O1O2N100O1O1N2O100000000000010O01O001BTEZNm:^1TE[N06n:]1^E[Nf:d1b000E\\DmNd;S1\\DnNd;]1010[O]DoN37a;e0iD[OY;KXD`0b0D\\;:fDF\\;7eDH_;3bDN^;1cDN];2cDN_;ObD1_;MbD3`;JbD5[<01O0000001OO10O10Sli4" + }, + { + "size": [ + 429, + 640 + ], + "counts": "R\\Y1i0a<3O1O1O1O0OG7M221101O1O1O1L4J6J6OGPDWOPOmgd4" + } + ] + }, + { + "image": "images/caption_detailed_79.png", + "subject_name": "person", + "object_name": "tie", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "TjW56fc05L4K4M4N2J7G8N2N3N0O3N3L2O1N2O1O1N1000O1O001O1O10O01O1O1O00100O10O0100O00_OfN^^OY1\\a0QO`^Oo0]a0WOa^Oh0]a0m002N20O0[Ob^OiN_a0l1O1O101N1000001O001O1O1O1O1O1O010O000010O1000O10O1O2N1QNk^OP1Xa0iNn^OV1na0O1O2N1O2N1O2O001N100O2O0O2N100010O0010O010O01O01O001O001O010O10O1O010000O4M5K2N4L2O1O2N0N3N1M4M1O3LYae0" + }, + { + "size": [ + 640, + 428 + ], + "counts": "l]V63_c0Ol\\O3Qc00m\\O2Rc0Ol\\O3Sc0Nj\\O5Uc0;0O1O1O1O2N3M4Ca\\O1oam1" + } + ] + }, + { + "image": "images/caption_detailed_80.png", + "subject_name": "dog", + "object_name": "elephant", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + } + ] + }, + { + "image": "images/caption_detailed_81.png", + "subject_name": "potted plant", + "object_name": "vase", + "predicate_name": "in", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Sa\\13k>`0A?B:F>B;E:FP3R4PMlMNbM7=k2U4PMlMNaM:9d2\\4oLlM0\\M?j2j2oL[LCj2l2mL\\LA;i02m04U3P4aKkK=On06T3o3aKmK>Mm06U3o3`KPL?Hm09T3n3`KRL`0El0;U3l3aKUL=Cn0a0c5S2VLlMj3V2VLhMj3[2VLbMj3_2XL^Mh3e2XLXMh3i2ZLSMg3o2T43dEoLc8S3[GoLc8R3\\GPMb8P3^GQMa8o2_GRM`8m2aGSM_8l2bGTM^8k2cGTM^8k2cGSM_8l2cGRM^8l2dGRM^8m2dI`Mn3_2mKgMS4X2nKhMR4W2nKkMjMZOf4i2bMnMfM_Oc4a2iMQNcMNT4n1\\NTN`MOS4l1^NVN^MNT4j1`NXN\\MNT4h1bN[NWMNX4f1aN]NUMOZ4c1bN_NRMO\\4a1cNaNoLO^4`1cNaNoLN_4`1aNdNmLOb4]1_NUNYLFe0i0c4[1_NVN\\LCc0k0c4\\1^NUN_LAa0n0b4\\1]NVNbM=R4]1WNZNhM9Q4\\1UN^NiM6S4\\1SN_NjM5S4\\1RN_NkM5T4\\1PN`NlM4T4\\1nMbNlM4V4[1lMaNnM5V4Z1lMaNmM6W4Y1lMaNmM4Y4[1jMaNmM4Y4\\1jM_NiM9]4X1jM_N]Me0i4l0kM^N[Mg0j4k0lM]NYMi0k4j0lM]NYMh0l4k0mMZNXMk0k4j0QNWNTMo0l4j0WNPNmLV1l4j0[NkMkLY1k4k0AUO?l0@TO`0l0@SOa0m0_OSOa0n0^OROb0o0]OQOc0P1]OnNd0S1\\OkNe0V1[OoMTKNa5S2^OgMXK3[5U2=fMDZ2a600000000O10000000000AaC]N_5CJk0CXO>f70O001O1001N1O2N8FhRj3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "mki51]4o0T5ROTJk1e5ZNfIY2X6iM\\Hc3b7\\LkFJ]1P4f7WLmFI[1S4g7TLnFIZ1U4g7ULWHn3i7mK[HS4e7jK^HW4U91O001N2O0O1N3N1N2O2O0O1O2O0O10000O2O00000O10000001O0000001O00001O001O1O2N1O1O1N3NnNbKcG]4\\8dKfGZ4S8mKnGR4P8PLQHn3P8RLRHl3n7TLSHk3m7ULSHj3n7WLRHh3n7XLRHh3o7XLPHg3Q8ZLnGf3R8ZLnGe3T8ZLlGe3U8[LjGf3V8[LiGd3Y8\\LeGd3\\8]LbGc3_8_L^Ga3c8aL[G^3g8bLWG^3j8cLTG\\3n8eLoF\\3S9iLeFW3]9U11N3M3L4N2L6K3MXL`KVM[4j6M4K6J4L4K5K5J5L5K4M4I8K6H8H:F:E?]OeVS2" + } + ] + }, + { + "image": "images/caption_detailed_82.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "^[b35g>3N1N3N1N2N2O1N2N2N2N2N2O1N2O1N2O1N2O1O1N2N2O1O1O1O1O1O1O100N2O1O1N2N2N2O1O1O002M200O1[OSNXDn1e;YNVDh1f;`NTDb1j;j0M3N2M3L4M3O1N2N2O1N2O1M3O100O10000000000000O10010O0O10000gLQEd2n:[MZE^2f:aM[E_2e:_M^E`2c:YMcEg2]:XMdEh2]:WM[EQ3e:nLRE[3V;3J6O1O1O1O1N201N1O1O002N100O1O100O1O100O100O10000O1000000000O1001N10O100001O0O01000000000O10000000000000001OO100000010N100001O00O100010O00O100010O00O101O010O000O1010O0001N2O010O001N110O001N101O100O001N2O010O001N2O010O1O0O11O00000000000000000000000O10000O10000O10000O100000000O10000000000O10000000000001O0000000000000000001O0000000^ORETMn:k2WEPMj:P3]EgLf:W3a000O10001N1000000O2O0O101N1O1O101N1O1O2O0O2N1O2N1N3N2N2N2N3M2N3M2N2N2M3M3N2N2M3M3M3M3M3M4L4K6J8Eon[1" + }, + { + "size": [ + 478, + 640 + ], + "counts": "0\\6b8000000000000O101O0000O10000000O1000000000000000000O100000000000000O100000000000000000000O10001OO100000000000O1000000000000O2OO1000000000O100000000O10000000000O1000000O1000001N1000000000000000000000000000000001O0000O100000001O00O1000000O1000O100000000000001O0000000O10000000000000000O2O00O010000O1N2K5M3N2O1N2O1O10000O10OO3N1O1O1N200O1O00100O1O2N1O010O101O0O1O100O100O100O1O1O1N2O1O2O0O100O10000O01000O2O0O10O010001N010O100O2O00O0100O1O100O100000000000000000000O101O0O00100O1O100O10001O0O010O10000O10000O2O00O10000000000000000000O1001O00000000000000000000000O1001O0001O0O20O0001N2O10O0001O1N200O001N6K3M010O1N1010N1000O100001O0O010O2O0000O100001O0000O11O0001N1001O01O000O11O0000O2O01O0001O0O11O0000O2O0010O01N1010O01O1N10100O00001N11O00jE\\KR:k4L2O10O0000O2O1OO2O00O2O010O00O1000000O10O2O01O0000000O10000000O11O0O1000000000000000000000000000000000O0200OO10O10000000O1001O000000O11O000O10O11O00000000000O100000001O00O100001OO01000001O0000O11N10O10000001O000O011O00O10000001O000O10000000O100001O000O011O00O10000001O00O1000O10000000001O0000000O1000O1001O0000000O100000O1001O00000O10000000O1001O000O1000000000O1001O000000000O10000000000O100000000000000000000O1000000000000000000O10000000000000000O1000000000000000000O10000000000000000O10000000000000000000000O10000000000O1000000000000O1000N" + } + ] + }, + { + "image": "images/caption_detailed_83.png", + "subject_name": "sky", + "object_name": "road", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "0d2h:0000000000M300M3L4N2O100O1UOlD^OT;=`ETO`:k0cEQO_:n0P1O1O1N2O1O1000000001O001O001O1O1O1O001O0000000000EjC@V_CCad1_O\\5d1RI]Ob1oN]5j2cJVM]5j2cJVM]5j2dJUM\\5l2bJUM^5k2bJUM^5k2cJTM]5l2bJUM^5l2aJTM`5l2_JTMb5k2^JUMc5j2]JVMi5e2WJZMm50eH\\2^1dMY6V2gIjM]6R2cIoM^6P2aIoMb6P2]IPNe6m1\\ISNe6m1ZISNi6j1WIVNj6i1VIWNl6T1eG\\O_1@n6e1RI[No6e1PI[NR7c1nH]NR7c1nH]NS7b1mH^NT7`1mH`NT7^1mHbNT7]1kHdNW7Y1jHgNV7X1kHhNV7V1lHiNT7W1lHiNU7U1lHkNT7U1lHkNT7T1mHlNU7R1kHnNT7S1lHmNT7S1lHmNU7R1kHnNU7R1kHnNU7R1kHnNU7Q1lHoNT7P1mHPOT7o0lHQOS7o0nHQOQ7o0PIQOP7o0nHSOR7m0mHTOS7j0PIUOP7k0PIUOQ7j0oHVOQ7i0PIWOn6k0TISOl6m0TISOl6m0SITOm6l0SITOm6l0SITOn6k0RIUOo6j0QIVOm6l0SITOm6m0RISOn6m0RISOn6l0TISOm6l0SITOm6m0QITOP7k0PIUOQ7j0PIUOQ7j0oHVOS75SG:i1BV7OTG?g1A\\7>eHB[7>eHB\\7>bHC`7:aHF_7:bHE^7;bHE\\7=cHD_7:bHE^7;bHE^7;aHF`79`HG`79`HG`79aHF_7:aHF`79_HHb77_HHa77`HI`78^HIb77^HIc76]HJc76^HIb77]HJc76]HJd74]HLc74]HLc75\\HKd75\\HKd74]HLc74]HLc74]HLd73\\HMe72\\HMe72[HNg70YH0f9100O1TMMTJ3h800000000000001OO1O11O1O00001O000000O100O11O00001O00O1000000000000000000000000O100001O1O000O01001OO100001OO10000000000001OO1kK0fIOPO1^20k41eI0QOO_21j41eI1POMb21k4OeI0QOO^22l4OfIOPO0^22k4NgI0g12h8OkLOeJ0[50eJ0Y50XH1i1Of61ZIOQ60eJ0[50eJ0a80000000jJ2dI3M0a4KT70000000000000000000000000000000000000000000000000O1mN;TFEk9l0dEUO[:T1000000000000000000000000000000O1000000000000000000000000000000O11O0000O11O00O1000000000000000000000000000000000000O100000000000000000001O1O3M4L;Eb0^O>jFSMf8Y3F?A`0_Ok0VO?A5mIZJ35MM[5S7fJoHV4o7ZO1N4MRI" + } + ] + }, + { + "image": "images/caption_detailed_85.png", + "subject_name": "mirror", + "object_name": "wall", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "k5b4^:1O3L3N3M3M3L3N3M2N2N2N0O10O100O10O01000O010O010000O10O0100O100O010O100O1000O0100O10O0100O10O10O100O10O10O100O100O010O100O10O01000O010O10O10O0100O10O0100O10000O010O100O10O010O10O100O10O10O100O100O01000O101N3N5J4M5J5L5J6K2M6K3M5J5L3L6K4K5L3M3L6K3M4K5K5L5K4K5L4K4M4K4M5K4K5L4KfP[7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "1_2]1\\O_3W1_L]O2\\O`3W1]L\\O3^Oa3W1ZL[O5^Oc3W1VL[O7^Oe3W1RL[O9]Oi3W1lK\\O;]Ok3W1hK\\O=]On3V1cK]O?]Oo3X1_K[Ob0]OR4W1ZK\\Oc0]OV4V1VK]Od0]OW4W1SK\\Of0]OV4Z1RKYOh0]OV4\\1PKVOk0]OV4_1mJTOl0^OV4a1lJQOn0^OV4c1jJoNP1^OV4e1hJmNR1]OW4g1fJlNS1]OV4k1dJhNV1]OU4m1dJfNW1]OT4n1eJeNW1]OT4o1dJdNX1\\OU4Q2bJcNX1]OU4Q2cJbNX1]OU4Q2cJbNX1]OT4R2dJaNX1\\OU4S2cJ`NY1]OT4S2cJ`NX1^OT4S2dJ_NX1^OT4T2cJ^NY1^OS4U2dJ]NY1]OT4V2cJ]NX1^OT4U2eJ]NW1^OT4U2eJ]NW1^OT4U2eJ]NW1]OT4V2fJ\\NW1^OS4V2fJ\\NW1^OS4V2fJ\\NW1^OR4V2hJ\\NV1^OQ4U2kJ]NT1]OR4R2nJaNo0^OR4Q2PKaNn0^OR4o1RKcNl0^OQ4o1TKcNk0]OR4o1TKdNj0]OQ4P2UKcNj0]OQ4o1VKdNh0^OR4n1VKcNi0^OQ4o1WKcNh0^OQ4n1XKdNg0^OP4o1YKcNg0^OP4n1ZKdNf0^OP4n1ZKdNf0^Oo3n1\\KdNe0]Oo3P2\\KcNe0]Oo3o1]KdNd0\\OP4P2\\KdNc0]OQ4n1]KdNc0^Oo3o1^KcNc0^Oo3n1_KdNb0^On3n1aKdNa0^On3n1aKdN`0^Oo3n1bKdN?^On3o1cKcN?^On3n1dKdN>^On3n1dKdN>]On3o1eKdN=]On3n1fKdN<_On3m1fKdN<_Om3m1hKdN;_Ol3m1jKdN:^Om3n1iKdN:^Om3m1jKeN9^Ol3n1kKdN9^Ol3m1lKeN8^Ok3m1nKeN7]Ol3n1mKeN6^Om3l1nKfN5^Ol3l1PLfN4]Om3m1oKeN5^Ok3m1QLeN4^Ok3m1QLeN4^Ok3l1RLfN2_Ok3k1TLfN1_Ok3j1ULgN0^Ok3l1ULfN0^Ok3k1VLgNO^Oj3l1WLfNO^Oj3k1XLfNO_Oh3l1YLeNO^Oi3l1YLfNN^Oh3l1[LfNM^Oh3l1[LfNL_Oh3k1]LfNK_Oh3j1^LgNJ^Oi3k1]LgNJ^Oi3k1]LgNJ^Oh3k1_LgNI^Og3k1aLfNI_Of3k1aLfNI_Oe3k1cLfNG_Og3j1cLgNF_Og3j1cLgNF@e3i1fLgNEAd3h1gLgNECa3f1kLgNDG]3b1oLgNCLZ3\\1TMgNC2S3W1[MgNB6n2S1aMgNA:j2n0fMhN@=f2l0jMgN@`0c2i0mMgN@f0]2b0TNhN_Oi0Y2`0XNgN_On0T2:^NhN^OR1o16dNhN]OV1k12hNhN\\O[1h1LmNiN[O_1c1IROhN[Oc1^1EXOhNZOf1[1B[OhNYOj1Y1]O_OiNXOP2R1XOFhNXOS2o0TOJiNWOW2j0QOOhNWO\\2e0kN5hNWOa2`0fN:jNUOd2=bN>jNUOh28^Nd0kNROl25ZNi0nNnNl25UNn0TOhNj26SNR1[O_Ni29kMY1@ZNj27fM`1DTNl27_Mf1LlMg2<]Mh1m5TNSJm1Q6oMoIQ2`90O100O1O100O100O100O1O100O1O100O100O1O10000O1O100001O2N1O4L2N2N2N001O1O1O0000N2O1N2O1O1O1O1O1O100O1O2OO0100O2N1O010O100O1O1O100O1O10000O1O1O1O100O100O100O100O1O1O10000O1O100O1O100O1O100O1O1O1O100O100O1O1O100O100O100O1O1TH`LW4a3hKaLW4_3hKcLV4^3jKbLV4^3jKbLU4_3kKbLT4^3lKbLS4_3mKaLS4_3mKaLR4`3nK`LQ4a3oK_LQ4a3oK_LP4b3QL]Lo3c3QL]Lo3c3QL]L`M1W5b3ZM\\L^M`0k4T3gM\\L]Mm0`4g2SN\\L]MT1Y4`2ZN\\L]M]1P4W2cN\\L]Mh1f3k1mN^L[MS2]3_1XO^L[M`2Q3Q1D_L[Mg2k2i0J`LZMR3c2=3aLZM]3Y21>aLYMh3o1Gh0aLYMV4b1XOU1bLYM]4[1QO\\1cLWMg4T1eNe1dLWMP5m0ZNl1fLWMZ5d0oMU2gLVMg5:aM`2hLVMn53ZMg2hLVM]6^OZI0c3\\3gLTMX:l2hETMX:l2hETMX:l2hETMX:m2gERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fEQM[:o2fEPMZ:P3fEPMZ:Q3eEoL[:Q3eEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:S3cElL^:T3bElL^:T3cEkL]:U3cEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEiL_:X3`EhL`:X3`EhL`:W3aEiL^:X3cEfL^:m2[ESM70^:k2_ESM32^:j2dEPMN5_:j2RFVMn9i2TFUMm9j2TFVMl9j2TFVMl9j2TFVMl9j2UFTMl9l2TFSMm9m2SFRMo9n2Q11O1N2O1O001O01O04L000O4M1O1O001O00000000003M2N1O001O00000010O00O1001O001O0M4eLoDNGO3Mh04]O^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + } + ] + }, + { + "image": "images/caption_detailed_87.png", + "subject_name": "teddy bear", + "object_name": "teddy bear", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "^ZV39l<8K6I5L3N1N2O1N1G:M2N2N2N3M2O1N2N2O1N2N2O1N2N2VOlM^FU2`9oM]FR2b9QN[FP2c9SNZFn1f9SNZFm1e9UNYFk1h9VNVFj1j9WNVFi1j9XNTFh1KfMn9d0VFd1o9\\NQFa1R:_NnE`1S:aNkE_1V:cNhE[1Z:k0O1O001O1N010O01O001O0010O101O001O00001O2N3M2N1O1O0O2O1O1O0010O011N1O10O0100O1O1^MUFd1k9[NWFe1h9ZNZFe1g9XN[Fh1e9WN\\Fi1d9WN]Fh1d9WN\\Fh1e9XN\\Ff1f9YN[Fe1f9[N[Fd1f9ZN\\Fe1e9YN^Fd1d9ZN_Fd1c9WNcFe1e:K1N10O[O`DD_;=cDA^;?cD_O];b0eD[O\\;d0fDYO\\;g0fDSO^;l0`0O2O0O2O1N2N2M4Hfee3" + }, + { + "size": [ + 425, + 640 + ], + "counts": "[UW31Y\\W10_\\lN1]dL6K3M5^ODoC=m;GQD;m;GRD:l;GTD9l;GTD:j;GVD9i;IVD7i;JWD6h;KXD5g;LXD5e:CPF9YO5e:EPFQ1o9SOnEm0Q:WOlEi0T:[OgEf0Y:]OdEc0\\:_OaEa0_:A`E?`:B]E?e:R1001O2M3N001OO1O100O10000O1000001N1000000001O001O1O000010O001O001M20[N\\Ee0c:ZO`Ef0_:YObEg0^:UOfELJ8_:HkE0I5]:HmE2G5]:FnE5G4Z:ERF7E2Z:_OZE2i0=D2Z;MgD2Y;NgD2Z;MfD3Z;MgD2Z;MgD2Y;NgD1[;NfD1[;NfD0\\;OeDO];0eDL^;2P1LWnf2" + } + ] + }, + { + "image": "images/caption_detailed_88.png", + "subject_name": "snowboard", + "object_name": "car", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Pdj24k>3N2N1O2N2N1O1O1O1O001O1O001O001O0000000000000000000000000000000O1lA\\Ol=l0O1000O010L301O100O110O2N001O000O10000001O00000000000000000000000000O10000000O100000000O101O00002N1O00000000O20mAXOl=n00000000O1N1O2O101N101O00000000O10000000000000000000000000000001O00001O00001O1O001O1O1O1O3DcA1f>2M010O0000I700O\\k]4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aa`17f>8I5VOFgB=S=KdB;X=j0L3M3N2N1O1O2N2N2O001O0O2O00001O001O000O2O001O00001N101O001O1O3L6K3M2N2M3M3N2M2O1N2N2O1N1O2O0O2N100O1O1O2N1O1O100N200O10000O1O00100O01O0010O01O010M210O0O20OO2O1O03N001O0O100O2O00000POlKTGT4l8TLlFl3S9VLlFk3S9TLmFm3T9RLlFn3T9SLWFHa0U4W9XLgFi3Y9YLeFg3\\9WLeFi3[9WLeFi3[9XLdFh3[9[LcFe3]9[LbFf3^9ZLbFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFh3\\9YLbFh3^9XLbFh3^9XLaFi3_9WLaFi3_9WL`Fj3_9XL^Fk3a9j000O1000_K_Fd3_9[LdFd3\\9\\LdFd3\\9\\LcFe3^9ZLbFf3_9XLbFi3]9WLcFi3]9WLcFi3]9XLaFi3_9WL`Fj3a9TL`Fl3`9TL_Fm3a9SL_Fm3a9SL_Fn3`9SL^Fn3b9RL^Fn3b9RL_Fm3a9RL_Fo3a9QL_FP4`9PL`FP4`9PL`FP4`9PL`FP4`9PL`FP4a9oK_FQ4a9oK_FR4`9nK`FR4`9nK`FR4_9nKbFR4^9nKbFR4^9mKcFS4]9mKcFS4]9mKcFT4\\9lKeFS4[9mKeFS4Z9nKfFR4Z9nKfFR4Z9nKfFR4Z9oKeFQ4Z9PLfFQ4Y9oKgFQ4Y9oKgFQ4Y9PLfFP4Z9PLfFP4Z9QLfFn3Z9TLeFl3Z9ULeFk3[9m0000WKfFl3Z9SLgFm3Z9l00001O01OO10YKeFi3[9n0O1100O1OO10000000001O00000000001O00000000QKhFV4X9jKhFV4X9jKgFW4Y9h00QKgFX4Y9gKhFX4X9hKhFX4X9hKhFX4X9hKhFX4Y9gKgFZ4Y9eKgF[4Y9eKgF[4Y9eKgF[4Z9dKeF]4]9aKeF]4\\9bKgF\\4Y9[KfFK2j4d9WK[Fi4e9WK[Fi4n90001O01O01O000010O0000010O0010O01O01O00001O000000001O00001O0011O2M1O2O0O2O0O2N10002N0O00010O100O0010O100O00010O1O1O010O1O001O1O010O1O001O001O1O1O001O1O001O0O2O2N1N100O2O001N2N2O1N1O3N1N1O2N1N3N1O1O1O2N1O2O001O010O001O00001O1O0O10001O001O00001O0O101O001O1O00001O00001O000000001N101O1O1O000O10001O00000O101N101O000O2O1O0O101O001N101O1N1O2O000O2O000O2O1N2N3M2O1O2M3M4M3L3M2M3N2N1N3M4J6I:IB1O001N2N2O2M2N2Nkol4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V2S;X200000000000000000000000001O2N2N1O001O2N00000000001OO1001O0000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000O10000O100O100001O000000000000000000000000000000000jIaM`1_2`NbM_1^2aNcM^1]2bNdM]1]2bNdM]1[2dNfM[1Z2eNgMZ1Z2eNfM[1Z2eNgMZ1Y2fNgMZ1Y2eNjMY1[1VJSOa4CX1U2gNlMY1T2gNlMY1T2gNlMY1T2fNmMZ1S2fNmMZ1S2fNmMZ1S2fNnMY1R2fNoMZ1Q2eNPN[1P2eNPN[1P2eNPN[1P2dNQN\\1o1dNQN\\1n1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1cNTN]1l1cNTN]1k1dNUN\\1i1eNXN[1h1eNXN[1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1gNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1gNVNY1j1gNVNY1j1gNVNY1j1gNUNZ1l1eNSN\\1m1eNRN[1n1eNRN[1n1eNRN[1n1fNQNZ1o1fNQNZ1o1gNPNY1P2gNPNY1P2hNoMX1Q2hNnMY1S2fNmMZ1S2fNmMZ1S2gNlMY1T2fNmMZ1S2eNnM[1R2ZNYNf1g1RNaNn1_1oMdNQ2]1lMeNT2[1kMeNV2[1iMfNW2Z1hMgNX2Y1gMhNY2W1gMjNY2V1gMmMfL5d5n1eMmMmL0^5S2eMmMoLL^5W2bMmMZ3S2fLmMZ3S2fLmMZ3S2fLmMZ3R2gLnMY3R2gLoMX3Q2iLnMW3Q2jLoMV3m1VIhMf3;S3h1VInMl3;m2e1YM\\Ne2d1]MZNc2f1bMUN^2l1eMnM]2R2^4O100001OO100001O00000000000000O100001O00O10lFRNk6o1TIRNk6n1^HRNTO2]8l1_HTNRO2]8j1aHUNPO2_8h1cHXNjN2b8f1dHYNiN1c8f1eHXNgN3d8e1dHYNhN2d8e1dHZNgN1e8e1dHZNgN1e8e1cH[NgN1f8c1dH\\NeN2V2GQ4k1TK\\NeN2T2KQ4g1WK[NeN2R2NQ4d1YK\\NdN2P20S4b1YK\\NdN2P20S4b1YK\\NcN3P20T4`1ZK^NaN2P21U4_1[K]N`N3=Em0?j4\\1\\KoNmNXOj0?l4Z1\\KPOnNYOg0>o4X1]KQOlN\\Of049f0\\5l0SKnNlNWO=98f0\\5l0SKnNA@0f0\\5l0RKoNB_O0f0\\5l0RKPOA^O2e0[5m0SKoN@_O3d0Z5n0SKPO_O^O>:P5X1VKUNfNc0f0Fb06l4\\1]KgNTOFe05k4^1^KRN`N1N9d01f04j4_1gK\\NhN2h01j4a1fK^OAPOi4c1eK]OCnNi4e1dK]OEkNh4h1bK^OHeNi4m1_K^OW6b0hI@W6`0iIbNXNA0O1`0n7^1iI_NkNL]7e1gI_NnNJ\\7g1gI^NPOGZ7k1fI^NY7b1gH_NX7a1hH_NX7a1hH^NY7c1gH]NX7e1gHZNYNEl8R2kH]NT7d1jHXN[NGj8Q2`HcNfN^Oh8P2^HnNh1ZO[2i1lKmNe1_O^2e1kKnNe1_O_2c1lKnNd1_OdM]O^4V2ZLQNlNc0g2JdM_O]4S2]LPNnNa0d2LbME]4n1_LPNQO2J1f27dML[4j1aLoMRO0J3d27bM0]4g1dLlMXO7U23cM5\\4e1eLjMXO9T23`M8_4b1ZMSNh0o0n1o0XMTNi0m0o1o0XMTNi0m0o1o0XMTNj0l0n1P1XMTNk0k0m1Q1XMTNl0j0l1R1YMSNn0h0i1U1YMSNR1d0e1Y1YMSNX1>_1_1YMSNY1=^1`1ZMSNW1=_1`1ZMSNW1=_1`1ZMSNW1=_1`1[MRNU1>a1`1ZMRNU1>a1`1[MQNT1?a1`1\\MPNR1`0c1`1\\MoMQ1a0c1`1\\MoMQ1`0d1a1[MoMQ1`0d1a1[MoMQ1`0d1a1ZMPNR1?d1a1ZMPNR1>e1b1XMQNS1=e1b1XMQNS1=e1b1XMPNS1?e1a1XMPNQ1a0g1_1XMPNP1b0h1^1YMoMn0c0j1^1YMnMm0d0j1`1XMjMo0f0i1a1>_NBb1>]NBc1>\\NCd1=\\NCe13R6T2jJUNgND=3Q6U2kJTNgNC>4P6U2lJSNfND>4P6U2lJoMkNG95P6U2nJhMnNN44R6U2mJgMnN034R6U2nJfMmN134Q6V2PKdMlN234Q6V2PKdMlN234Q6V2PKdMmN125Q6V2PKdMlN314S6U2PKdMlN314S6U2PKdMlN304V6T2oJeMjN314V6T2oJeMjN313W6U2_J]MUOV15WOX6V2]JRO[OgNX6X2]JQO[OfNY6Z2[JQO[OeNZ6Z2ZJRO\\OdNZ6Z2ZJRO]OcNY6[2YJSOB]NV6`2XJ_NROYOb0CX6d2TJ[NAROV7c2XIgMWOH_8a2ZHfM[OC^8g2XHeMS8\\2kGeMV8[2gGhMY8Y2dGjM[8V2dGjM]8V2cGjM]8V2cGiM^8W2bGiM_8V2bGhM_8X2mGZMU8f2W11_IYMTLNU6i2eM`M[2`2dMaM\\2_2bMcM^2]2aMdMRMMV4_2gNeMQMOW4]2fNdMSM1V4[2gNcMSM3W4Y2eNcMUM8R4W2hNaMUM>n3R2lN`MUM`0n3P2mN`MTMb0n3n1oN_MSMf0k3k1ROcMmLf0AoNV4h2]OcMhLV1f3W1bMTM:b0VNX1m3R1bMVM:a2T29bMWM:_2T2:aMXM;^2T2:aMYMZ3`M`M]O_Of2D>]3_M^M_OAZ6Q3WJkLQOGO6c07V6Q3WJiL]OH6?U6Q3XJhL37d5R3ZJfL28c5S3]JbL1;c5R3gKnLY4Y2ZImM]2JY4Y2\\IkM[2LX4Z2]IjM[2LX4Z2^IhM[2OV4Y2_IhM[2OV4Y2`IfM[21U4Y2`IeM]21R4[2bIcM\\22R4[2cIaM\\24Q4[2cIaM\\24R4Z2cIaM[25R4Z2dI`MZ27P4[2fI\\M\\28n3\\2hIXM\\2k0`JWOR5N=l0`JWOS5M=l0_JXOT5L=l0^JYOU5K[NBe1fJZN_51Kc1gJ^N]5OLa1iJ`N[5OKb1iJ`N\\5NKb1hJbN\\5LKb1jJbN[5LKb1iJcN\\5KJb1kJcN[5KJa1jJgN[5HKa1jJgN[5HJb1jJgN\\5GJa1kJhN[5GJa1kJhN[5GJV25jMKV25jMJX25hMKX25hMJY26gMJZ25fMKZ25fMKY26gMJY26gMIh1hJeN_5CIh1hJcNa5EGh1iJbN`5FGh1jJ`N`5HEh1mJ^N_5JDh1nJ\\N_5LCh1oJYN`5OAh1QKUN`53_Oh1k0XNUOh1k0XNUOh1k0XNUOi1j0WNUOj1k0VNUOi1l0WNUOh1k0XNUOg1l0YNTOg1l0YNTOf1m0ZNSOd1o0\\NQOc1P1]NPO9iJc02VOU6NQO6jJf0^6TOjN2jJj0\\6TOUOk0l0UOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOi0l0WOdN2SKg0Y6WOcN7PKb0]6WOcN:lJ?b6WOcNW1^1iNcNU1^1kNcNS1^1mNfNl0]1TOfNl0W1TOnNi0P1WOROi0l0WOUOk0h0UOYOm0d0SO]Om0b0SO_Ol0a0TO_O7XJ7Y6B_O5]J6T6E@3bJ2o5K_O2Y1NR60000000000000000000000000000000000000000000O1000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000N" + } + ] + }, + { + "image": "images/caption_detailed_91.png", + "subject_name": "tree", + "object_name": "rock", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0]=c100000000000000000000000000000000000000000000000000000000O1000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O11O00O100000000000000000000000000000000000000000000000000000000000000hI[NVI2d5c16cNWJd0`5i06iNUJc0b5d082lJ_Nh3`1[16dJaNo3Y1[1;WJPN1e0[4P1\\1\\1bNdN]1]1cNcN]1^1bNbN]1b1`N^N_1d1`N\\N_1e1aN[N_1e1aN[N_1e1aN[N^1g1`NZN`1f1`NZN`1f1`NZN`1g1_NYNa1i1\\NXNd1i1[NWNe1i1[NWNf1h1YNYNg1g1YNYNg1h1XNXNh1h1XNXNh1h1XNXNh1h1XNXNh1h1WNYNi1g1WNYNh1i1WNXNh1g1YNXNg1i1XNXNh1h1XNXNg1h1ZNXNf1h1ZNXNe1i1ZNXNf1g1[NYNe1g1[NYNd1g1\\NZNd1f1\\NZNd1e1]N[Nb1e1_N[Na1e1_N[N`1f1_N[N`1f1`NZN_1g1aNYN^1h1aNYN_1h1`NXN_1i1aNWN^1i1cNWN\\1j1cNWN[1k1eNUNZ1k1gNUNY1k1gNUNZ1j1fNVNZ1i1gNWNY1i1fNXNY1h1hNXNW1h1jNXNV1g1kNYNT1g1lNZNT1f1lNZNT1f1kN[NU1d1lN\\NS1e1lN\\NT1c1lN^NS1c1iNaNW1_1gNcNY1\\1gNeNX1[1fNhNZ1X1eNiN[1W1dNjN[1V1eNkN[1U1dNlN\\1T1dNlN\\1T1cNmN\\1T1dNlN\\1T1dNlN[1T1eNmNY1U1gNkNX1V1gNkNX1U1iNkNV1V1jNjNU1W1jNjNV1V1jNjNU1W1kNiNU1X1jNhNV1X1jNhNU1Y1kNgNS1[1mNeNR1]1mNcNS1]1mNcNR1]1oNcNP1^1oNcNP1^1PObNo0`1PO`No0d1nN\\NQ1g1mNYNR1o1gNQNY1R2cNoM]1S2aNmM_1V2^NjMb1W2\\NjMc1d2oM]MQ2h2iMYMV2k2fMVMZ2l2aMWM^2k2_MWMa2k2YMYMg2i2PM^Mo2f2dLnIWOa3U4U70000O10000000000O100000000000000001O00000000000000001O000000001O1O001OaNgEWNY:o1]EUNc:U3]MfK^JZ4U5VLhJk3U5XLjJh3T5ZLlJf3S5[LmJe3S5[LmJe3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5]LmJc3R5^LnJb3S5]LmJc3S5]LmJc3T5\\LlJd3U5[LkJd3V5\\LjJd3W5[LiJe3X5ZLhJf3Y5YLgJg3Y5YLgJg3Z5XLfJh3Z5XLfJg3[5YLeJg3\\5XLdJh3\\5XLdJg3^5XLbJh3_5WLaJi3_5WLaJi3`5VL`Ji3b5VL^Jj3b5VL^Jj3b5VL^Jj3b5VL^Ji3d5VL\\Jj3d5VL\\Jj3e5UL[Jk3e5UL[Jj3g5ULYJk3g5ULYJk3g5ULYJk3h5TLXJl3h5ULWJk3j5TLVJl3j5TLVJk3k5ULUJk3k5TLVJl3j5TLVJl3j5TLVJl3k5SLUJm3l5RLTJn3l5RLTJn3l5RLTJm3m5SLSJm3m5SLSJm3n5RLRJo3m5QLSJo3m5QLSJo3n5oKSJQ4e4ULmII^1R4a4ZLQJC^1S4]4aLTJZO_1T4^4bL`L^3`3bL`L^3_3dL`L[3]3iLcLW3\\3jLdLV3\\3kLcLU3\\3lLdLT3Z3oLeLQ3R3YMmLg2P3]MoLc2P3^MPMb2m2aMSM_2k2cMUM]2d2jM\\MV2T2mKmJV2o2m1S2[NmMe1o1nKmJ^2T3d1k1cNUN]1k1aNWN_1i1`NXN`1g1`NZN`1f1_N[N`1f1^N\\Nb1e1[N]Ne1c1WNPKhM^3R4b1`MVO`2k0UM_Ok2c0bLnJDd4j3V6O10000O1000000000000000000000000001O0000O1000000001O0000000000001O00001O000\\MnKZJS4c5oK]JR4^5RLbJn3Z5WLeJi3X5ZLhJf3V5\\LjJe3T5\\LlJd3P5`LPKa3m4aLSK_3k4cLUK]3i4eLWK\\3g4eLYK\\3d4fL\\K[3b4fL^KZ3a4gL_KY3`4hL`KX3^4jLbKV3]4kLcKV3[4jLfKV3Z4jLfKV3Y4kLgKU3V4nLjKS3R4PMnKP3P4RMPLn2o3SMQLn2m3SMSLm2l3TMTLm2j3TMVLl2i3UMWLk2h3VMXLj2g3WMYLj2c3YM]Lg2`3\\M`Ld2^3^MbLc2\\3^MdLb2[3_MeLa2Y3aMgL_2X3bMhL_2W3aMiL_2V3bMjL_2U3aMkL_2S3cMmL^2o2eMQM\\2k2gMUMY2k2gMUMY2j2hMVMX2g2kMYMU2d2nM\\MS2b2mM_MS2a2mM_MT2]2oMcMQ2]2oMcMR2]2mMcMT2]2kMcMV2]2iMcMW2^2hMbMX2_2gMaMZ2_2eMaM]2^2bMbM_2_2_MaMa2_2_MaMb2e2WM[Mj2g2SMYMm2g2RMZMo2f2PMZMQ3e2oL[MQ3f2nLZMS3e2mL[MT3d2lL\\MT3e2kL[MV3e2iL[MX3f2fLZM\\3g2aLYM`3g2^LZMb3f2^LZMc3e2]L[Md3e2[L[Mg3c2YL]Mh3e2UL[Ml3e2RL\\Mn3e2QL[MP4h2lKXMV4g2eK]M\\4b2cK_M]4c2`K^Ma4b2^K^Mb4c2\\K^Mf4`2YKaMh4_2WKaMi4a2UK_Ml4a2RK`Mn4a2QK_MP5a2nJ`MS5_2mJaMT5_2kJaMU5a2iJ_MX5`2gJaMZ5_2eJaM\\5^2dJbM]5^2bJbM_5^2`JbMa5_2]JaMe5^2ZJbMi5\\2SJgMm5Y2SJgMn5Y2QJgMP6Z2mIgMV6`51O1O2N3M7I7I5K3M5K3M3M2N3M3M5K3M1O2N9G7I2N3M2N6J5K3M1O4L4L2N3M2N2N3M5K4L002N5K3M1O0\\M]Ek0c:ROaEm0g:jNZEV1h:gNYEY1k:bNWE]1i:bNXE^1j:^NXEb1k:PNiDH>X2Q<000001O00000000000000001O0000000000000000000000000000000000O1001O000000O11O0000O100000000O10000000000000000O1000000000000000000O100O100000000O1000000000000000000O10000000000000000O100000000" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`=_1a=0000000000000000000000000000000000000000000000000O1000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000O1000000O1000000O10000000000O1000000O100000000O1000000O1000000O1000000O10000O100000000O1000000O10000O1000000O10000000000O1000000000000001O00000000001O00000000001O0000001O00000000000000000000000000000000000000000000000000000000000000O10000O100O1O1O1O100O100O1000000O1000000000000O100000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000O1000000000000O1000000000000000000000000O10000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000000000O1000000000000000000O100000000000000000000000000000000000000000000001O0000000000000000001O00000000000000000000001O0000000000000000000000000000000000001O0000000000001O000000001O0000000000001O0000001O0000000000000000O100000000000000000000000000000000000000000000O100O100000000O1000000O10000000000000000O1000000000000000000O10000000000O100000000O1000000000000000000cC" + } + ] + }, + { + "image": "images/caption_detailed_92.png", + "subject_name": "bear", + "object_name": "rock", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "\\fk0h0]<9J5C>XO^NoDV2f::G5M1N3M2N2N3N1N2O1N2O1O1O1N2O1O2N001O1O00100O1O1N2O10O01O1O1O1O0000000000001OO10000001O00001O1O0000001O001O2N001O1O002N1O001O1O001O1O1O1O001O1O1O1O100O1O00100O00100O1000O10O1000O10O100000O0100O1000O100O01000O1O1O0003NO1O10O011O2gCaMk;l2O0O10000O01000O2O0O1O101N10010N2O1O001O0O101O0O1000000000O1000000000O0100O100O00100O1O001O1O1O001O1N2O1O00001O1N2O001O1O1O1N101O1O1O001N2O001O1N2O001N2O1N101O1O1O001N2O1O1O001N2O001O1O001N2O002N002M101O001N2O1O1O001N2O1O0O2N2N101N2N2N1N3N2N1O2N2N2M3N2M3N2L6G_ab0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "T6j8V60O1O2O0O010N2N2O1N2O1O2N1O1N101O1O1O1O100N2O1O1O1O100O1O1O101N1O1OWGYKZ7g4eH`KV7_4jHhKP7W4PInKl6Q4TITLh6k3XIYLe6f3\\I^L`6a3`IdL\\6[3eIiLW6V3jImLS6R3mIRMP6m2QJXMj5h2UJ^Mf5`2\\JbMb5]2_JfM^5Z2bJjMZ5U2gJmMW5R2iJSNS5l1nJVNP5i1QKZNl4e1UK_Ng4a1YKaNe4^1[KgNa4X1`KjN^4U1cKmN[4S1eKoNY4P1hKSOU4l0kKWOS4i0mKZOP4f0PL[Oo3d0RL^Ol3a0ULAi3>XLDf3P3AQM`0n2@RMb0l2]OUMe0i2ZOXMg0g2XOZMk0c2UO]Mm0a2RO`Mo0_2PObMR1\\2nNdMT1Z2kNfMW1Y2iNgMZ1V2eNkM]1S2bNnMa1o1^NRNd1l1[NUNh1h1XNXNj1f1UN[Nm1c1RN^NP2`1oM`NS2_1lMbNX2Z1gMgNZ2X1eMiN_2S1`MnNa2Q1^MPOe2m0ZMSOi2k0VMVOm2g0RMZOo2e0PM\\OR3b0mL_OV3>iLC[39dLH^36aLKb32]LNf30YL1i3MVL4k3KTL6m3ISL7n3HQL9o3GPL:Q4EnKV4BjK>W4AhK`0X4@hK`0Y4_OfKb0Z4^OfKb0Z4^OeKc0\\4\\OdKd0\\4\\OcKe0]4[OcKd0^4\\OaKe0_4[OaKe0_4[O`Kf0`4ZO_Kg0a4YO]Ki0c4WO\\Kj0d4VO[Kk0e4UOZKl0f4TOYKm0g4SOXKm0i4SOVKn0j4ROUKo0k4QOTKP1k4QOSKQ1m4oNRKR1n4nNRKR1m4oNRKQ1o4oNPKR1P5nNPKR1P5nNoJS1Q5mNnJT1R5lNmJU1R5lNmJU1S5kNmJU1S5kNlJV1T5jNkJW1U5iNjJW1V5jNjJV1V5jNiJW1W5iNhJX1W5iNiJW1W5iNhJX1X5hNgJY1Y5gNgJY1X5hNgJY1Y5gNgJX1Z5hNeJY1Z5hNeJY1[5gNeJY1[5gNdJZ1\\5fNdJZ1[5gNdJZ1\\5fNcJ[1]5eNcJ[1]5eNbJ\\1^5dNbJ[1_5eN`J\\1_5eN`J\\1`5dN`J\\1`5dN_J]1a5cN_J]1`5dN_J]1a5cN^J^1b5bN^J^1b5bN]J^1c5cN\\J^1d5bN\\J^1d5bN[J_1e5aN[J_1d5bN[J_1e5aNZJ`1f5`NZJ`1f5`NYJa1g5_NYJa1f5`NYJ`1h5`NXJ`1h5`NWJa1i5_NWJa1h5`NWJa1i5_NWJa1i5_NVJb1j5^NWJa1h5`NXJ`1h5`NXJ`1g5aNZJ^1f5bNZJ]1g5cNYJ]1f5dNZJ\\1f5dN[J[1e5eN[J[1e5eN[J[1d5fN\\JZ1d5fN]JY1c5gN]JY1c5gN]JY1c5gN]JY1b5hN_JV1b5jN^JV1b5jN^JV1b5jN_JU1`5lN`JT1`5lN`JT1`5lNaJS1^5nNbJR1^5nNbJR1^5nNbJR1^5nNbJR1]5oNdJP1\\5POdJP1\\5POdJo0]5QOdJn0[5SOeJm0[5SOfJl0Z5TOfJl0Z5TOfJl0Y5UOgJk0Y5UOgJk0X5VOiJi0W5WOiJi0W5WOiJi0V5XOkJf0V5ZOjJf0V5ZOjJf0U5[OkJe0U5[OkJe0U5[OlJd0S5]OmJc0S5]OmJc0S5]OnJb0R5^OnJb0R5^OnJb0Q5_OoJa0Q5_OPK`0P5@PK`0o4ARK>n4BRK>n4BRK>n4BRK>m4CTKhKBX4>iKAW4?iKAW4?iKAV4`0kK^OV4b0jK^OU4c0kK]OU4c0kK]OT4d0mK[OS4e0mKZOT4f0lKZOT4f0lKZOS4g0nKXOR4h0nKXOQ4i0oKWOQ4i0oKWOQ4i0PLUOQ4k0oKUOQ4k0oKUOP4l0PLSOQ4m0PLROP4n0PLROP4n0PLQOP4P1QLoNo3Q1QLoNn3R1RLnNn3R1RLnNn3R1SLlNn3T1RLlNm3U1SLkNm3U1TLjNl3V1TLjNl3V1TLiNl3X1TLhNl3X1ULgNk3Y1ULgNj3Z1VLeNk3[1ULeNj3\\1WLcNi3]1WLcNi3]1WLcNh3^1YLaNg3_1YL`Nh3`1XL`Ng3a1ZL^Nf3b1ZL^Ne3c1[L]Ne3c1[L\\Nf3d1[L[Ne3e1[L[Ne3e1[L[Ne3e1[L[Nd3f1]LXNd3h1\\LXNc3i1]LWNc3i1]LWNc3i1^LbLfMh0k5g2_L^LlMh0e5j2_L\\LQNg0`5m2`LZLSNf0]5Q3`LXLVNe0Z5S3`LWLXNe0X5T3aLULYNf0V5U3aLTL[Nf0S5W3bLRL^Ne0P5Y3cLQL_Nc0o4\\3lMdLT2\\3mMcLR2^3PN`LP2`3QN_Lo1a3RN^Ln1b3SN]Ll1d3UNZLl1f3UNYLj1h3XNVLh1j3XNVLg1k3ZNTLf1l3[NSLe1m3\\NRLd1n3]NPLc1Q4_NmKa1S4`NkKa1U4e40O100O1000000O1000000O10000O100000000O100O1000000O1000000O10000O100000000O10000O100O1000000O100000000O10000000000O10000O100O100000000O1000000O10000000000O1000000O1000000O10000O100000000O100000000O100O100O1000000O10000O1000000O10000TJmJj1T5VNnJh1R5XNoJg1Q5YNPKe1Q5[NPKd1P5\\NQKc1o4^NPKa1Q5_NPK`1P5aNPK^1P5bNQK\\1P5dNQK[1o4eNRKZ1n4fNSKY1m4hNSKV1n4jNSKU1m4kNTKS1m4mNSKS1m4nNSKP1n4POSKo0m4QOTKn0l4SOSKl0n4TOSKk0m4UOTKi0m4WOTKh0l4XOUKg0k4ZOTKe0m4[OTKd0l4\\OUKc0k4^OUK`0l4@TK`0l4@UK>l4BUK=k4DUK:l4FTK:l4FUK8l4ISK6n4JSK4n4LSK2n4ORKKS55nJHT58lJEW5;jJCW5=jJBV5?jJ_OW5a0iJ]OY5d0gJZOZ5f0gJXOZ5h0fJWO[5j0eJSO]5m0dJPO^5P1cJlN`5T1aJiNa5X1_JYNTOdM^6S4^JXNP6i1PJUNQ6k1PJTNP6l1PJTNP6l1QJRNP6n1QJQNo5P2QJoMo5Q2QJnMP6R2QJmMo5T2QJkMo5U2RJjMn5V2RJjMn5W2RJgMo5Y2RJfMn5Z2RJfMn5Z2SJeMm5[2TJcMm5^2SJaMm5_2TJ`Ml5`2TJ`Ml5`2UJ_Mk5b2UJ\\Ml5d2UJ[Mk5e2UJ[Mk5f2UJYMk5g2VJXMj5h2VJWMk5j2UJUMk5k2VJTMj5l2WJSMi5m2XJQMi5P3VJPMj5P3WJoLi5Q3XJmLi5S3XJlLh5U3WJjLj5V3WJiLi5W3XJgLi5Z3WJdLj5\\3WJbLj5^3WJ`Lj5`3VJ_Lk5b3UJ\\Ll5d3UJZLl5f3TJYLm5h3SJVLn5j3SJSLo5m3RJQLo5P4PJoKQ6Q4PJlKR6T4oIjKR6V4oIgKT6Y4mIcKU6]4mI]KV6d4S2001O0000001O001O000000001O000010O0000O2O00010O00001O00001O0O110O000000001O00kJ" + } + ] + }, + { + "image": "images/caption_detailed_94.png", + "subject_name": "pillow", + "object_name": "couch", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "kc05gc04M3O1O1O1O100O100O1O100O1O1O100O1O100O10000O1O100O100O100O100O1O100O1O100O100O100O100O1O100O1O1O1O10000O100O10000O10000O100O1N2O10000O10000001O1O1O1O001O1O1O1O1O1O1O2N1O1O1O1O1O1O1O1O1O1O1M3O1N2N2N2N2O2M2N2N2N2N3L`lW7" + }, + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PnLAQ3b0mL^OS3c0mL\\OR3f0mLZOR3h0nLWOR3i0nLWOQ3m0mLROR3Q1lLoNP3W1mIbMh2W1Z3Y1kIdMj2R1[3\\1gIgMk2m0^3h1aLXN_3j1_LVNa3l1]LTNc3n1[LRNe3P2YLPNg3R2WLnMi3S2VLmMj3U2TLkMl3W2RLiMn3Y2PLgMP4[2nKeMR4^2jKcMV4_2hKaMX4`2fKaMZ4b2bK_M^4Y1gIgNf12c4W1gIjNc1Of4g2WKZMa2dNUNV4TOXMg2bNUNY4POVMk2aNUN\\4lNTMW5P3bJSM^5Q3[JRMe5T3RJoLn5W500000000000000000000000000000000000000000eKgId1Y6\\NgId1Y6\\NgId1Y6g20eKgId1Y6\\NgId1Y6\\NgId1c5gKQKe2\\Od1c5gKQKe2\\Od1Y6\\NgId1Y6\\NgId1Y6\\NgId1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1W6^NiIb1W6^NiIb1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6h21O0000O1001O0ZIgIZ6Y6<0000000000000000000000000000000000000000O100001OO100O11O0000O1O1O1O1O1N2N2001OFkH^JU7a5PI[JP7d5QI]Jn6d5RI[Jo6d5<0eH\\Jo6c5QI^Jo6b5QI^Jo6c5PI]JP7c5PI]JP7c5PI]Jo6d5QI\\Jo6d5QI\\Jo6d5QI\\Jo6d5RI\\Jn6c5RI\\Jo6d5QI\\Jo6d5<0O10000000000000000000001O000000000000000kHZJe6f5[IZJe6f5[IZJe6g5ZIYJf6g5ZIYJf6g5?00000000000000001O0000000000001O00000000000000001O000000000000000000000000000000001O0001OO100000000000001O000000001O`LdHQ1\\7oNdHQ1\\7_20000TLSJ:m5WLTJ[3O>R6AnI?S6@mI`0T6_OlIa0U6^OkIb0U6^OkIb0U6Z300001O001O001O0000001O00000000000000000001O00000001O1O001O001O00000000O10000001O000000000000001OQLUI^1k6bNTI_1l6a2O00PLUI`1k6`NUI`1k6`200000000000000000000000001O1O00000000001O00000000001O000O1000000000000001O000000000001O00000O100001O00000]I" + } + ] + }, + { + "image": "images/caption_detailed_97.png", + "subject_name": "bench", + "object_name": "tree", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + } + ] + }, + { + "image": "images/caption_detailed_98.png", + "subject_name": "cell phone", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 443 + ], + "counts": "Rlm33mc0O01O1O010O3N1O000O101N1N20O100L4O10000001O0010O01O000001MVXT4" + }, + { + "size": [ + 640, + 443 + ], + "counts": "WcY19dc07J5J5N2M3M3M2N2N2O1N2O0O2N2N101O0O2O1O001O001O1N2O001O1O0KWNR^Oj1ga0^NX^Oc1T?eNiBi1m[OmAP1n=ZOhAj0T>c2M3M4M2M3O1N3M2O1N2O1O1O1O1O100O1O100O10000O100O1[OaIhC`6SjJ\\AW5f>d0]O1OUJmAj5P>UJRB2Ne5o=YJSBk5m=XJRBg5[>H6^JaAX5k>JN14M8I2O1N1M4K5L7HTNnKSDj3Z>G5L5K5K3M2MXLSM`Fk2^=L5XNe^Oa0da0TO`^Oj0ga0nN\\^OP1\\b0N5J2O3M4K2O2N1O2M2O1O4K2N2O3L2J_R`2" + } + ] + }, + { + "image": "images/caption_detailed_99.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c]n25g>5M2N101O001O001O001N100O2O1O1N2O3M4L3QBROg=[1H4M4K6K9F5L2M3N000O1ZOgMeDY2Y;nMaDR2^;SN_Dm1`;j0O1O1O_M_De1a;l001O100000O010ZO`DZM0=a;X2jD[ME2b;a2h0O0O100O101O1N2N2XNbCk0aN4J_Y\\5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0W4i:00000000O10000001O00000000001O001O001O1O1O001O002N1O1O1O3MW1iN2N1O1O2N8H1OO1L4H8O1O100O10000001O001O1O1O000000O100O100_OWGiJk8i4m0M3N2N2O1O1O1_MfE;[:BmEZNGc1]:1PFYNFc1[:3\\FKe94_FIa95bFJ^92hFLX91lFMU91mFNT90oFOQ91oFOQ90PG1o8OQG1o8NRG2n8NRG2n8NRG2n8NRG3m8MSG3m8NSG1m8OSG1m81PG1o80PG0P91oF0P91nF2P91mF0R91mF0R91mF0R93kFMU94kFNR92nFOQ91oFOQ91nF1Q9OoF2P9OnF5o8LoF8ROPN6OS9j1bGm0]8TOaGm0_8UOQGTNLj2R9ClF>T9h2O001O1O2N1O2N1O0000O1O1M3O1N200O100N2O100O100O1oNPGRLP9l3TGQLm8n3[GiKg8V4\\GgKe8X4^GeKc8[4Q1O100000000000000000000000000001O00001O1O1O1O1O3Md0\\O1O1O1O2N001O00001O00001O00001O001O00001O001O002N1O1O2N1O2N2N2N2N1O1O1O1O0000000000000000O100O1O1O1O1N2O1N2O1RKTGf3n8VL\\Gc3e8\\L]Gb3g8ZL[Gd3P9QL]Gb3W:I3M3N1NlMPMZHP3f7VMUHh2l7ZMQHf2P8[MhGl2X8UMXG[O[Oi3^9mLUGY3k8hLTGc2BQM[9=QGa2NiLR9o0gFU2Q:lMmET2T:nMjEQ2W:QNhEm1Y:TNfEl1Z:\\11O100O10000O1000000000000001O000000001O1O1O2N2XF^Kn8d4PG^Kn8d4lFbKR9`4kFbKT9V5O1O1O001O00000000O1O1N2N2N2O1O1O1O1O1O100O100O100F:L4O1N2N2N2M3N2O100O1O1O10000O1000000O100000000001O0000001O00001O1O001O2N1O1O1O3M6J4YFQKU9^5O1O003M1O0000O1K5N2O1O1O1N2N200O1O1O1K5O1O1L4O100O1O1O1O1O1O10000O100000000O1000000001O001O001O001O2N1O001O2N002N2Nc0QFdJT9e5OFmFgJS9U5>L4O1O1O1N2O1]Oc0N2O100O10000O100000000001O1O1oEjKV9W4eFfKA:d9Q4jFRLV9Q4cFSL]9o3]FTLd9d40000000O100nN[FfLh9Z3XFeLP:GoEh23lLO9PN201N2JhZ^3" + }, + { + "size": [ + 375, + 500 + ], + "counts": "lTe21e;101O001O001N2O001O1O1O1O1O2N1O2N1O2O0O1O1O1O10O01O000001O000010O01O00010O001O010O00100O21N5K4L1O01O0O10001N1O101N10000O2O001N10001O001O0O101O1N101O1O001O1O001O1O1O1O100O1O001NWYQ2" + } + ] + }, + { + "image": "images/caption_detailed_101.png", + "subject_name": "sand", + "object_name": "playingfield", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "b8d4d80000000000000000001O000000O10000001O00O10000000O100000001O0000000000O100001OO01000000000000000000O100000000000000000000O1001OO0100000000000000000O10000000000000000000000O11O00O10000000000000000000000000001O0O1000000000000000000000000000000000000O100000001OO100000000000000000O10000000000000001O0000O100001O0O01001O00O100000O100000000000000000001O00O1001O0O10O100001O00000000000000O11O000O1000000000O10000000000000000000000001N010000QLPHg2o7TMYHi2h7dLlHZ3c8N7I6J4L2N2N1O2N1O2N2N2N1O1O002N1O1O001O1O1O1O001O001O00001O001O00001O001_NUEc0k:SO`El0`:POdEP1]:nNeEQ1[:lNiES1X;N00001O2N1O2N1O001O1O001O00001O000000001O00000000000000000000000000001O0000001O000000O1000000001O000000O11O1O0000O1000000O100O1000000000000O1000000000000POZO]Eg0b;00000oN[O_Ee0_:]OaEc0]:@bE`0]:AbE`0]:AcE?]:AbE`0]:BaE?^:B^Eb0a:_O^Eb0a:@^E`0b:B[E?e:BZE>f:DXEC4L2M3N1O1O1O10OO2O1O2N1O1O0010O01O001O0001O00000000A?L4O1N1M3N3O01O1O01O01O1O1O1M4J6_IdJQ5a5eJhJW5h6UNnHWLLb0Y7R3oHZLN=W7V3nHXL?Oh6e3j1M5K4lMeF0d9IhFL^9KoF^Oe9OoFIYX9" + }, + { + "size": [ + 425, + 640 + ], + "counts": "`UW31X=0Vb23f]M2nL0iH1X7KiH6T:2M2N2O1O2O000O10000O1000000O1O2N100O1000000O1^FYOa6h0^I\\O_6d0\\ICb6=^ID`6=`IC`6=`IC_6>aIB_6>aIB_6>aIC[MBb33c1i0`MB]6>cIB:Bo2m0gLBZ6?fIDW6dLC]36iLJk4XMSIn0Z2j1S801O00000000000000000000000000000000O1000000000000000000O10000[NWMXG4Z1f2W7lMaHT2W7[NYHoN_Og2R8[NnGkN>U3\\7WNXHcN7OGY3d7ZNRIc1i6`N[I^1`6gN`IY1[6kNgIT1U6POkIP1Q6SOPJm0m5VOSJj0i5XOYJh0d5ZO]Jf0`5]O`Jc0^5^OcJb0]5]OdJc0]5YOfJg0b5oN`JQ1g5eN\\J[1k5YNZJg1P6jMUJV2_800000000000001O00000000000000000000000000000000nFYMa0E[5R3oIbM=F_5h2TJfM8Dc5f2TJoMO^Ok5d2RJXNIZOH]OHNQ6S3dJ?\\3jLUMg2ROn0[3mL]MU2XOn0o2^MdMd1]OnLGl3h2WNoMo0@U1T2UNZN\\5R1YKjNi4m0eKkN`41UHLo31l8N102LUel2" + } + ] + }, + { + "image": "images/caption_detailed_103.png", + "subject_name": "umbrella", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 640, + 640 + ], + "counts": "gok54hc05I7L3K5M3L4L4M3L4N2N2N3M2O1O0O2O1N3N1O1N2O1O2L3O0N3M4K5M1M3M3N4K5I6G8DVC@l<>YC]Oi200O1N3M2O2M4M4L2N2N2N2M2O0O2O000O1M4O000001OO2O00010O0O100010O000000010O0010O00001N1M5M3M3M3[OZNnCi1f;gNTD[1i;P1L3N4L4L5K4M1N2N1000000O1000O10000O101N100O2O0O2O1O1O1O1O2N1O3M`0@3M3M001O001O002N2N0O100LYCPNiRC]OR=a0g0I7M4M3Moob5" + }, + { + "size": [ + 478, + 640 + ], + "counts": "2i>500000000000000000000000000000000000000000000000000001O0000O100001O000000000000O1000000001O0000000000O100001O0000000000000000O100000000001O000000jHMA3e7000O100nHOWO1h03UOMk06ROJm09QOGn0O^H6c6Ko0NaH7^6KZ16dNJ_15`NJb16\\NJf15YNKi14VNLk15SNKm17QNIP28nMHT27kMIV28hMHZ27eMI^26`MJa26]MKd25\\MJf24ZMLh23WMMk22TMNn22PMNR30mL1V3OgL1[3MeL3S90[J0\\L0d31\\LNd35YLKg36XLJj34VLLm31SLO^90O11O00fJ0fK0Z4NhK2X4MiK3W4MiK3`91SKLPK4m90PKMUK3j4OUK1j4M[F0j43k4L\\F1i43m9000iJNbFOj43d47YKIg47YKIf49YKGf4mFGe3K]5`0eF\\O2?h3E`5a0fF\\O0?j3D_5a0hF[OOb0i3B^5b0mF0c3^O`5d0jF0e3\\O`5f0hF1g3YO`5g0iF1f3XO`5g1_JYN`5i1_JWN_5k0oFNa3WO_5k0RGN^3WO_5l0SGM^3WO^5l0VGL\\3XO]5m0XGKZ3XO\\5n0[GKX3WO\\5n0]GLV3VO\\5n0_GMT3UO\\5o0aGKS3VO[5o0cGLQ3UOZ5Q1fGIP3VOX5R1jGGn2WOW5R1lGHl2VOV5S1oGGk2VOS5U1THEh2VOS5U1WHDf2WOR5U1ZHCd2XOQ5U1\\HCc2XOP5U1_HBa2YOo4U1bHA_2ZOn4V1cH@_2ZOm4V1fH@\\2ZOm4W1hH^O[2[Ol4W1jH]O[2\\Oj4W1mH\\OY2]Oi4W1PI[OW2^Oh4W1SIYOV2@g4V1TIZOU2@f4W1VIXOT2Ae4W1YIVOS2Cc4W1\\IUOQ2Db4W1_ITOo1Ea4W1bIROn1G_4W1dIROm1G_4W1eIPOm1I]4X1fInNn1J\\4W1hInNl1K[4W1kIlNk1MX4Y1mIiNl1NQ4_1TJbNk1OP4a1UJ_Nk10m3e1XJYNl12T3MkIk1W1TNj14o2c2kN]MQ1h2nNXMQ1k2mNUMQ1R3jNnLU1T3jNlLV1Y3eNgLZ1^3bNbL]1`3bN`L]1b3bN^L]1f3`NZL`1f3`NZL`1g3_NYL`1h3`NXL`1h3`NXL`1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXLa1h3jIULa43e1k3[NULe1j3\\NVLe1i3[NWLf1f3\\NZLe1d3\\N\\Ld1d3\\N\\Le1c3[N]Lf1a3[N_Lf1]3^NbLd1\\3\\NdLf1Z3ZNfLi1Y3UNgLS2o2oMQM\\2d2dM\\M]2b2dM^M]2a2cM_M]2a2cM_M^2`2bM`M_2_2`MbMa2]2_McMb2\\2^MdMc2[2]MeMd2Z2\\MfMd2Z2\\MfMd2Z2\\MfMe2X2\\MhMd2X2\\MhMd2W2]MiMc2V2^MjMc2T2^MlMb2U2]MkMd2T2\\MlMe2S2[MmMe2R2\\MnMe2P2\\MPNe2n1\\MRNe2j1^MVNc2g1_MYNa2d1bM\\N_2a1cM_N]2`1dM`N]2^1dMbN^2[1cMeN_2Y1aMgN`2W1aMiNa2U1_MkNb2T1^MlNc2R1^MnNc2n0nGoN_53c2n0`MROa2m0_MSOa2l0`MTOa2k0_MUOb2j0^MVOb2i0_MWOb2g0_MZO`2e0aMZO`2f0`MZOa2d0`M\\Oa2b0`M^Oa2a0_M_Ob2`0^M@c2>^MBd2<\\MDi24ZMLl2LVM4j8000000000000000000000000000000000000001O0000000000O10000000000000000000000000000000000000000000000000000000000000000000000000aM7iEIV::hEFW:=gECX:`0fE@Y:b0fE^OY:c0gE]OY:d0fE\\OY:e0gE[OY:e0gE[OY:e0gE[OY:e0gE[OY:d0hE\\OX:d0hE\\OX:c0iE]OW:b0jE^OW:?kEAU:?kEAV:=kECV:0nA2P>MQB3o=MQB4n=LQB6n=JRB6n=JRB7m=IRB8n=IQB7o=JmA:R>EoAc0i=\\OXBf0f=ZOZBg0e=YO[Bg0e=YO[Bg0e=YO[Bh0d=YOZBh0f=XOZBh0f=YOYBh0f=XOZBh0f=XOYBk0e=SO]Bn0b=RO^BP1`=PO_BQ1a=oN_BQ1a=oN_BR1`=nN`BR1`=nN`BR1`=nNaBR1^=nNbBR1^=nNbBR1^=nNbBS1]=mNdBR1\\=nNdBR1\\=nNdBS1[=mNfBR1Z=nNhBQ1W=POiBo0W=QOiBP1V=oNkBQ1U=oNlBP1T=POlBQ1S=oNnBP1R=POnBP1R=POnBQ1Q=oNPCP1P=POPCP1P=POQCo0o\\O8[[Ld0m;QOQEMVOc1i;`NQEMVOd1h;_NRENUOc1i;_NoD3UO_1l;^NoD=VOi0l;jNmD4VOO3S1i;jNnD4WON2T1i;jNnD4WOL4V1g;jNnD5AP1b;kNnD5_OP1c;kNnD6^OP1c;jNoD8[Oo0f;iNoD8[Oo0f;iNoD8ZOGLR1l;oNnD8YOGNa0NKm;EnD8WOI0?NKm;EnD9UOI2=OLl;EnD9UOI2>NJn;FnD8YOE14N`0j;_OnD8\\ODM4O`0k;@mD8\\ODM4Oc0h;]OPE8\\ODL6Ob0h;\\OQE8[OFM4Nb0i;\\OQE8ZOGN4Lb0k;[ORE7XOI0h0LVOj;2SE6WOI04L;Pl:F^F8b9H^F9a9G`F8`9HbF6^9JdF4\\9LfF3Y9MhF2X9NiF0X90hF0X90hF0X90hF0X90hF0X90hFOY91gFOY91hFNX92j200000000000000000000000000000000001O000000000000O100001O0000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000N" + } + ] + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Simple.json b/evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Simple.json new file mode 100644 index 0000000000000000000000000000000000000000..c89c1c988a35a806057e7712d98012f62ad88f1c --- /dev/null +++ b/evaluation/GAR-Bench/annotations/GAR-Bench-Caption-Simple.json @@ -0,0 +1,2143 @@ +[ + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000227491.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[f`46i>3N001fA3`0DQ<9^CU1_4M1O2O000O22M0000O010O00001O0QDJ7FT6a0bI6N\\O_6>ZIf0OmNg6>VIn0IgNR7;RIU1GaNV7;QIY1D_NY7:oHl1_N^M>?U8:iHS3Y7mLcHU3]7mL`HT3a7lL]HU3d7kLZHW3f7iLXHX3i7hLVHX3k7hLSHY3n7gLPHZ3P8gLnG[3R8eLmG[3S8eLlG\\3T8eLkG[3U8gLhGZ3X8gLgGY3X8jLfGV3Y8lLfGT3X8oLfGQ3T8WMkGj2m7^MRHb2i7cMWH]2f7gMXHZ2e7jMZHV2b7nM^HR2_7RN_Hn1`7TN`Hl1^7VNbHi1]7ZNbHe1^7\\NbHd1\\7^NdHa1\\7aNcH^1\\7dNdHnNKaNc0ENc0R7[2aHgNo1bNc5g2]HgNV2[N^5o2[HgNQ:T3O1O1O1O102N2M2O3K4aHhJc42^I[5i1gJo33XJl5c1UJd3P7YLQIe3U7ULnHj3U7PLnHo3W7kKkHT4^7ZKlHe4\\7nJjHQ5c8O100O1O100O100O100000000000000000000000000000000001O000dMWKaJi4[5ULjIm3S6XLUInNJk4P7UMjHo2T7_20001O001N2XI[HS6g7hI_HU6b7mI]HQ6c7PJ^Hn5d7RJ[Hm5f7SJ[Hk5f7TJ[Hk5f7SJ^Hj5c7UJ_Hh5c7WJ_Hg5^8O001N2N2O1O1O2L3M5L4L2N4L2M3O1O001N101O001N101O0O2O1O0O2N101N2O0N3L4M3M3M3L4B>M3M3M3DUD^MP<_2:N2O2N2M2O2N2L5J6WOlBUOc=;YWf4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_0.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000029397.jpg", + "mask_rles": [ + { + "size": [ + 449, + 640 + ], + "counts": "Zl95l=1O1O001O01O0000000010O012MU6CaIg0^6ZOVIQ1j6S301O001O0001O00000000000000001O002N3M:QIiH`6^7N100O1O00001O000000001O00001O00001O0001O0000000000000000000000000O100O100O10000O1O1N2O1N2N2N2O1O1O1O100O101O0O1000000O10000O100O100O1O1O1O10001O0O10000000000000O11O00000000001O000000O2O00000000000O1O1O1N2J6J6K5J6L4J6J6L4J6K5I7J6K5K5K5I7I7K5K5M3J6K6L3J6O10001O1O2N2N2N2N3M2N2N3M2N2N2N3M3M2N2N2N3M4L1N102M5I6H7J8G9Edm<" + }, + { + "size": [ + 449, + 640 + ], + "counts": "Qa_4?_=6L2L5L3M3N2N1O3M1O2O0O100L5ZOe0L5J5M3N3M2O1N2O1O2L4N4lN`M^Fd2k8f1E;@`0H7L301O1O1O001O1O00001O000000001O00000000000000000000000000000000O1000000000000O100N2O1M3CXH\\Jm7]4`HeKA5Z8S4W1M3]Oc0M3L4L4M3K5L4I7F:I7H8J6E;H8EXCVOTR;5a[E6H7L4RLBSJa0h5LnI7o56dI7n5g0UI_Of6e0UI^Oi6h0PIK^68oH9P7LcH=\\7T30001O001O0000000000001O00000000001O000000000000000000O1000000O10000O\\NlI]KS6Y6O1O1O1N2N2N2O1N2N2O1O10000O10000O10000000000N2gNgGjLY8U3iGiLX8U3kGjLU8T3PHgLR8V3SHgLn7V3WHfLk7X3XHcLl7X3[H`Lk7[3`1L4\\MQG0U9M\\G_Oj8?]2O1O1O1O1N2O1O1O1N2O1O1O1N2NSaZ1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_1.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000163117.jpg", + "mask_rles": [ + { + "size": [ + 500, + 376 + ], + "counts": "[o0Q82QH\\7V8N000O2HhGRIX8U7001O01O000000000O1O1O1N2N2dL`HcNa7Z1cHdN^7Z1eHeN[7[1gHcNY7]1kH_NU7a1oH[NQ7f1nH[NQ7e1oH]No6d1oH`Nn6a1PIaNo6`1PI`NP7`1PI`NP7a1PI_No6d1nH]NQ7d1jHVKOV3W7e1hHWK0T3X7S5000O1O100000000PNgHfKX7Y4jHfKV7Z4kHeKU7Y4PIeKo6Z4UIoIE^1U7b4^I^Kb6b4^I_Ka6a4_I^Kb6b4]I_Kc6a4]I^Kd6b4ZI_Kg6b4XI]Ki6d4XIYKi6h4WITKl6m4SIQKo6P5QInJP7S5gHhIMU1\\7S5aHhI120S1^7T5_HjI022o0_7l601O2N4L1O1O`HhHc6X7]IjHa6n0iHT5c0RJb6T7[IUIA@h6[7eIYI@]Ok6Z7eI]I[6c6dI]I]6c6cI]I]6b6dI\\I^6d6cI[I^6d6bI[I`6d6`I\\Ib6b6`I]Ib6`6_IaI`6_6`IZI]OES7?nHl5c0VJ_6NmHk5f0VJ]6_6dIQI[O3LMU7P7cIoH@0J0T7P7[1O02M2L4WKhHQ1\\7nNhHn0[7POfHP1Z7POdHeLMX4_7TOhHl0X7TOhHl0X7VOfHj0Z7YObHi0]7i3M2O01O01N11O0000O100000001O01OO3N3M001O1O2N00TMVIoLi6P3ZInLf6R3[ImLe6R3\\InLd6Q3]InLd6R3\\InLd6R3\\ImLe6T3ZIlLf6U3ZIiLg6X3WIiLa5cMoJN@g5InLi5\\MgJNA47\\8[6cGeIX8b6fG]IY8P7O2N1O01ZOlGmIR8k6O1O01001N0100001OO01UMgH[MY7d2gH\\MZ7d2fH\\MZ7d2gH[MY7e2gH[MY7f2fHZM[7f2bH\\M]7^501O0000O1UKaH`1a7^N_Ha1c7^N^H`1c7\\N\\HQM3a4b7]NcHb1_7\\NcH`1`7`NbH\\1_7dNcHY1_7gN]H\\1e7dNUH^1d0fK^6n2iH]1j0fK\\6X7gIhHZ6U7U1M2O00101aNgG^JIo0_8d4kG]KU8a4mG_KS8^3fGZL89R8\\3gGZL7 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_2.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000170613.jpg", + "mask_rles": [ + { + "size": [ + 640, + 439 + ], + "counts": "eiT57gc03M2N2O2K4N2K5L4O1O1N2jLL^B6^=n1UAjL0`1k>`3N1OkLeAj0X>UOmAj0R>TORBj0n=SO\\Bg0c=WOeBc0[=YOmBd0S=YORCe0mVS<@oC`0P<@RD>o;@SD`0m;@TD>l;AWD>i;AYD=i;AYD;j;DXD4o;MQDKV<5kCD\\<l8BTG8S9HkFGg99YF]OQ:c0nEYOX:f0hEUO^:j0bEPOd:P1]EhNk:2\\BMk2HQ;8YBM_a03a^OM_a02c^OM]a02d^ON]a00d^O1[a0Ng^O1Za0Lh^O4Xa0Jj^O6Va0Il^O6Ta0Hn^O8Sa0Eo^O;Qa0DQ_O;o`0Cj^OCDj0ca0_Oj^OJBg0ea0ZOl^O1^Of0bb0ZO^]Of0\\b0WOa]O34e0Zb0ZO`]O26d0Zb0Cf]O[MgNIg2e;UNVG`0ZMeNJf2e;VNTGc0[M[1a;RNRGg0[M^NNe2d;WNQGl0bMh0\\;]NoFn0jM`0W;aNmFR1lM=W;aNkFU1mM:W;bNhF\\1oMOZ;fNoDH5j1\\O^M1Y2^;8iD:FVM4V2^;c3]DWJ4V2`;e3_DVJMR2d;j3_DRJOQ2d;o3aDnK`;S4aDgKc;Z4\\DcKg;W63L5gNjCbJ\\<^5eC[Jb^6d;]JYD`5h;dJVDX5m;jJQDQ5S`0VBUOP>k0UBlNo=S1YB^No=a1ZBoMm=P2S2O1N1O2N3L3K5K6J7H9G8H;EVhV4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is talking to .", + "image": "images/caption_simple_3.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000465822.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dh^31e;1ig72RXH4J7I8J4M3N3L4]OROVFR1h9RORFR1k9?M3N3L2O100O2N1O1O1O1O1O2N1O100O2N101N1O1O1O2O0O101N1O1O101O0O2O0O100_HQMZ6P3dIQM[6P3bISM^6m2`IUM`6k2_IWM`6i2]IZMc6f2QIgMn6Z2oHhMR7W2kHmMT7[3010O0mLfHi1Z7VNiHh1X7WNlHf1S7YNQId1o6\\NTIa1m6]NWIa1i6]NZIa1g6]N\\Ia1e6]N]Ic1c6XNcIf1_6UNfIi1Z6TNjIk1W6RNlIbNh0`7dNRJc0`Nh0W9WOkFh0T9XOnFg0R9XOPGg0P9VOTGj0j8VOXGi0h8VO[Gh0f8WO\\Gh0c8WOaGg0^8YOcGg0ZOnNi8:oGS1o7kNSHU1n7hNSHX1o7dNTH\\1S9001O010O10O00010O01O010O001O010O00010O10O010O01O010O0100O010O0010O0101N1O2M2I8YOl0BcPb0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "f[b41c;5M3M3N3L2N3N3M2N2N10O01O000000O2N100O1O2N100O101N1O1O2O0O1O1O1O1O101N1O1O101N1O2N`[e0" + } + ], + "question": "What is doing with the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_4.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "32kc0k0XO2N001O000000001O000000001O000000001O0000001O00000000001O0000001O00000000001O0000001O0000001O0000000000001O00000000001O00000000001O000000001O000000001O000000001O0000001O0000001O0000001O0000001O000000001O0000001O000000001O0000000000O10000O100O100O1O1O100O1O100O100O100O1O1000000O100000000000000000000001O00001O000000001O0000001O00001O0000000000001O0000000000000000000000000000000000001O0000000000001O0000001O001O0000001O00000000000000001O0000001O1O001O001O1O001O1O001O001O001O1O001O00001O001O001O00001O001O00001O00000000001O00001O00001O00001O0000001O0000001O0000001O001O0000001O000000001O000000001O001O0000001O00O1000000O100O100O10000O100O100O10000O1000000O100000000000000O10000O10000000000000000001O0000000000001O000000001O000000000000001O000000000001O001O000000000000000000001O00000O1000001O0O100O1N2O100O1N3M2O1O2N1N2N3N1O1O1N3M2O1N3N100O100O2O0000000000d^OROg?m0V@WOi?i0V@YOi?h0V@XOj?h0V@YOj?g0T@ZOl?g0S@ZOl?g0R@[Om?f0R@[Om?f0`_OoNG=i`0a1P_OcNo`0X200O1O1O1O1O1O010O001O1O1O2N1O1O1O0001O01O0000001O01O0000010O0000000001O0000001O00000001O01O00000000001O0001O01O000000001O01O000001O01O00010O00000010O0001O0001O000001O0000010O000001O00000010O00000001O000000010O00000001O01O00001O01O0000010O000000000010O00001O0000000010O0001O0000000010O00000001O00000001O00001O;bLX_O46KO67HL001Je0[?" + }, + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y and ?", + "answer": " is in front of .", + "image": "images/caption_simple_5.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000128051.jpg", + "mask_rles": [ + { + "size": [ + 360, + 640 + ], + "counts": "U4V4R7000000000000000001O00000000000000000000000000000001O000000001O00001O2N1O001O00O10000O1O100O1010O00000O1001O01O1O0000000000O1L4O2N10000000000000001O0O2O00O11O000001O010O001O00001O3M4M2M1O1O2N1O001O01O00001O0010O01N10001O1O001O1N3M2N3M2M4K5J6E;K5K4O3L3N3M2N2M5L2N4K3L5J8SO^FFi9DXF8 located relative to ?", + "answer": " is in front of .", + "image": "images/caption_simple_6.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000560266.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "ocP1335iZLAf3>\\LAd3>^L@c3`0_L^Ob3`0aL]O`3b0cL\\O^3b0dL\\O_3a0cL^O_3?dL^Oe39]LFe36`LFd35hPY4" + } + ], + "question": "What are and doing to each other?", + "answer": " is looking at and biting .", + "image": "images/caption_simple_7.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000024919.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "iah21bYQ14bSoN0O2O0O1O1O000UN8nE1k9 and doing to each other?", + "answer": " and are pushing each other.", + "image": "images/caption_simple_8.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231169.jpg", + "mask_rles": [ + { + "size": [ + 454, + 640 + ], + "counts": "WaS2=f=g0SOb0E9H4L3M2N4K6K3N1O1N3M4M3L2O2M4M3L4M1O1O2M4M2N1N2O2M4M2M2O1O2N1O2N1N2O2N1N2O1O2N1O2M3N001OSLmFi2P9WMSGi2k8VMWGj2g8WMZGj2d8UM]Gl2b8TM_Gl2`8TMaGl2^8SMdGn2Y8RMiGm2W8SMjGn2T8QMmGP3R8oLPHQ3o7oLRHR3l7nLUHQ3k7nLVHS3i7mLXHT3f7kL\\HU3b7kL`HV3^7jLbHW3]7hLeHY3Y7fLiHZ3U7fLlH[3S7cLPI]3o6bLRI_3m6aLTI`3j6_LWIb3g6^L[Ic3c6[L`Ie3_6ZLcIf3\\6YLeIh3Z6TLkIk3U6SLnIo3o5nKUJT4h5jKZJW4e5gK]JY4i7000O10000O10000O100O2O001N1O10000O2O0O01001N1O10O010001N10O11O1N1O1O10001N0100O02N2N1O1O1O1O2N1O1O1O100O2N1O001O2O2L10100O3M10O00010O10O10O0100O10O001000O01O0001O010O0001O1O010O000010O0000O2O001N2O001O001O000O2O1O1O001O0010O0000O3OO01O1O001O001O1O001O00000010O2N1O2NO2M2000001O00001N101N1001000O0000010O001O002N001NZGTKT8l4c000N3N2O001N2O1N2O001O1O2M1O2N102L3L4M3O1O1O2M2N2UObEkMb:P2bEmMi;AVDLW1>^NnA1O0000Oea1" + }, + { + "size": [ + 454, + 640 + ], + "counts": "^_^51U>00000000001O2N2N010N2O1O1O1O1O001O2cFCm4>RKBo4>\\41O2N1O1O1O1O3eB0OXOg04XO064W:b3L0O3jEZLg9S4O0100O1O3Ng0XO2N1O10O6J100cE]LU:d37O02N1O2O0O010O01O0000001O001O00000000001O000000000000000000000001O00000000O11O000000O1000000000000001O00001O00001O001O000000000000hMeEe0[:WOlEe0U:[O^FeNVO0f00ZO:\\:R1Z2O10000000000000000000000000000000000000hFdNf5\\1ZJdNf5\\1ZJdNf5\\1b30000000000000hFeNe5[1[JeNe5[1[JeNe5[1[JeNe5[1c300000000000000000000001O001O001^FaN[6_1dIbN\\6^1aIeN_6\\1[IoN_6R1^IQOa6o0^ITO`6m0UG`NK1?N>d0S8Z3RGfLm8^3nFeLQ9X40000000000000000000000000000000000000001O0000000000O11O\\OnFhK22P9i3fGUL[8j3gGULY8k3hGTLX8l3iGRLX8n3P100000000000000000000001O00001O1O:F1OO010000000O10000000000O10000000000000000O1000nF" + } + ], + "question": "Where is located relative to ?", + "answer": " is beside .", + "image": "images/caption_simple_9.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000551822.jpg", + "mask_rles": [ + { + "size": [ + 453, + 640 + ], + "counts": "Y_P3e0Z=]OhBn0o and ?", + "answer": " is attached to .", + "image": "images/caption_simple_10.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000498463.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "j`T7a0]>201O000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000O100000000000000000000000000000000000000000001O001O000000001O000000000000000000000000000000000000000000TH" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l_T7i0V>2O000000000O1O1bNVOdDl0Y;YOaCKd0V1k;YO_CY1` and ?", + "answer": " is on top of .", + "image": "images/caption_simple_11.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000275198.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^[P34j>5J5M1M4M2N2N2M3N2N2M3N2N2O1N1O2N2O1N2O1O1O1O1O1O2O1N1O010O10000VO\\NWDf1c;jNRDU1l;POUDm0j;UOVDj0i;XOTDYOKX1P[O1K[NoM9Oo00W1j2WO\\O2KbNPNn00V1h2YO]O5HTNRN21N0X12iNMX2g2@^O5G[NUNQ1OhN0X2b1eM`N6e0e1^17EX2lNnLNd0_1:EQNXNY10bN1X2nM;ZMWO_3P1nMQO[3;QN;i0[O_Lg0]1XOeNN`4b0^LQOW64R1l0gHPOY17k:h0mCSOU17n:f0kCUOV15R;k0mDVOmNGO3i;o0[E^OkNCj;n0Q2F[BXOf=h09O00100O1O10O10O10O0010O1O1O10O01O001O01N11O101N00100O010O1OO2N1100O00O2O1O100O0O2N2001OO100O10000O10ON32N0WOQB`0X>O1N2O1O1L6IoRe6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is looking at .", + "image": "images/caption_simple_12.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000257896.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "`fV46dc0`0D:F9H4M1N1O2O2M101O0001N2O0O1O2N2N1O2N2N2M3N1O2N2N1O2N2N2N1O2N2N1O2M2N3N1O2N2N1O\\Q[4" + }, + { + "size": [ + 640, + 480 + ], + "counts": "ZT`21mc04M4K4M2O1N2O1N100O101M5M1N101N2O2M2O000O2N102N1N101N101[CnNZ6S1cIVOV6j0hIYOW6h0eI_OV6d0eIDV6 and ?", + "answer": " is wearing .", + "image": "images/caption_simple_13.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000034417.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "hc[4?n:=I:G4N1O101N10000J6O10000O2O00000000lNQF`0o9^OTFa0m9^OTFb0k9\\OWFc0k9VO\\Fh0[:A>O2O3N1O100OO02O0O100O010O0010O01O0010O01O000O101NVUi0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "kZm35\\;7I6K6L3M3N3N2M4iEkNg9d1N2O0O1O1F:O1O01O0O2C=N2O1ROmE12\\OT:?SFBI4\\:9SFFn97TFIn94SFLo91RFNQ:MRF3i:O100O2NTf]1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_14.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000047585.jpg", + "mask_rles": [ + { + "size": [ + 640, + 424 + ], + "counts": "V91Z2>_1C`L0f00000O100O10000000000O10000000000000000O1O10000000000001O001O00001O000000001eNd]OP1\\b0lNi]OS1Wb0kNk]OU1`b001O000000000000000000O100O1O1O1O1O1O1KeNd]O\\1\\b0dNc]O]1]b0dNb]O\\1ab00001OO10000000000000000000000001O0000000000LdNd]O20T1\\b0jNk]OU1ab0O001O00000000000000O10000000000000000j]OlN`a0T1f0000O10000O100O1O1N2O1O1O100000000000000000000000000000000bGiNSNW1m1oNmMQ1S2XOXFJ`6n0X35YF_No5\\1g37WFbNo5W1j3k0SLUOm3n0oKSOQ4o0mKQOS4S1iKmNW4T1hKlNX4V1eKkN[4V1dKjN\\4X1bKhN^4[1_KeNa4\\1^KdNb4^1\\KbNc4c1YK]Ng4g1TKZNl4l1nJTNn4`NUF_3j4QNQ5`NUFc3e4nMV5_NUFh3`4iM`5kNnER3\\4TNP6^NkE\\3R4VNQ7X1lHhNd7l0XHTOZ8=cGC_8?]GAe8a0XG^Oi8e0SG[On8i0mFWOU9n0dFRO]9b1hD]MWOQ1QlFIQ9:jFJT99fFLW97dFNY96aFO\\93aF1\\91]FhK[OX4X:1]FhKYOX4Y:0aFeKXOZ4U:MmF2S9OmF1R90nF0R90oFOP92PGNo83RGLm85RGKo86oFKP95QGKo84SGKl85WGIi87TGKl85TGLk85UGKk85UGJk87UGIk87UGIj88VGHj88WGGV7nKoH]4JEU7SLnHX4MET7ZLhHS43DS7[LiHR42DT7[LjHQ42DQ7aLiHm31FT7dLeHg34IT7dLfHd34IU7hLbH`36Km6^KbHY1:`36Jm6PMkH[36Cn6TMjH[37Am6WMjHZ37Al6YMiHY39_Om6j2mHZMQ7m2hHTMW7k7O2kH\\Cl6m<0O0O3N1O010N1O101O000000000O1O1O1N200O100001O0002N1O001O1O0000001SEfHR9[7kFiHS9W7jFlHV9U7gFoHW9R7eFRIZ9o6dFSI[9n6dFSI[9n6eFRIZ9P7eFoH[9T7cFlH\\9V7mESH and ?", + "answer": " is standing on .", + "image": "images/caption_simple_15.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000234757.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y\\Z52k>6J5K4L5K5K4M3N2O1N2O100O1000000000001O00000O100O0O1O2N10O0jN`Bn0`=ROaBm0_=ROcBn0\\=ROeBP1X=POhBP1X=POiBP1V=QOiBo0W=ROhBn0Y=SOeBm0[=<1O1\\OoBWOQ=e0VCXOk1iA0V>OjA3U>MkA3U>MkA3U>MkA4T>MkA3U>NjA2W>NiA1W>;2O001N2N2N4JbRT3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "oXT4:e>2O1N3N1O0aE>QLUOZ9=UJg1V5ZN]J[2[5eM^Jl2W5VMbJX3V5hLaJg3X5ZL]JV4^5jKZJd4`5\\KYJT5^5lJ^J_5[5bJ`Jf5\\5[J^Jk5a5VJYJo5g5QJUJS6k5mISJU6l5lIoIZ6P6fIlI^6T6bIhIb6X6]IdIh6[6YIcIi6]6m000O10000000O00100O1000O1000O1000O100000000O101O001O1N1000000O100000SNXKaIi4m5^2B>iNW1kNU1iNV1_Nb1O1O2M201N2N2N2M4L[bT4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is in front of .", + "image": "images/caption_simple_16.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000008899.jpg", + "mask_rles": [ + { + "size": [ + 539, + 640 + ], + "counts": "Z>e1V?0000000000000O1000000O10000O1O1O100O100O100000000O100000000001O000000000O101O00JZNXAe1h>[NXAe1g>[NZAe1e>]NYAd1e>^N[Ab1c>`N]A`1d>^N]Ab1c>\\N_Ad1a>\\N^Ae1b>YN_Ah1i>10000L4N2000000KoM`AQ2`>oM`AQ2`>oM`AQ2d>1O100O1O100O11O000000O1O1O100O01000O10000O1000XNbAS1^>kNeAT1[>lNeAT1[>lNeAT1[>lNeAT1[>kNfAU1Z>jNgAV1Y>iNhAW1X>hNiAX1W>gNjAY1V>gNjAY1V>fNkAZ1U>eNlA[1T>dNmA\\1S>cNnA]1R>bNoA^1Q>aNPB_1P>aNPB_1P>`NQB`1o=`NQB`1o=_NRBa1n=]NTBc1m=[NTBe1a>0000000000000000000000O10000000000001O00O100000000000000000001O2N:FY2PBfMP>[2PBdMQ>\\2oAeMP>[2QBeMn=[2RBeMn=[2RBeMn=[2PBfMQ>Z2PBeMP>[28000iAeMn=[2QBgMn=Y2SBeMo=Y2:000000000000000O10000000000000000jAeMl=[2SBfMm=Z2:000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000O100O100000000O1000000000000000000000000000000kAbMm=^2SBbMm=^2800000000000000000000000000000O100000000001O0000O1000000000000000000000000000000000000000O11O0000000000000000O11O0000O1000000000000000000000000001OO0101O0001O0O10000000000000000000001O000000000000O1000000000000000000000O1001O00O1000000000000000000000000001OO10000001OO100001O00nNfACZ>=hA@Y>`0hA_OX>`0kA_OT>a0mA^OS>b0mA]OT>c0mA\\OS>d0nA[OR>e0oA[OP>e0RBYOn=g0SBWOn=i0SBVOm=j0TBUOk=l0WBROi=n0WBSOh=m0YBQOh=o0YBPOg=P1ZBoNf=Q1[BnNe=R1\\BmNd=S1\\BmNd=S1]BlNc=T1^BkNb=U1`BiN`=W1aBhN_=X1aBhN_=X1aBhN_=X1aBhN_=X1bBgN^=Y1bBgN^=Y1bBgN^=Y1aBhN`=W1aBhN_=X1aBhN_=X1aBgN`=Y1`BgN`=Y1`BgN`=Y1`BgN_=Z1bBeN^=[1cBdN]=\\1Q10001O00O100000000001O0000000000`AeN`=[1o0100001O00000000000000O11O00000000000000O1001O00000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000001O0O1M4ZO]@Aih5h0PgJ=H2O000001O000000001O00001O00001O00001O00^B" + }, + { + "size": [ + 539, + 640 + ], + "counts": "VTZ22e`08J4M2N2O0O1_OAf@`0X?Cf@=Y?Ee@i0N1O1J6O1N3L3N200O100000O102N1O2N2N2N6I2O1O3mNbAC`>_OfA6?Fm=3dA6e0Ai=7dA6^?He@4^?If@2^ea7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_17.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000260261.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "Xi`33lc02M3N2M2O2N2N2N101O1O10O1000000O10000O10000O10O1000O1O001O0O2M3O0O2O001N2O1M2O]^P4" + }, + { + "size": [ + 640, + 426 + ], + "counts": "dbZ32mc03N2N6J2O0O001O01O01O1O001O01O01O00000001O0000000O10O1O100O1O1O1N2O01000O1gK@QEa0n:DmD=Q;GlD:S;IjD8U;IjD8U;JjD6U;KjD5W;KhD6W;KhD6W;KgD7W;KfD7[;HaD=^;BaDa0_;_O^Dd0a;]OZDg0g;YOVDj0h;WOUCCeN^1U>POQCP2nNZE5_;I]4Objb1" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_18.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000301563.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^2m:_20000000000000000O2O000001O01O0000001O0O1000010O000000000O10000001O01O00000O101O00001O00001O001O000]KXMbNi2]1XMbNh2^1WMdNi2o50000001O000O2O2cE_Mk9Q300010O0001O000O2O001O00001O1O00001O001]KZL^Og3`0ZL@f3`0ZL@g3?YLAg3R50000001O000^KWLAj3>VLBj3>VLBk3=ULCk3o4@cFQM]9n2dFRMi4EDY3cKRMi4EDS3kKWM`4FER3mKWM^4FFS3mKWM\\4FGS3mKWM\\4FGS3mKXMn8h2RGXMn8h2TGVMl8j2UGUMk8k2UGUMk8l2TGTMl8l2UGSMk8m2VGSMi8m2XGRM^OJW9T3[GRM^OJW9T3\\GQM\\OLX9S3\\GSMe8n2ZGRMf8n2ZGRMg8m2YGTMe8m2[GSMe8n2ZGQMg8o2YGQMg8o2ZGPMf8Q3YGoLg8Q3YGoLg8Q3YGoLg8Q3YGnLh8R3YGmLg8T3YGjLh8V3g00000001O000000001O0000001O001O00000000001O0000000000001O000000000000001O00000000001O000000001O0000001O0010OO101O000000000000001O0000000000002N1O001O0000000000001O0000000bHgLi4Y3eJ^MV5b2aJnMX5S2]J\\NCkMX5i3SK`N_OSNV5]3WK_Og4a0XK@h4`0WKBh4?UKDj44d4`6lK`IT4Z6SLfIl3T6[LkIe3T6]LlIb3S6`LmI_3S6bLmI]3S6cLnI\\3R6eLnIZ3S6fLlIZ3U6fLkIY3X6eLhIZ3Z6dLgI[3Z6eLfIZ3[6fLeIY3]6fLcIY3_6fLaIY3b6eL]I[3e6dL[I[3f6dL[I[3f6eLZIoNOg3i6YMWIoN2g3h6ZMWIkN5i3e6\\MUIjN8i3c6^MUIgN:j3b6_MiIa2Y6^MgIa2Z6^MgIa2Z6_MeIa2_6\\M`Id2b6[M^Id2e6ZMZIf2g6ZMXIf2i6ZMUIg2l6XMTIh2m6XMRIh2Q7VMmHk2V7SMiHm2Y7QMgHo2[7PMdHP3]7oLeHo2\\7PMeHo2]7PMbHP3`7nL`HR3a7mL_HS3c7lL\\HT3f7jLZHV3g7iLYHW3h7hLYHW3h7iLWHW3j7hLVHX3k7gLUHY3l7gLSHY3n7fLRHZ3o7eLQH[3P8dLPH\\3Q8cLoG]3Q8dLnG\\3S8cLmG]3S8cLmG]3T8cLkG]3V8bLjG^3W8aLiG_3X8`LhG`3Y8_LgGa3Z8^LfGb3[8^LdGb3]8]LdGb3]8]LcGc3]8]LcGc3^8\\LbGd3^8]LaGc3`8\\L`Gd3`8\\L_Ge3b8ZL^Gf3c8XL^Gh3S901O000000O1000000O100000000hK\\LgNe3X1^LeNc3Z1aLcN_3\\1dLbN\\3e0XLUL`0T3X3f0[LSLa0T3T3i0_LmKa0Y3o2j0hMVOV2j0kMVOT2j0mMVOR2i0PNWOo1j0QNVOn1j0SNVOl1j0VNSLWN`2c3]1WNRLWN`2b3^1XNQLWNCOk2a3a1YNQLZN^2]3a1ZNQLYN^2\\3a1\\NPLXN_2\\3b1\\NoKYN]2[3d1]NnKYN]2Z3e1^NnKXN\\2Z3g1^NmKXN[2Z3h1_NmKWNZ2Z3i1`NmKVNY2Z3j1bNlKUNX2Y3m1bNkKTNW2[3n1bNkKSNV2[3o1cNkKRNT2\\3Q2cNkKQNS2\\3R2dNkKoMR2^3T2bNkKPNP2^3U2cNkKoMo1^3V2eNjKmMo1^3X2eNiKlMn1`3Y2eNiKjMm1b3Z2eNiKiMl1b3[2fNiKiMj1a3]2gNiKhMi1a3^2hNiKgMh1a3_2iNiKfMg1a3a2iNhKfMe1b3c2iNhKfMc1a3e2jNhKfMa1`3g2kNhKfM^1`3j2kNhKeM]1`3k2lNhKdM\\1`3l2mNhKbM\\1a3l2nNhKaM[1a3m2oNhK`MZ1a3n2POhK_MX1b3Q3oNgK_MW1b3R3QOfK]MW1b3S3ROfK[MW1c3S3SOfKZMV1c3T3TOfKXMV1d3T3UOfKWMU1d3U3VOfKVMT1d3W3VOeKVMS1d3X3WOfKSMR1f3X3XOfKQMQ1h3Y3XOfKPMQ1g3Y3ZOfKnLQ1h3Y3[OSMe0m2]OQMc0o2^OoLc0Q3^OnLb0R3^OnLb0R3_OgKjLe0h3d3_OkLa0U3@jL`0W3@gLa0Y3AdL`0\\3AcL?]3AhKbL;n3m3AgKfL7i3S4@fKiL5g3U4AeKiL4g3W4@eKjL2g3Y4@eKjL0f3\\4@cKkLOf3^4AbKjLMf3a4C_KQ1b4PO\\KP1d4l300001O00000000000000001O00000000000000001O00000000000000001O0000000000001O00000000001O000000000000001O00000000000000001O0000001O0000000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000001O0000000000000000001O00000000001O0000000000001O0000001O001O000000000000001O0000000000000000000000001O0001O0000000000O101O00000001O0000O10001O00000000000000001O000000000000000000000000000000000000000000000000001O01O0001O0000]J" + }, + { + "size": [ + 428, + 640 + ], + "counts": "ff[21[=00000SX<3egC8oBJSL4L4H8E<0O102N3M4L1O3M3M3M1O2N1O101N3N2M1100O001N2O4L1O000001N5L1N1O1O2O0O2N3M2O2L3N1O1O2N101N2N1O1O100O1O100O1000O010_LfGo1Z8PNfGQ2Y8nMhGR2Y8nMhGQ2W8oMfGdN2\\3X8nMmGR2S8mMmGT2S8mMlGS2T8PNiGP2W8PNiGo1X8mMmGR2R8nMnGS2R8mMgGgN3\\3V8QNkGm1U8SNmGl1S8SNmGm1T8QNnGo1Q8RNnGo1R8QNnGo1Q8RNPHm1P8RNQHo1n7PNRHQ2n7PNQHo1P8RNPHm1o7WNoGg1R8WNoGi1S8UNnGi1T8mMcGUO9m2U8kMeGZO3j2Z8kMeGZO2i2Z8kMgGYO1k2Y8jMXHV2j7gMXHY2i7dMYH\\2h7aMZH_2g7]M\\Hb2i7TMSGNW1m2V9O2N2N2M4L3M2O3L3N4L1O1O1O1O1O1O1O1O1O2N1O010O0O101N2N101N2O1O1O2N1O1N2N6K1O0O2O1O1O1O1N101O1O1O0O2N1O201L2O2M4MZ>InA0N4M0O2L41O01ON11O10O101N3ImB0Pko2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_19.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000016598.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Sme635l0Ni0OYNe`0Z3K3N2O1O010000O100O001M3N2O1O1O1O100O10000O10000O10000YOZ_OTNf`0j1^_OSNc`0l1`_ORN``0m1b_ORN^`0n1c_OQN]`0n1d_OQN]`0o1d_OPN\\`0o1f_OPNZ`0P2f_OPNZ`0P2g_OoMY`0Q2g_OoMY`0P2i_OoMW`0Q2i_OoMW`0Q2i_OoMW`0Q2j_OnMV`0R2j_OnMV`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OnMV`0R2k_OmMU`0S2k_OmMU`0S2l00000000000O10000010O01O001O1O1N2L5iNj]O_OXc06g\\OLb^X1" + }, + { + "size": [ + 640, + 478 + ], + "counts": "Vj133Nac0a0^\\OAPc0l0Fg5mAYJV>Z5jAlJ]>S4fA^Lg>^3[AnKM9l>g3WA_Lm>_3SA`LQ?]3QAaLR?\\3UA[Lo>b3RA]LR?]3`@]L?6S?Z3PAfLR?W3P1YO\\_OeMi\\1" + } + ], + "question": "What is doing with ?", + "answer": " is holding ", + "image": "images/caption_simple_20.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439854.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "bhP11[:2O0O101N1O100O1O100O1O010O2O0O3N0OLTFIl9650010O0010O010O010O0010O0010O010O000010O0100O100O2OMRFHl98UFHk97WFHh996O1O100O2MaY`3" + }, + { + "size": [ + 333, + 500 + ], + "counts": "hmT19o0JR8=gGDX8`0SGG3Jj8o0SGROl8X100ZOjNUHV1i7nNUHQ1k7QOTHo0k7SOTHm0k7UOTHk0l7VORHL26l70QHJ54j73PHI73i75oGH;0f78oGH=Me7 and ?", + "answer": " is on .", + "image": "images/caption_simple_21.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000012062.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "l`k7>k<4L4[C\\O^ and ?", + "answer": " is in front of .", + "image": "images/caption_simple_22.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000121586.jpg", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "VTZ191IY>g0KO=YOaA2OO4Oi=0]B0H002OOk=2SB20:1Bk=k0VBTOT<;VEHdN:3EV5]A0R>a0M2N2N101N2O1N1O000O1O100O100O100O10000O100010O01O4L1O0001O00001O1O001N2N2M3N2O2M2N3M4L4L2M6Inok5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_23.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000476704.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "V8T5X8001O000000O10000000000010O02N1O00003L3N0000010O5K2N0O2O00000001O00000000O01O1L4M30000O10001OO1000000001O0000000000O1000000000000000000000O1000001O000000O100000000001O00000000O2O00101N3M3M1O0000000O11O00000000RL\\GT3d8jL_GU3a8gLcGDJ[3d8nLTHl2l7RMWHm2U9O00000010O0001N100000001O0000000000000000001O001O000000001O0000O10000001O0000000000000000001O00000000000000001O0000000000000001O0001O0O100000000001O0001O0O100000000001O00O100000001O00000000000000001O00000000000000001O0000000001O0000O100000001O0000000000001O00000000001O1O001O000000000000000000000000000001O0000000000000001O000000O100001O000000000O100000000000001O01O00000000000000000001O00000000000000000000000000001O0000000000000000001O000O10000000000000000001O0001N010000000000000001O00001O00000000O101O000000000000000000001O00000000000000000000000000000000000000001O000000O10000O10000O10000O10000000000001O4L1O001O001O0000001O000000000000O10000O1O1O1O1N2N2N2N2N2M3N2L4O1O1O1O10000O10000O100O10000O100O10000O10000000000O100O100O100O100O1O1O1O1O1O100O10000000eL_FP1K`0i9[N`Fb0IF1[1U:nNmEA2a1X:]N`Fb1e:0000000001O001O00002N3M1O002N1O2N001O1O1O1O00O10O10O2N100O100L4O1O1O1N2O100YLoNSKQ1_4^O`Kb0_4_O`Kb0`4^O`Kb0`4^O`Kb0`4^O`Kb0`4]ORISOf1`1X5]OQIVOe1]1Y5^OQIXOd1Z1[5^OPI\\Ob1V1^5^OPI^O`1T1`5^OPI_O`1R1_5POcHI?4_1S1_5oNfHG?2_1X1\\5oNULQ1k3oNULQ1k3nNVLR1j3mNWLS1j3jNXLV1h3gNTKMYN]1c6eNUK2TNY1g6eNUK6PNU1k6dNVK=hMP1R7bNWKb0bMm0X7^NXKg0]Ml0[7ZN[Kl0WMk0^:Q100001O00000000000000001O000000000000000000O100000000O1O1O1N2N2O1SOPMgFg3^91FYLoFg3P9\\LmFe3Q9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_24.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000211042.jpg", + "mask_rles": [ + { + "size": [ + 640, + 458 + ], + "counts": "YdX34ic05L3N2N2N2N101N1O2N1O2N1O2O1N101N2O0O2N100O1O0001O0O1O2O001O001N10O20O0001O01O01O1O01O0O2O0O1O100O10001O01O01O001O1O001M3N2K7Ghbf0L_]YO5`N3n]OK;7ea0b0n]OFQb0U1N2N2N2I7K5K5N201O00000000000001O00001O000000001O0000001O00001O00001O00001O00001O00001O001O0000001N10001O00001O001O00001O0O2O001O001O001N101O001O1N101O001O0O2O1O0O2O1O1N101N2O1N101N2O1O1N2N2O2M1O2O1N3L3N3L5Jbcc1" + }, + { + "size": [ + 640, + 458 + ], + "counts": "]dW3;bc04M2L4L4N2oK^OaDe0^:X1RB_Nd1?U<_2WChMc^3m0N3M2O1N2O1O1O1O1N2O1O1O1N1O2N1_Ng_OYO]`0f0h_OnN``0P1X10001M2M4J5E;K6L5JnX=0`SB3WhQ3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to and leaning on .", + "image": "images/caption_simple_25.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000055299.jpg", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "e and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_26.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435206.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "b531`7c5`H]J`7c54000\\H]J`7c5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5400000000000000O11O0000O10000001O00O11O0000O1000000000000001O00O1000000000000001O00O1000000000000001O00O10000001OO1001O000000O11O0000000000000000000000000000001O0000O1000000000000001ZH]Jb7b54000000000000000000000000000[H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b541O000[H\\Jc7f501O000000O100O11O0000N]H\\Jc7c530000000000000000000000O1001O00O1000000O100001O0000000000O1000000000000000000000000O10000001O00O1O1O1000000001O00O100O100O10000000000O100N200001O1O001O000000000000000000000000000000000000000000000000000000001O00001O001O001O001O1O0000001O00001O001O000mH^J]6c5`I_J`6a5`I`JCLg6e5dIeJ\\6\\5bIfJ]6[5_IhJa6X5^IiJb6X5]IhJc6X5]IjJa6W5]IkJb6U5^IlJa6T5^ImJb6o5O000000001O00001O001UJ^Ik4b6TK`Ik4`6UK`Ik4`6TKbIk4_6SKbIm4^6RKcIn4]6RKcIn4^6PKcIP5^6oJbIQ5^6mJdIS5\\6kJfIU5[6iJfIW5[6gJfIY5[6eJfI[5[6dJfI[5U701O1O1O1O1O4L000000O11O00001O0000001O0000O10000000000000000000[LjGU2W8hMkGX2U8hMkGX2U8hMkGX2U8gMlGY2U8fMkGZ2U8fMkGZ2U8fMkGZ2V8eMjG[2V8dMkG\\2V8cMjG]2W8bMjG]2X8gLfG<2m2Y8fLeG=1n2[8cLfG?Nn2\\8bLhG?Lo2^8_LgGa0LP3i8oLYGP3h8nLYGR3g8mL\\GQ3e8mL\\GS3f8jL]GT3e8iL]GV3e8gL\\GY3\\9O00001O2N001O00001O00001O001O001O1O1O001O1O1O001O001O1O2N001O001O1O001O1O1O1O000WN[El0f:mNaER1`:kNcET1^:kNbEU1^:jNdEU1\\:jNeEV1\\:gNgEX1Z:dNjE[1X:`NkE`1Q;eNXDQ1h;nN]Dn0P<01O00001O100O2N0000001O00003MM3O100O1O100O10000002N6J00001O1O3M2N1O2N2M5L3M1O1O0Oml`2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "Vbn3:e< doing in relation to the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_27.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137950.jpg", + "mask_rles": [ + { + "size": [ + 415, + 640 + ], + "counts": "WW1231N11O0O11N10?0<4 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_28.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435208.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PjNXAb1c>`NWAg1g>ZNVAk1g>VNTAR2g>PNVAW2e>jMXA^2b>`M_Ae2\\>[MdAg2[>SMhAR3V>jLlA[3Q>dLoA^3o=dLnA`3P>`LoAb3P>_LnAb3Q>_LnAc3P>_LmAd3P>^LnAd3o=`LPB`3k=jLnAY3i=PMTBS3g=RMWBo2h=SMVBn2i=VMRBk2n=d1O1O1O10O010O0100000O01000O10O010000000001O0000001O000000001O001O000O2eKPBd2Q>YMRBf2n=ZMTBd2m=ZMUBe2k=[MWBc2j=[MXBd2j=ZMXBd2l=XMXBd2i=ZMXBf2m=YL_Ag0e0P3U>iL`Bd2a=PMQCi2V?L6K2N2N1O1O0O100000001O0O102N1N01N1101O1N01000O01N110O1O101N101O0O2N2N100O100O1000O10O1O10O1@i^OWNVa0f1P_OWNQa0a1X_O]Ni`0a1l0O1N2N2N2N2N2M4M2L4L5I7J6N6CWgZ6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_29.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137576.jpg", + "mask_rles": [ + { + "size": [ + 563, + 640 + ], + "counts": "0g15UOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AUOf>k0ZAUOf>l0YATOg>l0YATOg>l0YATOg>k0ZATOg>l0YATOg>l0YATOg>l0YATOg>l0YASOh>m0XASOh>m0XASOh>m0XASOh>m0XASOh>m0XAROi>n0WAROi>n0WAROi>n0WAROi>n0WAQOj>o0UAROk>n0UAROk>n0UAQOl>P1TAoNl>Q1TAoNl>Q1TAnNm>Q1SAPOm>P1TAoNl>Q1TAoNl>Q1TAoNl>Q1TAnNm>R1SAnNm>R1SAnNm>R1SAnNm>S1RAmNn>R1SAmNn>S1RAmNn>S1RAmNn>S1RAmNn>S1RAlNo>T1QAlNo>T1QAkNP?U1PAkNP?U1PAkNP?V1o@iNR?V1o@jNQ?V1o@jNQ?V1o@jNS?T1n@jNS?V1m@jNS?V1m@jNS?V1m@iNT?W1l@iNU?V1k@jNU?W1j@iNV?W1j@iNV?W1j@hNX?W1h@iNY?V1g@iN[?V1e@jN[?V1e@jN\\?U1d@kN]?T1c@kN^?U1b@kN_?T1a@lN_?T1b@kN]?V1c@jN]?V1c@iN^?W1b@iN^?W1b@iN^?W1b@hN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@fN^?[1b@eN^?[1c@dN]?\\1c@cN]?^1c@bN]?^1c@bN]?^1c@bN]?^1c@aN^?_1;000000001O000001O1O000010O00000001O0001O0001O000000001O001O00001O001N10001N10001O0O2O001O001N101O001O1N10001O0O101O000O101O1N101O1O1O0O2O2N1Ao^O9Wa0NS_S8" + }, + { + "size": [ + 563, + 640 + ], + "counts": "adj0X1P`0?Q@`N]?e1e@ZNZ?g1f@YNX?i1h@TNY?m1h@QNY?m16M4K5J7F:F:F;CY\\34ncL04HWc62lfM2dTKM26Kn01PO4N000NV>[3I100O1O0000000001O001N2O001N2O1O1O1O1O1N2O1N2O2N1O1O001N1O101O000000K5M4N10000001O001O1M4I6N2M5fMk@P2X?oMh@Q2Y?32OO02O000O1O3N0O10001N100O101O0O1000001O000O2O00001O00001O1O001O001O001O001O0O2O001O001O0O2O0000001O000O2O000010O0001O00001O0O101O001N100O1O2N1O3L2N3N2N3N2M9H1O001O00000000001N1O10gon6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_30.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000126137.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bQ`1`0]<`0L2J7OO2mNmCl0SQE6c;I_D6b;H`D6b;H`D7a;HaD5a;IcD4a;FbD5aQZ5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_31.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000573943.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Ya`84l>5Kb0^O6J1O1O000O01N2N2M3N200O1000001O000000001O000000001O0000000000001O002Ldd9" + }, + { + "size": [ + 480, + 640 + ], + "counts": "doj7g0U>5K5N10000000000000O010O10000001O1O1O1O001O0000000000O2O0O1001O01O000O1000010O0000000O1O2N1O6I_`j0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_32.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000225532.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "h_d21f;1O1O2N1O1N2O1O1O1O001O001O001O0oDBi:?VEBi:?VECh:=XEDg:=ZEBe:>\\ECb:>]EDa:=_ED_:=aEC^:=bED]:=cED[:=eECZ:=gEDW:=hEEV:SHAl7?UHAj7a0UH^Ok7c0UH\\Ok7e0TH[Ol7f0THYOl7g0VHWOj7j0VHUOj7l0VHSOj7n0VHQOj7P1VHoNj7R1VHmNj7S1WHlNi7U1WHjNi7V1XHiNh7Y1WHfNi7[1WHdNi7\\1XHcNh7_1WH`Ni7a1WH^Ni7c1WH\\Ni7d1XH[Nh7f1YHXNf7j1ZHUNf7l1ZHSNf7n1ZHQNg7o1YHPNg7Q2YHnMg7S2YHlMg7U2XHkMh7V2XHiMh7X2XHgMh7Z2XHeMh7[2YHdMg7]2YHbMf7`2ZH_Mf7b2ZH]Mf7d2ZH[Mf7f2ZHYMf7h2ZHWMf7j2ZHUMf7l2ZHSMg7m2ZHPMg7P3`01O1O1O100O002N001O001O2N001O1O1O1O1O1O0000000000000000000000000000cMQHR1o7eN_HV1a7eNgHX1X7fNPIU1Q7hNXIQ1h6nNbIi0]6VOiIf0W6YOmId0S6\\OnIc0R6\\OQJb0o5^ORJ`0o5@SJ>n5@TJ2fMOU80WJKdML14OLU89WJ and ?", + "answer": " is driving on .", + "image": "images/caption_simple_33.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000424349.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "XlW48l<>`CBX3L^4a1YKdNb4e1UK_Ng4g1RK^Nl4f1oJ]NP5g1jJ\\NU5h1fJZNX5j1dJXNZ5l1bJVN^5l1_JUN`5n1WIdMLa0m6V3nHlLR7X3bHPM]7_4O1O100O100O10000O100O100O100O10000O10000O100O100O10000001O00000000O10000O1_NoIiKQ6V4[J^Kf5_4cJYK_5`4mJYKU5e4oJSKgNK[6P5j1N2N2O1O1O1N2N2O1O1O1O1O100O100O100O100000000O1000000O10000001O0000000000001O001O001O001O000eIcKi3^4nKlKP4V4fK\\KXNc0R6Q4dKTL\\4m3aKVL^4k3`KVL`4k3^KWLa4j3]KWLc4k3[KVLd4k3ZKWLe4j3XKYLg4j3TKXLf3kNZLP5IYLk3nNWLl4G[LP4mNVLn6i3TIVLl6i3VIVLj6i3[ISLf6k3\\ITLd6k3_ISLa6l3bIRL^6n3dIPL\\6o3iIlKX6T4mIgKR6Y4`100000000O10001O0O101O001N101N1O1N3J5H9EZFiLm9g2c0L5L3N201N101O0000001N2O1O1N101O1O1O00001O001O1O1O1O001O00000001O0001N110O00000001O0001O000000001N10000O101N1O2O001O000O2O001O1O001O1N2N2O0O2O1O2N1O2M2O2N1O1N2O0O2O1N2N1N5L6GcbP1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "^j7=g<6I8L4`ETO[8Q1bGRO[8R1cGoNY8f1UG[Ng8l1RGXNg8Q2UGQNf8U2XGlMf8V2YGkMd8Z2XGhMc8^2[GcMc8`2ZGbMd8a2ZG`Mc8d2[G]Mb8g2\\GZMa8k2\\GVMb8j3O1O2L3QM^KUKJW1j4[3^LcLc3[3`LbLb3[3aLdL`3Y3cLcLa3\\3`LbLb3Z3bLeL_3Z3bLdL`3[3aLdL`3Z3cLdL^3[3fLbLZ3]3lL]LU3c3lL[LU3e3kLZLV3d3PMWLQ3h3SMTLn2l3TMRLl2n3TMQLm2n3UMQLk2o3UMPLl2P4VMmKk2R4XMkKi2U4WMiKk2W4UMhKl2X4UMeKm2[4Q300O1O100O1000000001O00O10000O100000000O1000000000000000000000000000000O11O000000000000O1000000000000001O001O001O0eJeKg1[4WNhKh1Y4SNlKl1T4QNoKo1R4nMQLQ2o3nMRLR2o3lMSLS2n3jMTLV2m3hMULW2l3fMVLZ2l3dMUL[2l3cMUL]2l3bMUL]2m3aMTL^2m3`MUL_2m3_MVL^2P4\\MTL`2R4YMRLd2T4VMQLe2R4WMPLh2R4UMPLj2R4TMoKk2Z4eKTJP1e1Z3_4_LmKY3\\4XLlKf3n6N001O0000001O1O2N3M4L3M2M2O1O2PO[FVNi9f1_FQNd9l1aFPNb9m1o0N3L5K4M3M3L4L4K5M3L4K8I_ko5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_34.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000173302.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "en\\43U=10000O101O00000001NVDMfo19k[N4O100000001QOBQE>l:HQEd0a:@]ET1n9nNQFR1n9QOPFo0P:ROoENE7]:LmELS;5lDKS;7lDIT;8kDHU;9jDGV;9jDGV;:jDEV;gDBY;>gDBY;>gDBY;>gDBY;>hDAX;?hD@Y;`0gD@Y;`0gD@Y;?iD@J3g:=_E7`:I[EmEClNOW;>mEBmN1U;=mECnN0U;=mECnN0U;>lEBoN0T;?mEAPOOS;`0mEAPOOS;`0mE@QO0m96jF:8@QO0k9:jF6:@RO0i9;jF5;@RO0h9>iF3;AROOc03X8nDAR;?nDAR;?nDAR;?nDAR;?nDAR;?nDA6Ln9c0lE`0S:@mE`0S:@mE?T:AlEOHROh:o0aE_OR;`0oD_OR;a0oD^OQ;b0oD^OQ;a0oDG9D\\jd3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_35.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000352760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 544 + ], + "counts": "PX1`bW16YQiN6J3N3L2O3M3M1O2N2N2O1N2N2N1O2O1O1N200O001O1001O0O2O1O0O100O11OO100000000O10O10O01000O100O010O100O1O02OO10000O010O100O00100O01000O010O00100O010O010O01O010hMQO\\Ao0a>TO`Ak0_>VOaAk0\\>XOa_OJm1m0a>@_Aa0^>AbA?]>BdA=Z>DgA=W>DjAEnA;Q>GnA:o=HRB7l=KTB5k=LVB3g=O[B1a=1`BO^=2cBN]j7`1`EPNg2?i7\\1hERNa2b0d7Z1QFPN]2g0`7S1[FTNW2h0\\7d0SG]Nd1o0X7`0WKAf4>^KAa4>aKB]4>eKBZ4=hKDU4`1o_OYNg12Y>]1\\2N2N1O2N101O001O0010O2O0O2O2[OlNZ^OV1_a0TO[^On0^a0o0\\EaMa3c2]L^Ma3e2\\L]Ma3g2]L[Ma3g2]LZMa3j2\\LWMb3l2\\GPM\\36V5n2oF\\Ne2iNY6Q3dFQO`2PNk6\\6jHeIU7d6\\FSIe1;m7e7eFWHL8]9b9N2O1O2M2O1O2N002N1O2N1O010O00O100O2M2O2K4N3M3C`FVFd9c9`0K5J6L4fM[FQJm9h5`FmIe9m5eFeIh9R6_2B7]O`0E;J6J6M2N3N2N2O1O10001O000001O1O00100O2N1O100O2N1O2O1N2N3M2O2M3N1NZLVAY2g>fM_AX2_>gMdAX2Z>iMiAU2U>lMmAS2R>lMQBS2n=mMSBS2k=mMYBQ2f=PNZBo1f=QN[Bn1d=RN^Bm1c=RN^Bn1`=SNaBm1^=SNcBm1]=RNdBn1[=RNfBo1X=PNjBQ2T=oMmBR2R=mMoBT2o and ?", + "answer": " is below .", + "image": "images/caption_simple_36.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000344614.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Qag32kc04N2M2O100N201M2O1O2K6K4W@VOUXOa0lJj@00Y4Ta0[N8H3K5J6J6K5K5oNk]O6Vb0In]O3Sb0Lo]O0Tb0LU^OGSb06m0M3M3NRXj02ngUO001O000000000000001OO10000001O00001O001O1O001O001O001O0000001O0000001O0000000000001O0000001O000000O100001O1O00001O001O000000000000001O00001O1O000000001O000000001O0000001O00000000001OO1001O1O00001O0000001O00000000001O00000000001O001O00000000001O001O1O2N00OV]OnNhb0Z1K7I7I1O1O1O1O2N000000O100O1N2001O1OO1O1O1M30000000000000000003M1O1O001O00000000001O0R^ORN60J1010N10k`0n1h_OTN``0l1a_OTN@Ne`0o1^_OUNG4ONl`0m2o@oLUN61JO11NS>S3nBlLPOW2Y=l0lCoLiNS2Y=0iBeNU1XOiN;1c1m<6TCcNT1IjNc1jLGc8K_GX4`8iK^GX4b8iK\\GX4d8c3kGfEb6\\:WIgFk5[9lIdDOZ40gK2n01RONO150;OF0Lm0lc0" + }, + { + "size": [ + 640, + 478 + ], + "counts": "W_W6e0Vc0:F9I6K4M4K4L4M3M3M3M2N3M2N3M2N3M2O1N3N0O2N2O1O1N2O1N2O1O1O001O1N101O1O1O1O001O001O001O001O001O001O0001O0001O01O010O001O00001O001O001O001O001O001O001O1N2O1O001N2O1O0O2O1N2O1N3M2O1N2N3M2O1N3M2N3M2N3M3L3N4K4L4M3K7I6J8H=]On_n0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_37.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000272148.jpg", + "mask_rles": [ + { + "size": [ + 378, + 640 + ], + "counts": "V4c7W40000000000O100000000000000001O00O1000000000000001O00O1000O1O11O00001O2N001O0000JbHPL^7P4bHoK_7Q4aHoK_7V4001O0K`HPL_7Q4aHoK_7U41O100001O0001O00O1000000000000001O00O100000000000000001OO1000000000000000000000000000000000010OO0101O000000000000000000001O0kJfKj2Z4UMhKj2X4VMiKi2W4VMjKj2V4VMjKj2V4TMlKl2T4UMkKk2U4UMkKk2U4WMhKj2X4\\2000lIfKh4Z4VKhKj4X4VKgKk4Y4[100000RJgK[4Y4eKgK[4Z4b10000000000001O2N00000000000001O]IeKe00RO0l3[4_LcKb08mNKQ4[4nMkKPNJR4[4oMjKoMKR4[4PNhKoMMQ4[4QNgKnMNQ4[4RNfKlM0R4Z4RNfKlM0R4Z4RNeKlM2S4X4SNcKkM4S4X4]NhKd1Y4YNiKg1W4XNjKg1W4XNjKh1V4WNkKi1U4WNkKi1U4WNjKj1V4oMbKjM7W4W4oMbKlM4V4Z4nMbKa2]4_McKa2]4]21000000000000001O00O10000000000000O1000000001O000000000000000[LbKN^41gKKY45gKKY43iKMW40lK0T4NnK2R4NnK2R4NoK1Q4OoK1Q40nK0R42kKOU42jKNV42jKNW42gKOY44cKM]4h30000000000000000001O00O1001O0000000000001O000oI`Kh4`4XK_Ki4a4X1000000000000000000000000001O000000O11O0dNeKYK[4f4gKYKY4g4gKYKX4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4g4iKYKW4g4iKYKW4g4iKYKW4h4hKXKX4h4hKYKX4i2]KRN:TOZ4h2_KSN7UOZ4h2_KSN7UOZ4g2`KUN5TO[4g2aKYL0d14\\O[4g2hKmMM\\O[4g2hKmMN[OZ4h2hKmMN[OZ46]KZ1;TON^OY44^KY1;VON]OY44^KX1ZOL[OZ4b2kKSNK[OZ4a2kKUNLYOY4b2kKUNLZOX4a2kKVNMYOX4b2jKTNOZOV4e2hKRN2YOW4h2aKPN8YOW4d4jK\\KV4c4kK]KV4b4jK]KW4b4iK_KW4`4jKaKV4]4kKcKU4]4kKbKV4]4kKcKU4]4kKcKU4]4kKcKU4^4iKcKT4nN^Ka5 and ?", + "answer": " is over .", + "image": "images/caption_simple_38.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000222317.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "kc^1g0T>>E7J2N2O0O1fCQOnN_O10[;`1fEFlNkNn:_1WF2`9N`F9Y9GgFl0f8SO[GX1Z8hNgG]1S8cNmGb1n7^NRHf1j7ZNVHj1e7WN[Hk1c7UN^Hl1`7TN`Hm1_7RNbHo1]7QNcHQ2[7oMeHR2Z7nMfHS2Y7mMgHT2X7lMhHT2X7kMiHV2V7jMjHW2U7iMkHX2T7hMlHY2S7gMmHZ2R7fMnH[2P7fMPI[2o6dMRI\\2n6dMRI]2m6cMSI^2l6bMTI_2k6aMUI`2j6`MVIa2i6_MVIc2i6]MWId2g6]MYId2f6\\MZIe2e6[M[If2d6YM]Ih2b6XM^Ih2b6XM^Ij2`6VM`Ik2_6TMbIm2]6SMcIm2]6SMcIn2\\6RMdIYOkNK2P2]7mNfISOTOG0Y2T7mNhIkN@GJ`2m6nNiIiN:Z2l5mNjIgN=\\2h5mNlIdN?_2d5mNoKR1P4nNQLR1n3nNSLR1l3nNTLS1k3mNVLS1i3mNZLQ1d3PO]LP1b3PO_LP1`3PO`LQ1_3oNdLo0[3QOfLo0Y3QOhLo0W3QOjLo0U3QOlLn0T3ROmLn0Q3SOPMn0n2ROSMm0m2SOTMm0k2SOUMn0j2ROWMn0h2ROYMo0e2QO\\Mo0c2QO]Mo0c2QO]MP1b2PO^MQ1a2oN_MR1_2nNcMR1\\2nNdMS1[2mNeMS1[2mNeMT1Z2kNgMV1X2iNiMX1V2hNjMY1U2fNlM[1R2eNoM\\1P2cNQN^1n1aNSN_1m1aNSN`1l1_NUNa1k1]NWNc1h1^NXNb1h1^NXNb1h1]NYNc1g1\\NYNe1g1ZNZNf1f1YN[Ng1e1XN\\Nh1d1XN\\Nh1d1WN]Ni1b1XN^Nh1b1XN^Nh1b1XN^Nh1b1XN]Ni1c1WN]Ni1c1WN]Ni1c1WN]Ni1b1YN]Ng1c1YN\\Nh1d1XN\\Nh1d1XN\\Nh1d1YN[Ng1e1YNZNh1f1YNYNg1g1YNYNg1g1ZNXNf1h1ZNWNg1i1YNWNg1h1ZNXNf1h1[NVNf1j1YNWNg1i1WNYNi1g1VNZNj1f1VNZNj1f1UN[Nk1e1TN\\Nl1d1SN^Nl1a1TNaNk1_1TNcNk1]1UNdNj1\\1UNeNk1[1TNgNk1Y1TNhNl1X1TNiNk1W1TNjNl1V1SNmNk1S1UNoNi1Q1VNQOi1o0WNQOi1o0WNSOg1l0YNXOd1h0[N]Oa1c0_N@^1`0aNC]1=cND\\1P2@SN>n1^OVNb0j1\\OXNd0h1[OYNe0g1YO\\Nf0d1YO]Ng0c1WO_Ni0a1TObNl0^1SOcNl0^1ROeNm0[1ROgNm0Y1ROiNm0W1QOkNo0U1POmNo0S1oNPOP1P1oNQOQ1o0nNTOP1l0nNWOP1j0oNXOP1h0oNZOP1f0POZOP1f0nN^OP1b0oN@P1`0POAo0?POBP1>POCo0=PODo0=QOCo0=QOCo0=QOCn0>ROBn0>ROBn0>QOCo0=QOCo0=QOCo0=QOCo0=ROBn0>ROBn0>ROBm0?SOAm0?SOAm0?SOAm0?SOAm0?SOBk0?UOAk0?UOBj0>WOAi0?WOAh0`0XO@h0`0XOAg0?YOAg0?YOBf0>ZOAg0?ZO@f0`0ZO@f0`0ZO_Og0a0YO_Og0a0ZO]Of0d0ZO[Og0e0YO[Of0f0ZOZOf0f0[OYOe0g0[OXOf0h0ZOXOf0h0[OVOf0j0ZOVOf0j0ZOVOUN^LV1\\4e0VOSNaLW1Y4g0TOoMhLY1T4h0TOPNgLX1U4h0TOSNdLU1X4h0TORNgLS1V4l0ROoMlLS1R4n0ROnMnLS1P4o0SOiMRMV1l3Q1ROhMSMW1k3R1QOfMVMW1i3S1QOcMYMZ1f3S1QOaM\\MZ1d3U1oN_MaM[1`3V1nN`MbMZ1`3W1lN_MfMY1^3X1kN_MhMY1]3X1lNhLfLc0R1\\1\\3Y1POYMfM]1Z3[1POVMgM^1Z3\\1POTMiM^1W3^1QORMjM^1V3`1POQMlM^1T3a1QOoLmM_1R3c1QOkLPNa1o2d1ROjLRN_1l2g1SOhLSN_1k2i1o0UNQOl1Q1QNoNo1S1nMnNR2U1kMkNV2X1fMhNZ2[1cMeN]2_1^MbNc2h1iLfIK0Ob4]3m6001O000000001O00001O0000001O000000001O0000001O0000001O0000001O0000001O00000O2N1L4K5F:J7H7M3L4L5M2N2O2M2N2O1O3M2M2O2N1O100O1O101N1O1O2O0O10000O1000001N10001N1000000O2O00001O000O101O00001N100N2J7L3N2O2N1O1N200O2N101N1O100O101O000O101O0O101O000O101O0O10001N1000000[JgHc3Z7ZLkHc3U7\\LVIY3k6gL_Io2a6PMaIo2`6PMbIn2^6QMaJQ2_5oM`KeNiL0O07h1b7CfMcNa2\\1c5O0000001O000O2O00000000001O00001N1000001N1000001O00001O00000000001N1000001O0000VK" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`UZ33k>2O2N2N1O2N1O2O0O2N1O101N00100O1O010O01000O010O101N10000O2O00001O0O101O001O0O2O001O001N101O001O001O1O2N0000001O000000001O0000001O0010O01mBfN^ and ?", + "answer": " is lying on .", + "image": "images/caption_simple_39.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231088.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "cna02jc05L5L3M3M2M4L4L4L4L4K5J6J6H8N1O2N2O2N1O1O1N3N1O1O1N2O3L2O1O2N2M3N2M2O1O1N2O001N10000O10000O10000O1000000O101N10000O100O101N10000O10000O2O0O101N100O101O0O101Z@XLQ?i3PAVLP?j3QATLo>m3b01O0000O1O1B\\@`Ld?[3a@fL^?Q3g@nLZO5m?e2g@gMY?T2f@SNY?d1j@cNV?X1e@ROZ?e0]@SNO`1d?:_@1a?@V@PO4DKo1l?[OY@X1f?hNZ@X1f?a101O01O0000000O1O101N100O102M4M5J4M5J6K2M[OPMj_OOc0m2d?ZM[@b2f?aM[@[2e?kMY@P2i?RNX@j1h?YNZ@b1g?dNU@X1l?lNT@P1m?UOR@f0n?]OS@>n?FR@6o?LR@0o?4o_OHR`0 and ?", + "answer": " is attached to .", + "image": "images/caption_simple_40.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000421923.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "_`Q7f0Yc0=D`0@`0_O>C4L3L2O1M3M3O1N2O10O01O0000000O2N100M32N2N00O1O001O1O1N2N2I6L5O100O10O10O010000N20O0100000O2N1001O000000000000O1O2N100004VMm_O_1e0" + }, + { + "size": [ + 640, + 426 + ], + "counts": "ogo42jc06L2O1L5N1O1d^OFR?:m@HR?8m@KP?7n@LXNA``0e0SAO[N^O``0d0TA7k>JTA6k>KUA5k>JUA7k>ITA8l>Hl@d0P?\\Oo@e0Q?[On@g0R?YOm@f0T?]Oh@d0W?^Oh@a0X?@i@>X?Cg@ and ?", + "answer": " is inside .", + "image": "images/caption_simple_41.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000057149.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "_hW4 and ?", + "answer": " is in front of .", + "image": "images/caption_simple_42.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000577932.jpg", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "VTR5 and ?", + "answer": " is touching .", + "image": "images/caption_simple_43.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000311002.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZWb02W=3O0N20001N10001N10000O2O0000000O10000O100000000000O01000O1K6Mmh[1MWWdN5K3M2O2N1O1O1O1O001O0O101O001O1O1O2N1O1O1N2O1O1O1O0010O103L10O01O3M6I5K8B[b97e]F2N3L6J3M4K5L4K6K4L4L2O3M3]D_NV;c2XO0O2O0000000000001O0000000000000000000000000000001O0000001O00000000000000000000001O0001O00001ZMkET2U:jMoET2R:hMRFX2`:00000010N100000O10O01N101K5F:N2O10O10000O10000000000O11N10000000000000O10000000000000000001O000001O00000000000000000000000000000O2O000O100000000O101O000O1000000O2O000000000O2O00000O101nNYE]Oh:c0YE\\Og:c0ZE]Of:c0ZE]Of:c0[E[Og:d0[EZOe:f0[EZOe:f0\\EYOd:g0]EWOd:i0\\EWOd:g0^EYOc:f0^EYOb:f0_EZOa:f0_EZOa:f0_EYOb:g0^EYOb:g0_EXOb:g0^EYOb:g0_EWOb:i0^EWOb:i0_EVOa:j0_EUOc:j0^EUOb:k0_ETOa:l0`ESO`:m0`ESO`:m0aEQO`:o0aEoN`:Q1m001O000000000000000000000000000000001O000000000O100000000O100000001O0O100000000O100O100O100O1O1O2J5K5N2O100O10000000000000000000001O000O100O5IT[k1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\gc3:n<4M3L4L3L4N2L4L5M2M3M3N2N3H7O2N1O1]OSNfEa1BkNe:JbE[1MjN_:0_EV15kNY:d1iE`NR:_1QFbNm9\\1UFgNh9Y1YFjNb9Y1\\FhNc9`1UFaNj9`2jFjLW8V3cGQM\\8P3aGRM_8W3VGkLj8l3O000000O100O2N1_OnFfLU9W3b0L3jNV1J4M2EmD^NV;d18000O11O1O10O10O100nN^D:d;D^D:c;D`D9c;DaD8b;F`D8b;DeD6_;Dcol3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_44.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000525600.jpg", + "mask_rles": [ + { + "size": [ + 326, + 640 + ], + "counts": "TeV33o99H7J5L5I6L4K5L4G:I:_HnM^6W2YIRNb6l2L2O100001\\O^IbMc6W2iIbMZ6X2m0K5K5L5K4N3M2M4N16J5L2M3MSOnGIo70ZH1e7GhH5W7@VI>\\8M1M2M6K^Yh2" + }, + { + "size": [ + 326, + 640 + ], + "counts": "oTe2:i9:ZFBT9P1H3M2O2M2N2N3M2OO0M3M4L3O2M2N2O2O0O100O10O2O001N2O4J9G?_O8Hk0VOYN_I1U6dNeIZ1a0Ge5g0hJjN\\5V1i1O3O1XOPHYOQ8a0l0H9N3F`oX3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_45.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000378139.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VRa17R=4M2M5L2O1O0O2O1N2O0O100O2O0O10000000000O11O000000O100O1O1N200O1O1O1O1O1O1O2N1O2N1O2N1O1O1O2N1O2N2N2N1O101N100O100O1000000O10000O0100000O100000O100000000O1000O1000O10000000000O10000000001O00O10O2O00O100000O10000000000O1000O1000000000O100000000O100O11O0000O100000000000000O100000O10O10000O10000000000O1000O10000000O1000000O10000000O01000000000000O10000000O10O100000000O1000O10000000O10000000O10O100O10001O00000000O010001O000O01000000000O100000000O100000000O10000000O10O10000000000O10O100000O10000O1O100000O10O10000000000000000O100000O1000O100000000O1000O100000O10000000000O10O100000O10000000000O10000O100000O01000000000000O1000001O000O100000000O10000O100000000O100000000O10000O2O000O101N2N101N2N1N3N1O2N100O101N100O1O2O0O101N100O2N2ORjP2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "_]l23W=2O1O007I00000O1O00100O1O1O010000O106H4J]fU5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_46.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000189806.jpg", + "mask_rles": [ + { + "size": [ + 400, + 500 + ], + "counts": "d\\W32[<6I7^OK_D:[;b0K4M3N2N2M3N2N2O1N2N1O1O1O1O100O1M2000N2O010N200O1001O1O001OO100O100100O1ONSFWNX9h1hF[NV9e1kF\\NT9b1mF_NR9_1QGaNn8_1RGaNn8_1SG`Nm8`1SG_Nm8c1SG\\Nm8e1SGYNo8g1QGXNo8i1QGWNo8i1QGWNn8k1PGUNQ9k1nFVNQ9l1lFUNU9m1dFWN\\9m1]FWNc9[200O100000O100000000O100000000O1O1M3N2N2N2N2N2O10PFlMd9T2ZFoMe9]210000O10000O100O10O0100O2O0O100O1CUFTNl9l1Y:N^E5\\:R1M4M3M2O2N1O1O2N2N1O1O1O2N1000O01O100O10O01000O010O001N1DUFSNm9j1WFSNk9m1 and ?", + "answer": " is looking at .", + "image": "images/caption_simple_47.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000515445.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "fm_7:[>d0A doing with ?", + "answer": " is leaning on .", + "image": "images/caption_simple_48.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000203580.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Pc]43V=3N2N100O1O1O1O1O100000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000O10001O1O2O0ON101O100O10001O000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O1000000000O100000000000000000000001N2O_TV1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "dYT47S=2N3L4N1N2O1N2N2O0O100O1O2N100O1O101N1O101N2N101N1O2O0O1O10O01O1O100O100O1O100O100O1O100O01000O1O100O010O01G]DhNe;W171O2O1N101N2O1N2O1M3N2N101O00000O100000000O100000O01000000O10000O010O1000000O10O10O1000O010O1N1K6N1101O2M3N0O2O1O0O100O10000O100000000001OTElMa:S2]GlMg6T2[HlMPO0K00 and ?", + "answer": " is over .", + "image": "images/caption_simple_49.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000499622.jpg", + "mask_rles": [ + { + "size": [ + 456, + 412 + ], + "counts": "Qh>1W>0O2O0O100O1000001N10000000000O1000000000000O10000O100O1O1O1O1O1O1O1O10000N3O000O1000000O1O100O10000O1000000O10000O100O100O100O100O101N100000000O100O10000O1000000O100000000O100000000O2O00000O10000000000O1000001O000000000O100000000O10000000001O0O100000000000000O1000000000000000000000001O000000000000O010N2L4N101N2N2N2O1O00100O001O010O10O010O01000O10O011OO10000O100O1O100O10001N10000O2O0O2O001O001O1O1O1O1O1O2N3M2N2N1O3M3M1O1O00001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000003fNjC[O3g0UTEBl:=UECk:=UECl: located relative to ?", + "answer": " is on .", + "image": "images/caption_simple_50.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000135872.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZYl01Z=0iR20hfb01^V[O001O01NmW10Pkb08kl[O5J5L7I3K2O20O1OjCSON050_;l0\\D\\O3Jb;i0[D[O1Oi;b0UD[O0N16Q<;nC_O1;U<2PDMR<1nCOSCa0_O;E=D7H5L4K2N1O100O1O100[N_M_Hb2a7_M]Hb2b7_M]Hb2c7_M\\Ha2d7_M[Hb2e7_MYHb2g7_MXHa2h7aMUH`2j7eMQH\\2o7^NVGc1j8_100000000000000O100000000O100000000O100000000O100O100O10000O1O1O100O100O1O1O100O100O1O1O1O100O100O1O1O100O1O1O100O100O1O100O1O1O1O100O1O1O1@eJnH]5o6a0O1O1O100O1O100O1O1O10000O1O10aJQIb4n6^KVI_4j6`KWI`4i6_KYIj1OKh6lMUIIi6bMWIo1O`0a7@\\Hc0d7\\O[Hf0e7ZOYHh0S6ZMRKm1kNi0R6[MbIO]1l1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1oNk0Q6\\MaIN`1i1oNm0P6_MQK:bNf0=a1P6_MRK4gNk06c1P6XMdI3`14iNm03d1n5ZMfI0a14jNm01e1n5XMhI2U2o0VNg1V6XMcKQ1WNg1V6XMcKP1XNh1`8fMPGE52;c2R6ZMiK7kMG238e2R6[MiK4WNKNf2R6[MjK2XNLLg2f5YMRJ3U20WNLMh2e5YMRJ3U20jNd2o4ZMQJ2W2OiNe2o4YMSJ2U20iNe2]5YMlK0gNg2]5YMmKOfNh2]5YMmK0eNg2^5YMmK1dNf2`5XMkK3dNf2a5WMkK4cNe2T8[MlGe2T8\\MkGd2U8\\MkGd2V8\\MiGd2W8\\MiGd2W8\\MiGd2k4XM]M5gMc2l4XM^M4eMe2l4XM_M4eMc2l4YM_M5dMb2l4ZM_M5eMo0XO3d5iN_M6dMc0ZOUO3V1`5kN`M7cM91e0l4kN_M8eMO9m0d4kN^M:WOj0\\3kN\\MX7BXG1_1=Y7DVG0a1;Y7FVGOa15^7NoFNc12_71mFOb11a7OnF0a10a71mF0b1Nb73jF1c1Lc73jF1c1Lc73iF2c1Lc73jF2b1Je74hF3c1Ie74gF4c1If73fF59^Of0:[83eF67Ah06\\83dF85Bj03]83dF84Dj00`83aF;2Dl0Oa82`F=0Do0Ma82_F?MFR1Ib82_FT1c0bN@8^93]FV1:_NF336`92\\FY16jNMKa93[FX17kNLJb9m1aFYNLKc9l1aFZNKJd9m1aFXNKKd9m1bFWNJLd9n1aFVNJMe9m1aFVNJMf9l1`FgN`9Y1aFfN_9Z1aFTNI1g9k1`FSNJ2g9j1`FSNJ2f9l1_FRNK2f9l1`FRNH3h9k1aFbN_9^1aFbN_9_1aF`N_9`1aF`N`9_1`FaN`9_1aF`N_9`1aF`N_9_1cF`No0\\Ol6T2UH`Nc0^ORO2T8o1YH`N93_7[1YHcNM>h7n0]H9b7G^H9b7F_H:a7F^H and ?", + "answer": " is on .", + "image": "images/caption_simple_51.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439994.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "T?12b0OE2I114O100075d`0j2K2O0001O00001O000O10001O00001O0000001O0O101O0000001O0O11O010O0001O001O00001O00001O001O001O0000001O010O00001N10010O00001O0000001O00001O00001O01O0001N100001O00O20O001O01O00001O000001O0001O00001O00001O001O1O1O1O01OO1001N100000000000000001O1O1O0000O1N200O1000001O00000000000000000000000001O00000001OO100000010O2M2O2O1N1O1O3M>B001O001O0001O000000000VOfNn^OZ1Qa0lNk^OS1Ua0oNi^OR1Va0oNi^OQ1Va0POj^OP1Va0QOi^Oo0Wa0ROh^On0Xa0ROh^On0Xa0SOg^Om0Ya0SOg^Om0Ya0TOe^Om0[a0TOd^Ol0]a0TOa^Om0_a0TO`^Ol0`a0XO[^Oi0ea0i00000000000000000000000000000001O000001O0000000000000000000000000001O0000000001O0000000000000001O0000000001O0000O100000001O000001OO100000000000000001OO2O00000000000000000000000000001O01O001OO1000000000000001O0001O000000000000000000001O0000000000000000010O000O1000000000000010OO100001O00000000]OU^OQOka0k0\\^OROea0l0]^OSOca0l0_^OSOaa0l0a^ORO`a0l0e^OQO[a0m0k^OnNVa0m0o^OTOPa0h0T_OXOl`0e0W_O[Oj`0:`_OEa`09a_OGa`04c_OKYb000000O2O00000O1D<00K6L2M4N2M3O1J6I7O0101O`0^OZk;UOlaC7KJk05Ra0j1O1N10001O0000001O000000001O000000001O000000000000001O00000001O0O1000000N]3" + }, + { + "size": [ + 640, + 428 + ], + "counts": "oUc62kc06VMN_A3\\>6`AK[><`AG]><_AG^>>]AG`>9VAMh>5T@oN=P1^?2U@nN6X1c?KU@PO4X1g?HT@QOL`1o?]OU@U1j?jNS@Z1n?gNo_OV1MgMT`0b35O2M3M2N3M3N1N1M1O003N3N3M3N110;E7I>Ba0k_OkK^?]4N1O2OO01O01O1O1O1O1O1O1N2nL_@^1g?_N^@NJ@>^O^?f0i@MRa0NT_ONo`0OT_OOn`00X_OJh`06Z_OHg`07Z_OHg`06\\_OGf`08b1N2N2M4MmQ3NVnL1ag<" + } + ], + "question": "What is doing on ?", + "answer": " is walking on .", + "image": "images/caption_simple_52.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000468501.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`h_11d;3HOfD3Z;5O101O00001O000000O100O10000O100000000000000000000000O10000000000O10000000O10O10000YGJVOLk4:mKMVOKl48lK2UOGm47mK7SOCn47nK;oN_OS56lK`0nNZOU57lKb0lNXOW57kKe0lNTOY57iKj0jNQO[56jKl0iNnN]55jKP1gNkN]57jKR1fNhN_57jKT1eNeNa57iKW1dNbNb58iKY1bN_Ne59hK[1aN\\Ng59fK`1_NXNi5:gKb1[NVNm59fKY2Y4hMfKY2Y4hMfKY2Y4iMdKX2[4jMdKW2[4nM_KT2c3VM_Ko0e06oN0l4nN]K]17EELf4TO[Kg1OYO4I_4[O\\Ka1IkM7_1c2f5kMPJZOa0j2`5jMSKU2n4jMSKU2o4fMVKY2j4`M_IIi1g2i4`M[K`2f4_M[Ka2f4]MZKd2f4XM^Kh2h6100fGVMe7Q2mGbN=^Og7j1THfN4Aj7d1VHjNOBS8Z1QHSOKDX8S1PHYOGD]8n0nG^OCEb8h0mGD@Df8b0nGJ[ODQ96gG5XOEW9JhGa0oNFb:;\\EEe:f01O1N2O2N2N1O1O1O2L3N3L4Kl[h2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "ccR42d;3N2N1O1O1O2O01OO1O10000N2O1O1O1O0L4M3O01N2010O3M2N2N1N2N1O2N1O01O0N2001O1N200O1O1O1O1O001N100gNhNTHY1j7jNTHV1k7mNSHS1n7mNQHT1n7mNPHT1P8mNoGT1Q8lNoGS1S8lNlGU1T8lNkGS1W8lNiGS1X8oNfGQ1[8oNdGP1^8oNbGQ1_8nNaGQ1`8PO_Go0c8SOZGm0g8QOZGn0h8QOWGo0j8ROUGn0k8ROUGm0l8SOTGm0l8SOTGl0n8TOQGl0n8UORGj0o8VOQGi0P9WOPGi0o8XOQGg0o8ZOQGf0^1nN]5YIc0_1nN[5?WIb0m8^OSGb0l8^OUGa0k8_OVG`0i8AXG>h8BYG=g8CZG and ?", + "answer": " is looking at .", + "image": "images/caption_simple_53.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000171190.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "mW_8d03Ej=m0J3O1O2O0O101N10001N100O2O01O100O001O0iNcBl0]=QOcBQ1^=mNaBU1_=610O00001O001O001O10O000N2J6K5G:J5L4K5K5N3M2OnB0m<7M2N2O100O0O2001O1N101O_NF\\D9d;J[D5e;L[D3e;NZD2k1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[k_71m>2O2N2N100O1O2N1O101N1O1O1O2N100O1O2N1O1O101N1O1O1O1N2N2O2N1N2XOTOYCo0dk0V;@[DE?k0W;AZDB>m0Y;2fDN[;1eDO[;2dDM^;3aDM`;3_DMb;3[DOg;0TD4m;_1002N1O2N1O1O1O1O1O1O1O`NfMZFZ2d9lMYFS2f9QNWFo1h9SNVFn1j9SNTFn1k9SNTFn1l9RNTFn1Q5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_54.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000565391.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "e6173a2M\\M0R>3nBM13QO14O[1NbN3NNM0e=n2\\BiNOOOZN218Z4f7f7K1N2O001O1O001O001O001O001O1N101O00001O001O001O1O1O00001O001O001O1O001O001O00001O1N2O1O001\\LPC6P=]3001O1O1O1O001O00002N001O1O001O1O1O1O1O001O002N1O001O1O2N2N1O2N1O1O1O3M2N2N4L2N1O001O2RKaAW4b>eK`AHNL0W4f>n04L4L3M5K6J;`KZ@P4m?N1O1O3M1O1O00003M1O1O00O1002N01O03M0O100001O00O1001O0000O100001O0000000000000000000000000000000000O11O00O10000000000000000000000000000000000000000000000001O00000000000000000jE`Ll3`3UL_Lk3a3QL_LS4a3V6000dE_LY4a3S6000bE_L]4a3cK_L]4a3cK_L]4a3Q600000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000001O000000000000000000001O0000001O001O0000001O0000001O001O00001O1O001O001O001O1O001O1O1O1O1O2X@lK`?\\4N2N2N2N2N2N3M2N2N2N3M3M2N2[EdJb6_5n36J3M2N2N3M3M3M2N1O3M5K3M2N2N3M3L4M3N1N3M3M3M1O2N2N3M3M1O2N2N4L2N2N2eEjGh8X8UGjGj8Z8nFbGPO4R:]8kF_GSO4R:_8mFbGR9a8jF`GV9b8eFbGZ9b8_FaGa9b8\\F^Gd9c8ZF^Gf9d8QFoF09NIP:d9PF\\FP:c9QF^Fn9b9RF^Fn9j901O000000001O00000N31N00LoEYFR:f9oEYFQ:g940lEXFP:h9oEYFQ:g940000000001O00000O11O000000000001O000O11O0aM_FYHNo1c9h5_FYHOn1d9g5]F[HOn1f9]5XFfH5NOm1d9\\5]FgH:1Db1b9i5TGdH[Oc1a9i5^GVJa8g5ZFgHV1c1_8]N\\FS7X1bJ\\8[N\\FS7W1cJ]8`5dG`J]8`5bG`J_8_5aGaJa8]5_GdJ`8\\5`GdJ_8]5aGcJ^8^5bGbJ`8\\5_GfJ`8Y5_GiJa8W5YGoJd8T5\\GlJd8T5\\GlJe8S5[GmJg8Q5YG^IQOm0f9e5YGZIUOQ1`9h5YGXIWOP1`9h5YGXIWOQ1`9f5YGUI[OU1]9e5XGUI]OU1W9i5]GQI]OV1V9i5]GQI]OV1T9k5_GoH]OV1T9k5_GPI[OV1V9j5_GoH\\OW1V9j5VGgHA27\\1R9k5UGiHAN9_1Q9j5VGhH2^1h8j5VGgH3`1f8i5VGiH3^1c8m5YGfH5\\1_8R6lF_HO<3D:_1g8S6lFRIL^O2O0O9a1k8R6nFUIM^O:[1k8S6mFUIM^O:Z1k8V6kF\\I4nNOa1P9c6mFaH3]OMb1Q9Z9XKWE]LKV12GP;^NoD0Ob0o5_O^J5c0Iac0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "n??240_OO30N11OO171H4<21KEi?o2Z@_MLBj?o3001O001O001O001O00001O001O001O1O001O001O001O001O1O001O001O001O001O1O001O1O001O001O1O001O1O001O1O001O001O001O1O1O1O001O1O1O001O1O1O2N1O001O1O1O1O1O1O2N2N1O1O2N2N1O1O2N2N1O2N2N3M2N2N3M3M4L5K6YOW]OK`c0O3M2N1O1O1O2NQhY7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_55.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000322829.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "1b8i4001O5K0000001O0000M3N200000000000000001O000000O100000000001O00000000O1001O000000O1000000000000001O001OO10000000000000000000000000O2OO100001O00O10000000001N10O2O01N1000OZKeK>[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR and ?", + "answer": " is in front of .", + "image": "images/caption_simple_56.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000535523.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "Zeh52Y=2iB0P=600O2O001N101O1O1N2O3M1O001O1O3M0O10001O01O010O01O1O10001OO1000O01O010O101OO1cCTOY^:CbE<^:DdE:]:FbE;]:FbE;]:FcE:\\:GcE9\\:IcE9[:GeE:Z:GdEX:CcEa0Y:CeE?V:GiE:Q:LnE4k94SFMj97UFJj96VFJ\\9gNeF`1MJV9oNlFX1LJU9QOoFj2P9WMoFi2Q9WMPGh2Q9XMmFi2T9WMkFh2W9f0001O001O1O1O001O001O1O001O1O001O00001O1O001O001O00001O001O001O001O1O001O00001O1O001O001O00001O001O001O001O001O001O001O001O00001O1O00001O001O001O001O001O00001O001O001O001O001O00001O001O00001O0000001O000000000000000000TE" + }, + { + "size": [ + 428, + 640 + ], + "counts": "eiR73X=1O2N2N1O2N100O2@GjC:h;3YDM`;:_DH_;8aDI`;5`DLa;2_D0`;O`D2`;M`D5_;JaD7_;HaD9`;DaD=_;B`Da0_;^OaDc0_;\\OaDe0`;XO`Dj0`;UO`Dl0`;ROaDo0_;oNbDR1j;2\\OmNmDU1k:hNmD47U1k:hNoD07Y1i:hN^EZ1b:fN\\E\\1d:dN[E]1d:cN[E`1W;1M3M3O1M3N1N3N2M102O0010O2O0O10000O2O0O101N1O2N1O1O2M2O2L4M2M4L4L4K5I7K5J8GeW:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_57.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000276018.jpg", + "mask_rles": [ + { + "size": [ + 640, + 416 + ], + "counts": "Una05kc01N102N1O2N1O0O2O0O2M2M3M3K6K4J6L4N2O1O101O000d@cNU<]1cCmN[WOeAo0[>QO]AW1c>iNWA]1R>VMVB\\1Dc1U>RMXBY1_Oi1X>oLZBX1[Ol1[>kLZBR2\\O9[>dMZBQ2_O9V>fM\\BP2^OdM`BT1CROM\\2j0jM[;D[DQ1oLAQ3`0oL_OQ3b0mL^OS3c0hHUL32\\2T3j4h0^IZLc1n2P5i0YI_Ld1d2X5m0nHhL5AMj2S7m0lHhL4\\OOf2\\7U1bHiL\\1b1e6e1oGiLY1[1R7k1fGkLS1]1W7i1fGjLo0`1\\7g1dGiLn0b1^7e1cGkLe0h1i7^1aGjL?o1P8W1bGiLk7TM_H^2C`0n7RM`H_2_O`0R8oLaH_7`7_HbH`7_7^HdH`7]7_HcHa7^7]HdHb7]7\\HdHd7^7YHeHe7]7XHeHg7\\7WHeHi7]7SHeHm7^7YGjGJk0m8e9O00001bNTEfIl:Y6]E_Id:^6bE^I_:`6dE^I]:_6iE]IW:^6PF`IQ:\\6TFbIm9\\6YF_Ig9a6^FZIc9e6n1M101O1O1O1N2O1O1O1N2N3N1M4A?]M[B_Nn=YNcAk2U1UNg=h1]2L1N2N2O2M3N2M2O2N2M4K6I4L5K6J6K5J5M5I7J7EejR4" + }, + { + "size": [ + 640, + 416 + ], + "counts": "9Y33PNd0hN]O0c01]O4O0f0KZO7OK0O1o0NXO0K`0R1@a05\\42^7LbH3a7J`H5f=00O1O1000000O100O100Oig19fWNId\\O`0Sc0?QOYOT^OQ1ca0k0C=L5L4K9H9G6JR5@=C:F3L10000O1O1M3L4L4J6L4J6M3K5K5O1O1O1O1O1O1lEcHb7`7]HcH`7^7_HdH^7^7aHeHZ7^7dHeHZ7\\7dHhHW7[7hHhHT7Z7kHhHS7Y7lHjHP7X7oHlHk6W7UIkHe6Y7ZIlH`6V7_IPIYNmN_7W8WJhIUOYOa3T7YMQJ\\NBl3b6gMmLd1X3[NQMT1X3kNPMg0W3XOXMXN\\IT12ROj9a1\\M`N\\3_1c700O1O1O1O1O1O1O1O1O1O100O1O1N2O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O1O1N2O1N2O1NRTg5" + } + ], + "question": "What is the relationship between and ?", + "answer": " is guiding .", + "image": "images/caption_simple_58.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404249.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "Y[i17hc03M3M1O2O0O2O1n\\OD^b0<`]OH]b09b]OI]b07b]OK]b06b]OJ^b07a]OI^b08a]OI_b08`]OH`b01_]OC1<`b00a]ODNd:n0cHg0gMmNf9<_HP1eMeNk9>[HR1gMaNm9i0nGk0PN_NQ:R1\\EgMj0f0CU4U:gMoEYN^O]4c:\\MkEc3T:cLcEa3]:Q300O100O001dNiFnGW9n7cFQG;l0R9o7WGQHh8m7\\GRHd8k7`GSH`8m7bGQH_8n7cGQH\\8n7gGPHZ8o7hGoGY8o7mGlGS8T8SH_GU8`8e101OO2H701O010lMYGnHg8P7bGhH_8Y7[HkGf7T8_HeGe7Z8Q2N2ZOSEmGJ0Z;l7f0N2H9M2M3L5O_FQIQ6o6ZIiHiMj0d8h6cH_JX7i5RHlJNZMm6U8fHgJ7ZMR7Z;kHgDV7Y;hHhDX7X;eHlDZ7T;bHPE_7h;0000001O1XE_Ho8a7PGaHo8`7mFcHS9_7hFeHW9\\7bFjH^9Y7^FiHa9X7ZFlHf9W7UFkHk9V7RFmHm9[7iEgHW:b8O001N101O0O1O1O2N1N2O2L4M3eLoE_LW:`NVFn2HUN]:jNQFY2AaL9X2V:nNTFR1_1H_8TOZF`0a2]OX73bFLb2Ko68jKBY4>lKXOX4h0a7O10X]OjNeb0Y100Z]OhNcb0W1^]OhNbb0X14O12ON10O01O1002ON001O1000\\]OkN\\b0T193M1O1O0000O1O0106KM2N1100004K1O2O0002N10N11O0011NO21M13MO101N2O11OO000O21ON011O100O21M10N101O01O02M1O2N2M1O2N2N2N3M2O3J]bf2" + } + ], + "question": "What is doing on ?", + "answer": " is riding .", + "image": "images/caption_simple_59.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000098287.jpg", + "mask_rles": [ + { + "size": [ + 640, + 415 + ], + "counts": "m;4o2O^MNa22cMNk;0hC02:KHa0Nb03RO9K07FI1140KO:7FO0d04WO02KO001OO106OK0_;7`Dm7];;000000000000O1000000001O000000O1iNaGPFOc0a8V9_GQF`0g0Q8X9`GPFa0f0o7Z9SHgFm7Y9PHjFP8V9jGPGV8P9hGRGX8n8gGSGY8l8hGTGX8[:O2N1O1O1O2N1O001O001O1O1O00000000000000000000O10000O1J6000000gNgGjFZ8T9iGjFX8U9jGiFW8V9jGiFW8W9iGiFW8X9iGfFX8Z9hGfFX8Z9hGfFX8Z9hGfFX8Z9hGeFY8[9hGdFX8\\9hGdFX8\\9hGcFY8\\9hGdFX8\\9iGbFX8]9iGbFX8^9iG`FX8_9iG`FX8_9iG`FX8^9kG_FW8a9iG]FY8b9jGYFY8f9Q10000001OO1O1001O00000000000000O100000000O100000000O1O1000000000000001O0000000000000000O100000000000000O100001O00000000O10000O100000000000000000000O1000000000000000[GYFQ7g9cHYFlN;^8\\9fHZFjN;`8Z9gHiFY7W9gHiFY7W9cHmF]7T9\\HRGd7n8WHYFZOk0_8l8SH[Gm7e8QH]Go7f8gGaGY8R:100O1K_GjDb8S;7N2M3N2O1L4I7M3O1M31O1O002N1O2N4L1O3M1O5K5K1O2N1O]N^GmG]8R8iGjGV8V8mGhGR8W8RHfGn7Z8VHbGj7^8YH_Gg7a8`HXG`7g8fHSG[7m8iHoFW7Q9kHmFU7S9nHjFR7W9oHfFR7Z9RIaFo6_9RI_Fo6a9h1O100000000000000000000O100000000000000000000O1000000000000001O00O100O1000000000000000000000000000000000000000000O10000000000O1000000000000001OO100000000O1O10000001O000000000000000000O100000000001O000000O100001O0000O10000O1000000000000000000000000000000000000000000O11O0000O1000000000000000000O10000000000000000000000002N001O000000000000000000O100O100N2M3O10000001O00000000O100000000001O001`FlFlN100j3NXL0T4Wc0dKk\\O1?" + }, + { + "size": [ + 640, + 415 + ], + "counts": "UeX31T22a?4U@4h?NV@4h?NV@4i?NR@6l?LP@8o?In_O:P`0Hm_O:S`0Je_O:\\`0LX_O3n`00e^O8^a0R1201O1N2LJPNd^Ok1[a0=2M3O100O21O002N1O2N2O1N2O1O3McMe^OK2g04 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_60.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278973.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "Sfn13W=1N2O0000001O1O1O1N102N1O1O000O2O001O1O0O101N1000000O10000000001O1O00001O001O00001O00001O00000O10000O100000000000O01001O00O10000O10000O1000O100000000000000O10000000000O2O0000001O0O2O001O001O0O2O001O001O000O10000O10001O000O2O00001N101O001O001O0O2O001O00001O0010O010O010O10O010O010O1O2MiQ`4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "fjT12V=?A5M1N100O0001O010O010001O0O2O0O10001CW9k1O2O0O01_OkFkLU9S3nFlLS9Q3PGnLP9o2TGPMm8m2VGRMj8m2XGRMi8h2]GWMd8f2_GYMb8d2aG[M`8`2dGaM\\8\\2gGcMZ8[2hGdMY8Z2iGeMY8W2`1L5J;E7J6J7H8@>Ghdg4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on top of .", + "image": "images/caption_simple_61.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000104198.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dS2b0P;6J6N3M3N2N1O2N2M101IfNnE\\1P:6O20O2ON1O101O001XG\\N[7e1WHjNg7c1bGgN_8X2O1000000O1001O00000000O1O11O1O000jNaGSO_8S200O2N20O1O000O02O1N10O100000O10000lNkLfIP3TOjLT7=fIe2]6^McIT2VO^M_7a0ZIe1S7`NkHZ1W7kNhHU1V7nNhHR1X7ROdHo0\\7ROcHn0^7SO_Hn0b7SO\\Hm0d7UOYHl0h7TOSHP1n7\\11O00O1O1O100O1O1O10000001O001OO1O1O10000O1000000O100O100O10O1001N100O1O1O1POTLUJl3h5XLVJi3f5^LVJc3h5`LVJa3j5`LUJ`3j5cLTJ]3k5fLQJ\\3o5gLkI\\3U6S11O2N0000O100O1N2N2O2N1O1O10O10001O2N0010O0O100O1N2N2N2M3M3M3M3L4N2K5J6L4N20000O100O1GQIjKo6U4TIhKm6e3VIbLNHl6Z3SIfL88HHm6Z3SIeL89EG01P7Y3TIfL7b0e6g2UIeL8e0b6W3RIkLn6o301O00001O3M4L9G4L5K5K3M3M3M2NO100O100O1001O001O1O1O001O2N1O1O1N1001N10O10O2O00O200_KoIZ3R6dLQJZ3o5dLTJ[3l5cLWJ\\3j5bLWJ^3j5`LXJ_3j5^LXJa3j5\\LXJc3n601O1O1O1O001O001O001O1O001O1O1O1O00000000OgMSHj0l7VOXHg0g7YOZHg0f7WO\\Hi0d7UO^Hk0b7TO`Hk0a7TO_Hl0b7POaHP1`7mNbHS1`7kN`HU1b7hN_HY16XNU6=gIb2X6YMlIh2Z6lLkIT3]7O001O001O001O1O1O00001O001O001O000000000000001J5ZO\\GPNn8k1b0D;N2O01O1N1O1100OO10O2N2O1N2O1N3M3N3K?BZTW1\\O[lH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "2g15N1aNd7a1[HN0bNe7a1ZHM1bNe7a1ZHM1bNe7a1[HL1bNd7b1[HL0cNe7b1ZHK1cNe7b1[HJ0dNe7b1[HJ0dNe7b1[HJ0dNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7d1ZHG1eNe7d1[HF0fNe7d1[HE1gNd7d1[HE1fNe7f1YHD2gNd7e1[HC1gNe7f1ZHC1gNe7g1YHB1hNf7f1YHB1hNf7f1YHB1hNf7f1ZHA0iNf7f1ZHA0iNf7g1YH@1iNf7g1ZH_O0jNf7g1ZH_OOkNg7f1ZH_OOkNg7g1YH]O1lNf7g1YH]O1lNf7g1ZH\\O0mNf7g1ZH\\O0mNf7g1ZH\\O0mNf7h1YH[O1mNf7h1YH[O1mNf7h1ZHZO0nNf7h1ZHZO0nNf7i1YHYO1mNg7j1XHYO1mNg7j1YHXO0nNg7j1YHXO0nNg7j1YHXO0nNg7k1XHWO0oNh7j1YHVOOPOh7j1YHUO0QOg7j1YHUO0QOg7k1YHSO0ROg7k1YHSO0ROg7k1YHSO0ROg7l1XHRO1ROg7l1XHRO1ROg7l1YHQO0SOg7l1YHQO0SOg7l1YHQO0SOg7m1XHPO1SOg7m1XHPO1SOg7m1YHoN0TOg7m1YHoN0SOh7n1XHoN0SOh7o1XHmNOUOi7n1XHmNOUOi7n1XHmN0TOh7o1XHmNOTOj7o1WHlN0UOi7P2VHkN2SOi7R2VHjN1SOj7S2UHjN1SOj7S2UHjN1QOl7V2RHiNV8W1jGiNV8W1jGiNV8W1kGcNZ8]1fG_N^8b1aG]N`8c1`G\\Na8d1_GZNc8f1^GXNc8h1]GWNd8j1[GVNj0He6R2aHUNa09h6b1hHTN6ELm0n6Z1PISN6HJP1l6U1TISN6HJU1g6Q1XIQN8II\\1`6j0_ImMbJkMYOl1Q69dJmM[Oo1l55eJoM_OQ2g50ZJlMM92P2b5K_J_NOh1`5IaJ_NOi1_5IaJ^N0i1_5IaJ^N0j1^5HbJ^N0j1^5HcJ]NOk1^5HcJ\\N0l1]5IbJ[N1l1]5IbJ[N1l1]5IcJZN0m1]5IcJZNOn1^5HcJZNOn1^5HcJZNOo1]5HcJYN0o1]5HcJYN0o1]5HdJXNOP2]5HdJXNOo1^5IcJXNOo1^5IcJWN0P2]5IcJWN0P2]5JbJVN1o1^5KbJUN0R12oN]5j1aJUN0o07oNY5m1`JUN0m0:POV5n1`JUN0h0a0SOo4Q2_JTN1f0f0QOk4U2_JSN0g0g0POj4V2_JSN0f0j0mNi4Z2]JRN0g0m0iNh4_2ZJQN2f0i6Y1VIPN1g0i6Y1VIPN0i0i6W1WIPN0j0h6V1XIPN0j0h6W1WIoM1k0g6U1YIPN0j0h6W1WIoM1j0h6W1XInM0j0i6X1WInM0h0k6Z1UInM0g0l6\\1SImM1e0n6^1QIlM2e0n6_1PIlM2e0n6_1PIlM1e0P7_1PIkM0e0Q7a1nHjM1e0Q7a1nHjM1e0Q7a1nHjM1f0P7`1oHjM1f0P7`1oHjM1g0o6_1PIjM1h0n6^1QIjM0j0n6\\1SIiMOj0o6]1RIiMOj0o6^1QIgM1j0o6_1PIgM1i0P7`1oHgM1g0R7b1mHgM1d0U7e1jHgM1a0X7h1gHgM1>[7k1eHfM0;_7o1aHfM09a7R2^HeM07e7T2[HeM04h7W2XHeM1Ok7\\2THeM1FT8e2kGeM\\8[2dGeM\\8[2dGdM]8\\2cGdM]8\\2dGbM]8^2cG`M_8_2=N2F:E;G9C=N2O100002N1lEiNf9e2oN2VG]M\\8c2cG^M]8b2bG_M^8a2bG_M1MS8d2lG_M1OQ8c2nG]M13n7`2QH]M1[N]O1c5i2bJmN;[N_O0c5h2cJoN7[NCNc5h2cJRO2ZNHMb5g2dJFJdMa5f2eJFIbMe5i2aJEJbMe5i2aJDKcMd5i2`JEL`MNNf5m2`JEL`MNOe5l2aJEKaMONe5l2aJDLbMNNe5l2aJDLeMc5g2aJDLbMNNe5l2aJDKbM0Nd5l2aJDKcMOMe5l2aJDKbM0Nd5l2aJXOMoM1Nf5k2[JXO0lM11d5j2\\JYO8mM\\5j2\\JYO8lM]5l2ZJXOW6h0iIXOW6g0jIYOV6g0jIYOV6g0jIYOV6g0jIYOV6g0iIZOW6f0iIZOW6g0hIYOX6g0hIYOX6f0iIZOW6f0iIZOW6f0iIZOW6f0iI[OV6e0jI\\OU6d0kI^OS6b0mI@Q6`0oIAP6?oIBQ6>oIBQ6>PJAP6?oIBQ6>oIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6>nIBS6>mIBS6>mIBS6>mIAT6?lIAT6?lIAT6?lIAT6?lIAT6?lIAT6>lICT6>kIBU6>kIAV6>kIBU6>kIBU6>kIBU6>kIBU6=lICT6=lICT6=lICT6=kICV6=jICV6=jICV6=jIBW6>iIBW6=jIBW6>iIAW6`0iI_OX6a0hI^OZ6a0eI_O\\6`0eI_O[6b0eI]O\\6c0dI\\O^6c0bI\\O^6e0bIZO_6f0aIYO`6g0`IXOa6g0`IYO`6g0_IYOb6g0^IWOd6i0\\IVOe6j0[IVOe6j0[IUOf6k0ZISOh6l0YISOh6m0XIROi6n0WIQOj6o0VIPOk6P1UIoNl6P1TIPOm6P1TIeNJjNT7b2SI[M\\8R2n0I7M3J6I7J6L4I7J6K5K5LPic0NTW\\OS1cDhNY:\\1mEiNd9W1[FkNd9S1\\FoNd9P1[FQOf9l0\\FUOd9j0[FXOe9f0[F\\Oe9b0\\F_Od9?\\FCd9;\\FHd95\\FMd91\\F?U9@kFb08kNFMZ8d0hGf0LfN55JO[8>jGj0KeN63KO[8=jGm0JdN63KO0MU8?oGP1LaN54JO2MT8Ih7m0hG\\O?Gj7i0gGD?Cj7g0eGJa0@h7d0jGL=Ch7>iG3?@B^Oo7l0RH71kN6m0GVOf7i0^H:OjN6U1[7F`HRIPN0c1m6=SIPN0d1l6 located relative to ?", + "answer": " is over .", + "image": "images/caption_simple_62.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000224051.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "WeS3451j4o0j3R4QLbJ1<6n0k3Q4mKhJ199l0k3Q4jKkJ37f0j3V4aKoJ93=g0k3f4gKcJ>L_O6_4Y5eKdJ?EE8CDf4j5cKeJ`0AK6X4c5^KeJ]6Y5cIgJ^6Y5bIfJ`6X5aIgJa6W5_IiJb6W5^IgJc6Y5]IgJd6X5\\IhJ=H]OOe5a5`JhJ7=U5j4dJiJ6?U5h4eJiJ5`0W5f4dJjJ5?Y5f4bJmJ3=\\5f4_JPK38a5g4\\JRK25e5h4ZJSK04g5i4YJTKN4i5h4YJRLi5m3WJQLk5P4TJoKn5P4RJoKo5Q4QJnKP6S4oIkKS6U4mIiKV6V4jIiKW6V4kIhKV6X4jIhKV6X4kIdKX6\\4hI\\K`6d4aImJm6S5SIlJn6U5RIjJn6V5RIjJn6W5RIgJo6Z5QIdJP7`5mH]JU7d591QI^J\\O?o5S5aJ_KY5b4eJdKU5]4iJgKU5Y4jJiKU5V4kJlKT5T4kJnKS5S4lJnKT5S4kJmKU5S4jJnKU5S4kJmKT5T4kJnKT5S4kJmKT5S4nJlKR5R4PKoKn4Q4SKZLb4e3`K^L[4b3fK_LY4a3gK_LX4b3hK^LW4c3iK]LW4c3hK^LW4c3iK^LU4c3jK]LW4c3iKULCoM11b4l5jK]LT4d3lK]LR4d3mK]LR4e3nK[LP4f3PLZLo3h3PLXLo3l3oKTLo3Q4nKnKQ4S4oKnKP4S4QLlKl3V4VLhKi3Z4VLgKg3[4YLgKd3Z4\\LfKb3\\4^LeK`3\\4`LeK^3\\4bLeK\\3\\4dLdKZ3_4eLbKY3_4gLcKT3a4lL_KQ3c4oL_Km2d4SM\\Ki2g4WM[Kf2e4\\M\\K_2h4`MYK^2i4`M[K[2g4dM[KX2i4hMWKT2l4lMYKl1j4TNYKe1k4\\NYK[1k4eNXKT1m4lNYKk0i4UOg4001O001O0O100O00O010OO200001N2N200O3LhTi1" + }, + { + "size": [ + 428, + 640 + ], + "counts": "T:X3T:000O100OoMRFmNMc1n9ROSFD5CM657H30EQ:MPF50K1N08NJ7NL584J4KHV:NoEO15LI60>2[O069JJQ:KPF33O15LJ5OL1OO:2E158KJk99VFG42JL77JH3;0MMKi9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_63.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530099.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "T\\Y11d;3N1O100O1O1O1O1O1O100O1O1O100O1O1O1O1O1O1O100O1O1O1O100O1O1O1O1O1O100O1O1O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O010O1O1O2N1O100O1O1O1O100O1O1O1O100N200O1O1O100O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1N2O1O100O100O1O1O100O100O1O100O1O10000O10000O100O100O10000O10000O10000O10000O1000000O100O10000O100O10000O100O100O10000000000O100O10000O10000O1000000O100O10000O10000O1000000000000O1OVM[Hd1e7ZN\\Hg1d7VN_Hj1a7UN_Hl1a7SN_Hn1a7QN_HP2`7PN`HQ2`7nM`HS2`7mM_HT2`7kMaHV2_7iMaHX2^7hMaHZ2_7eMaH\\2_7bMbH_2]7aMcH`2]7_McHb2]7]MdHc2\\7\\McHf2\\7[McHf2]7XMdHi2[7VMfHk2Z7TMfHm2Z7RMfHo2Y7QMgHP3Y7oLhHQ3X7nLgHT3Y7kLgHV3X7jLhHW3X7hLhHY3X7fLhH[3X7dLhH]3X7bLiH^3W7aLiH`3V7`LjHa3W7\\LiHf3b72O1O1O1N2O1O1O1O1O1O1O1O100O100000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000GhKmH11X4R7gKmH21V4R7hKmH21V4R7kKPIU4P7kKPIU4Q7jKoHV4R7gKPIY4W700000001O000000cH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "Zel11f;1N2O3L2O0O2O0O2O1N2N3N3L3M102M1O3L2N3O1N2O1N3M2N2M2O1O1O1O1O1N2O1O001O1O10O00010O01O100O001O100O100O100O1N3O1N2N1O2OO10O0100O010O010O10O01O10000O010O1000000O10O1000O1000O010000O10000O1O01000O1O1000O01O1000O0100000O01O1000O10O1000O01000000O01000O1000O1000O0100000O1000000O0100000O1000000000000O10000O1000000O100O1000000O101O0O10001N101N101O001O000O2O001O001N10001N1O1O1O2N1O1N3N1OaEROX:l0gEVOY:i0gEYOY:e0gE[OZ:d0gE\\OZ:a0fEA[:=eED[: and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_64.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000202339.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Vk_18cc06I6J7J5J7I6J6J7J6J5M3M4L3M3M3M3O1000000000000000000000O100000000O100000000O10000000000O011O001Og0XOc0^O9G:F4K6Kohi6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "koo18gc03J6L3L3L5L3L4K4O2M3O1H8L2O0J3QN`NP@IY1m1k>j1O10M3O1O1010o_O_Lg0LSOe0n=h3[BRMEcNR34U7^4kEYMf2]N\\7d6[H^Id7j6SHXIk7P7mGQIR8[7aGhH^8^7\\GbHc8c7YG]Hg8g7UGYHP7H[IS8AVHn63[Ii7DUHP79UIf7HPHR7a0QIb7JmGS7f0PI^7KmGT7h0nH_7JjGW7l0kH[7LjGX7m0kH[7JiGZ7o0jH[7HfG_7P1hHZ8W7iGfHWNIj9`7SHbHUN4a9[7[H]HXN3a9`7XH[HW8e7R20O2N1001OO10O0100O1O2O0RFWHQN6JJ2No8l7SIWHoMj0i4[OVLNU1f7LYHmMk0f4l0\\MP61oHmM_O2F`4P2aKVN_1h7`0cHnMDa0KR4]2lLa5b0kHZ3e1nKa5d0mHe3e1\\K`5m0kH[NTOX5b2[K`5Q1jHP4e1kJd5P1kHX4a1cJg5P1mH^4^1UJn5Y1hHf4m9TKXFn4h9nJZFV5c9dJoDTO_1[6a9UJPE@i1]6V9QJoFQ6Q9jISGW6Z;002O0O2N010O1O2OO01O10O02N001O100O1O2O1N2N4L4L3N1N3M3N1NTJiCP18iMI\\1Ud600000O10O01O1O1000O01O1[OVOR^Ok0la0\\OP^Oc0Pb0@m]Oa0Pb0Cn]O>Qb0Dn]O;Sb0Fk]O;Tb0Gj]O9Wb0h0000O101N1O101N1O2O2M2^O_]OBcb08h]O_O\\b0=f0K6KggS4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is carrying .", + "image": "images/caption_simple_65.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000172396.jpg", + "mask_rles": [ + { + "size": [ + 351, + 640 + ], + "counts": "3_2`82M5L3M3M6J3M2N5K5K4L2N4L3M3M6J4L2N5K6J3M2N3M5K2N8H4L6J1O3M3N1M5L3M3N7H3L5L2N4L3M3M3M4L4L5K2N3M4L4L4M1M5L3M4L5K1O2N6J3M4M3K3N4L4L4M1M2O5K3M5K2N2N4L2N2N2N00001O000000000001OO10000001OO100001O00O100001O00O100001O00O100000000000000001OO10000001OO10000001OO10000000000000000000000000O1001O00000000000000O1001O000000O1001O00O1001O000000O10000000000001O00O1000000000000001O00O1000000000000001OO100000000000000001O00O1000000000000001O00O10000000000001O000000O1001OO10000001O00O1000000000000001O00O1000000000000001OO1000000000000001O0000O1000000000000001OO10000000001O0000O1001O00O100000000000000000cILo14^40000000000000000000000000O1001O000001N11N1001N10O2O0001O0000O1001N10O2O0001O0000O1001O00O10000001OO10000001OO10000001OO100001O00O100001O00O100001O00O100001O0000O1001O00O10000001OO1000000000000001O0000O10000000000001O00O1000000000001OO100000000000000001O00O1000000000000001O00O2OO101OO100001O000000O10000000000001O00O10000000000001O000000O11O0000O1001O000000000000O10000001O0000O1001OO100QJMR13n4O00000000000000O1001O0000000000000000O11O0000000000000000O11O000000000000000000000000000000000000O11O00O100N2001OO1L4N20000O1O1N2H@mEd0P:7I7K5N2O1O1L4L4O1N2D]NXGg1f8:F:M3O1N2N2N3L2N[MSH^2a7`M]H65Y2]7h0G9K5N2M3M3N3N`LXIj2h6WMXIi2V6bLlINUO:01f4h2gKjLB6l0MVO20M93[4m2lKnL_O5k0NVO12M20K4e4i2PLRM[O2ONm02VOO<2@33K^4j2XLWMXO00Nl03BNB6O02J_4f2\\L[MSO5l0KC:BJ1K_4b2`L_MoN5l0JD;@K2J_4_2cLbMlN4m0LB:AJ3K^4W2kKiMF4b0M\\O20Lm02VON7d0I]O^4V2SLjM21ZO49K78H and ?", + "answer": " is inside .", + "image": "images/caption_simple_66.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404839.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "_l02g15\\NK?0E0b`07j@S1S?POj@S1U?lNi@FdN^1b`0kNa@IXOY1X`0mN`@LXOV1W`0oNX@HWO74HM[1``0oNT@7]OP1``0hNQ@:^Oo0b`0eNP@T2Q`0kMo_OT2R`0mMj_OV2V`0jMi_OW2X`0hMh_OX2Y`0fMi_OX2Y`0fMh_O4@j1i`0SNf_O3Ai1l`0RNb_O6Bh1l`0TN`_O4Aj1Pa0VN[_ONGl1n`0XNX_OMJk1n`0YNW_OQ2j`0nMT_OT2l`0lMR_OV2Ua0cMk^O]2Wa0bMg^O7On1Xa0`0M32N2N0O01000OgN\\MXAc2h>^M]@0;b2W?_M]@14OD`2j?aM^@3FL42M^2]`0_Mg_OO03M_2k5iMQ5U2hJYNS5g1hJbNT5^1hJlNR5T1kJYOk4f0RKKa44XK:b4ETJfMZLO40?k2k8^OmITOhLb1e7oLgIX2b1P3V6nLdI_3W6]LeIW4o5gKnI`4P6`KiIg4W6YKeIk4Z6VKbIn4]6SK_IQ5a6oJ[IU5e6kJYIW5h6iJUIY5k6gJPI^5i6`HfGR2^1b5l6\\HfGR2U1oMgNi7^8VHfGT2l0P6d7RJWHQ6h7\\JdGl5[8m20000000000LdDdG\\;h7dDlH\\;g6hDiHN?Z;m70002NLgDaGY;^8hDbGX;]86ZLRE]KIo0G4^;\\3mE^KcNh03=^;[3WFULYNa0_;X3[FVLVNb03hNV;]4fFbM`9]2eF_M[9`2kFgKgM^1d1aNZN3a9U4TIUMQOfNn7T4RIUMPOgNo7S4TITMmNiNo7Q4ZIRMhNlNn7P4\\IPLQMm0\\1ROa8P4SIUMXNnNf8m3QIVMWNoNh8j3SIVMTNoNk8j3RIWMRNnNn8j3QIYMjMROV9d3QIZMjMmNY9h3nH[MlMiNW9l3nHYMhMkN^9k3kHYMfMnN_9i3kHYMdMPOa9g3kHYMdMPOa9g3kHYMcMPOc9f3lHYM_MQOg9e3kHZM]MQO]84kG`3]2YM[MSOU8e0bG\\OAc3]3ZM[MQOP8o0`GQ3V3oL[MPOm7a5iJ^KT6c4nI[KQ6g4oIXKUMQOR8i5iJWKTMPOR8j5jJVKTMPOR8j5jJVKTMPOS8h5jJXKP6f4PJYKW6a4iI_K[6\\4fIeK]6W4cIhKa6T4`IlKb6Q4`InKc6o3\\ISLg6i3YIXLoLoNQ9h4PJYLj6d3VI\\LkLQO]9`4iI_LfLTOd9Z4fIcLgLQOe9Y4eIfLgLQOd9X4eIiLaLlNR:Z4]IRMXLfN\\:V4\\IXMh6d2XI[Mk6c2UI]Mm6a2SIVMZ7f2fHWM`7f2`HYMd7e2[H[Mi7a2WH`Mh7`2XH`Mh7`2XH`Mh7a2WH_Mi7a2WH_Mi7a2VHaMi7_2WHaMi7`2VH`Mj7`2VHaMi7`2VH`Mj7`2UHaM2kNQ4e3lK`M4jNP4f3mK_Mk7b2TH^Ml7c2SH^Ml7c2RH^M5hNT4k3gK\\Mn7e2RHZMn7g2QHYMo7i2oGXMP8i2oGWMb0iNa3R4kKUMS8l2lGTMa0kNg3S4gKQMb0lNg3T4fKQMb0kNh3T4fKQMb0kNh3U4dKQMf0hNf3X4TKhKkMX1k:R3WGjKjMU1`3iNa3Y4SKlKjMS1`3hNd3Z4oJoKkMo0b3hNd3[4lJSLkMj0U;U3kFdMT9]2iFeMW9]2fFdMZ9^2_FgMa9\\2WFiMi9b2bEhM^:m501O1iHYGS3g8S40hN\\GTGe8f8cGWG^8`8kG_GU8]8PHbGQ8Z8SHeGm7X8WHgGi7l7fHRHZ7j7kHUHV7o5SGcJ;iNa1a0Q7R6UG^Jc2[OX6V6YG\\I1a0j2Gl5k3SGZNh0hMf;m3bCZN^=e1dBYN]=U41N100O1hM_BQNb=]1bBoN_=o0gBiLEh1e=^1iBgLGg1`=a1lBdLGi1^=a1ZC^MPO3g=]2eDSM^;j2jDnLX;n2o2L4M3M3Ll^O[MUa0R2R_OVNSa0h1Q_OTNPa0k1c000O2N101N101O1O0O3[Ol]OZObc0JZmm3" + }, + { + "size": [ + 640, + 427 + ], + "counts": "nY:1nc02N1a]70^bH3N1O2M8H4M2M3N2M5K6K1O1N2O1O0OI`]OoNab0Q1`]OnN`b0S1_]OmN^b0V1a]OkN^b0\\100O010O2OO1000OO2M3M2O200003M00N2O10000N\\]OiNcb0X1201O3ROo]OHQb05Q^OKoa05Q^OKoa05Q^OKTb0OgQ?6nn@6N000O020O00000004LBHU]O7kb0IX]O4hb0MY]OOib02V]ONib03b01O1O001O1O1O1O00001O4L2N1O2N2N3M1O1O001O1O0000001O0O1000O1100O1O0000001O001O00001O000000001O000000000O2O0000010O00001O0O101O01O01N2N2M201O000000001O00000000001O0000000010O000001O0000001O0j@_Nm;a1QDbNo;]1QDeNm;[1SDgNk;Z1UDfNj;Z1VDhNh;X1YDiNe;X1]DfNa;[1`DfN^;[1aDfN^;Z1bDhN\\;X1eDiNY;X1gDhNX;X1iDhNV;X1jDiNU;X1jDiNU;W1kDiNU;V1lDkNS;U1mDkNS;U1lDmNS;T1kDnNT;R1kDoNU;Q1iDROV;n0iDSOW;m0hDTOX;l0YD\\NgMi0P>k0YDaNaMe0V>j0YDCg;=YDCh;=WDBj;>VDBj;>WDAj;>VD^On;c0QDQO`MZO1Na>g1nCQO`MZOf>f1iCPOaMZOg>f1gCTOZeDBZ;>fDBY;?iD_OW;a0kD\\OV;d0kD[OU;e0kD^OR;b0oD^OP;b0PEBl:>UEBk:>TECf9lNWDa1S2C`9WOXDV1Y2D_8XOeD6?n0^2DW8n0lD]On2ER8n2nGRMn7R3RHoLj7T3VHmLf7X3ZHfLc7^3]HbLa7_3`HaL]7`3dH`L[7b3\\FaKF31k0i9d3[FmK[O21RO2\\1U:d3[FXM_OTOT:g3XFmNf9T1XFoNf9S1VFQOh9e5N1O1O101_KSFlNQ:n0RFQOU:g0mEYOT:c0oE]OR:a0oE^OY:8jEHX:OoE0T:KoE4U:FnE:U:BlE>X:mKoDT3m0n0g:^NdEb1a:UNcEk1`:\\MQDmNd1g3]:fLhFZ3^9ZLhFf3i and ?", + "answer": " is beside .", + "image": "images/caption_simple_67.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000069138.jpg", + "mask_rles": [ + { + "size": [ + 640, + 371 + ], + "counts": "o96b0NDNM72KQb04[^OLD;056@Hj00SO48LJNOV?9XAKB=35NYO1]7i9nHVFL1d00`7i9YIWFg6i9ZIVFf6j9ZIVFf6j9ZIVFf6j9ZIVFf6j9[IUFe6j9i10000001O000UJUFa1k9Z4000VJUF_1k9`NWF_1i9aNWF_1i9bNUF_1k9[4001O00000XJTF\\1m9cJSFf31e1m9cJTFg30e1W:bJgEb32l1X:[NhEd1Y:[NgEe1`:TN`El1d:PN\\EP2e:oM[EQ2e:PN[Eo1f:_KXE`11Q3j:\\KUEc11Q3n:XKQEh10P3T;SKlD`7T;f0000000001O0000OaJlDT2T;lMmDS2S;]300O1O1OlHoD[5P;fJPEZ5m:iJSEI2e3j:[NVEd1g:_NYEa1g:_NZE`1f:aNYE_1g:aNZE^1f:bNZE^1f:bNZE^1g:aNYE_1g:bNXE^1i:aNWE_1i:bNVE^1k:aNUE_1l:aNSE_1m:aNSE_1n:`NSE_1n:i30iJQE]1P;bNPE^1P;i31O0jJoD]1Q;bNPE^1P;bNPE^1P;cNoD]1Q;i3O10lJPEX1o:iNREV1m:kNTET1k:mNUES1k:mNVER1i:oNWEQ1h:QOWEo0i:QOXEn0i:QOWEo0i:QOWEo0i:QOXEn0h:SOWEm0i:SOWEm0i:SOXE_LV1b2b9WNUEo03PNm1i2k8VNZE\\O2_OL7S3h2e7iM\\JV2g;00000000000001O001O00O11O00000000001YNQ_OGea0VOZ^O1olo0NoRPOV1Ya0]1N2O100000000O1000000O1N200000000000000O10000000000001O0000O100000000001O00000000O10000001O000000000000O10000001O00000000O1O1000000O1000000000000YMRMkA0Z2o2Qh?S1dB`NcM1i?_1hB`NX=`1iB_NW=`1iBaNW=_1iBaNW=_1iBaNW=_1iBaNW=_1eB`NdM1g?_1eBaNcM0h?_1dBbNdMOh?`1cBaNeMOh?`1cBaNeMOh?`1bBbNfMNh?`1bBgN]=Y1cBgN]=Y1dBfN\\=Z1dBfN\\=Y1dBhN\\=Y1cBgN]=Y1cBgN]=Y1cBcNeMNh?_1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=W1aBiN_=W1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=X1_BiNa=W1_BiNa=W1_BiNa=V1`BjN`=V1_BlN_=U1aBkNV<^2jCbMQ;c3oD]Lf:n3YESLf:R4ZC`Kb1`0T;e4^D\\Kb;n4SDSKm;`5\\CdJd<\\600O100O1O1O10000000000O101O0000000000000000000000000000000001O00O1001O0000000000000000000000000001O0000000O10000O100000000000001N10000O2[Od0oMnG_F0a0j8R3\\Gj0m:oN^E[OBZM5Gc;Y2YDhMi1T1TNiNR>[1n[Z2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_68.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000342367.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "QnQ62n>1m>NYRO1e>0ZA4a>M`A3`>L_A6`>I`A9c>I\\A2l0MVO2V=MiB2N4c0=e<]OkB9Nm0f;jNgE1cNb1d;UO[DB<^1X;BbD>];EaD;\\;IcD8Z;h1M3M3M4L4L3N3N11O000000000010O0001O0000001O000O101O00001O001O00001O0000010O0001O01O00001O010O001O010O0000001O0jLQE\\2o:]MYEFI`2o:fM[EII_2m:aMaEOC_2h;`MZD_2e;aM\\D^2d;bM^D\\2b;dM_D[2U5K4N2O2N1O1O1O100O2N1N2O2O0O100O2N1QNZOdEf0V:EdE and ?", + "answer": " is in front of .", + "image": "images/caption_simple_69.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000263796.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "[i;a0Tc0>L2N3N3M1N4K3O2N3f]O`Nma0l1M8I1\\OhM]_O[2``0hM]_O]2^`0fM__O^2^`0e0L5L5K3L2O0O2O000O10000O101O001O002N0002N2N3M1O3M2N6J4L1O1O2N1O001O000000TOoLi@Q3V?SMf@n2Z?UMc@k2]?XM]@k2b?XM[@i2e?[MU@g2k?g0O10000O1001O00O1O1O100O1000000000000002N2N1O2N6J4L1O1O0000000000VO^LVAb3h>dLTA\\3k>gLRAZ3n>hLo@Y3P?lLi@W3W?mLc@U3\\?h000O10000O100O100O1000000000000000000O10000000000O10000O10000O10000O1000000O100O11O1O00O1O10000000000N2O100N200000000O10000001O1O5K3M8H2N6J3M2N3M3M2N2N1O1O1O2N1O1O1O001O1O1O1O1O001O1O2N1VM__OS2a`0gMk_OS2W`0gMQ@U2P`0hMT@V2l`0O1O2N2N1O2N1O1O1O1O001O001O1O001O001O001O001O0000001O000000001O0000001O00000000000000000000000000000000000000000000O10000O100O10000O1O1O1O100O1N2O1O1O1O1O1O1001O002N3MO1O1C=N2TOhMY_O;7m1[`0^Nd_Ob1[`0`Nd_O`1[`0bNc_O_1]`0R1BhLT@X3k?iLU@W3k?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3m?iLS@W3l?jLS@W3m?kLQ@U3o?>O1N2N2K5A?O1O100O100001O001O0000XMZKWFe4h9^KaCMc2e4l9bKRF^4n9bKRF^4n9cKRF\\4n9cKSF]4m9bKUF]4l9^KaC0c2b4l9]KbC1b2b4l9]KbC1b2b4g<0O1O100O100O100O10000000000001O00000000000000O10000000000000000O1000000000000001O001O00001O001O1O001O1O3M2N1O1O2N1O1O001O00001O1O002N3M2N2N1O2N3M1O2fMi_Og0W`0UOQ@g0Q`0TOT@j0o?_N`_O2i0\\1i?`N__OLQ1b1b?`N^_OLS1b1a?^Nj@`1n`0M4L1O002N2N3M1O001O1O2N2N2N1O2N3M1O2N1O2N3M1O2N1O2N5KQl`0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dad3a0\\c0:G8G8H7J5K6K3L7J4M1N4L2O2M4M1N2O2N2^_OWMh?k2T@ZMj?g2S@\\Ml?f2Q@\\Mm?f2Q@\\Mn?e2Q@[Mn?g2P@ZMP`0\\3N1O100O1O1O1O2Z@nK\\?\\4N001O100O00100O10O01O010O10O0100h@\\KS?`4RA`Kk20o8`4VD`Kk20o8`4\\31000000O10001O000O10000000000000001O000000000000001O0000000001O000000000000001O00000000001O000001h@\\KS?c4l@`KR?g41O0100O01O00010O10O01O0O2J6L4N101O1O1O1O001O2N010N2O1O1ZOT@VMm?h2U@WMk?g2X@WMj?h2W@WMj?g2W@XMk?g2X@UMi?j2[@nLk?n2f0N1M4N2N1O2L3M4L3M4N2K6K4I8J6M4\\Ombf2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_70.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000119828.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`oY1a0P;:C;I8I5M4K4N3L3M3N1O2N1O2M3N1N3N2M3N2N1O2M2O2N1O2N1O2N1O2N2N1O2M2O2N1N3N2N1O1O2N1O1O1O2N100O101N1O101N1O1O2O0O101N10000O100O101N1000000O10000000000O1000000O10000000O100000O10000000O100O10000O10000O100O100O1O1O100O10000000000O1000000000000000001O0000000010O00000000001O0001O01O01O0010O00010O010O2O1N2O1N2N4M3M3L4L2O0O1fJoJl3T5lKlJnN2W5T5fKjJUO3T5U5eKiJTO4W5V5aKnJ`4V5ZKkJf4W5YKhJg4Y5UK^JD9X5S6001O00010O001O001O00001O001O001O0000001O00000000001O0000000000010O00001O00001N100010OO2O001O1O0O4MOLUJ^Jj40QLc5WOZJg4:lKU5[OfJm49dKP5CdJj4]6VKbIk4_6SKcI1Ne4_6[KbI0Of4_6ZK`I20e4_6YKaI20f4^6XKbIo4_65O1OjJcIS5`61O1O1O1NgJeIY5V67M2N3K401M2N2O2H7B>^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "_Sg31h;0N0bh01[WO4M5K3N3MCPE4n:LUE4i:K[E3d:M]E3a:NaE1^:OeEOZ:0hEOX:1g00PE2V:0gE3X:g001M2O1N101N2O001O001O00001N100O100O100O10000000000000000000000000000000000001O0000001O0000001O001O0O2O0O2O0J7K5J5L6I8H\\eR1" + } + ], + "question": "Where is located relative to ?", + "answer": " is attached to .", + "image": "images/caption_simple_71.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000001993.jpg", + "mask_rles": [ + { + "size": [ + 419, + 640 + ], + "counts": "]8a0a<1O2O1O0O10001N10000O2O0O10000O2O000O10001O0O100000000O2O00000O100001POTDg0Vo:CoD>P;]3fDiKc8]4^OcK\\H]4d7dKZH]4e7fKWH\\4i7gKRHf2c9Gf0N1010O10O100O010O100O10O10O100O0100000O10O100000O100001N2O3M3M2N2NGnEcMP:]2RFdMk9\\2WFdMg9\\2[FdMb9]2`FdM]9]2dFcMZ9_2gF`MW9b2iF^MU9c2lF]MR9d2PG[Mn8g2SGXMk8[2fFkM>Jk8W2nFnM7Ki8U2TGPN4Ig8V2ZGoM0Hf8X2]GoMV9P2mFPNQ9d1XF]Ni00n8]1aF^Ne05h8[1jGdNU8[1nGeNP8X1THhNk7T1[HlNc7P1cHPO\\7k0jHUOT7g0RIYOm6b0XI_Of6=`IB`69fIGY65kILT6OSJOm5LYJ4[9010O1000O1N2O1MbVX5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_72.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000221502.jpg", + "mask_rles": [ + { + "size": [ + 320, + 640 + ], + "counts": "nR63g99K3O1N1O2O1N2O1O0jFZOR9k0N5YGTOQ8n0mGUOk0Nk5n0YITOj00l5m0ZITOg01YOGY6W1gIROLL>5CJ[6S1`IjN4f0;OP6b0aIkN2g0NWO7g0X6h1dIZN[6h1cIYN]6e20DcIWM[6i2gIUMY6m2eIRM]6n2dImL_6n2n1g5QNkI0?n1f5TNjIMa0o1d5VNjIKb0o1d5UNjILc0o1d5TNiIMc0n1d5WNhIKd0n1d5XNhIHe0P2c5YNhIFe0Q2d5WNiIEe0U2c5TNdJl1]5SNcJn1]5WN^Ji1b5VN_Ji1a5WN_Jj1`5VNaJi1_5WNaJi1_5WNaJi1`5VN`Jk1_5UNaJk1`5TN`Jl1b5QN_JP2c5mM]JT2c5jM_JU2a5kM_JV2a5iM_JW2e602N2N2M5K8I;E5K8G;F9Febm4" + }, + { + "size": [ + 320, + 640 + ], + "counts": "Qgj0131f91XF1g95N2N1O101N1O100O1000000O1O10000O10000O100O100O2O0mN_OgHa0i5I`Ie0b0Bn5T1RJmNl5U1RJlNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5V1RJkNm5U1RJlNn5S1SJmNm5T1RJlNn5T1RJlNn5T1RJmNm5S1TJnNj5R1VJQOg5o0YJSOe5m0[J]O[5c0eJC^O_Nc5n1oJOd0UNi2l1cL8:nMS3i1cLa00hM]3h1bLg0DZMK:o3i1\\LZ1d3a201O00O1001O000000[MXLROh3n0`LiNa3V1eLeN[3[1mL]NS3c1WMRNj2n1YMnMh2R2aMcMa2]2cM^M^2X1XLmN]1F\\2\\1[LkN]1CZ2b1YLkN`1]OZ2h1VLkN`1\\O[2i1ULkNa1ZO[2k1TLkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNR6V1nIjNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIjNV6V1jIjNV6V1jIjNV6V1jIjNV6V1jIjNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6T1jIlNV6T1jIlNV6T1jIlNV6T1jIlNV6T1iImNW6S1iImNW6S1iImNW6S1iIlNX6T1hImNW6T1hIlNX6S1iImNW6S1iIlNX6T1hIlNX6T1hImNW6S1iImNW6S1iImNW6S1iIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1hIjNX6V1gIkNY6U1gIkNY6U1gIkNY6U1hIiNY6W1gIiNY6W1gIiNY6W1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1fIjNZ6V1fIiN[6W1eIiN[6W1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1dIjN\\6V1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIhN\\6X1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1dIhN\\6X1dIhN\\6W1fIhNZ6Y1eIgN[6Y1eIgN[6Z1bIhN^6]200000000000000000001O00:Fg0`JnKd3Y4TLkKi3X5NlLaKi0]4jNRLU1m3jNULW1i3hNXLY1g3gNXL[1g3eNXL\\1h3dNXL]1g3cNZL]1e3cN[L]1e3cN[L]1e3cN[L]1e3cN[L^1d3bN\\L^1d3bN\\L^1d3bN\\LjN6@YO]1U49]LiNg0d0l2c0\\LjNR19b2l0]LkN]1MW2W1\\LkNc1IQ2[1^LkNe1Fn1X1cLRO`1Da2f0PLF`1CT32]K;`1BZ4>iK^OX4b0c2O1000001O00001O00000000002N001O00000O100000001N5Hgm?N]R@21Oci:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_73.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000312586.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "`eo63X=2N1N3L3N3HD]C>b<61N101O000O100O2O00O1O101N2O103`CXOT and ?", + "answer": " is in front of .", + "image": "images/caption_simple_74.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000187236.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y6c0n0i1U:XNkEg1T:^NjE`1U:cNjE\\1V:fNjEX1V:iNlES1U:oNjEP1U:SOjEl0V:UOiEk0W:WOhEh0W:\\OfEd0Z:]OeEb0\\:@aEa0_:A_E?`:C_E=a:E\\E;e:GYE9g:IVE8j:KSE5m:o100O\\LTEV3m:gLVEX3j:fLYEY3e:hL]EW3c:iL]EW3d:gL^EX3d:eL]E[3[7dLnK[3l7000O1000000000YM_Do1a;oMbDo1`;oMcDo1_;nMbDR2a;iMbDV2_;gMcDY2_;cMcD]2P<0O100000O010O1000O01000O10O010O1O01WObMPE^2o:cMPE]2P;cMQE]2o:cMPE]2]:bMXF]2i9cMVF]2k9cM[E16\\2_:eMZE06Z2a:gMWEO8[2a:gMVEN8[2b:jMREM;Z2c:kMoDMoBDP=;QCFn<:QCGo<8RCIm<7RCJn<6RCJn<5RCMl<4TCLl<4SCMm<2SCOm<1SCOm<0SC1m and ?", + "answer": " is lying on .", + "image": "images/caption_simple_75.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000255749.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "W_U1e0Z2H]7Q1bG]OZ8j0[G]Oc8g0WG]Oh8f0QG_Om8f0eFEZ9U2O2N100O100O1N2M3NAXGbLd8`3`G^L`8_3cGaL]8\\3gGcLY8Z3jGeLW8X3mGgLS8W3oGhLR8U3R1I7_Ob0WN]E`0n:]OZEi]Y2" + }, + { + "size": [ + 424, + 640 + ], + "counts": "m`T28i<;I4M3N1O1O2N1O1O1O1N200O1O1O1O2N1O100O1L4M4L3N2O1O10000O10O100nNlDGKD[;b0nDGIEZ;b0PEGY;9hDD[;:gDC\\;;j0M3N2N4JhSc5" + } + ], + "question": "What are and doing in relation to ?", + "answer": " and are entering .", + "image": "images/caption_simple_76.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000575243.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZiV34Q=6M3L4L4N2M2N2O1O2O0N3O001N10001O000O2O010O001O00001O10O0001O001O010O001M3O1O010000O10O01O100O1O00100O1O1O010O10O01OO20O0100O01O100O100O00100O100O010O1O1O100O010O100O101N1O100O1000001N1O100O1N2O2O0O1O2N100O1O2O0O2O001N100O1O101N101O001Ni`b3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_77.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000473118.jpg", + "mask_rles": [ + { + "size": [ + 500, + 346 + ], + "counts": "Uod01c?00000\\im00cVRO9_@Kh>f0G9D<\\ObNgBi1W=;K5M3M3O1000000001OAaMhC`2Th80bIBfM>h80aIDfM]2KYN:m8]O]F`0U2O`N4n8<]HCeN2n8;ZHFfNOR9;UHJgNJV9=lG0lNCY9=bFkNc0Y1@@\\9<_FROXORB7X>GmAMchm1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is jumping from .", + "image": "images/caption_simple_78.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + } + ], + "question": "What is doing on the ?", + "answer": " is walking on the .", + "image": "images/caption_simple_79.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509131.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mVb55T=2N2M2N4L3M4M2M4L5L3M1O2O1O2N3M3ZDfNU;k1N2L2O00100O0010O1O1O010O2N1O0O10001N1O2O1N2N2O1N2N2N2N2N3M2N4L5K4K4K7HS[Q2" + }, + { + "size": [ + 425, + 640 + ], + "counts": "^_^58P=e1\\N7I5K6J5Ld0\\O0001O001N2O1O2M2O2N3L4L101N2N2M3N4QNPE]1V;WNTEe1];J5K5J6J6K6H8Hc[]2" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to .", + "image": "images/caption_simple_80.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000167902.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "[9Y1^:0O1000O10000000O100000O10000000O100000000000O1000000O1000000000O100000O10O100000000000O1000O1000000000O1000O1000000000000O01000000O1000O1000000000O1000O1000000000O10000000O1000O100000000000O1000O10000000O100000O10000000000O10O100000000000O100000O1000000000O100000O100000O10000000O100000O10000000000000O0100000000000000O10O1001O00O1000000000O10O10000000000000O10O100000000000O10000000O100000O100000000000O10000000O100000O10O10000000000000000O10O100000000000000O0100000000000O10000000O0100000O2O00001O001O001O1N2O2N2N3M2N6J8H4Kk]10UbN3N3L5VEFo9>lECDL]:f0dEE^:j00000000O01000000000O1000000000O1000O100000000O1000O10O1000000000000O1000001O00001O1O2M2O2N3M2N1O001N4M001O[ODSF8l9JTF5l9NQF2o91mE0S:4gEMZ:h0O1000000000O0100000000000O010000000O10000000000000O010000000000000O010000000000O10O10000000O100000000O10O100000O1001OO1000000000O01000000000000O0100000000000000O010000000000000O100000O1000000000000000O100000O100000000000000O1000O100000000O10000000O1000000000000000O010000000000000000O10O1000000000O1000000000O010000000000000O1000000\\H" + }, + { + "size": [ + 375, + 500 + ], + "counts": "anV15Z;MhD;o: and ?", + "answer": " is over .", + "image": "images/caption_simple_81.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000097924.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "n7a4o70000000000@VHZLj7e3XHYLh7g3ZHYLe7c0WHS24ZMe7b0eHg1FgMe7b0gHe1DiMf7a0jHb1@mMf7`0lHc1\\OnMh7?lHd1[OmMi7?mHd1YOmMj7?mHc1]OjMg7b0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7b0nHb1_OlMc7b0nHb1_OlMd7a0mHd1^OkMg7?kHg1\\OkMj7=jHh1\\OkMk7k1[9UNfFj1[9TNgFk1o901O000000O1SOUN^F2OO:j1Y9UN]F95g1c9TNcFk1]9QNhFn1Y9SNeFm1[9SNfFl1Z9TNfFl1P:O1O0000001O00000000001O1N1001O000O10O10O100O100001O00000001O000O100O100O1001O1O001O0000O1YOWNUFN?k1\\9ZNaFi1^9XNaFi1b9SN`Fl1S:O1O1O001OXNZNYHNTOh1a8]NgHc1Y7^NgHa1W7eNeH[1Z7gNfHX1Y7kNfHT1Z7mNfHR1Y7ROeHm0[7UOdHj0]7UOcHk0_7SOaHm0g7gN^HX1]9000000000000000O100N2L400@`0000000000oMlEH0k1d:N00O10000TNXNPIh1P7YNZH\\2f7hMUHY2k7mMmGL_Oj1c8bNeGDNf1]8[OfGd0Z8[OhGd0W8\\OkGc0U8\\OmGc0S8\\OnGd0Q8]OPHb0P8^OPHb0P8]OQHc0o7]OQHc0n7^ORHb0n7^ORHb0n7^ORHb0o7^OPHb0Q8]OoGc0R8]OmGc0U8[OjGf0W8YOiGg0Z8VOfGj0\\8TOcGm0^8RO]GS1f8iNTG^1m8aN^GT1d8iN^GV1e8fN]GY1e8dN]G[1e8bN\\G^1f8`NZG`1i8\\NYGc1j8[NUGe1l8\\NbF10c1_9\\N_F21c1m9]NSFc1m9]NRFd1m9_NPFb1m9b0fNaMcH_2\\7eMaH[2_7fM`HZ2`7iM^HV2b7kM]HU2b7oM[HQ2e7QNZHn1f7SNYHm1g7TNXHl1h7VNVHj1j7ZNRHf1m7\\NRHd1n7^NPHb1P8`NnG`1R8bNkG_1T8eNiG[1W8gNfGZ1Z8hNcGY1]8]100O1000000000000O100000000000000000000O1O1001O0000000000000000001OO10000001O000000000000000000000000000000000000000000000000001O0000000000000000000000000000001OO100000000000000000000000000000000O1001O00000000`G" + }, + { + "size": [ + 400, + 600 + ], + "counts": "R]^16T<;H9G4oKXOULk0f3CoK?n3HkK;R4KiK8T4MfK6Y4NbK4\\4O`K4_41ZK2d45TKNf4hJDX5 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_82.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509656.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0b6^800001O00O1001O000000000000001O000000000000001O00000000000000000000000000000000000000000000O1001O00O1001O00001O00N2bNmGPKJ=O@^8S5jGkJO0l8c5oFbJn8Y6_O4L2N0000000000000000001O00000000000000000000O1000000000;\\IbG?In11a0Y[JVOf5R1_JaNa5d1dJRN\\5V2gJ\\M\\5l2X36J?A9G:F:F:F:F:F>B7I5K5K1O0000000000000000000000O1lJeGc3[8[LhGc3Y8\\LiGc3W8\\LoG]3S8XLhGRO8_4U8_LcGQO9Z4Z8eL]GQO9X4\\8eLPHZ3P8eLQH`3j7`LWHc3e7]L[He3c7[L]Hh3`7WLaHj3^7SLeHn3Z7QLhHo3W7QLiHo3W7PLjHP4V7PLjHQ4U7oKlHP4T7mKgG]OV1e4S7QLkHQ4U7oKkHQ4U7lKgG]OU1g4U7]KfG0e1]4d6`KkG1b1_4c6aKjG0c1_4c6aKkGO_1N^Nb4X8aKjG0`1b4f6]KjG2_1b4h6[KhG4`1a4h6[KhG4`1a4h6[KhG2b1b4n6]KQIc4o6]KPIe4P7ZKoHg4e8O101O0O100000000O10000O1010O11N1O1O100O1O002N3N1N1N2O00001O000000O10O1O1O1N2O1N3M3M45L0OOO01O2O0O1O001O1O0O2O00001O001O00001O001dLmDi2T;TMoDi2i;K3L3N2N1O100O1O2O0O1O1O1O2O0O100O101N100O10O0101O0O2N1N2O2L4M3L3N3N2K4L5N3J6L4I6D^A12OPUP5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aah1c0U>?C`0B;F:E>D5K7H6oDlLm9Z4I4L5K5K9H3M2N0O01O1O1O2O0O10000O1O2O0O1O101K41ON3M2NiE]KW:b431O1OH9O0O2N1M4M201N2O001O1O1O001000bFWLe7i3YHZLf7h3UH\\Lj7f3QH^Ln7d3mG`LQ8c3iGaLW8`3fGbLZ8a3aGbL^8c3UGiLh8j4L4M2N2N000000001OO100O1O1L4N2M3M4L3O1O1N2N2N2O2M2O1N2O1O1M3N2N2N2L4L4L5L3000000O110O001O0000001O000000010O00001O00000001O0000000000000O1000000O103cEbKR:m4I3M9F5L2N1O:F2N1N2N100O2O000000001N101O000O10OFjIeG02V6^8kI`GZ5`0lJQ8J_GP5`9721M21OO200_Ob0M2N2N3O0O2J501O0O2L4N3O0O2M5L3N3M>\\HaHk6o7J6I5K and ?", + "answer": " is in front of .", + "image": "images/caption_simple_83.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000140658.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "fb076LN2M;[b02g]O=Qb0m0O1G9K5J6H`Ml^Od2Sa05B>DZLWAN_Oj3Z?ZLRAl3m>d000O100O1O1N2N200O1O100O10000O1G9C=K5N2L4I7lNjITD`6k;bIRD`6m;cInC`6Rn2o10000O^ARMTk2eATM\\>k2eAVMZ>j2eAWMD0e=h2_BVMTO3h0Oe=h2hBXMZ>h2fAXMZ>h2fAWM[>h2_10U@XMFOf=i2fCXMZg2iAZMV>e2kAZMV>f2b1O1000000OTA\\M^l2PBTMP>n2nARMR>P3^11O1O002N3Z@hL^>Y3_@iLa`0[300ZDcLi6^3l4001O1O2N3RD[L[7g3eDZLX?g3g@YLY?g3g@YLY?g3`000001O001O3Q@TLd?P4W@RLh?S400000O1O100O10000001OO100000000000000000000O10000000kCnK\\8R4cGoK]8Q4cGoK]8R4cGmK]8T4g3OlCmK]8S4bGnK^8R4h31O00001O0jClK`8U4_GkKa8U4_GlK^L5i;o3kGlK]LOk;U4iGkK]LOj;V4iGkK]LOj;W4k3000000001O00000000001O00001O00001O0000001O0000001O0000000000001O00001O001O00000000001O00001O00000000001O001O00001O000000001O0000001O0000000000001O001O001O0000000000001O000000001O00000000001O00001O000000001O00001O00000000000000001O001O00000000001O001O00000000001O000000001O0000000000001O000000001O0000001O00001O00000000000000001O1O00002N3M2N001O0\\H^JbIN00O1Y41bKj0l0Sc0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dhj26fc07L2M3M2O2N2N1O1O1O1O1O1O1O100O010O010O0100O010O1O100O1O1O1O1O1O1O2M2O2M3M4LPlh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_84.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Sjn73k?3N2N1O2N2N2N102M2N2N1N3O1N2N2M3N2O0O2M3N2M3N2N2O1N2N2O001N2O2FUN[Bn1_=XN\\Bl1`==L3[E^M[7S3_HVMY7n2aHWM\\7m2aHVM\\7n2aHSM^7o2`HRM^7Q3`HQM^7>`F]1Q2VN^7P3bHQM\\7P3dHPM[7P3fHQMY7o2gHRMW7o2hHRMW7o2iHRMV7n2jHRMU7o2kHQMU7n2lHSMS7m2lHTMS7T2oFcMT1]OoN26j0h8R2RGdMP1U1m7W1TGcMP1U1m7W1TGdMP1S1m7W1UGfMn0Q1o7W1TGhMo0n0n7Y1UGiMn0l0n7Y1VGjMn0j0n7Z1VGlMm0h0n7[1UGmMo0f0m7[1VGoMm0d0o7[1VGPNm0b0o7\\1UGSNm0?o7\\1VGTNm0>n7]1UGUNo0;n7^1UGVNo09n7_1UGXNn06o7a1SGYNQ12n7c1SG[NQ1Nn7e1RG]NR1Kn7g1QG^NS1EP8l1mF_N];`1dD_N];`1cDaN^;]1bDcN_;]1`DdN`;[1aDdN`;[1`DeNb;Y1]DiNc;W1\\DiNe;V1ZDlNg;S1XDmNi;R1WDnNk;Q1SDQOm;h0XCgNl0`0n;f0YD[Oh;c0YD]Oh;c0WD]Oj;c0UD\\Om;c0SD]Oo;b0oC_OT<>lCBY<:fCE_<7bCH`<6`CIa and ?", + "answer": " is running on .", + "image": "images/caption_simple_85.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000106048.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^9l3`90000[MaF^1^9X10000000ZMbF^1^9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1\\9Y1000000000000000YMdF^1\\9bNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1g8]MiGV1@]1g8]MiGV1@]1f8^MiGV1A\\1f8^MiGV1A\\1f8^MiGV1A\\1f8_MhGU1B\\1f8_MhGU1B\\1f8_MhGV1A[1g8_MhGV1A[1g8_MhGV1A[1g8`MgGU1B[1g8`MgGU1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1A\\1g8`MhGT1A\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1C[1f8bMgGS1C[1f8bMfGT1DZ1f8bMfGT1DZ1f8bMfGT1C[1g8bMeGS1D[1f8cMfGh3Z8XLfGh3Z8XLfGR1D[1f8cMfGR1EZ1e8dMfGR1EZ1e8dMfGh3Z8XLfGh3Z8XLfGR1D[1f8dMeGQ1E[1f8dMeGg3[8YLdGh3\\8XLdGh3\\8XLdGh3\\8XLeGQ1E[1e8fMdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8[LcGe3]8[LcGe3]8\\LbGd3^8[LcGe3]8[LcGe3]8\\LbGd3^8\\LbGP1IV1e8jMbGP1IV1e8jMbGP1IV1e8jMbGd3^8\\LbGd3^8\\LbGd3^8]LaGc3_8]LaGc3_8]LaGc3_8]LaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1d8lMbGn0JV1d8lMbGn0JV1d8mMaGm0KV1d8mMaGa3_8_LaGa3_8_LaGa3_8_LaGa3_8`L`G`3`8`L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3`8aL_G_3a8aL_G_3a8bL^G^3c8aL]G_3c8aL]G_3c8aL]G_3c8aL]G_3c8e000O100001O000000O1000000000000000000000000O100000000O10000001O0000001O1O4L1O1O1O1O1O001O0000001O0O20O01O1O000000000000000000000000000000O10000O1O1O1000000O100000000001O000000001O0000001O00001O0000001O000000001O000000001O00001O001O000000001O00000000001O00000000001O001O00001O00001O000000001O0000001O00001O00001O1O1O6J2N1O00001O00001O1O2N1O1O1O1O1O000000000000000000000000O10000000000O1M3L4O100001O00000000000000001O001OO1001O00000000000000000000O1001O1O1O00O1O1O1001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000001O0000000000000000000000000000000000001O1O00001O001O001O001O0000O1001O0000O11O000000O1000000O1O1O100O100O100000000O100000000000000000000000000000000O100000000000000000000000000O10000000000000000GgL^FZ3Z9`0N2O1001O0000000000000000001O00O1001O00000000000000000000000000001O00000000000000000000000000001O0000000000000000000000001O000000000000001O00O11O00000000000000000000000TLcFi3]9WLcFi3\\9XLdFh3\\9XLdFh3\\9XLdFh3]9WLdFh3\\9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3_900000000000000000000000O1000000000000000000000000O100aF" + }, + { + "size": [ + 428, + 640 + ], + "counts": "[jl16XJ500O1O10000O2O00003L3N2N1N2O1O1O1N1000000O101O001N2O0O2OO01O0100000O10O1M3O11N100O100O100O101O000O1000001N101N100O101O000O10000O2O0000000O2O000O2O0O1O101O000O101O00000O2O0O100O101O0O10001O0O101O000O2O000O100O2O0O1000001N10001O0O101O0O100O2N1000001N10000000001N3N1O002L4M2O01OO3N3L2O1O1O1O1N10001N1O10O0100000O010O1O1O10OO2O1N13M1000000O100000001O0O100000001O000000001O000000000000000000000O1000000000000000000000000N20000O10000000000000000000000000000000000000000000000000000001O1OO1O1001O1O001O0000000000000000000000000000001O00000000000O100001O00O1001O0001N100001O000000000001O0000000000001O00000000001O00000000001O0000000000001O000000001O0000000O2O00001O1O001O001O2N1O001O1O3L3N2N1O6J:F4L5K4L9G7I6J4L8H5K2cK]Gl3e8oKaGU2H^Og8]NaGo1OC`8^NbG`0L]O1=a0V1k7hNeG3j1o0Y6WOnGCQ2R1j5DUHVOU2R1i5HRHUOW2j0n52jGUO[2?R6 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_86.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000052565.jpg", + "mask_rles": [ + { + "size": [ + 458, + 640 + ], + "counts": "e;d2g;O00001O00001O000000001O00000000000000000000001O00001O0000000000001O001O00O10000001O000000000000000000000000000000000000001O00000000001O000000000000001OO100000000000000001O000000O100001O00000000000nMQDi1P4N1O0O2O1N2O0O2O1O1N100010O000000100N100O100000O1O101O1N4L7H7J5J;F8]CVNWUO_B0_f\\2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_87.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000165039.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "[8m1^;001O1N2O0001O1O01OnNfD3Z;LhD3X;MhD3X;MhD3X;MhD30ROS;k0lD32ROR;j0mD41SOQ;i0nD40UOQ;g0nD51UOP;f0oD51VOn:f0QE40XOn:d0RE40XOm:d0TE4OYO^:MgEf0L30[O\\:NbEh02O0[O]:o0cEF0\\O\\:n0eEEO^O[:m0gEDN_O[:m0hECM@[:m0iEBKC[:k0jEBKC[:j0kECJDY:j0nEAIFX:i0oE@JGW:i0oE@JHU:h0RFAHHU:g0SFAGIU:g0TF6k9JUF6k9JUF7j9IVF7j9IVF7j9IUF9j9FWF:j9EVFk9fNlE>8l0m9eNkE`07l0n9cNkEa07m0n9`NlEc06n0m9_NmEc06n0n9^NlEc07P1m9\\NlEd07P1m9\\NlEd06R1n9YNlEe06R1n9YNlEe06S1n9WNlEf05T1o9VNlEe06o0T:\\NfEe06i0`:WO`Ef0d:YO\\Ed0S;POmDn0i;N2N;E;FgV`0D\\i_O0XC9_aHB_7>`HC`7=`HC`7=`HC`7=`HC`7B8J3N213OL00O000001O000000000000O100O0@cNRE_1m:dNPE\\1Q;?0O1N2O1O01000000O0100000O10000O10000O100001N100000O10000000000O1000O1000000000O10000000000000O10O100000000000000000001O0O200O1O1O1O000000001O000001N11O000O01000000O10000O1O2N1N200O1O2N11O00O10001O0000001O000011N0010O10O00000001O00001N100O2O001O0O2N1000001N100O1O1O100O1000000000001O001N2O00001N101O3L6K2M3K5K5L3MP`o0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_88.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000370270.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "bc01oc01N=D0000000000000O10000O1O1O1O100O1O100O1O100O1O1O1O1O1O100O1O1O1O1O100O1O1O1O1O100O100O1O1O1O100O1O1O1O1O1L_Nh]Ob1ha0c0N2N2O1J6J6O1000000000000001O2NO100001O0000O1lN_Mj@`2V?eMh@X2W?jMi@U2W?lMh@T2X?lMh@T2X?lMh@T2X?mMg@S2Y?mMg@S2Y?PNd@P2\\?PNd@P2\\?TN_@m1a?TN\\@n1e?RNY@o1g?QNY@o1g?QNY@o1g?QNY@o1h?PNX@P2h?PNX@P2h?PNW@Q2i?oMV@R2j?bMQ@O0`2S`0YMR@V3]`0O0000001O0000O100000000000000O100O100O1O1O1N2O1O1O1N2N2M3O1N200O11O0000000000000000000000000[Lk_O_3U`0aLl_O^3T`0aLm_O_3S`0bLl_O^3T`0bLk_O_3V`0]Lm_Oc3W`012N1O1O5K2N1O001O000000001OO1000000O1000000O100O100O100O1O1O100O100O10000O100O100I_Lm_Oc3S`0500O1000N3N1O1O002O000O100001O002N4L0000000000000000000000000000000000000000000001OO1000000000000000000000000000000000000O1I7N2000000000000001OO10000000000000000000000001O0000000000000DU@cLk?S3f0O1O1O1N2O2M2O10000O10000O1O1HS_OYMQa0c28N2M3N2N2M3N2O2M2O1M3O1O2N1N2E;E;N`0QOok83^TG1N2N2O100O1N2L4N2O100O1L4M3O1O1O1M3O1O1M3N2E;N2N2M3N2I7N2L4O1L4L4J6N2L4L4H8L4M3O1OUMc@U1\\?eNcA@`?>T2DmSl1" + }, + { + "size": [ + 640, + 480 + ], + "counts": "Zdf151Mdc0e0B:G7I`0A1O1N2N2mN]Ng_Od1U`0aNi_O`1R`0gNj_O[1T`0gNk_OZ1R`0jNk_OX1R`0lNk_OU1R6PN`3m0]FT1P6RNa3k0_FS1n5TNb3k0^FR1o5UNa3n0ZFo0`1WN?GoN8]ON[9Y3cG^M3l0iN^N`9Y3dFZL?`18X1e8n0`FoLLV19XNOg2\\9kN\\FZN:S8Z9bIoFY6P9hIWGP6j8PJVGP6j8PJUGQ6k8oIVGP6j8oIXGP6h8PJYGo5g8QJYGn5h8SJWGm5i8SJWGl5j8SJWGl5j8TJVGk5k8UJUGi5n8VJRGh5P9XJoFg5S9ZJjFg5X9XJgFh5Z9XJdFh5_9WJ]Fh5i9XJUFc5R:`JhEi3j0gJk9c1UEe3\\Bk3g>b05K5N2K5J5O2O13M4L4L3L5L3M3M6J4L4M4L4K4M5J4M3M4M2`DcIV9`6fF_I[9d6cFZI`9d6aF[Ia9c6P2N1N2N3N2M3N1RK]B\\3f=bL\\BZ3i=cL[BX3h=fLZBCFj2S>aMYBAKi2o=bMYBBLh2m=eMYBAMDAn2]>iMXBB4_2h=lMVBB8\\2d=PNVBA;l0QO7a>YOTD@YNb0g=Igkh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_89.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481413.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "loi3?g<:F9I6K4L4K4M4M2N2M3O1M3N2N2N2O0O2N101N1O100O2O0O100000000001O1N2O3M0O10000O1O010O001O100O2N2M3N3M2M3M3N3L4L4L4K6I9HWYh3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "mkd29P=3L4M2N2K5L4L4N2N1M3O1O1ON2NdEQOb8g0cG_OZ8b0bGC]8<`GJ]87`GM_82^G3`8N]G6b8I\\G doing with the purple frisbee ?", + "answer": " is holding .", + "image": "images/caption_simple_90.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000092839.jpg", + "mask_rles": [ + { + "size": [ + 517, + 640 + ], + "counts": "a:a5`:4J6N3O0001O001O001O2N1O001O1O001O2N1O2N3M1O001O2N1O1O1O3M2N2N1O1O2N1O2N2N2N5K2N1O1O1O1O001O00001O2N2N001O1O1O2N001O2N2N1O1O002N3M1O2N2N2N1O2N1O2N1O2N2N2N2N2N2N2N4L1O2N2N1O1O002M5L2N101N4L5J5L1O2N2N2N1N200O2N1N3N0010O00001O000000001O000O1000O11O000000000O1000000001O0000000000000000000000000000O1000000000000000000000000000000001O0000000000000000000000001O0000000000000000000000001O0000000000001O0000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000001O00000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000000000000000000000000000000000000000000000001O0000O1000000000000000000000O010000O2O000O01000O100O10000O10000O100O10000O1O10000O100O10000O100O10000O10000O10000O100O100O100O10000O100O100O1000000O100O10000O100O100O10000O100O10000O100O10000O10000O2O0O01000O10000O2O0O1O1000O01000000O2O000O1000O01000000O100O1000000O100O10000O1O100O100O100O10000O10000O10000O10000O10000O10000O10000O100O100O100O10000O10000O10000O10000O1000000O100O2O000O01000O2O000O100O1000O10O2O0000000O100000O100001O0002NN2N20O1gLZC1N6O]2i<\\McC1G4NU2h located relative to ?", + "answer": " is sitting on .", + "image": "images/caption_simple_91.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000336209.jpg", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "e6]2S;00000000000000001N1000000000000000000000000O1000000000001O000O1000000000000000000O10000000000000000000O10000000O10001O1O000000000000001O0000000O100000000000000000001O00O1001O00000O1000001O002N2N006I9H1O0000000000000000001O0000000000000O10000000000000O100001O00001O000O100000001O0000000000000O1000000000000000000000O100O1O10O10O1N2O100O100O1O1O010O1000000000000001O00001O001O0O2O001O2N2N2N2M2O000000000000000000000000000000000000000000000O101O0000000O10000000000O100000000000001O001O0000000000001O00000000000000001O0O100000000000O1000000000000000000000000000O10000000N2O1O10000O1O2N3M3N6I5KUFAk9`0TF_Om9a0SF_Om9a0TF^Ol9c0UF[Ok9f0UFXOl9h0TFXOl9k0RFTOn9m0QFSOo9n0QFPOP:S1nElNR:U1oEiNQ:W1oEiNQ:[1lEdNT:]1mEaNS:`1oE]NQ:d1oEZNR:h1nEVNR:n1kEQNU:Q2jElMX:U2RFiMe9Y2XFiMg9X2XFiMg9X2XFhMg9Z2WFgMi9Y2WFgMi9Y2WFgMi9Y2WFgMi9Z2WFeMi9\\2VFdMj9]2VFcMh9`2VFfMd9\\2ZFfMd9\\2ZFeMe9\\2YFfMf9Z2ZFfMf9Z2ZFfMf9Z2YFgMg9X2WFkMi9U2VFlMj9S2WFmM:^OA6R9`2RGmM;Fc8\\2SGoM8Hc8Y2UGoM8Hc8Y2RG[MNd0_8^3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGdLR8\\3nGdLR8\\3nGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3PHdLP8\\3RHbLm7_3QHcLo7]3PHdLP8\\3PHdLP8\\3PHdLP8\\3PHdLP8\\3oGeLP8\\3PHdLP8\\3PHeLo7[3QHeLo7[3QHeLo7[3QHeLo7\\3PHdLo7]3PHdLP8\\3PHdLP8\\3oGeLQ8[3oGeLQ8[3PHdLP8\\3PHdLP8\\3oGeLP8\\3oGeLQ8[3nGfLR8Z3mGhLR8X3mGiLS8W3mGiLS8W3mGiLS8X3lGhLT8\\3hGdLX8]3gGcLY8[3iGeLV8[3kGeLU8]3iGcLW8\\4O00O1O1001O000O2O1O1O1O00003M001N10000000001O001N2O1O00001O0O10000000001O00000O10000000000000000O10000000001O000000O10gJ" + }, + { + "size": [ + 432, + 640 + ], + "counts": "2[5U800000000O10000000000000000000000O10UHiJ]7W5>0000O1000000000000O10000000000000000O1000000000000000000O100000000000000000000O10000000000O10000000000O10000000000000000O1000000000000000000O100000000000000O100001O0000O1000000000000O1000000000000O10000000000000000O10000000000000000O10000000000O10000000000O1000000000000000000O100000000O1G]GdKd8[4900O1O1O1O1N2N2O1O1O1O1O1O1001O1dFTLR9_4F1O000000000000O1000000000000O1000000000000O10000000000000000O1000000000000000000O100000000000000O10000000000O100000000000000O1000000000000O10000000000O100000000000000000000O1000000O]GhKP8X4PHhKP8X4d00000O10_GkKg7U4UHiK^O8Y8o3XHVLf7j3\\HULc7k3^HULa7k3`HUL_7k3cHTL\\7l3fHSLY7m3U100O100O100000000000000O1O1N2N2N2O1N2N200O1O100000000000000000000001O000XG]LUOOU8e3bHlLZ7T3eHRMV7o2iHRMV7n2iHTMV7m2iHTMV7m2dHZMZ7h2aH\\M^7e2`H]M_7[4O1O001O00000000B[HPKf7l4_HSKa7j4cHTK^7k4dHRK^7k4e0N2N20000O10000O100O100O1000000O10000000000001O001O0000001OO100O100O1000000O1000000000000O10000O1O100000000001O001O001O:F1O1O3M6J1O000000O11O1O1O001O1O1O001O0000000000000000000000000000000000000000000000O1000000000000000000O1O1O1000000000000000000000000000000000000000000000000001O000000001O0000001O00000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000O1000000000000000000O10000000000000000O1000000000000O100O1L4RNn1M3O100001_FSMSON01Q9Q5YO1O1O0000000000000000000N" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_92.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000458325.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "]ca33Q17k:MkD;S;K^D?\\;j01L3O2M4N2O4L2M2N202N1O1XOlC6g doing on the ?", + "answer": " is crossing the .", + "image": "images/caption_simple_93.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000350122.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lPT5g0U>7H8J5K4L5L2O2M2N2O1O1O1O1O1O100N12N3M1O1O0O2O2N2N5J3M3M5K:D3M101O001O0O2O001O1N1O2O00O010000O01O01O01000O000O2O1N2WO_OTCa0lfDJV;Q2O0OO2H8L4N2N2N2N2O1000YD]NR:c1iEeNMgNi9k4O01VK[FW4g9eK\\FZ4f9bK]F\\4V:N1N2M3M3^N`E[Ng:`1`1J5L5\\OSCoNR=m0UCmNo doing with the bicycle ?", + "answer": " is pushing .", + "image": "images/caption_simple_94.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000295809.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "d547k9_5UFaJk9_5UFaJk9_5UFaJk9^5VFaJk9_5UFaJk9\\5XFcJi9]5WFcJi9]5WFcJi9]5WFcJi9]5WFbJj9]5WFbJj9^5WFcJg9]5YFcJg9]5XFdJh9\\5XFcJi9]5WFbJj9^5VFbJj9^5WF`Jj9`5VFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WF`Jj9`5;O10000O10000000000000000O1000000O100000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000O1N200002NN2000000000000O11O00O10000O1000000000000O1000000000000000000O100000000000000O10000000000000000000000O1000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1001O000000O11O0000O100001O00O100001OO10000000000000000000000O11O000000O11O00O100LPEXKP;l401OO1000000000000LPEXKP;l4000000000000000000000000000000000000000001O0000001O000000O11O000000ISEZKn:l40IQE^Kn:i410000001O00HRE]Ko:b4SE]Km:d4RE]Km:c4SE^Kl:b4TE]Km:c4RE^Kn:i411OMRETKn:i4UEWKk:i4TEWKm:h4TEXKl:h4TEWKm:i4SEWKm:i4SEWKm:i45000000O10000000000000000O11O003MM300001O00O1O11O2N1cMZK^If4n8MmDZKR;f4nDZKR;j4000001O00O100001OO100001O0000O10\\NTKTHl4`91OO10000001O0000000000000000000000000bLTKhKl4f70000000000000000000000000000001O000000000000O1001O000000000000000000000dNSKeGm4[8SKeGm4g9000000000000000000000UNRKdHn4W90000000000000000000000000000000000000000000000000000000000000000000000000000O11O0000LQEVKP;j4PEVKP;n4O0000000000000000000000000000000000LPEWKQ;i4PEVKP;n4O0000LQEWKo:i4QEWKo:i4QEWKo:m4000LPEWKQ;i4PEVKP;n4O0000MPEVKP;i4QEVKP;k42001O00001O1O00LQEWKo:i4QEWKo:l4100001O1O001O00001O0000001O0000001O1O1O3M001O1O0000001O1O1O00001O1O00001O1O3M1O001O001O0000001O1O1O00001O1O001O001O001O000000001O00001O1O1O2N1O1O1O1O001O001O5K00001O6J1O001O1O1O001O1O001O1O000000RI" + }, + { + "size": [ + 512, + 640 + ], + "counts": "PT[86h?3O0O2N10000O1000001N10001O00000O101O00000000000O10000000O1O010O0100O100O100O100O1O1O100O1000000O100000000000000000000000000000000000000000N2D=G]`86e_G5K4L000001O00000000000O100000[L" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_95.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000511760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "i?9V3HZ=d0a_OD00000:0F0J5Q1MQOe?S400000000000000000000O1001O00O10000000000000000000000000000000000000000000000000000000000000000001O00O1001O000000O10000000000000000001O000000000000O11O0000000000000000000000000000000000O100000000000000000000000000000000O100001O0QLb@\\3^?bLe@4Ih2b?oL]Ao2k?O1O>B:gM^^O`1Tb0M1O1O000000000000000000000000001O0000000000000000ZM^NnBb1R=`NlB`1S=eNiB[1W=iNeBW1[=lNbBT1^=mNPAB2M7Om0_?n0`@RO_?P1_@QOa?Q1\\@POe?e20ZMe@g0[?QOn@n0R?QOPAXO[OR1e?ERAmN]OK1b1`?BkA=U>[OSBe0m=YOUBg0k=XOVBh0j=WOWBi0h=XOXBh0h=XOXBh0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=XOVBh0j=XOcAAnNW1_?ZOf@TO and ?", + "answer": " is standing on .", + "image": "images/caption_simple_96.png" + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/annotations/GAR-Bench-VQA.json b/evaluation/GAR-Bench/annotations/GAR-Bench-VQA.json new file mode 100644 index 0000000000000000000000000000000000000000..70b56d69db5d8b0a7b54df976e0db50ba32622e8 --- /dev/null +++ b/evaluation/GAR-Bench/annotations/GAR-Bench-VQA.json @@ -0,0 +1,11742 @@ +[ + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1582.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`fS2:bo0=C9J3M3M1O1O001N1010O2N2N3N8G4L2O1N3N3M6I4Mgn3NYQL8J4K5L2N2M3N2M2O2M3N2M:G000O1000000O100O10000O1000001O0O10000O100O101N1O100O2O0O2N101N100O101O0O2O0O2O0O2O001N2N101O0O1000O100O100O010O1O100O010O10O10O10O010000O10000O1O100O00100O100O10000O100O100O100O100O100O100O100O1O100O10000O100O1O100O2O000O10000O10001N100O100O100O1O1O2N1N2N4KY[cP1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "igR2`02:`n0j0iQOhNWm0^1cROmNTm0Q2N2N2N1O1O1O10O01O1O100O00100O10000O1O10O001O0O2O11N2O1O1O1N3N1O1N101O2N1O1N101OO10000O1O0KdSOfL^l0V38M3N201O01O0000O1O1N2O2N100O101N101O0O1O2N1O1O2N1N3N1O2O001O1O0010O1000O100O1O1O2N2O03ON2M5K3M4L3M4K6K6J8G3N1N2N2N4KjVaR1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "`i[d0`0i0BLl0bj0WOVVOY3bi0nLZUO2KIMa3hj0a1M4cUOnJfi0T5VVOQKl1Bhe0S6[XOnI[1;Xf0m7N000O2O00000000000000000000000O10000000000000000000O1000000000000000000000000000001O1fJTYO^1nf0]N`YOX1Rg0PLmWOa1\\1^2Wg0mLYYOS3lf0SL_WOLk1P4ei001O001O00010O000O100000001O00000000001O0000000001O000O100000O1000001O00000000001O000000001O00000000001O000000000000000000000000000000000000000000IXLTTOj3hj0PMZUOX3Pj0g1K5J6N2O1O1_OhIQWOa6mh0:N2H8G9I7EZHVXOl7ig06O100000000000000000000000000001O00000000000000000000001O000000001O000001N1000000000O101O001N100O2N1RKTXORNMX2Yh0QNhWO[OX1@]Ok1_O[Ngh05mWOOk1`0\\NVOdi0L[ZO1Pl_=" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Sla<6Xo00jPOQ3TMo_OjC\\O03R1j`0_9WBlEmLBRa0[9[5nLbYORJ6J211Ijf0c4S4M3O101O00001N101O001O0O2O2N2dM^TO7kk0lNnTOn0Xm0J4K3N1O1O00001OO10000001O00000000000000000000000O100001O00000O1001N10000000000000000000000000000000O10000001O000000000000000000000000WHmNe@S1W?TOf@l0X?WOf@j0X?ZOf@f0Y?\\Of@d0X?^Oh@b0W?@h@`0V?Bj@>S?Fm@9P?JPA6n>LRA4m>NRA2l>0TA0j>2VANi>3WAMh>4XALh>4XALg>5YAKg>5YAKg>5YAKf>6ZAJf>6ZAJf>5[AKd>6[AKe>5[AKe>4\\ALd>4\\ALd>3^AKc>4^ALb>4^ALc>2^ANb>2^ANb>2^AOb>O_A1a>O_A1a>O_A0c>O]A1c>0\\A0d>0\\A0e>O[A1f>NZA2h>LXA4i>JWA7j>HVA8k>FVA:k>EUA;m>CSA=n>CRAo5a3gGRLZ2l_OBT`0?j_OBV`0?i_OAW`0`0h_O@Y`0?g_OAY`0?g_OAZ`0?f_O@Z`0`0f_O@[`0?e_OA[`0?e_OA\\`0>d_OB]`0=c_OC^`0l`0_OV_O`0k`0]OW_Oc0l`0YOU_Og0Qa0QOQ_Oo0og010000001O0000000000O10000000O10001O00O2O00000001O0O100000O1000000000000000000000000000000001O0000O1000000000000000000000000000000001O0000O10000000000001O00O1000000O1000000O1YNoN[SO4OJKV1ck0lNbTO^3ij0W1ZOf0gNY1WNeIaXOg7ef0W1[Oe0\\Od0jN`EP\\O\\;Rc0fDi\\O[b3TNl1UNU]OXETd0`9j[OaF`e0V8j1\\Od0XOl0mIVWO\\OolS5" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_0.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2925.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "^hb19Q>>D7I7I7I7I6J6J6J2O1N2O1N2M3N2M3M3M3M3N2M3M3L3L5L4K5G9F:H7O200001O1N3N2N2M4L4L4L5J8H:^OT]R1" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_1.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/49.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "Y_Rb05;6jn0KTQO6kn0MQQO5on0>000001O00000000O101O000O4YOXa\\e0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "hbS:46o0kl0]ORSOS2`j0k2[Oe0cNQJPXOO2Q6kg0UJQXOJ1U6lg0SJQXOI2U6lg0TJQXOG3V6kg0ZJjWO@;W6kg0aJUXO`5jg0`JVXO`5jg0aJTXO`5lg0aJSXO`5lg0aJoWOc5Qh0W1O001O0XXO]HRg0f7hXObHTg0a7hXOhHPg0T80001O0001O000001O00000001O000O100O1WNlXOeJUg0Z5mXOdJSg0]5mXOcJRg0^5nXOaJSg0_5mXO`JTg0a5jXO`JUg0a5kXO_JUg0a5kXO^JVg0a5kXO^JUg0b5lXO^JTg0a5mXO^JTg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0`5mXOaJSg0_5mXObJRg0^5nXObJRg0^5nXObJRg0^5nXObJRg0^5nXObJRg0^5mXObJTg0^5lXObJTg0^5lXObJTg0]5mXObJTg0^5lXObJTg0]5nXObJRg0]5oXOcJQg0]5oXObJRg0^5nXObJRg0^5nXObJRg0]5oXOcJQg0^5hXOgJYg0Y5eXOiJ[g0m6100O1000000O10000O10O100000O10001N10000O1O002WNdXOkI^h0Z5`WOeJ^i0U5j0kNkUOmKfj0^2\\2]M`ecj0" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_2.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2905.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "T[l342NP>f0C9G0O2O03L2O000000000000O2OO10O100000000O10O1000O100000000O10O100000O10000000000O010000000O100000O1000O1000000O100000O10O10O1O001O1O1O1O1O001O1O1O001O1O1O1O1O001O10O01O1O1O1O001O1O1O001O1O1O1O1O001O1O1N2O002Nn^Z3" + }, + { + "size": [ + 460, + 620 + ], + "counts": "cQ_5b0i=2OO010O01O001O01O01O010O0010O01O010O10O01O10O002Mfik2" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_3.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1496.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "0Q5oj0000000000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000000000000000O100000000000000000000000000O10000000000000000O100000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000O10000000000000000000000O1000000000000000000000000000000000000O1000000000000000000000000000000000000O100000000000000000000000000O10000000000000000000000000000O10000000000000000000000O100000000000000000000O1000000000000000000000000000000000000O100000000000000000000O100000000000000O1000000O10000000000000000000000O1000000000000O100N2]Oc0O1O10000O1O100000000000000001O00002eSObLPl0j3M3M2N2N1O1O00001O00000000O10000000000000000000000000000000000O100000000000000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000000000000000O1000000000000000000000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000O1000000000000O1000004L2M:GP1aLgROU2dn0nNYmlg0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_4.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/515.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "YnmT1c1bm0m0L201O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000dRO" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "][gS1:7HUo0j0H2M101O00001O0000000000000000000000O100000000001O0000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O01N10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000dD" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_5.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1132.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^Tfo0;ao06M101O00O10000000000000000000O1000000000000000000000000000000000000000O10001N4JgkP7" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "XjZU1Z1lm0k0L4M2O10O01O00010O000001O01O0010O000010O01O010O0010OO2M3YOj0mNTXf1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "dQV:6ho05K4M1O2N1O101N1000000O10000O100000000O1000000O100O010O100O10000O100O1O1O100O100O100O1O001O100O1O1O10O01O100O1O1O100O1O1O1O001O1O1O1O1O001O1N2O1M30000000000000000000O100000000000000000000000000000000O100000000000000000000000000000000O100000000000000000000000000O100000000000000000000000000000000000000O100000000000000000000000000000000000000000000O100000000000000000000000000000000O100000000000000O10000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O10000000000000000000000000000O1000000000000000000O1000000000000O10000000000000000000000O1000000000000O1000000000000000000O100hNdQOl0`o0XO3M1N2O0000fo[10[ocN=[QOMan04\\QOOcn0E]QO117on0IQQO6Qo0IoPO5So0NjPO2jn0JZQOe0cn0]O[QOe0cn0\\O[QOe0en0\\OZQOe0en0\\OYQOe0gn0:0O1]O\\QOJdn04_QOKan06^QOJbn07]QOIcn0;YQOFfn0j0000000000000000O10000000000001O0000000000000000000000000000O10000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O1000000000000000000O100000000000000000000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000O10000000000000000000000O100000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000O1000000000000000000000000000000000000O1000000000000000000000000O100000000000000000000000000O100000000000000000000000000000000000000O10000000000000000000000000000O10000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000O100000000000000000000000000O1000000000000O100000000000000000000000000000000O1000000000000O1000000000000O1000000O1O1LTPh3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "TQa73a00l0P=jNYC6Ga40WL0e0Nl4mk0]IVTO239O0Od04oNLd0Yn0A\\RO;fm0B]ROCjC4QN12ML6d0e;W9UD_Gmc0n6S\\O`GRg0Z6XYOiI2Lmg0j1eWOd13eLSj0^1TVOc1_j0[NdUOb1^j0\\NdUOc1]j0\\NdUOb1^j0]NdUOa1]j0^NdUOa1]j0^NdUOa1]j0^NeUOa1[j0^NfUOa1[j0_NeUOa1[j0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0]NgUOc1Yj0]NgUOb1Zj0]NhUOb1Xj0^NhUOb1Xj0^NhUOb1Xj0^NhUOb1Xj0]NiUOc1Wj0]NiUOb1Xj0^NhUOb1Xj0^NgUOc1Yj0\\NhUOd1Xj0\\NhUOd1Xj0\\NhUOd1Xj0[NiUOe1Wj0[NiUOe1Wj0[NiUOe1Wj0[NhUOf1Xj0YNiUOg1Wj0YNiUOg1Wj0XNjUOh1Vj0VNlUOj1Tj0UNmUOk1Sj0TNnUOl1Rj0TNoUOk1Qj0TNoUOm1Qj0SNoUOl1Rj0TNnUOl1Rj0SNoUOm1Qj0SNoUOm1Qj0SNoUOm1Qj0RNPVOn1Pj0QNQVOo1oi0PNQVOQ2oi0oMQVOQ2oi0nMRVOR2ni0mMSVOS2mi0mMSVOS2mi0lMTVOT2li0lMTVOT2li0kMUVOU2ki0jMVVOV2ji0iMWVOW2ii0hMXVOX2hi0gMYVOX2hi0gMYVOY2gi0fMZVOZ2fi0eM[VO[2ei0dM\\VO\\2di0cM]VO\\2di0cM\\VO^2di0`M^VO`2bi0_M_VOb2`i0]MaVOc2_i0[McVOe2]i0YMdVOg2[k000000000000000000000O1000000000O10O1000000000000000O10000000000000000000O1000O1000000000000000000000000000000O010000O1001O0O100001O000O100001O0O100000O100O100000O2O00000000O100000000000000000000O1000O10000000000001O00O10000000000000000O01000000000000001O00000000O01000000000000000O1000000001O0O10000000O1000000000000000000O100000O2O000000000000O10000000000000000O100000O101O000000O1000000000000000O10000000000000000O100000000000O1000O10000000000O1000000000000000000000000000000O1000000O1001O000O100000O1000000000000O10O11O00000000O10000000000000000000O10O1000000001O0000O10000O1000000000O1000O10000000000000000000000O100000000000O101O000000001O000000O10000000000000000000O1000000000O100000000O100000000O1000O1000O10000000O2O000000O10000000000000O10000000O10O10001O00000000O10000000000001N010000000000000000000000000O1000000000000000O1000O100000001OO1000000O1001O0000000000O100000O10001O0000O100000000001O000O1000O100000000000000000000O0100000001O00O10000000O100000000000000000O10O101O0000O10000000O1000000000000000000000000O100000000000000000000000000000000000O1000O10000000000000000000000000O100000000000000000000O10000000O100001O000000000000O100000O100000000000000000000O100000O1000001OO1000000000000000000000000O01000001O000000000000O1000000000O10000000000000000000000000O100000O101O0000O1000000000000000000000O100U^OmLaE4MOHNd03ZON238M3NG0jP3jFlL_Lg0dNEH1b0JB015MHm0KS>T3YHQMXINHNY10gN3a`0P3i;0000001N100000O100O1000000001N10O101O00iNmLaUOS3_j0nL_UOS3bj0kL_UOU3fk0000000000000O10000000000000000000WYOnLVJN02O1OOYj0_O^UO1e0OO1]OOO11ON31Z1ik0eNWTON0=L3Ne1ok0PNQTO01X3lk0c0_Oa0iNW1hMYJaXO4H7OP6me0cI^ZO3M4JQ8Ve0jGoZO6L3Mb8Te0[GnZOl9`d0QFa[Oc:hc0V1YOg0ZOf0[OkBT^O`=Ya0^Bh^OV>\\?hARAN10Nd>[>_AZA1M31K238OH0Lb>W>[AWBS1A^O0h>c=i@bBOOga0l, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_6.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2897.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "ZVg37Q>8I5M3M2NDaBM]=OnBMP=1TCOj<1XCOg<0[COd<2]CMc<1_C0`<0aCO^<1cCO]<1cCO\\<3bCNY<:cCGU<\\1N5M5K1O11N2N2N10O000000000000000001O000O100000000O100O2L5eN\\CNH411ZdZ4" + }, + { + "size": [ + 460, + 620 + ], + "counts": "hh\\45U>4M4M2TBBf=c0O001N1O1O1000000001G\\BGe=8]BFe=`00001O3M1O0002N10O0000000001O1N2O000N22N1N2N2O0O10000000000000000001O000_OaB4a=Ib0OTQa3" + }, + { + "size": [ + 460, + 620 + ], + "counts": "i`[7:i=:L300O01O1O1O101N1O1O1O1L6JaXT1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_7.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1116.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dbnh07ho02N2O2cPOKjn0f0000O10001O000000O11O0000001O0O2O00001O0O1CPQOHRo06>N1O3NX]S>" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "egX`04jo02O2K4M4M2N2O1B>O1O10000001TOVQO`0jn0_O\\QO and are in the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_8.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/16.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dk0Y3gl000O1O1O100O1O100O1O100O1O100O2N100O100O100O1O100O100O1O1O1H8N2O1@`0VOQRO_OSn0`0g0O2N1O100O1O100O1O100O1O1O2N1O2L6F\\TZV1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "PP]63mo01O1O1O1O1O1O001O1O1O1O1O1O1O001O1O1O1O1O001O1O1O1O1O001O1O1O1O1O1O001O1O1O001O1O1O1O1O1O1O1O001O1O1O1O1O1O1O1O001O11O000O0100000O1000001O00000O100000O1000O1000000000000O1000000O1000000000000O10O02N1O1O1O1N2O0O3N1N2O1N1O2O2M2O1N101N2O1N2N2N2O1N3M2N2Nnmdm0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "nla:d0]o0Annc0k0dP\\OZ2gM1O1O00001O0000000001O0000000000000000000001O000000000000000000000000000000001O0000000000010O0000O10000M3M3N2O1O1O1O1O1O1O101N100O100O100O10000O10000O100O1N2M3N2N2N2O1O1O1O1N2mNS1O1O10000O1000000000001O00000000O1000O100O100O10000O100N2G9N3N1O3M3M[Tlh0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "0Yd0Y2d@X2\\?hMd@X2_?eMa@[2b?aM_@_2d?]M^@b2i?VMX@j2l?QMU@o2P`0iLT@V3R`0`LR@`3]`0gKm_OY4me0000001O000000000000000000000000000000001O00000000000bMdTO8\\k0QO`UOj0`j0ROfUOl0Zj0SOhUOl0Xj0ROkUOm0Uj0QOnUOn0Rj0QOPVOn0Qj0POQVOo0oi0QOQVOo0oi0POSVOo0mi0POTVOP1li0POTVOP1mi0nNUVOQ1li0mNTVOT1mi0iNUVOW1ki0gNWVOY1ki0bNXVO^1ki0\\NXVOd1Sl0O0000000000000000001O0000000000000000001O00000000000000000000O1\\I\\NbXO1M0i0MYO4_4b1hb0i0o\\OWOPc0k0n\\OVOQc0n0iXO\\Ml3g1[c0]2Y[OeMgd0b3mXOcMSg0e500O1000000O100O1nJ[GR^ON01;OU3g8^>\\Go]O10O2N021M01l2f8Q?]Go]O1001N101N101Na2i8[?\\Go]O2OO2N101N101Na2i8[?\\Go]O2OO2N101N101Na2i8[?\\Go]O2OO2N101O0O2Na2i8[?]Gn]O1O03M101O0O2N`2j8\\?\\Gm]O;3D0O101N10`2h8U?WGZ^Oh0M]O001N101Na2h8U?WGY^Oi0N]ON03N0O10a2f8U?YGY^Oh0NE1F001Na2g8V?XGY^Oh0NE2EO11Na2g8W?VGY^Oi0NE3DO11N`2m8Z?WGQ^O:4E0M^3T9X>nFY^O0fim0CfSO4iW\\2JafdM0iodQ1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_9.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2307.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "m]U61m>3^OORB2n=OQB1o=0oA1P>1nA1Q>0nA0R>1lA0T>0kA17I`=7XB05LT=KRCf0GBV=NnB`0JCY=0jB=KE[=0gBi0Z==101N101O01O5K4L2L1L4L4O11O2N2N2N3N1O101OO1N3M2N1N2O00O002I602O6K5J01O001O0OCdBYOZ=g0iBWOW=i0kBTOU=3hB;g=D[B0K0i=O^B0K0g=LbB3I0V>0jA0V>OlA0T>OmA0T>OnA0R>OQBOo=0RBOo=0ZeW2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Qo_84d>9I6H7L5H7J7L4L4M3M2K6F]N[C_1_10000001O001O1O1O1O1O1O1O1O001O0O2O1O3LaSR4" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_10.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1468.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "_`Uo0a0]o02N2N3N1O1O1O1M3G0\\QOVO27mm0Q1nQOSOPn0_101N100000000000000001O01O000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000O100000000000000000000000000000000000000000000000000000001O00000000000000000000000O10000001O00000000000000001O000000000000001O0O3[NSROH`_f4" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "RoU3k0So03N2N1O1O100O1O100O1O10000O100O100O10001N100O100O1O100O100O100O10000O100O100O10000O100O100O10000O100O100O100O1O10000O1000000O100000000O100000000O100O100O1O1O1O1O100O100O100O100O100O1O100O100O100O1O100O100O1O1O010O1O1YOjMbSOY2Vl0l0M3N2N2M3N2O1O1O1N2O1N2O1O1O1O00100O100O100O100O10000000000O101OO2O000000001O001O001O001O001O1O2N2N2N2N1O3M4L2N1O001O00001O000000VOiLkTOW3oj0QMoTOo2Pk0SMoTOm2Pk0UMoTOk2Pk0VMPUOj2mj0ZMRUOf2kj0]MUUOc2jj0^MVUOb2ij0_MWUOa2hj0`MXUO`2fj0bMZUO^2ej0dMZUO\\2ej0eMZUO\\2ej0fMZUOZ2ej0hMYUOY2fj0a1O1O100O100000000000WL]UOU2cj0kM]UOU2cj0d1000000001O00000]K]UO4Oh2ej0PM_UO6Nj2cj0oL`UO5Ol2bj0nL_UO5On2cj0lL^UO50o2cj0kL]UO60o2ej0iL[UO70Q3hj0eLUUO=3n2jj0cLQUOa05l2Sk0VMlTOj2Tk0VMkTOk2Vk0o000001O0bLfTOZO4V2Vk0`NhTOUO, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_11.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1555.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]hl=7ao09N2N2N2O1M3M2L5N2N2O1O2N1O1O2N3^QOmNQn0d1M2N1O1O00000000000000000001O01O000000000000000000000000000000000000O1O2UOPY]h0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_12.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1503.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "gdf05go08J4L4L4M2N2N2N2N2O1N101N101N10000O2O000O1000000000000000O1000000O2O000O100O1O10O1O10O01O1N2O1O100O101O0000001OO100000001O0O2O1N10000001O00000O01000001O000O10001N101N101O0O2N2N2N1O2N3M3L4K8Gd[YT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "SYc01ho0=G7J4M3M2N2O0O2O0O2O1N101N2O001O000O2O00001O00000000000001O001O00000001O1O001O02N3M00O2N1O100000000001O00001O0001O000O100N2M3L4N2O1O1N2O1O1O1O100O100O01000O10O1O10O0100O1O2O0N2M3J6N2000001N10001O0001O01O00000010O00101N1O1O00000O100000000000000000O200O001O001O1O2N1O2N2N100O0O3N1O1O001N2O1N101O1N2N101N1N3N1O2M3N3LRg^R1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Zjd1Z1\\n0iNiQO^1Qn0cNoQO^1Pn0bNPRO_1om0aNQRO`1nm0`NRRO`1nm0`NRRO`1nm0`NQROb1nm0_NQROa1om0_NPROb1Pn0710O00001O00001O001O00001O00001O00001O00001O00001N10001O0O2O000O2O00001O0001O8Hl0SOS1mNmSgT1" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_13.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/136.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bPk1=ao05K5L3L4L5K4L3M4K4O1O1O001O00000000O10000O100O100O1O100O1O10000O10000O100O1O1N2N2N2N2N2O1N2N2N2O1N2O1N2O1N2N2N2O1N2N2N2O1OQPWT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "c\\VV1R3kl0c2[SO]J03:Nh07Xf0]8H3L2O2N1O1O100O100000000O1N2E" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_14.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/130.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "nml8l0Po0;F9H=B;E5L4L2O2N1O1O1N2[NcMfUO^2Xj0hMaUO[2ai0WMVVOc03Y2ei0\\MPVOb02W2li0kNlUOX1Tj0Z2O10000000000001O1O1Oe0[O3M001O00001O1N101O1O6J`0@4L5K4L8H4L7I6J2N2N2N3M2N4L4L4L3M5K5K4L4L4L3M2N2N2N2N2O1N2N2N2N1N3N2N1O2N2N1N3NVPbl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Q[k8c0Pm0^2N101O0000000000000000000000000000000001O0000000000000000000001O0010O0001O00000000O10001N100O1O100000000000000001O0001O000001O000000000000000000000001O001O>AX2cMhdcl0" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_15.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1900.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "SSR24[`04L4L3M4M2N3M3N1O100O2N101N100O2O001N101O001O0O2O00001O00001N10001O001N101O00001N101O001O001N101O001N1000001O001O001O0O101O001O1N1000001N101O0O2O001N2O000O101O000O2O0O1O2O0O2O0O101O00001O00000O2O001O1O1O1O00001O000O2O001O000O2O001O1O0O2O00001O0O2O001O001O0O2O00001O00001N10001O001N2O001N10001N10001O001N101O1O000O2O00001O000O2O001O1N10001O000O2O00001N101O001O0O2O001O0000001N10001O001N101O1O0O2O00001N101O001O1O0010O01O012M2N1O01O000O2O0O1O2O0N201N100O2O0O1O1O2N1O1O2N100O1O2N100O2O0O1O1O2N1O1O100O2O0O1O1O010OO2N1001O1O100O1O101N100O1O2N1O100O101N1O1O100O2N1O101N1O101N1O1O101N1O1O101N1O100O2O0O1O101N1O1O2N1O101N100O2N1O100O2N1O100O2N1O100O2O0O1O1O2N100O2O0O101N1O1O1O2N100O1O101N1O100O1O1O1O0001O001O000O2O01O100O1O2N1N3M2N3K5L4K5K9EVih3" + }, + { + "size": [ + 530, + 730 + ], + "counts": "nZW55U`09H8H8H8K4I7H8H8K5K5K5N2O2N100O1000001O00000000010O00000000000000001O0000000000001O0001O00000001O00000000001O000001O000001O000000000000001O000000000001O01O00000000001O0000000000010O0000000000001O0000000000001O01O000000000000000O1M3K5J6K5J6J7J5J6K5J6J6L4K5H9KjX_4" + }, + { + "size": [ + 530, + 730 + ], + "counts": "P1l3f<010O000001O0fM]ClN5S1^, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_16.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1108.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "joc12mo02N2O0O1O1O100O2O0O1000000O10000O10000O10000O10000O1000000O10000O10000O100O1000000O1000000O10000O100O10000O10000O1000000O1000000O10000O100O1000000O100000000O10000O100O10000O10000O1000000O10000O100O100O10000O100000000O10000O100O10000O10000O1000000O10O11O0O100O10000O10000000000O100000000O10000O100O100O1000000O10000O100O100O10000O01001O0O100000000O10000O1O10000O1000O01000000O2O0O10000O10000O1000000O100000000O10O02N1O10000O100000000O100O00100O1000000O10001O0O10000O1000O0101O0O10000O1000000O10001N100O10000O1000000O1000000O10000O100O100O100O100000000O100O100O100O1000000O1000000O10000O100O10000O10000O100000000O100O100O100O1000000O10000O100O100O10000O10000O10000O1000000O100O100O100O100000000O1000000O100O10000O10000O10000O1000000O1O100O10000O1000000000000O100001O1OS2mM1O1O001O00001O00001O1O1O1O0000001O000000001O001O00001O0000001O0000001O0000001O001O1O1O001O00001O0000001O001O00001O00001O001O001O001O001O001O00001O0000001O000000001O00001O001O0000001O001O001O1O00001O00001N101O000O2N1O2N2K_PT1JjokN2O0001N20O010O1O000001O0O1OXPZe0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mhVg02; and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_17.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1080.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "noV41no0100O2O0O100O10000O100O10000O10000O10000O10000O100O10000O10000O1000000O10000O1000000O100O10000O1000000O1000000O1000000O1000000000000O100001O000000001O00001O0000001O00001O0000001O001O001O00001O00001O00001O001O00001N1000001N1000001N101N2NXPRP1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "ajd7Y3`k0[1F7L3O1O10O0100001N10001O0O10001N10000O2O000O10001N10000O101O0O101O0O10001N10001O0O10001N100O2O000O2O000O10001N1000000O2O000O101N10000O2O00001N10000O2O000O101O0O101O0O100O2O000O101O000O2O000O101O0O10001N10000O2O000O101N10000O2O000O2O000O101O0O10001N100O10001O0O10001O0O100O2O000O101O0O10001N10001N10001N100O10001N10000O2O1N2Ng0YOSUVk0" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_18.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/640.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "Xl0b0^o00000O10O10O1000O10O10O10000000O10O1000O0100000O0100000O10O10O1000O100000O10O100O10O1000O10O10000O1000O10O1000O100000O01000O10O10O10000O01000O100O2NXTdU1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Wm0[2em0000000O01000O10O1000O10O10O10000O0100000O0100000O0100000O01000O10O10O1000O01000O010000O10O10O100O01000O10O10O100O10O10O100O100O10O02aNZROa0gm0[O^ROc0an0N2N2N2_Oa0N2O1N2N2O2MPR`U1" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_19.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fTZj0X1[m0`1fRO^Mjl0S3I6J4N2N100O1O1O1O1O10O01OM3L5L3L4O1100O10O100000O0001N110O01]YOiLUJ4he0T3n_OeMh?\\2V@iMg?W2X@lMe?U2Y@nMf?Q2Z@RNd?n1Z@UNe?k1Z@WNe?i1Z@YNe?g1Y@\\Nf?d1Y@]Ng?c1V@aN^KUN\\c0Z3TAdN]KUN^c0W3TAfN\\KTN`c0V3SAgN[KUNbc0U3Q\\OfLN4X4n1TLUNec0S3l[O_MW1VO_1V2fMTNhc0[4h\\O^Ke1V2iMSNjc0_4a\\OZKh1X2kMPNkc0a4_\\OYKh1X2mMoMlc0a4]\\OZKg1Y2nMmMnc0a4[\\OZKf1[2PNkMoc0`4Z\\O\\Kc0JCd2@fMPd0a4Y\\O\\Kb0M_Od2DcMRd0a4W\\O_K;a3[O_LSd0a4V\\OT1jc0mNS\\OU1mc0mNn[OV1Rd0\\5000000O1001O00000000000O100000001O0]Im[OY1Sd0fNP\\OX1Pd0fNS\\OY1mc0SMj[OSN;i4kc0SMR\\OkM6Q5hc0SM^\\O^MMI2^4cc0[Nl]OSMiN]4[c0_Nn]ORMjN]4Yc0`Nm]ORMlN9QO\\3Vd0ZOm]OoLoN3YO^3lc0@n]OlLPO1^O^3ec0D\\@:d?F_@7a?Ib@4_?Kd@2\\?Nf@0Z?1g@MZ?2h@LX?4i@UOlJVN\\d0d2i@ROoJYNYd0d2j@POoJZNXd0g2i@nNPKZNXd0g2i@mNQK[NVd0h2j@kNQK\\NWd0h2j@iNRK^NSd0i2l@gNSK_NQd0k2l@dNVK_Noc0k2m@dNVK`Nmc0k2o@cNVK`Nmc0_1_[O@c5>SKbNlc0c0YCh0nHbNnc0OiC[1Qf0M3L4J6M4K4K6JVdP:" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "_VhQ1k0So0g0[Ob0]O=D8H3M1N2O00000O10O1000O101O0000000O10O10000O01000O1000O1000O1000O01000000000O10O100000O010000000O10000000O0100000000O10O100000O1000000O010000000O1000000O10000000O0100000000O100000000O1000000O1000000000O10O100000000O1000000O1000000000O010000000000O01000O1000000O100000O0100000000O0100000O1000000O10O1000O100000O10O100000O10O1000000O010000000O01000000000O010000000O0100000O10O100000O01000000000lJ" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_20.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\^]Q17go0:H1N2O00000000000000000001O00000000O100000001O000001N10000O101N2M3N2M4Lba]5" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mVjl04lo00amT2h0RSjMZOEO1OT3d0^N^O^OMae0]3[YOgL3OOm1d0[OBYOYf0d5iYO^LVf0g6N2O1O001O0000000010O001O2N2O2M4L7Hk1VNT1jNQ1dNSUOeK^k0a1P3ROk0B=CUfR8" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]\\nm06go04N10001O00100O010O10O0100O010O10O0100O010O10O10O10O0100O0100O1O010O1N6GUcl8" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_21.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1621.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "iom24ko02O0O1N2O1O1O1O100O1O1O100O1O1O1O1O1O2OO0100O1O1O1O1N2O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1O2N100O1O1O1O1O1O1O1O010O1O1O1O2N1O001O1O100O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O010O1O1O1O100O1O0000001O000000000000000000O2N100O1001O00O10000001O00O20O000000O2M2O1O100001O1O1O1O1O100O2M2O1O1O1O1O100O1O100O1O1O1O1O1O1O1O100O1O1O1O1O1N200O1O1O100O1O1O1O1O1O1O100O1O1O100O1O1O1O1N200O1O1O1O2O0O1O100O1O1O1O1O1O1O1O1O1O1O1O1O1O2N1O100O1O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1N200O1O1O1O1O100O1O1N2O1O1O1O100O1O100O1O1O1O0101M3I6B?[Od0B`0VOj0\\Of0[O`Wjl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "[bfb085MXo0d0I4L3N2M10010O000000000O02O000000000000000O2N2Gn]`d0" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_22.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2584.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "VP5;P>7I4M9G5K2N2N001O5K1O001O010O000010O01O10O01O010O001O010O0101Nc0]Od0[OVVR8" + }, + { + "size": [ + 460, + 620 + ], + "counts": "]:`1X=G33L4J7I3O3N2M2N5L13H4J6J6J and are in the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_23.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2130.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "]PQ2:F9G4L2O0O1O01O01O010O01O00010O0001O01O001UOYB=f=B`B9`=FfB6Z=IlB1T=OQCLo<5m001O00010O002O1MZTj6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "ScY2b0[>:I1O1O001O000000000000001O00000000000000001O01O1O01O010O00010O01O01O01O000010O0001O010O02N010O00010O01O01O01O010O000010O01O000010O01O00010O001O000010O01O01O01O1O002O4KQaj5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nRg84g>6I6M3K5M4K4K5L4K5K6I6I7I7J6I7G9G9J6G9oNgLSFa3k9l0O0O100000000000000000000000O1O1O1H8G9I7D, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_24.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/297.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "aRgg0;_o0>C7H7H9I7I6K5K5L3L5L4M2L5L3N3K4L4N3N100O2O000O101O0000000O1000000001O00010O000O10O100001O00000000000000000000001O00O10O100000000O10O100000O10000O10000O100O100O10000O100000001O0O10000000000000010O01O001O2N3M5K1O10O01O000000001O01N100000000O2O0000000O1000000O10000O10001O000001O000000000000000000O2O001N2O1N2O2M3M3N2M3M3M3L4M2M4L4M3L5K4L4K7J5K5J6J5K6J9CQng:" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_25.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/552.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\kSS12ko05M2N1010O00010O00010O01O010O1O01O01O0010O0001O001O010O001O00001O00001O0000000000O2N3ITP=6goB5M3N100O010O1O1O100O1O10O0100O1001O0O2O0000001O000O2O00001O0O101O00001O0O101O000O101O000O10001O00001N1000001N100000001N10001NbTS1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "djYj03mo00O2ANnPO2Qo02kPOOTo03kPOMUo04iPOMWo04hPOLWo0?000001N3N2N1O2N3M1N2O1O000O101O000000000O10000000000O10000000000O10000000O100O1N2O1O1N2M3N2N2N2N2M3L5HZVj;" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]Zm, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_26.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/46.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "djY9k0So05L3M2O0O100O10000O10001N1000000O10000000001O000O101O00001O000001O01O00001O101N5K:F6K0O1O10O010O2O10O4M0O02@YROaNkm0k0gROSO\\m0`0PSO]OXm0J`Vll0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "lS`o06go05L3I7H8N2N2N2O1O1O001O1N2O10O01O1O1O1O1O001O100O1O001O1O1O1O001O1O1O0010O010N`0^NSRO1VjW7" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "dPib03ko03N1O2N1O2M2O2N101N101N101O001N2O001N101O0O2O001N101O00001O010O010000O01000O100O010O100O010O100O001O1N1B?L4N2O1N3MWo\\c0" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_27.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1258.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bfgR13?4NO^j00dUOL3001N1O4Lc1ma0SOQE_OmHI9OO102L7L_1na0SOSE\\OoHI9OO2O9E11_1Pb0ROSE[OQIH81N2Nm0Gk0Qb0SOX^OnN^6<^II63N4Kk2ka0kMUESOnHL4373L5Kk2ka0kMUEROZIN03N1N9F12`1ma0PO_FQORH1N2b08TOP1ka0CoHUO_E1O2J0ha0h0QIROUF0ZO4_a0j0mJVOR5j0nJVOR5j0nJVOR5j0oJUOQ5k0oJUOQ5k0oJUOR5j0nJVOR5j0mJXOR5h0nJXOR5h0mJZOR5f0jHROaE2^1;T`0a0jHWOaEOZ1a0Y`09PCTOl09_JMN6a0DAi0ca04oBWOi0k2VRLR_OQOP2Q2^NQN\\OT6Ta0jJk^O\\OKLX1a2jN[4Wa0Y7O1O1O1O010O00001O001O001O000010O01O0000000000O1N2[Of0kNT1fNZ1cMf[OZG4K^e0V5ZZOnJ9M3M^h0b2\\4dN\\1F:N2O1O100O2O00000001O01N10000O101N1K5O2O1N3MTee2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "jj0h2n0fNni0Z1RVOfNmi0\\1QVOeNni0_1oUOaNoi0d1mUO]NRj0f1kUO[NTj0g1jUOZNVj0g1hUOZNWj0i1fUOXNYj0m1bUOTN^j0g3O1O1O1O1O100O100O1O100O100O1O100O100O1O1O100O100O010O100O101N100O100O100O100O100O100O100O101O000O1000000O1000000O1000000000000000000000000001N101O1O001O001O1O1O001O1O1O1O001O1O001O1O001O1O1O1O001O1O1O0O2O1O1O010O1O1O1O1O100O1O1O1N2O1O1O1O1O2N1O1O2N1O1O1O1O1O2O0O1O1O2N1O2N1O1O2N1O1O2N1O2N1O2N2N1O1O2N1O2N2N1O2N2N3M2N1O1O2N1O1O00O10oLYSOi2gl0WM\\SOf2dl0ZM^SOd2bl0\\M`SOb2al0]MbSO`2^l0`MeSO]2[l0dMhSOX2Xl0iMiSOU2Wl0lMiSOS2Wl0nMiSOQ2Wl0oMkSOo1Vl0QNjSOn1Vl0RNjSOn1Wl0RNhSOn1Yl0SNcSOo1^l0QN`SOP2`l0SNYSOR2gl0b02O1O1UNPSOd0Qm0UOVSOj0ll0QOXSOn0kl0lNYSOS1ll0eNXSOZ1hm0N3N1O1O1N1F_QOUObn0h0bQOUOan0f0`0I8IZQ\\Q1" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_28.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1843.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "UnP88U`06M3M3M2O100O1M3N2N2M3N2M3O2N10000O1O1N2O21NfASOQ=l0bBA^=i11O1O4L1O00010N4M1O1O0O2O0O1O1N2L4M201O1O1O1N101^OXAYOi>8kAFV>3RBKo=0WBMn=MVB0]\\m2" + }, + { + "size": [ + 530, + 730 + ], + "counts": "Zgo:4^`05K2N0100O00O2J5J6I8I60001O00000010O00010O01O001O1O001O3M2M3N2N1O001O00001O00O10000O1000000O20O00000YG" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_29.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1012.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "0i7Wh00000O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2ZOcTOfL`k0W3f0M3O1N2O1O1O1O100O1O1O1N2O1N2O1O100O1O1O1N2O1O1O1O1O100O1O1O1O1N2O1O1O1O1O100O1N2N2O1O1O1O100O1O1O1O1N2O1O1O1O1O1O100N2O1N2O1O1O1O100O1O1N2O1O1O1N2O1O1O100O1O1N2O1O1O100O1O1O1O1O1N2O1O1O1O1O1O100O1O1O1N2O1O1O1N2O1O1NRPWQ1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "`Ph61cPi:2lnWE2iPOOVo02jPOOUo00lPO1To0MlPO4To0LlPO5So0JnPO7Qo0InPO9Ro0FlPO=Zo01O2N2O1O00001O0O2M3N2BWOcQOl0Yn0VOfQOk0Yn0UOgQOl0Xn0SOhQOo0Xn0POhQOQ1bn02O1N3M2N2K5N102gQOaNmm0l1M5USOmM^O1M8IH^j0X2QVOi0^OYM2F0K50T44kK9LN_1FR?\\9]_ORHn0f2TORLU?Ra0\\O`2`MQ1oNj0VO3@[ZO^Fie0Y1WZOeNOO2;O00O4B94EMb21^M021LT1Pf0SOSZOG030Kj7d0^g0M1N2O0O2M2N20TQOIom07m0O10000O1001O00000000O100M30001O03M1O00000O10L4000000000000000000000000002M3N000000O1M3000001O0000000000000000000002N1O0000O1N20000000ZXO1[?1W@k0Y?UOd@Q1Y?oNe@U1W4TO[DCW:4VMV1T4G]NSO_MW1Q4J^NoN`MX1Q4J_NoN_MX1P4K`NnN]MY1R4JaNnNVLKhJb1_:GbNlNaK6aJ`2Z;_NdNkN_Kf3^5aMQOkN^KR5mJPKNO5J4O1LZ12hNO`3_1j5jNZKa;mJdD1GOL7:J14NLX1f9hNZK[`0d4f_O\\K]`0TKe_OJ30O2M3Fc6:XNVd0SKa[O\\5:AZg0QKeXO6E^1>[3]j0O00010O2O1O001OO100O1O1O1000000O01O1N2O010O1O100O0100N110000O1000000O100O\\UOVKTj0j4lUOWKSj0h4nUOXKRj0g4d00000O10000O1000000O1000000O1O1000000O100O1O100O100O10000O1000fUOcKQi0\\4PWOdKPi0\\4oVOfKPi0Z4PWOfKPi0Z4PWOfKPi0Z4PWOfKPi0Y4QWOgKoh0Y4QWOgKoh0Y4QWOgKoh0X4SWOgKmh0Y4]1O100O10000O1000iUOjKdh0V4[WOlKdh0T4\\WOlKdh0S4]WOnKbh0R4^WOnKbh0R4^WOnKbh0Q4_WOoKah0Q4`WOnK`h0Q4bWOmK_h0R4h10000O10000O10000O1000PVOPLPh0o3QXOQLog0o3Q2O100O1000000O100O1O100O100O1000000000000O10000O10000O10000O1000000O100O100O10000O1000000O10000O10000O1O100O1000000O100O100O100O100O100O10000O1000000O1000000O100O1O10000O10000O100000000O100O100O100000oEVMlFi2T9fM^FZ2b9gM]FY2b9iM]FW2d9hM\\FW2e9iM[FW2f9gM[FY2j9bMVF]2m9aMSF_2n9^MTFb2ec0O100O100001O004Le1PSOkKkj0P;WUOjD:Ld035Nh?e=Y_O]B92HKW`0]`0__OR@3c01ROW=ib0eB`]O7Em;gd0lCg[O2GS:Yh0nEcWOQ7kk0QI\\TOX3[o0kL3M1O001O000000000000001O0000000000000000O1000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000O10000001O00000000000000000000000000O100000000001O0000000000000000O100000000000000000000001O0000000000O10000000000000000000000001O0000000000]J1lUO83JU4Mge0[1UZOeNje0^1TZObNle0`1RZO`Nne0a1QZO_Noe0b1PZO^NPf0b1PZO^NPf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NRf0b1nYO^NRf0b1nYO^NRf0b1nYO^NSf0a1mYO_NSf0a1mYO_NSf0a1mYO_NSf0a1mYO_NSf0b1lYO^NTf0b1lYO^NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NVf0`1jYO`NVf0a1iYO_NWf0a1iYO_NWf0b1hYO^NYf0b1fYO^NZf0b1fYO^NZf0b1fYO^NZf0b1fYO^NZf0b1fYO^N[f0a1eYO_N[f0b1dYO^N\\f0b1dYO^N\\f0b1dYO^N\\f0c1cYO]N]f0c1cYO]N]f0c1cYO]N^f0b1bYO^N^f0b1bYO^N^f0b1aYO_N_f0b1`YO^N`f0b1`YO^N`f0c1_YO]Nbf0b1^YO^Nbf0c1]YO]Ncf0c1]YO]Ndf0c1[YO]Nef0c1[YO]Nef0c1[YO]Nef0c1[YO]Nff0b1ZYO^Nff0b1ZYO^Nff0a1[YO_Nef0k0cUOFg3@ff0h0fUOFe3Bff00bUO>:Ee3Mhf0N_UO0;O`33ik00000000001O000000000000001O0000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]hg==_o06H8K5K3N3M3M2O2N3L4L4M1O2N1O1O2M3N7I4L7I7I7J1N1O2O1N101N1O1O101N100O101O000O101N10001N1000001O00001O00001O001O001O001O001O00001O010O001O000010O01O001O0010O010O010O1O01O01O01O00010O0010O0001O01O01O0010O01O010O001O010O0010O10O01O10O010O000010O0001O010O0010O01O1O01O01O010O0010O010O010O010O010O01O01O01O0010O01O00010O02Mj1VLbRO^1lSXe0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "VT_3;5Oen0Q1cQOROF3011a0Xk0e3K4L4M3L3N2M3N1O1O2O0O1O2N1O1O1O2N1O1O2N1O2M2O2N1O001O1O001O1O1O1O2N1O1O1O1O1O1O1O2\\WO^Igg0c6XXO^Igg0c6YXO^Ieg0c6[XO]Idg0d6\\XO\\Icg0e6]XOZIcg0g6^XOXI`g0j6j01O1O1O1O1O1O101N1N2O1O1O2N1O1O1O1O2N1O1N2O2N1O1OoNkWOYJUh0g5lWOXJSh0i5mWOWJRh0i5PXOVJog0k5QXOUJng0l5SXOSJkg0o5UXOQJjg0o5WXOQJhg0P6YXOPJeg0Q6[XOoIdg0R6\\XOnIdg0R6\\XOnIcg0T6\\XOlIcg0U6]XOkIag0W6_XOiI`g0Y6^XOhIag0Y6_XOgI`g0[6^XOgI`g0]7O1O100O1O1O1N2O1N2O2O0O1O1O1O1O1O1O1O10001N100O100N2000000000001O000000000000000000001O000000000000001O000000001O00000000001O0000000000000000001O000000001O00000000000000001O000000001O00001O00001O001O00001O00O10002N0000000001O0001VOcYOnG^f0Q8fYOlG[f0R8nYOeGTf0Z8PZOaGdi06mUO6N\\3gn0^L_QO8Xg9DaXF860M2NO4M6Flk0]6kSOoI4D\\h0^8iN5N1XYOcGke0]8TZOfGje0Z8UZOgGke0Y8UZOhGje0X8VZOhGje0X8UZOjGje0V8UZOkGke0U8UZOkGke0U8TZOlGle0T8TZOlGle0U8SZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0U8SZOkGme0U8SZOkGme0U8RZOlGne0T8RZOlGne0T8RZOmGme0T8RZOlGne0T8RZOlGne0T8RZOlGne0T8RZOlGne0T8RZOlGne0U8QZOkGoe0U8QZOkGoe0U8QZOlGne0U8PZOlGPf0T8PZOlGPf0T8oYOmGQf0S8oYOmGQf0S8oYOmGQf0S8nYOnGRf0S8mYOnGRf0j3PZO@KgLUf0h3RZOhe0CWZO=ge0EXZOc6\\c0WIW\\O5, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_30.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1390.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fPho09do08XOC_QOc0_n0?000000000000000000000000000000000000000001O000000000000000000O1000000001O000000000000000000000000000000001O000000000000000000000000001O000000000000O1O1O1N2M3M3K5L4K5L4M3JVP_5" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "YSQR13io05J6K4M3N2L4K5M3M3M3M3K5L4K5M3M2M4J6L3N3M2M4L3L4N1N3N1O12N2N2O1N2O1N2N2N2N2N2M3N2N2N2N2N2M3M3K5M3M3N2L4M3N2K5M3M3N2N2N2M3N2M3N2N2O1NRPl3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "PPW43mo05K3M3M3M3M2N3M3M2N3M2N4L6J4L3M4L4L3M3M4L4L4L4L3M5K3M4L3M4L4L3M3M3M4L1O1O00000000001O000000000000000000_NPTORORl0j0RTOTOQl0h0TTOTOok0h0UTOUOok0f0TTOXOPl0c0STO[OQl0`0TTO[OQl0`0RTO^ORl0=PTOBTl09oSOETl07QTOESl07PTOEVl05mSOIWl02nSOGYl04m1K[ndQ1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_31.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/602.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]c]Q17bo08WMDfSO4EONm1]l0^1eUOWM^g0k2[XO^Mbg0b2[XObMdg0^2YXOeMgg0[2TXOkMkg0V2PWOiL7T1ih0S2nVOnL1T1Qi0n1nVORMGV1[i0h1mVOQOSi0o0mVOQOSi0o0lVOROTi0n0lVOROTi0o0jVOROVi0n0iVOSOWi0m0iVOSOWi0o0fVOROZi0j3000000000O1000000000O1000O1M3oNR1gNX1eM[TO`N6Hml0V1iROmNcn0:^QOHk^\\5" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_32.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/130.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "i`TQ1;Zo0>H`0@>C`0_O;E:H?A6I]MoSOa1jk0aNYTO`1ck0\\NdTOe1Wk0[NmTOf1oj0XNVUOh1hj0XNZUOi1Pj0gNUVOZ1hi0cN]VO^1ai0aNaVO`1]i0aNcVOa1[i0_NeVOb1Zi0_NfVOa1Xi0`NhVOa1Wi0`NhVO`1Xi0aNgVO_1Xi0dNfVOZOUOn0Uj0KbVOUO]Oo0Qj0N`VOoNDR1li01^VOkNIR1ii08ZVOdN1Q1fi0?UVO]N9R1ci0e0PVOWN`0R1ai0m0jUOoMg0S1_i0P1hUOlMk0R1^i0V1fUOcMn0V1^i0\\1hVOdNYi0OcUOLT13[i0NiUOIl09\\i0LmUOFh0=]i0KPVOCe0a0]i0HTVOB`0e0^i0GYVO[O, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_33.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1944.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "j9`0[3\\2`6dMaI[2_6fMaIY2_6gMbIX2^6hMcIW2]6iMdIU2]6jMdHFZN`2R9jMbHLYNY2U9kMaH1VNT2Y9kMaH3UNQ2[9jMaH:oMl1`9jMbHRO`Bh0i>L3M2N1O0000000000000000000000000000O1M3M3M3O100N2O1O1N2O1O1000000O100000000001O000000000000O1000000001O0000000000000000000000O1001O0000O1000000001O000000000000000000O100000000001OO1000000000000001O00000000O10000000000000000000000000000000000000000000000000000000000000000000000000O11O00000000O11O0000O100001O000000000000000O01001O000000000000000000O11O000000000000000000000000000000000000000000O11O000000000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000000000000000O11O00000000000000000000O11O000000000000000000O11O0000000000000000O100001O0000000000000000O1001O0000000000000000O100001O0000000000000000O1001O000000000000000000O11O00000000000000000000000000000000000000000001O01O0O10O1000001O00000001O000000_Oo@AQ?9ZABg>9^AFb>7bAH_>2gAM[>LkA3[?O1O10O01NUaZ2" + }, + { + "size": [ + 530, + 730 + ], + "counts": "PmY31Y`01j_O3P`0;M3@a0L4J7K6J6K2O1O2M3M3N2N1O1O1N7J4L1O1O103L111M0101N12O2M001OO0AaBlM`=d2OjNdBUO\\=Q2O0101N1O100O1O1O100O1O1O001O1O1O2O0O1O001O1O1O1O1O1O2O0O1O1O1O1O1O1O001O010OaNnM^DR2b;nM^DR2b;nM_DQ2a;oM_DQ2Q=00000001O000000001O000000000000000000001O0000000000cNRNVDn1j;RNVDn1j;RNVDn1j;SNUDm1k;UNSDl1l;UNTDj1l;WNSDi1m;WNSDi1m;XNRDh1n;YNQDg1Pc0eAB[>=_AIa>T11O0000001O0001O000001N1001O000001O000000000000000000000000001O000000000000000O2O000O101M2N2N3K6C and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_34.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2347.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "k`[33k>2F:001O1O1O1O1O2N2N0O2O00Wmi5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "jVj15f>6L4J5K6I6L5G9^Ob0ROn0M2O2O0O1000000O100000000O100000000000000000000000000000000000000000000000000000000000000cEZM`8f2]GeM[8[2bGkM[8U2bGQN[8o1aG[NY8e1fGcNS8\\1mGlNl7T1THSOe7m0ZH\\O^7d0aHBZ7>kGoLd0j2[77QHPMa0n2[72SHQMa0R3_OTM\\7Q3RI;m6m3O1O001O000010O00000000000000000\\KiH_1W7aNiHSM7m3P7POjHQM and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_35.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/118.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "PPf64lo04L1O2N1O1O1O1O1O1O2N3M5K1O1O1O1O001O0000000000000000001O00000000001O00001O00001O00001O00000000001O00001O0000001O000000001O0000001O000000001O00001O0000001O00000000001O0000001O00001O001O1O1O2N1O1O000000000000000000O100000000001O000000000000001O0000000000001O001O00001O00001O0000001O000000001O00000000000000000000O10000000000O10000000002M3N1O2N1O2N1O1O2N2N1O1O1O1O1O1O1O2N1O1O1O1O001O01O01O0000000000O2N1O100O1O1O1N3O0O10000O2O000O100000000000000001O1O1O1N1O2N1N2N3M2O2M3M3N3K6KPoji0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_36.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2158.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[jX5R2n3900O1N20WA0a>1521O1O00000000000000O11O00SO0jB0U=7eBI[=b0XB@h=l000000001O9G=CB`0@8H1O0000000000000000000O100000000000000000000O100000000000O2O1O1O7I`0A5I5La0jMXC5_Of0V>M4L5K4LWgR7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hbj44i>:VCI\\:;^EK^::\\EIb::[EHc:, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_37.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/105.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dlQS14_o0S1hNS1ROg0_O=E and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_38.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/869.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`V`9=4H2Min0f2XN7I2N1O100O10000O100O10O10O100000O0100O100000O0100000000O10O10000000O10O1000O10000O0100000O1000O10O1000000O01000000000O10O1000O100000O10O100000O1000O100000O10O1000000O010000000O10000O01000000000O100000O01000000O1000O01000000O10O100000O100000O01000000O10O10O1000O10O100000O1000O1000O01000000O01000O10000O100000O10O1000000O1000O100000O1000O010000O1000O1000O10000O1000O1000O1000000O01000O1000O010000O10O01000000O0100000000O10000000000O10001O00000O1000000O2O0000000O101O000000001N100000001N10001O00000O2O0000001N1000001O000O101O001O0O101O00001O001O0O2O001O001N101O001O001N101O001O0O2O001O001O0O2O001O001O001N101O1O001O1O1O2Mg0YOo0nNfhed0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_39.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2369.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "o7o6Q81O00000000O2O3N2L5L4L2O3L7I4L2M3N2O0N10kN[JPId5o6cJkH]5S7hJjHX5U7kJiHU5W7oJeHQ5[7SKaHm4_7VK^Hj4a7[KZHf4f7Z100O100001O2N2N3M4L1O001O0000O1O1D7G6L3N2N3N2N00000002N2N3L2M3M2N3O0O3M3LTT`5" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_40.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/48.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`Zi09ao08I5J7M2O1000001OO1000000001O000000000001O0000000000000000000010O0000001O000000000000000000000001O000000000001O00000000000001O0000000000000000001O00000000000000000001O000000000001O000O3LneXT1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_41.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1081.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "go[b06`n0\\1SOk0XOh0H8K6L3VN[MWVOg2bi0jMQVOY2ji0QNoUOQ2mi0S2N2M3L4N2O1OdJbVO\\Oe0f3hh0lLPXOP3og0gL\\XOX3cg0gL`XOX3_g0hLcXOW3]g0iLcXOW3\\g0jLeXOU3[g0jLgXOU3Yg0kLhXOT3Wg0mLiXOS3Wg0nLiXOQ3Wg0PMiXOo2Wg0SMhXOl2Xg0XMfXOf2Zg0]McXOc2]g0`MaXO^2`g0fM]XOY2cg0kMYXOU2gg0WNnWOh1Qh0]NlWOb1Th0cNfWO^1Zh0kN[WOW1eh0Q31KdVOfI[i0f5fVOfJbi0Z5^VOeJ5CUi0f0QWOa4EkJYi0b0[WOc4eh0\\KeWO\\4Yh0eKPXOR4og0oK\\XO3oNV2ch0jMmYOS2ke0VNXZOe1fe0_N`ZOZ1^e0hNmZOl0Se0VOS\\OmN]KNfh0U1d52O00O2TO\\QOOPo0JmWjc0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_42.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/778.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "ZPa=l0jn0=M1O2N1O1O1O1O001O1O001O1O001O001O00001O001O00001O0000001O00001O000000001O00000000001O000000000000000000000000000000O10000000000O100000000O1000000O100O100O100O100O1O1O1O1N2N2A?D\\Peg0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "XQV98Ro0Q1D3M2M3O1N2O1N2O0O2O1O0O2O001O001N10001O001O00001N10001O0000001O0O100000001O0O1000000000000O2O000000000O10000000000O100000O1000000000O10O100000N20O10000O100O100O100O1O1O3M2L7hNmPQl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Vl`21jo09J4L4L3N2N2N2aSOWOci0l0YVOZOci0g0YVO_Odi0c0YVO@ei0a0ZVOAkh0@`UOP1d1Bfh0J]UOg0l1@ch00\\UOa0P2Abh01[UO?R2B`h04ZUO;U2B`h0_1ZWOgNdh0Z1[WOgNdh0Z1ZWOhNeh0Y1ZWOhNeh0Y1ZWOhNeh0Y1ZWOhNeh0Y1YWOiNfh0X1XWOjNgh0V1XWOlNgh0U1WWOlNih0U1VWOlNih0T1WWOmNhh0T1WWOmNhh0S1VWOQOhh0P1SWOUOlh0k0QWOYOoh0g0oVO[OPi0e0oVO^OPi0b0nVO@Qi0`0nVOCPi0=nVOFRi0:lVOHTi09gVOKXi06fVOLZi04dVON\\i06_VOL`i06]VOLai06]VOKci07ZVOKei08WVOIhi0Y3N100O1O1O1O10001N1000000O1000000O010O100O01O001M3K4O2O10O10O10O10000000001O0000000001O01O01O010O010OO3N2M3N1N1N3N1O2N1O2N101N1O1O1O101O0O1000000000O1000000kLVWOTOjh0i0]WOSOch0m0_WORO`h0m0dWOPO\\h0iNTWOoN1R1a0T1Zh0jNiWO00T1Vh0lNlWOOOT1Uh0mNmWONOT1Uh0lNmWOO0T1Sh0mNmWOO0T1Sh0mNnWON0T1Rh0mNoWOO0S1Qh0nNoWOO1R1Ph0nNQXON0T1Ph0mNPXOO1S1og0mNQXO01Q1og0nNQXO11P1og0nNPXO22o0ng0nNQXO32n0mg0nNSXO30o0mg0mNTXO40n0lg0mNUXO51l0jg0oNVXO33l0gg0POWXO44j0fg0POWXO56i0cg0QOYXO57g0ag0QOZXO7, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_43.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2237.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "neU17h>3N1O00001O00000000000001O0001O000000000010O000000000000001NQig7" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_44.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1341.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^`Rc0 and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_45.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bm0^2bm00001O1O1O1O1O1O1O1O1O1O1O1O2N1O1O1N2O2N1O1O1O1O1O1O2N101N1O1O3M7I6I4LkPkV1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_46.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1555.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "VlYb02ko06J4L3M4L4L3N3M2L4N3L3N2N2M2O1O1N1N200O0001O1O10N100100O010O0001O10O1O01N1100O000010O1O0^FkN]DT1b;8TCGna=TOQ\\OL[6o0f=nNY\\O1o5Q1j=lN_\\ONc5V1P>lN`\\ON\\5T1W>nN_\\OMX5U1Z>nN`\\OMR5U1`>nN`\\OMk4W1f>lN`\\ONf4V1m>kN_\\OOa4U1R?mN]\\ON^4U1W?mN]\\ONY4T1\\?oN[\\ONV4R1a?QOY\\ONS4P1e?UOU\\OOQ4l0k?YOP\\OOP4h0h?T2Q@mMP`0X2j_OhMW`0[2d_OeM_`0[2^_OeMe`0[2Y_OeMi`0[2U_OdMn`0\\2P_OcMSa0]2k^ObMXa0_2d^OaM_a0a2[^OaMga0`2T^OaMoa0`2m]OaMUb0_2i]OaMYb0_2_]OfMdb0Z2V]OiMmb0W2P]OhMTc0X2h\\OiM[c0W2b\\OjM`c0V2^\\OiMec0W2Y\\OiMic0W2U\\OgMoc0Y2n[OhMTd0X2j[OhMXd0X2g[OgM[d0Y2c[OgM_d0Y2`[OfMbd0Z2gYOXL9\\1Sf0\\2cYOZL8Y1Wf0]2`YO]L5W1\\f0[2`YO`L2R1af0^2]YObLOo0ff0_2[YOcLNl0if0a2YYOeLLi0lf0b2XYOgLIf0Qg0d2VYOfLId0Rg0f2UYOgLHa0Tg0j2bXOoKNg0;=Xg0m2YXOWL1b0<7Y7POP8R4R@[L9b050`7UOn7a4\\@^L2Lc7YOm7]4^@_L0K[5AoKLW>Y4^@aLNI\\5K`KOf>l3`@lMU5`0X:e1b@nMn3h1Z;:h@TN[1f4W=UM]Ab7^>^HbAd7\\>\\HcAf7\\>[HbAg7]>n400O100000M3TOkAc_O_>V`0i0N2O1TLRAYDR1Km=Z:WEaEk:i8lFTGV9l7o7jLUXORNPh0l1TXOoMog0P2TXOlMng0T2SXOiMog0V2SXO^LMLog0f3WXOZLL1kg0e3hXO]KRO6>j0eg0c3kYO_LSf0a3nYOaLoe0_3QZOcLme0\\3TZOfLje0Z3VZOhLhe0W3ZZOiLee0h2YXOVLS2S1ce0f2_XOQLP2\\1^e0b2P[OaMmd0_2T[OcMid0]2X[OdMfd0[2[[OhMbd0X2_[OjM^d0U2d[OmMYd0S2h[OPNTd0P2n[OQNoc0o1S\\ORNjc0n1W\\OTNfc0l1\\\\OTNbc0l1b\\OSN[c0n1i\\OPNTc0P2R]OmMkb0S2X]OmMeb0S2_]OmM]b0S2k]OgMSb0Y2P^OgMma0Y2V^OgMga0Y2\\^OgMaa0Y2a^OhM\\a0X2g^OhMWa0X2m^OfMPa0Z2S_OfMj`0Z2W_OgMg`0Y2\\_OgMa`0Y2b_OhMZ`0X2h_OiMU`0W2n_OiMo?W2U@hMh?W2]@hM`?X2c@gM[?Y2i@fMT?Z2n@gMo>Y2SAgMk>Y2WAhMf>X2]AiM_>W2eAhMX>X2kAgMS>Z2oAfMn=Z2UBfMh=Z2\\BfM`=Z2dBeMY=[2jBdMT=\\2nBeMo<[2TCdMj<\\2YCdMd<\\2_CeM]<[2gCdMV<\\2mCcMQ<]2QDdMl;\\2WDeMe;[2_DcM_;]2dDcMY;]2iDdMT;\\2nDeMo:[2UEdMh:\\2[EdMb:\\2`EeM^:Z2eEeMZ:Y2jEgM[::Q^OVOh7a0U=lNPCS1Pf00O0010O10010N10011NO1O1O02O01N2OO2N04N2NON10O3L3M2O22ON11OO10O1N2000O00O2N1O1N2O0002N1N1O1jNjQO`0Vn0_OPRO, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_47.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1090.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^m0a0_o000000O10000000000000000000O10O1000000O10001O00000001O001O001O1O000001O000000000O01000000000ZROBjk0>STOFlk0:RTOImk06STOLlk04RTONnk02QTOOok01PTO1ok0OPTO2Pl0NoSO3Ql0MnSO4Rl0LmSO5Sl0KlSO6Tl0IkSO9Ul0GiSO;Xl0DeSO?[l0AcSOa0]l0_OaSOc0_l0]O^SOf0bl0[OZSOh0fl0XOVSOl0jl0TOSSOo0ml0ROnROR1Rm0QObROX1^m0e00000000000O10000001O0000001O3M;E5K4L5K3M3M5K3M3L2O2N2N1O1N2N1O2N101N1O2N1O2O1N2M5KdP`T1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "hVdQ12ko04M3M3M3I7L4L4M3M2L5K5M3L4N1O2N1N3N2N7I8H:F9G9XYO]Mc?f6XZOhK[e0b7L2O0`]OZEe?g:i2001O1O1O001O010O1O1O1O1O010O00O1O1YIV[O_NMgM>FF9N0j0JZd0o3^[OPM@oNYi0Q4Y21O01O0001O001O01O01O010O001O010O001O001O01O01O001O01O01O00010O001O0010O01O00010O001O00001O010O001O010O00001O010O001O00010O00001O010O001O00001O010O00001O10O01O00001O001O010O1O00010O00001O001O0010O0001O00100N101O00010O00010O00010O001O0010O01O00100O00001O0010O000001O0010O00010O01O001O00010O001O10O010O000010O010XO" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "fRj, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_48.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1090.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fnSf0T1Wn0h0J4eKWN[ZOl1ae0gNgYOWOmLS2[i0nNiYOQ1Vf0SOiYOk0Wf0WOjYOf0Vf0\\OjYOb0Vf0CiYO9Wf0LfYO2Zf02eYOK[f08fYODZf0?eYO_O[f0e0eYOWO[f0n0dYOnN\\f0V1dYOfN\\f0^1eYO]N\\f0e1eYOWN[f0n1cYOoM]f0V2aYOgM_f0]2`YO`Maf0c2^YOYMcf0j2]YORMdf0Q3T35L4K4L4L4M4K4L3M3M5L4K4L3N2M4M3L4M2N3N13MN3M2M4L3M3N1N0010O0000001O00001O001O00001O1O001O1O001O1O001O1O1O1O1O1O2N1O001O001O1O1O1O1O1O001O1O1O2N5jKPUOR3Tl0K3M2N1O1O1O1O1O001O1O1O1O001O1O001O001O001O1O001O001O00001O001O001O1O001O1O00001O000000000000000000000000000O10001N1M3I7WOi0D>_Ob0_OPSj<" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "^j>6go05L3N2O1O00001O0O1O100O1O1N2O1N2O1O1O1O100O1O0001N1O2O001O1O10O01O100O1O00100O1O1O10000O100000O100000000000000000001O010O001O010O00100O0010001N100O1O1O100O0001O001N2O0O2O1N2O1N2O0O2O1N2N2N2NjUaT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mckT12Zo07oPOOkn04TQO6an0K^QO=[n0CeQOl0km0UOUROQ1em0oN[ROR1dm0oN[ROR1dm0nN\\ROR1cm0oN]ROQ1cm0oN]ROR1bm0nN^ROR1am0oN_ROQ1am0PO^ROQ1am0oN_ROQ1`m0PO`ROQ1_m0oNaROR1]m0oNcROS1[m0mNeROT1Zm0lNeROW1Ym0iNgROX1Xm0hNhROZ1Um0hNjROY1Um0gNkROY1Um0gNkROY1Um0gNkROY1Um0gNkROX1Vm0gNkROY1Um0gNkROX1Vm0hNjROW1Vm0jNjROU1Wm0kNjROR1Xm0nNhROR1Xm0mNiROR1Ym0mNgROR1\\m0kNeROU1dm0aN^RO]1Sn0O3Nd0[O6I?AoZj1" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_49.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1072.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "nbk;5?M25om0^1K2M2O0000001O0000001O000010O00000000000O1000000000000000000000000001O0000001O001O000001O000000000001O0000000000000000O1O2K4Aa0oNo]Sj0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "kf0V7jh0000000001O0000000000000000100O000000O101N10000001O00001O01O0001ObMZWO[KYf0iMhZOo1lN8\\f0iMdZOV2mN1_f0jM_ZO[2oNLbf0lMZZO\\2ROHdf0mMVZO`2SODgf0mMSZOb2UOAhf0oMmYOf2XO\\Okf0PNfYOk2\\OVOnf0PNaYOR3\\OoNSg0QN[YOW3^OiNVg0VNRYOX3FbNXg0o2dXORM\\g0Q3`XOPM`g0S3]XOmLcg0T3[XOmLeg0g50\\M[XOmLeg0S3[XOmLeg0R3\\XOnLdg0R3\\XOnLdg0Q3^XOnLbg0Q3_XOoLbg0n2`XORM`g0l2cXOSM]g0Z2dXOVK7_2Vg0e0mZOYOSe0e0T[OVOld0h0W[OWOid0h0Y[OWOhd0g0Y[OYOgd0f0Z[OZOfd0f0[[OYOed0f0\\[OZOdd0f0][OYOcd0f0^[OZObd0f0^[OZObd0e0_[O\\O`d0d0`[O\\O`d0d0`[O\\O`d0d0`[O\\Oad0c0_[O]Oad0b0`[O^O`d0b0`[O^O`d0b0`[O^O`d0b0_[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Ocd0a0\\[O@dd0`0\\[O@dd0?][OAcd0?\\[OBdd0?[[OAed0?[[OAed0`0Z[O@fd0`0Y[OAgd0`0X[OAgd0?W[OChd0?U[OCkd0>Q[OEod0UOPoAV1\\O;N2O1O001O2N010O01O01O0000001O00000O100000001N11N101M4[NmQOP1ZZo9" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_50.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/67.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "lhaf06ho04M3M3M3M3M3N2M3N3L2O2N2N3M2N2N102M2O1O1000O1N2N2N2N2M3N2M4M2N3L3N2M3N3M2M4L3MQVV`0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "YmTR11lo05M3N1O1N2O1N2O9G5K7H7J1O1O1O001O001O001O001M2I8L3O1mNQNUTOP2ik0RNUTOo1kk0SNRTOo1lk0SNQTOo1ok0SNmSOo1Sl0TNjSOm1Ul0XNeSOi1[l0ZNbSOf1^l0[NaSOf1^l0]N^SOd1cl0l00001O01O010O00100XM`SOn1al0nMcSOQ2^l0jMfSOW2Rm0O00100O2N2O0O1O100O4M7H1O01O00010O0001O01O01O00001O000O2M3K4G:E;F:H8LbRW3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "TPTV1`12oNim0a3UN_1aNk1UN=Ch0PXOhGhf0j9nNn0ROa1_NX2d\\OgA9GILd?Qb0^@o]O[=cd0bB^[On:dj0dF^UObN:\\2Zn0bNe0[O;E2N1O00001O00000000001O000000O10000000000000000O100`M2gRO<0A[12gk0g1eSOYNYl0X2nRORNQm0_2O1O1O1000000O10000001O00000000" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "0j;Vd0000000000000001O000000000000000000000000000000001O000000000000000000001O0000000000000000000000001O000000000000000000000000000000001O00000000000000000000001O0000000000000000000000000000001O00000000000000000000000000001O0000000000000000000000000000001O000000000000000000000000000000001O00000000000000jIX\\O4hc0I^\\O4bc0K`\\O4`c0Kb\\O4^c0Lc\\O3]c0Le\\O3[c0Mf\\O3Yc0Mh\\O2Xc0Mj\\O2Vc0Nj\\O2Vc0Nk\\O1Uc0Ol\\O0Tc0On\\O0Rc00n\\O0Rc00o\\OOQc01o\\OOQc01o\\OOQc01P]ONPc02P]ONPc02Q]OMob03Q]ONnb02S]OMmb03S]OMmb03S]OMmb03T]OLlb04T]OLlb04T]OLlb04T]OLlb05T]OJlb06T]OJlb06U]OIkb07U]OIkb07V]OIib07W]OIib08V]OHjb08W]OGib09W]OGib0:V]OFjb0:W]OEib0V]OBlb0R]OBnb0>R]OBob0>P]OBPc0>P]OBPc0?o\\OAQc0?o\\OARc0>n\\OBRc0?m\\OBRc0>n\\OBSc0=m\\OCSc0=m\\OCSc0>l\\OBUc0=k\\OCUc0>j\\OBVc0?i\\OAWc0?i\\OAXc0?g\\OAYc0`0f\\O@Zc0`0f\\O@Zc0a0e\\O_O\\c0a0c\\O_O]c0a0d\\O_O[c0b0d\\O^O\\c0b0e\\O]O[c0d0e\\O[O[c0e0e\\O[O\\c0d0e\\O[O[c0f0e\\OYO[c0h0d\\OXO\\c0h0e\\OWO\\c0i0c\\OWO]c0i0d\\OVO\\c0k0c\\OUO]c0l0c\\OSO]c0m0d\\ORO]c0n0b\\ORO^c0o0b\\OPO^c0Q1b\\OoN^c0Q1b\\OnN^c0R1c\\OmN]c0T1d\\OjN]c0U1d\\OjN\\c0W1c\\OiN]c0W1d\\OhN\\c0Y1d\\OfN]c0Y1d\\OfN\\c0[1c\\OeN]c0\\1c\\OcN^c0]1b\\ObN^c0_1b\\O`N^c0`1c\\O_N]c0b1d\\O\\N]c0c1d\\O\\N\\c0e1d\\OZN\\c0f1e\\OYN[c0h1f\\OVN[c0j1f\\OTNZc0l1g\\OSNYc0n1h\\OPNYc0P2g\\OoMYc0Q2h\\OnMXc0R2i\\OmMXc0Q2j\\OnMVc0P2m\\OnMUc0gNk[OTMS4T4cg0M2I7D;GdSOjL`l0R3:G9E]Ob0cN`QO3O2^R:JdmE1Qo00VQO8in0IVQO7ln0GTQO5\\n0MVRO3cm0`0QROAfm0HoQOk09\\Ogm0n0WROoNjm0c1POYNYSO5Gh1Pm0SNXSOZ2Um01O01O1[O^ROlNdm0L_ROd00^Oam0O_RO;L[O5:am0n0\\ROgN3;bm0m0\\ROfN3, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_51.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1383.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "k4d3\\l00O1N2N101O1N1N3N101N101M2O2N101GnKbTOT4Zk0;N1N3K401O010O10O10000O1O101N1O1O2N1O2N2N101N100O2O00001N10001O0O101O001N101O001O0O2O001O1O1O001O1O0O2O1O1O0O2O1O001N2O1O1N101O0O2O0O2O1O1N2O2M2O1N2O1O0O2O1N2O1N2O2M2O2M3N2N2M3N2M3N3L4M2M4L4M3L3M5L;D5K3M3M4L3M^iTT1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_52.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1258.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]d]k0=^o08I8fQOVOSm0P1fRO]Ool0f0fROlNHh0d0WOok0d2mSO^Mok0m2hSOTMUl0^3M4M2N4M7HAc0^O3N>A8Hn0UAnBbKLmNA40:1J[c0V:d\\OcE4`1Uc0W9h\\OkFWc0P;O2i\\O]Ca1Le11mL1a=oS8Pa0mHU^Oc1LlMk0f7Ta0jHY^OVOHMO`2j0e5Ua0gH[^OWO0\\2?h5Va0eHX^O\\OLc10bNd0m7Wa0cHX^O[OMHMb15^NK31I;f8aa0\\HW^OROM020M`18]NK3O2MX9^a0dGg^OROM011O]18^NKR:Sa0UGm^OWOMGO^19^NIR:Ra0VGP_OVOL_17]8k`0oFR_OX12k7j`0nFR_OTOMc15]8l`0lFR_OSONe1O`8Pa0iFS_OSOMZl8cAYGX>\\=L2bCU^Ob:ma0\\EU^ObNNg;oa0fES^OcNNe;Rb0iEm]O_:Vb0c13M3L6K7HnGRC]MhjNgAlIS1L[ON03D90R7Z>kNeAjIT1OYON04B:0o6^>WOaA]I87003I3W8S>bNeAaIN?6_7[>aN_AeJ3f6e2lGn8j6XDS3_2QFgM6a;e6YDU22fG\\2NZ9V6XDl1>hGn1>]9n5WDi1a0hGk1c0^9XOUDY64o1c0iGd1j0n9^5fCm1`3eHl8g=XGWBl8c=TGZBT9a=eFiB^9];cCcDo20`9P;oClDb24a9`:]DZER26c9c8RCiG^1\\Oo15d9V8aClGW1Bk18b9HhBU7V1UHkNj0m1Ji19b9IiBR7^1mHa0Ne17h9MbBQ7b1bH`0ERNc0a3:j9b7ZDbGb00gMb0`3T:j4gHfJPM^ObLQ1i=a4mHlJkLk0Y:n3VIVKdLh0X:d3_IgK[L=X:T2[BjMc7ElK:Q:T2hKbMXJ8n9R2kKjM[J1h9m1ULSNYHYODf0n;]1[BPNR:e0oIMd9W1YBUN_:f0`IJUL@d=_1]B`Nb:g0UILV:k0oLWOlHLY:gHOY:O`M0[H0[f0AbUO=W42Uf04lYOLRf05oYOJoe07XYO_ObLM8=ji09\\YOHiLNhi0:cYOHeLMii08dYOMbLJki07eYOO`LJii05kYO1if00YYO3df0LTYOB\\Lf0Uj0OZYOB`L`0Sj0NZYO<`LUOTj0>nXOYO\\MQ1FVOQj0>oXO[OZMP1HUOPj09UYOBSMP1JQOPj0:UYOGQMl0Vj0\\OjXOGPMn0Wj0YOkXOHnLo0Wj0WO]YOj0cf0QO_YOS1bj0G_QO\\O_n0d0dQOZO[n0g0fQOXOXn0i0;5N20O010M2O1O11\\QOPOUn0R1;1oQOoNTm0f0URO@f0LTm0b0\\RO]Oa01Rm0a0]SO_Obl0`0`SO@_l0`0dSO]O\\l0b0a1O000K50000010O1O1O100CESQO>mn0:1O1K6KeRO@^k0>dTOB\\k0=fTOA[k0>S20100O1OIHkPO4Uo0NlPONUo03;1O0O100lMJgROM_19jk0LfROL<3KM07Um0LdROM<4IM16Xm0JZSO0_OO05\\n0JVRO2`n0O<4M1gNJVRO2F7Rn0NQRO2G1Xn03hQO1MM]m0HZSO^n0AgQO:[n0^OPROok0@STO1aN1^m0MRTO1`N2^m0NPTO1bN1^m02kSO;Vl0DjSOPo0_OTQOa0kn0^OUQOe0in0[OVQOh0hn0XOWQOi0in0WOVQOk0in0TOXQOl0hn0TOXQOl0on0O00000001N1JSOYQOn0en0TOZQOl0fn0TOZQOm0en0SO[QOm0fn0QO[QOP1jn01O1O1O1O2M2N3K7WROcNgl0`1SSOeNll0[1QSOhNol0X1oROjNPm0W1mROkNSm0W1jROkNSm0S2O2M5L2M3N1O2eSOPMA0ik0T3_TOZM^k0j2ZTO[Mek0`30001N10000000001O00000O1O1N2O1001O001O00O1O1N2O1O1N21O000000O100O1JZKQUOg4oj0ZKPUOf4Pk0XKRUOh4Tk0O1O000000O100IZKSUOg4mj0ZKRUOf4nj0[KQUOe4Pk0YKQUOg4oj0XKRUOh4Sk00O1M3O100O100000000000000000000000000000000O1O1O1O1N21O1O001O00O1O1N2N2O10000001O00O1O1N2N2O11O2N1O001O00O1000000000000000000O1O1N2O10000000000O10000000000000000O100O100O10000000O100O1000O100O101O000000001M2O1O1O1000001O000000O01000O010000000001N100O10000000000000O01O1O1O100001O000000O100O10000100O001OO001N2O1O10000001O0000O1N2L40000001O2N1OO1O1O1O1000000000000O1O1O1000000010O0000000O1000O10000001O00001O1O1O4L4L3M2N1O1O2N2N2N2N2N9G4L:\\KSUOa3jk0LgMUL`XOh3_g0ZLbXOd3^g0\\LdXOb3\\g0`LSWONa0a3[h0cLeVOKH9W1W3]h0eL`VOh0k0b2gh0cL_VOS1c0Y2]i0iMbVOV2^i0lMaVOS2`i0mM_VOS2ai0nM^VOR2bi0nM_VOQ2ai0mMaVOS2_i0lMcVOR2_i0lMcVOS2]i0nMcVOQ2lh0kL_VOU1f0o1ih0PM_VOT1f0k1\\h0nLlVO7OS1h0g1\\h0[OcWOe0]h0\\OcWOc0Uh0lLSWOb2h0c0Sh0mLTWOa2i0a0Rh0PMTWOa2h0?Sh00hWO0Xh03eWOM[h05dWOJ\\h08bWOH^h09bWOF^h0<`WOCah0>_WOAah0?_WOAah0`0_WO_Oah0b0_WO]Oah0d0_WO[Oah0f0^WO[Oah0e0`WOZO`h0g0`WOXO`h0i0`WOVO`h0hMUWOZ2;N`h0gMVWO\\2:L_h0m0bWORO^h0o0aWOQO_h0^MZWOS36^O_h0_M]WOS34]O_h0`M]WOT34[O^h0bM^WOS36YO\\h0eM\\WOT38VO\\h0hMYWOT3;SO\\h0_1dWO`N\\h0a1dWO^N\\h0b1eWO\\N]h0c1dWO\\N]h0c1fWOZN]h0c1eWO[N]h0b1fWO\\N[h0b1hWO[NYh0fNPWOl1n0ZOSh0\\NRWOE9l1V1O`g0WNoWOe1h0ZNUN^1Vi0`NTXO5hNQ1T2lNnM1OON`0Pk0AoTO=1FY2IlM100Ma0Vk0NiTOF`2LcM1M?Yk0LhTOG\\20eM1K=^k0H[WONZMc0]k0\\OZWOR1`g0hNiVO3i1U1]g0jNiVOOl13RMm0Wj0TOjVOJo13QMo0gi0UO]VO7NE`30_Ln0di0XO_VO?\\3\\O`Ln0gi0TO^VOc0Y3c0[f0cN`VOj0T3d0hf0]OgXOUO_M`1ji0]O^XOX1bg0hN\\XOZ1dg0eN_XOY1ag0gNaXOV1ag0WNPVONa2j1_g0UNTVOO^2l1^g0RNZYOl1`j0M`0@`0@103K6KVPf1JaoYN`0G8ROo0H7J6J6M3M3K5J6L4M3M3N2M3L4M3N2N2O1O1O100O1O1O1O1O1O1O1O1O1O1N2O1O1kN^KkVOc4ah0hKcVOGk0a4ah0iKdVOFi0c4ch0gKeVODh0f4ch0fKeWO[4[h0eKdWO\\4]h0ZK`VO2R1e4eh0[KZWOf4fh0ZKYWOg4ah0UK_VO5n0h4bh0_K\\WOb4dh0^K\\WOb4eh0]KZWOd4fh0\\KXWOf4ih0YKUWOi4kh0VKTWOl4lh0TKRWOn4nh0RKPWOP5Pi0oJPWOR5Pi0mJPWOT5oh0lJQWOU5oh0jJRWOV5mh0kJRWOV5nh0iJRWOX5nh0gJQWO[5jh0bJ\\WO`5ch0bJ[WO_5fh0_J[WOa5bi00O100000000O1N2N2N2N2N2O1O1O1O1N2O1O1O1O1O100O100N2O1N2N2N2O1O1O100O100O100O100000000001O001O1O001O0000000000000000O100001O000000001O0000001O001O001O001O001O2N3M1O1O0000O1K500O1000000000000001O00001O000000001O00000000000000001O00000000010O000O2O001O00000000000O110O000000001O000000000000001O00001O0000000000001O00001O0000000000001O00000000001O0000001O001O0000001O0000000000001O0000001O001O2N001O00001O1O001O0000O100O100000000O10000001O0000001O00001O000000O11O0000000000001O0000001O00000000001O001O000000001O0000001OgJbJP@^5o?dJP@\\5o?eJQ@[5n?fJR@Z5Y?cJS\\O4d4X5X?fJR\\O3f4W5X?gJQ\\O2g4W5W?iJR\\ONh4Y5T?kJU\\OKg4Z5R?nJW\\OGh4Z5P?PKY\\ODh4\\5o>PKY\\ODh4\\5o>PKY\\ODh4\\5o>PKX\\OEi4[5o>oJY\\OGh4Y5o>PKX\\OHi4X5o>PKX\\OHi4X5o>PKX\\OHi4X5n>QKZ\\OEi4Z5m>QK[\\ODi4Z5k>SKiAm4W>SKiAm4W>nJnAR5R>mJoAS5Q>lJPBT5P>kJQBU5o=iJTBV5l=iJUBW5m=hJRBX5n=hJRBX5o=gJQBY5o=gJQBY5n=hJRBX5n=iJQBW5o=iJQBW5o=iJQBW5n=jJRBV5n=iJTBV5l=jJTBV5k=hJXBX5h=hJYBW5f=jJ[BU5d=lJX\\OMg5W5o=oJY\\OJh5W5n=PKY\\OJj5U5l=RKZ\\OIj5U5l=RKZ\\OIj5U5k=SK[\\OHk5T5k=RK[\\OKi5S5l=PK]\\OMg5S5l=lJa\\O1c5S5l=lJa\\O1d5R5j=nJb\\O0d5R5g=RKd\\OLe5R5f=TKd\\OJf5R5f=UKb\\OJi5P5e=VKb\\OJi5P5e=UKb\\OLi5o4e=TKc\\OMh5o4e=SKc\\OOh5n4h=oJ`\\O4h5m4Y>SKgAm4Y>SKhAl4W>UKiAk4W>UKjAj4V>VKjAj4V>VKjAj4V>VKjAj4W>UKiAk4_=oJS]O1`5P5[=TK^Cl4a, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_53.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/45.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dbc6460O2N2N`30[Lhi0V6jCPJhB0O11O1NW15aN33NO110N06KL40000OHM3N:2V6S9bIoFQf0Y8W1M4L300000001O0000000000000000000000000000000000000000O1001O0000000000O1000000001O000000O1000000000000000000000000000000000000O1O1gFiGbJ^8]5gGVB5n6`8e4XGPEMQOd3JnL0[=^;c60000001O001O00001O000000000000001O000000001O000000001O0000001O0000000000001O0000001O0000001O001O00000000000000001O000000000000001O00000000O100O1O1J6L4H`0iET[O^1Mk16QL67Fh0_o0RO3N1O2O001N1OWe_l0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "V]oT15fo05M3N2N2N2O1O10`0@1O0000000000O2O001O3ROSQOc0\\o0I4L6J\\bY2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "T_Qi0a03GO7^n0o1gQOeMPm0P4PSOYL4M3Ldi0[8RWObGVe0a:RO3N1N2O0O2O0O101O000010O000000000000001O01O000O101O00001O00001O000000000000000001O00000000O11O000001O000000000000000000000000000000001O0000000000000O1000001O000000000000000000000000001O0000000000001O00000000000000000O2O0000001O001O001O00001O0000001O01O0001O00000000000000000O1000000000000000000000000000000000000000000000000000001O000O100000000000000000001O000000000000001O00000000000O100000000000000000000000001O000O100000001O0000001O0O10000000002N3M3M2N1O1O00001O001O00001O00000000001O00001O2N1O1O00000000O1O100000O10000000001O001O001O00001O001O0010O01OO100O100O10000000000O10O01O100001O00000000000O1000001O01O00000000001O010O010O000000000001O01O001O00000001O01O0001O001O00000000001O000000000000000001O000001O00000000000000000001O000000O1000001O00000000001O000000000000000000000000000N2N2O1O10000000001O0001O00000000000000O10000O10001O00000001O0000000000000000000001O0O1000000001O0000000000000000000000000000O10000000000001O000000000001O000000000YA" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_54.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/3005.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "PPU>2no000001O00001O001O0000001O00001O00001O001O0000001O00001O0000001O0000001O001O0000001O0000001O00001O0000001O00001O0000001O0000001O0000001O0000001O000000001O000000001O000000001O00001O00001O000000001O000000001O000000001O000000001O0000001O0000001O00001O000000000000O10000O1O1O100O1O1O1001O00001O001O001O0000000000001O001O1O1O0000O1001O001O001O1O00O1000000001O0000000000000000001O00001O000000000000001O001O0000001O0000001O0000000000000000O1NRPQc0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Tea1?_o05M1N101O00001O00000000000000000000000000000000O1000000000000000000O100000000000O100O2N1O2M4I6J7J6M4L4M3L2O000O101O0000O2O001N2N4K5K7Gb0UOX[YT1" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_55.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1626.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "k`gk0:`o07K4L5K4K5M3M3K5K5L4K5L4L4J6M3N2L4L4L4M3K5K5M3J7L3M3M3L4J6L4L4L4L4L4M3M3L4K5L4L4L4K5M3K5L4M3M3K5L4M3L4K5K5L4M3M3L4L4L4L4L5K4L4L4L4M3L4M3L4M3M3L4N2N2O1O1O1O100O1000010O0000000000000000000001O000000000000000001O00000000000000000000000000000000000001O000000000000000000000000000000000000000001O00000000000001O0000000000000000000000000000000000000000000000001O0000000000O2O01O0000000000000000000000000O1001O001O00000000000000000000000000000000000000000001O0000000000000000000000001O0000000001O0000000000000000000000000000O10000001O000000O2O0000000001O00000000O100001O0000000000000001O0001O000000000000O10000001O0000000001O00000000O1001O000000000000000000000000001O00O10000000000000000001O000001O0000000000000000000000000001O0000000000000000000000000000000000001O000000O2O000001O0000000000oG" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_56.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2291.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "`_n52k>6L2O2nAHX=<]BKc=j0^OmNUCS1hP1h9]OmEA=7_OGW:`0PF_O;8a:7WE@97a:8cFF^99cFF^9:cFB`9=b20000001O000000010O0013Dd^V4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "edk84k>2N2gALP=MRC9LLo, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_57.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1584.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\Z[93mo02M101N10001O00000O101O00000000001O0000000000000000000000000000000000000000000000000001O000000000O101O000O101N101N1Mfeil0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "RbVU11f0a0om0KaQO<^n0d0O1N2O2N8H1O000000001O00000001O01O001O0000O1N2N2O1N2N2N2N2O1N2N2M3N2O1N2N2MoQOVORm0i0mROYOSm0e0lRO^OTm0`0kROCUm0:mROGSm07mROKSm02oROOQm0OPSO2Pm0LoRO6Rm0ImRO9Sm0EmRO=Sm0BmRO?Sm0_OmROd0Tn03N2N2N2O1M3N2N3N1N2N2N2N2O1N2N2N2N2O001O1001O001O3M4K4[OoQOQO_n05Y^8" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_58.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/647.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "d_d5`04h0Wi0h5A6M2OO010O002N1N3M3M3M2O101N1O101O00001O00000000000001O0000000001O00000000000000O101O001O010O4L3M2N4L10O0O1O100O101N1M4]Mi2kMhf\\P1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "bidl01e0;D1mm0_1N2O0O1O100O1O1O001O100O1O1O100O1O1O1O100O1O1O100O100O10O01000000000000O0100000O10000000000O100O0100000000000O1000O1000O100000000000000O00100O100O100O10O10000000O100O100000O10O100000000000000O010O100O100O100O100000O10O1O100O1O100O001O100O1DSM`SOQ3]l0;N2O1O1O010O101N10O010000O1lNXLoUOh3Tk001000O100O10000O1000O010000O1000000[KTLT]Ol3lb0TLU]Ol3`g00O10O0100O01O1O100O00001O001O001O1O001O1O1O1O1O1O001O1O1O1O001O001O00001O1O100O1O1O001O1N2O001O1O001O001O1O001O001O100O1O00001N101O1O001O1O00001O001O1O00100O001O1O1O1O1O1O001O1O1N2O00001O001O1O10O01O1O001O1N2O001O001O1O1O001O1O001O001O1O1O001O001O001O1O001O001O001O001O0010O01O00001O3M2N001O1O1O1N10001O00001O1O1O1O10O01O000O101O001O0000001O00000^OgPO=`o0N1O001O00000O010O1O1O2Nho:L]PE0O2O01N1000eP2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "bii11`03hn0n0E?Bi0WOP1`ROoLQl0f3J7I>C?@1O010O00010O01O00001O00010O0010O000001O00010O001O010O00010O01O0100O010O01O01O00001O00100O1O010O000010O00001O1O010O0001O001O010O01O01O01O0001O001O1O10O01O001O00001O001O010O00001O01O01O001000O10O00010O0000010O01O0010O00010O000010O010O0010O001O010O0100O1O10O01O0001O01O010O0010O1O001O0010O01O1O1O1O1O0000000000O10001O01O001O00001O00001O0000001O00001O1O001O1O001O0000001O000O10001O001O001O001O001O001O001O100N101O00001O00001O00000000001O00001O00001O002N5J2O1O00000001O0McLdSO[3]l0eLcSO[3]l0eLcSO[3]l0eLdSOZ3\\l0dLfSO\\3`l0O00001O001O1O1O1N11O000000000000000000000001O00000000O1001O0000001O00001O001O1O1O1O1O00001O000000000000000000000000001O1O1O001O0000000ZMkROa2Vm0\\MmROc2Xm0000001OO10000O1000000001O00001O0000001O00001O00001O00001O01O01O001O00001O0000001O0000001O1O1O0000001O001O00001O1O1O001O001O001O0000O10000001O00001O001O001O0000001O00001O001O1O00001O00001O00001O00001O001O1O00001O00000000001O1O1O1O00000000001O001O00001O000000001O00001O00001O0000001O1O1O001O00001O00000O2O00001O1O0000001O00001O001O00001O0O2O00001O00001O000000001O001O001O0000001O00001O00010O1O001O00001O0000001O00001O00001O00001O00001O0000001O001O1N101O00000000001O001N100O2O00VPld0" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_59.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/256.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "o_e4;bo05L4L4L3N1O2N2N101N100O1O2O0O100O100000O0100000O100O10000O100O100O1O1O2N100O2N2N1O2N2M3M4L5J\\`kQ1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Zoi86io01O1O100O010O100O100O100N20O01O1000O01O10O0100O100O10O0100O1O100O100O10000O010O100O1O100O10000O100O1O100O010O10000O100O100O100O100O10000O2O0O100O100O100O10000O10000O100000000O10000000001N10000OROmQO1Sn0LTROOkm0O[ROOdm0OaROO^m01eROL\\m03gROI[m07W1O2O0NVRm7NmmRH1O2N1N200O1N2O1O1O1O1O1O100O1O1O1O1N2N2N2K5M3L4N1O2N2O2N100O1001O00000O101O00001O0O10000O2O000O100O2O0O100O1000000O2O000000O010000O1000000O10000O0100000O10O100000000O010O100O100O00100O1O1O0O2L4M3O1N2N110O100000000001O0000001O00001O000O2O001O001O0010O01O001O00001O001O00001O0010O01O001O010O00100O001O001O010O001O001O010O001O1O1O0010O01O00100O001O010O1O00100O001O1O010O00100O001O1O01000O100O1O100O001O1O001O2N1O1O1O1O1O1OWP[=" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_60.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. Red and white.", + "B. Blue and white.", + "C. Black and orange.", + "D. White and black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_61.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. It is a mix of red, black, and white.", + "B. It is a mix of orange, black, and white.", + "C. It is a mix of blue, black, and white.", + "D. It is a mix of green, black, and white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_62.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2 primarily composed of?", + "choices": [ + "A. Entirely made of high-strength plastic.", + "B. Primarily composed of aluminum and rubber.", + "C. A mix of carbon fiber and fiberglass.", + "D. A combination of metal and plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_63.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. It features a solid orange color with black lettering.", + "B. It has a wavy blue and black design.", + "C. It is decorated with a pattern of geometric shapes.", + "D. It has a colorful floral and vine-like pattern." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_64.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "iWm>2\\g01RYO5hf0MnXO?MM`f0EbYOR1OkNle0P2[ZOWNLMhd0l2O1G711O100OI_[OlL2O^d0W31kL`[OV3`d0jL`[OW3_d050000O2O00O01O001O01O01O1O1000O01O01000O1O1O100O1001O1O0O2O001O001N10000O1000O1000O1L31O01O010001O1O001O0000000L4O001O1O1O1001O0O1O10000O21OO1O000O100000N2O100000000O2O1O00001O00000001O001O100O01OO1001O5L0O00100O01O0O2OO2N101O1O2N1O00012N0000000O02N001O5K1O3M2N1N3M2M5F8N1XMW[OW2ld0gMW[OV2kd0gMW[OV2md0hMV[OS2^e0M7aNPZOa0Tf0]OoYO>Uf0@mYOYf0^OiYO?Xf0BkYO9Wf0GiYO8Xf0GiYO8Xf0GjYOM]O0Qg02[aU5" + } + ], + "question": "What is the primary material of the filling in ?", + "choices": [ + "A. Crab meat.", + "B. Spicy tuna.", + "C. Avocado and carrot.", + "D. Tofu." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_65.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00915597.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "[bP55jo02N2O1N1O1O2N101N1O100O1O100O100O1O1YRODjk0Vk0AjTOb0Sk0_OlTOd0Qk0\\OoTOg0oj0YOPUOj0mj0VOSUOn0jj0ROUUOQ1hj0oNXUOU1dj0kN\\UOY1aj0fN_UO]1^j0cNbUOa1Zj0_NfUOd1Xj0[NhUOi1Tj0XNkUOl1Qj0TNoUOo1ni0QNRVOS2ji0mMVVOV2gi0lMWVOX2ei0iMYVO[2di0fMZVO_2bi0bM[VOd2ai0]M]VOg2`i0ZM]VOl2_i0VM^VOP3]i0QMaVOU3Zi0lLcVOX3[i0iLcVOZ3[i0gLcVO\\3[i0eLbVO^3]i0dL`VO^3`i0bL]VOa3bi0`L\\VOb3ci0_L[VOb3ei0\\101O000010ZJ]VOn4ci0RK_VOl4bi0SK`VOl4`i0SKbVOk4_i0TKdVOi4]i0VKeVOh4\\i0WKfVOh4Zi0WKiVOf4Xi0YKhVOg4Yi0XKhVOg4Yi0XKgVOh4[i0VKeVOk4\\i0SKdVOm4^i0QKbVOo4Wj0nK[UOj2ej0WMeUO^2\\j0bMhUOZ2Wj0VMVUONc0j2Wj0XMWUONc0h2Vj0ZMXUONc0f2Uj0\\MYUOOb0c2Uj0^MZUOOc0a2Sj0_M[UO0c0_2Rj0aM\\UO0c0]2Qj0cM]UO1b0Z2Qj0eM^UO1c0W2oi0hM_UO1d0U2li0jMaUO2d0Q2ki0mMcUO1d0o1ji0oMcUO2e0l1hi0RNdUO3e0h1gi0UNeUO3f0e1ei0XNfUOG@3W1i1ei0]NeUOG_O4V1f1gi0_NeUOG^O4T1f1ji0_NeUOG^O3R1f1mi0_NdUOH]O4P1d1Pj0`NdUOH\\O4n0d1Sj0`NdUOH\\O3l0d1Vj0aNbUOH\\O4j0b1Zj0aNaUOI[O4h06XOf0Wk0VO_UOKZO3f06\\Oc0Vk0YO`UOJXO4e06B=Sk0^O_UOKWO5c04K7mj0E_UOKWO4`071Okj0J^UOLWO4=87Hhj01]UOKWO5;9=_Ofj07\\UOLWO48Xl0A_TOMUO51?[l0^O_TOOVO3Oa0[l0^O_TOOWO3Mb0\\l0[OaTO0WO2Je0]l0ZOaTO0YOT1Vl0kNbTO2WOS1Vl0lNcTO1XOR1Ul0lNdTO2WOR1Tl0mNeTO1XOQ1Sl0mNfTO3VOP1Sl0nNfTO3XOm0Sl0oNfTO4WOm0Rl0POgTO4WOj0Sl0QOgTO5VOj0Rl0ROhTO4UOk0Sl0POiTO6SOj0Sl0QOiTO6SOi0Ul0POgTO9SOh0Ul0POfTO>POc0Zl0oNdTOm1\\k0SNcTOo1]k0QNaTOP2_k0PN`TOR2`k0nM^TOT2ak0lM^TOU2ck0kM[TOW2dk0jMZTOW2gk0hMXTOY2hk0hMWTOX2jk0gMUTOo0AOYl0SOTTOh0ASO3P1Yl0TOSTOj0BPO4O0g0Wl0ASTOh0EmNb0d0gk0GQTOi0d0TO`k03nSOh0^m0XObROg0_m0XOcROg0\\m0ZOeROe0[m0ZOgROd0Ym0]OiROa0Wm0_OjRO`0Um0@nRO>Rm0BoRO]ROBdm0a0YRO_Ohm0b0URO^Omm0c0QRO]Oom0e0nQO\\ORn0g0kQOXOVn0j0gQOWOYn0W110RROnNol0V2K3M2N1O1O2N1K6N1N2N2O0O1O100O00iLbSOm2_l0RMbSOn2]l0SMcSOl2]l0TMcSOl2^l0SMcSOl2]l0UMcSOj2]l0VMdSOj2\\l0UMdSOk2\\l0VMdSOi2\\l0WMeSOh2\\l0WMeSOi2[l0WMeSOh2\\l0XMdSOg2]l0ZMcSOd2^l0\\McSOb2\\l0aMdSO^2Zl0dMhSOY2Xl0iMhSOU2Yl0kMiSOR2Xl0nMiSOOF`1bl0aNiSOKIc1]l0cNlSOFIf1\\l0dN[TOZ1fk0fN[TOX1fk0hNZTOW1fk0jN[TO4PO:el0B\\TO3oN:fl0C[TO2PO:fl0D[TO1PO9fl0F[TOOPO:gl0FYTO0PO:gl0FZTOOPO9gl0G[TONoN:gl0G[TOOnN9hl0H[TOMoN:gl0G\\TONmN:hl0G\\TONmN:hl0H\\TOMmN9il0H\\TOMlN:il0H]TOLkN?", + "choices": [ + "A. White.", + "B. Yellow.", + "C. Green.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_66.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00915597.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "[bP55jo02N2O1N1O1O2N101N1O100O1O100O100O1O1YRODjk0Vk0AjTOb0Sk0_OlTOd0Qk0\\OoTOg0oj0YOPUOj0mj0VOSUOn0jj0ROUUOQ1hj0oNXUOU1dj0kN\\UOY1aj0fN_UO]1^j0cNbUOa1Zj0_NfUOd1Xj0[NhUOi1Tj0XNkUOl1Qj0TNoUOo1ni0QNRVOS2ji0mMVVOV2gi0lMWVOX2ei0iMYVO[2di0fMZVO_2bi0bM[VOd2ai0]M]VOg2`i0ZM]VOl2_i0VM^VOP3]i0QMaVOU3Zi0lLcVOX3[i0iLcVOZ3[i0gLcVO\\3[i0eLbVO^3]i0dL`VO^3`i0bL]VOa3bi0`L\\VOb3ci0_L[VOb3ei0\\101O000010ZJ]VOn4ci0RK_VOl4bi0SK`VOl4`i0SKbVOk4_i0TKdVOi4]i0VKeVOh4\\i0WKfVOh4Zi0WKiVOf4Xi0YKhVOg4Yi0XKhVOg4Yi0XKgVOh4[i0VKeVOk4\\i0SKdVOm4^i0QKbVOo4Wj0nK[UOj2ej0WMeUO^2\\j0bMhUOZ2Wj0VMVUONc0j2Wj0XMWUONc0h2Vj0ZMXUONc0f2Uj0\\MYUOOb0c2Uj0^MZUOOc0a2Sj0_M[UO0c0_2Rj0aM\\UO0c0]2Qj0cM]UO1b0Z2Qj0eM^UO1c0W2oi0hM_UO1d0U2li0jMaUO2d0Q2ki0mMcUO1d0o1ji0oMcUO2e0l1hi0RNdUO3e0h1gi0UNeUO3f0e1ei0XNfUOG@3W1i1ei0]NeUOG_O4V1f1gi0_NeUOG^O4T1f1ji0_NeUOG^O3R1f1mi0_NdUOH]O4P1d1Pj0`NdUOH\\O4n0d1Sj0`NdUOH\\O3l0d1Vj0aNbUOH\\O4j0b1Zj0aNaUOI[O4h06XOf0Wk0VO_UOKZO3f06\\Oc0Vk0YO`UOJXO4e06B=Sk0^O_UOKWO5c04K7mj0E_UOKWO4`071Okj0J^UOLWO4=87Hhj01]UOKWO5;9=_Ofj07\\UOLWO48Xl0A_TOMUO51?[l0^O_TOOVO3Oa0[l0^O_TOOWO3Mb0\\l0[OaTO0WO2Je0]l0ZOaTO0YOT1Vl0kNbTO2WOS1Vl0lNcTO1XOR1Ul0lNdTO2WOR1Tl0mNeTO1XOQ1Sl0mNfTO3VOP1Sl0nNfTO3XOm0Sl0oNfTO4WOm0Rl0POgTO4WOj0Sl0QOgTO5VOj0Rl0ROhTO4UOk0Sl0POiTO6SOj0Sl0QOiTO6SOi0Ul0POgTO9SOh0Ul0POfTO>POc0Zl0oNdTOm1\\k0SNcTOo1]k0QNaTOP2_k0PN`TOR2`k0nM^TOT2ak0lM^TOU2ck0kM[TOW2dk0jMZTOW2gk0hMXTOY2hk0hMWTOX2jk0gMUTOo0AOYl0SOTTOh0ASO3P1Yl0TOSTOj0BPO4O0g0Wl0ASTOh0EmNb0d0gk0GQTOi0d0TO`k03nSOh0^m0XObROg0_m0XOcROg0\\m0ZOeROe0[m0ZOgROd0Ym0]OiROa0Wm0_OjRO`0Um0@nRO>Rm0BoRO]ROBdm0a0YRO_Ohm0b0URO^Omm0c0QRO]Oom0e0nQO\\ORn0g0kQOXOVn0j0gQOWOYn0W110RROnNol0V2K3M2N1O1O2N1K6N1N2N2O0O1O100O00iLbSOm2_l0RMbSOn2]l0SMcSOl2]l0TMcSOl2^l0SMcSOl2]l0UMcSOj2]l0VMdSOj2\\l0UMdSOk2\\l0VMdSOi2\\l0WMeSOh2\\l0WMeSOi2[l0WMeSOh2\\l0XMdSOg2]l0ZMcSOd2^l0\\McSOb2\\l0aMdSO^2Zl0dMhSOY2Xl0iMhSOU2Yl0kMiSOR2Xl0nMiSOOF`1bl0aNiSOKIc1]l0cNlSOFIf1\\l0dN[TOZ1fk0fN[TOX1fk0hNZTOW1fk0jN[TO4PO:el0B\\TO3oN:fl0C[TO2PO:fl0D[TO1PO9fl0F[TOOPO:gl0FYTO0PO:gl0FZTOOPO9gl0G[TONoN:gl0G[TOOnN9hl0H[TOMoN:gl0G\\TONmN:hl0G\\TONmN:hl0H\\TOMmN9il0H\\TOMlN:il0H]TOLkN in the image?", + "choices": [ + "A. The masked object has a smooth, glossy surface.", + "B. The masked object has serrated or jagged edges.", + "C. The masked object has small dark spots on its surface.", + "D. The masked object is covered in tiny black specks." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_67.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "oX[=4hg06L4N1N100O2O001O001N101O010O010O01000O0100O010O1N1N3N1N3M3N1N3Nonk9" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Black.", + "B. Red.", + "C. White.", + "D. Brown." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_68.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "oX[=4hg06L4N1N100O2O001O001N101O010O010O01000O0100O010O1N1N3N1N3M3N1N3Nonk9" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. A rectangular object with sharp corners.", + "B. A cylindrical object.", + "C. A rectangular object with rounded corners.", + "D. A flat, circular object." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_69.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "XXW78ag07I7J7O0K5N201O000O2O0000000000000O101O0000000000000000000000000000000000000000000010OO10000000000000000000000000000000000000000000000000000O10000000001O00000000000000000000000001O000000000000000000001O000000000000000000001O000000002N1O1O1O1O1O3M1O001O001OVPh=" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metallic.", + "B. Plastic.", + "C. Wooden.", + "D. Ceramic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_70.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "XXW78ag07I7J7O0K5N201O000O2O0000000000000O101O0000000000000000000000000000000000000000000010OO10000000000000000000000000000000000000000000000000000O10000000001O00000000000000000000000001O000000000000000000001O000000000000000000001O000000002N1O1O1O1O1O3M1O001O001OVPh=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red.", + "B. Black.", + "C. Brown.", + "D. White." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_71.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N?", + "choices": [ + "A. Circular.", + "B. Square.", + "C. Rounded rectangular.", + "D. Oval." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_72.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N shown in the image?", + "choices": [ + "A. Wood.", + "B. Glass.", + "C. Metal.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_73.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N?", + "choices": [ + "A. White.", + "B. Pink.", + "C. Brown.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_74.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the texture or pattern of ?", + "choices": [ + "A. A grille with concentric ridges and radial spokes.", + "B. A pattern of small, rectangular tiles.", + "C. A grid pattern of small, square panes.", + "D. A perforated lattice pattern." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_75.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The masked object is circular.", + "B. The masked object is square.", + "C. The masked object is rectangular.", + "D. The masked object is arched." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_76.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the shape of the vent on ?", + "choices": [ + "A. Rectangular.", + "B. Circular.", + "C. Arched.", + "D. Square." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_77.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the color of the fan inside ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Orange.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_78.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "Which of the following best describes the texture/pattern of in the image?", + "choices": [ + "A. It is a price tag with printed text.", + "B. It is a price tag with handwritten text.", + "C. It is a blank piece of paper without any text.", + "D. It is a sticker with a barcode on it." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_79.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "Based on the image, what is the shape of ?", + "choices": [ + "A. It is a rectangular object.", + "B. It is a square object.", + "C. It is a circular object.", + "D. It is a triangular object." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_80.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Red.", + "B. White.", + "C. Black.", + "D. Brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_81.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00981094.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R_]>4lg0000000000000O10O1000000000000000000000O10000000O1000000000000O1O01000000000000000000000O10O100000000001O000000000000O10O1000000000000000000O10O1000000000000000O1000000000O10000000O1000000000000000O10O1000000000O2O0000000000O10000000000000O100000000000O10000000000000000L5O22L6JSaY6" + } + ], + "question": "What is the primary color of 's shaft?", + "choices": [ + "A. Black.", + "B. Red.", + "C. Silver.", + "D. Blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_82.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00981094.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R_]>4lg0000000000000O10O1000000000000000000000O10000000O1000000000000O1O01000000000000000000000O10O100000000001O000000000000O10O1000000000000000000O10O1000000000000000O1000000000O10000000O1000000000000000O10O1000000000O2O0000000000O10000000000000O100000000000O10000000000000000L5O22L6JSaY6" + } + ], + "question": "Which of the following descriptions accurately represents the texture or pattern of ?", + "choices": [ + "A. The handle is smooth and made of two different colors of plastic.", + "B. The handle features a series of parallel grooves running along its length.", + "C. The metallic grip area has a knurled, cross-hatched pattern for a better hold.", + "D. The entire surface of the object is smooth and polished metal." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_83.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "What is the color of the main body of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Brown.", + "D. Blue and white." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_84.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "Which of the following statements about the color of is correct?", + "choices": [ + "A. The hull of the object is painted dark blue.", + "B. The entire object is covered by a large blue tarp.", + "C. The nameplate on the side features white lettering.", + "D. There is a red life preserver attached to its side." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_85.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "What is the color of the component mounted at the rear of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Black.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_86.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What is the color of the head of ?", + "choices": [ + "A. Gray.", + "B. Blue.", + "C. White.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_87.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue and white.", + "B. Pure white.", + "C. A mix of white, brown, and gray.", + "D. Black and brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_88.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What color is the beak of ?", + "choices": [ + "A. The beak is yellow.", + "B. The beak is black.", + "C. The beak is white.", + "D. The beak is grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_89.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Pure white.", + "B. A combination of grey and brown.", + "C. Black and white.", + "D. Blue and grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_90.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. It is primarily blue and white.", + "B. It has a combination of white, grey, and brown feathers.", + "C. It is completely black.", + "D. It is mostly white with some black markings." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_91.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "Which of the following statements correctly describes a shape characteristic of in the image?", + "choices": [ + "A. The wings of the object are fully folded against its body.", + "B. The tail of the object is spread out in a fan shape.", + "C. The beak of the object is noticeably curved upwards.", + "D. The entire body of the object forms a straight, horizontal line." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_92.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is shown in the image?", + "choices": [ + "A. A bird swimming in the water.", + "B. A bird perched on a boat cover.", + "C. A flying bird.", + "D. A bird sitting on the roof of a boat." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_93.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Gold.", + "B. Gray.", + "C. White.", + "D. Red." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_94.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Glossy.", + "B. Matte.", + "C. Rough.", + "D. Ribbed." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_95.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "To which device does belong?", + "choices": [ + "A. It is a red alarm button on the console.", + "B. It is a spherical paperweight used to hold down papers.", + "C. It is the trackball of an ergonomic mouse.", + "D. It is a hold-indicator light for the telephone." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_96.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "URc=?`g0d0]Od0\\Oc0]Od0\\Od0[Od0]O:F1O00000O1000000000O100000O100000000000000O01000000000000000O100000O1000000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O1000000000000000O010000000000000000O1000O100000000000O1000000000O100000O1000000000000000O010000000000000000O1000O100000000000O100000000000O2O:Fe0[Od0\\Od0\\Oe0[Od0\\Oe0ZO\\Ri5" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. It is a square.", + "B. It is rectangular.", + "C. It is a trapezoid.", + "D. It is a parallelogram." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_97.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "URc=?`g0d0]Od0\\Oc0]Od0\\Od0[Od0]O:F1O00000O1000000000O100000O100000000000000O01000000000000000O100000O1000000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O1000000000000000O010000000000000000O1000O100000000000O1000000000O100000O1000000000000000O010000000000000000O1000O100000000000O100000000000O2O:Fe0[Od0\\Od0\\Oe0[Od0\\Oe0ZO\\Ri5" + } + ], + "question": "What is the primary material of ?", + "choices": [ + "A. Plastic.", + "B. Wood.", + "C. Metal.", + "D. Glass." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_98.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Blue.", + "C. White.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_99.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. Cuboid.", + "B. Cylindrical.", + "C. Conical.", + "D. Rectangular." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_100.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Paper.", + "B. Cloth.", + "C. Wood.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_101.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. The object is rectangular.", + "B. The object is circular.", + "C. The object has a checkerboard pattern of squares.", + "D. The object is oval." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_102.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. Red.", + "B. Yellow.", + "C. Blue.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_103.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. The object is predominantly yellow with a green checkered pattern.", + "B. The object is primarily white.", + "C. The object has a black body and an illuminated red light.", + "D. The object is dark gray." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_104.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01080826.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "k`[19eo04M2N101N1O100O10000O1O100O10O10O01M3N101O1O10O01000O010000O10O100000000000010O0001O0O102M3M4J^_Te0" + } + ], + "question": "What is in the image?", + "choices": [ + "A. It is a kitchen sponge.", + "B. It is a bar of soap.", + "C. It is a bottle of dish soap.", + "D. It is the handle of a kitchen utensil." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_105.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01080826.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "k`[19eo04M2N101N1O100O10000O1O100O10O10O01M3N101O1O10O01000O010000O10O100000000000010O0001O0O102M3M4J^_Te0" + } + ], + "question": "Which of the following options accurately describes located near the sink?", + "choices": [ + "A. A yellow and brown bar of soap.", + "B. A yellow cleaning sponge.", + "C. A part of a silver faucet.", + "D. A slice of a banana." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_106.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "What is the shape of the object indicated by ?", + "choices": [ + "A. The object is elongated and thin.", + "B. The object is round and smooth.", + "C. The object is bell-shaped with multiple lobes.", + "D. The object is bulbous and tapers at one end." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_107.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Red.", + "C. Green.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_108.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "Based on the image, what is the shape of ?", + "choices": [ + "A. The object is spherical.", + "B. The object is curved.", + "C. The object is bell-shaped.", + "D. The object is cylindrical." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_109.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01095871.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "ob<`0_o02N2O000000O1000001O0001O000001O000001O000001O0001O000001O000001O000001O000001O0001O00000001O0001O000001O0001O00000001O0001O00000001O01O00000001O00012M2\\XO[Oi?f0d_ONZ`02S_Oa0m`0@_^OS1aa0kNn]Oh1Sb0VN\\]O\\2db0cMj\\Oo2Wc0QM\\\\OBWLa3]g0lLQ\\Oi3oc0WLe[OU4[d0kKY[Oa4gd0_KmZOm4Te0RKcXOCd1g5ie0fJbXOENN`0N0U6Pg0ZJbXOL;HF^6]g0nIbXO07\\6Wg0dIbXO14]6Zg0XJdXOj5]g0XJ_XOi5ag0[1000000000_OjXOWHXg0o6aXORI^h0e6c0C=lN_VOPKZj0e4f0YOXUORLmj0`3P1D?", + "choices": [ + "A. The masked object is a large, rectangular block.", + "B. The masked object is part of a long, horizontal structure.", + "C. The masked object is the main vertical support of the structure.", + "D. The masked object consists primarily of crisscrossing diagonal lines." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_110.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01095871.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "ob<`0_o02N2O000000O1000001O0001O000001O000001O000001O0001O000001O000001O000001O000001O0001O00000001O0001O000001O0001O00000001O0001O00000001O01O00000001O00012M2\\XO[Oi?f0d_ONZ`02S_Oa0m`0@_^OS1aa0kNn]Oh1Sb0VN\\]O\\2db0cMj\\Oo2Wc0QM\\\\OBWLa3]g0lLQ\\Oi3oc0WLe[OU4[d0kKY[Oa4gd0_KmZOm4Te0RKcXOCd1g5ie0fJbXOENN`0N0U6Pg0ZJbXOL;HF^6]g0nIbXO07\\6Wg0dIbXO14]6Zg0XJdXOj5]g0XJ_XOi5ag0[1000000000_OjXOWHXg0o6aXORI^h0e6c0C=lN_VOPKZj0e4f0YOXUORLmj0`3P1D is correct?", + "choices": [ + "A. The main arm is a solid beam, not a lattice structure.", + "B. A hook is visible hanging from the object's arm.", + "C. It is the tallest structure in the image.", + "D. There is no operator's cab attached to the tower." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_111.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0?", + "choices": [ + "A. Triangular.", + "B. Cylindrical.", + "C. Irregular.", + "D. Rectangular." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_112.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0?", + "choices": [ + "A. The object is made of plastic.", + "B. The object is made of wood.", + "C. The object is made of metal.", + "D. The object is made of stone." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_113.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0 in the image?", + "choices": [ + "A. A structural support for the playground.", + "B. A curved wooden bench.", + "C. A flat-topped wooden stool.", + "D. A wooden toolbox." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_114.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gb_f03lg09[OESYO>Rg05I^OnXOe0Qg06O1O1O0001O1O001N2N2L4O001N003M2O000O2N1000000000001N100O1O1O110O001N\\]b0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. It is cylindrical.", + "B. It is rectangular.", + "C. It is conical.", + "D. It is spherical." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_115.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gb_f03lg09[OESYO>Rg05I^OnXOe0Qg06O1O1O0001O1O001N2N2L4O001N003M2O000O2N1000000000001N100O1O1O110O001N\\]b0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Gray.", + "C. Brown.", + "D. Black." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_116.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is the overall shape of ?", + "choices": [ + "A. Crescent-shaped.", + "B. Rectangular.", + "C. Circular.", + "D. Triangular." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_117.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Red.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_118.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is in the image?", + "choices": [ + "A. A drum rim made of plastic.", + "B. A buckle made of silver.", + "C. A drum rim made of metal.", + "D. A drumhead made of hide." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_119.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01108895.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WUk23lg04K5K6K4gXO\\Oif0U1K4L5U]OnN^>X1PAVOm>Q1k@SOS?S1i@mNU?Y1f@hNW?_1U^OcMZ2P1^?e1a@[N\\?i1b@XN]?j1b@VN]?l1a@UN^?m1a@SN^?o1`@RN_?P2`@PN_?Q2a@oM^?S2`@nM_?T2`@lM_?V2_@kM`?W2_@iM`?Y2_@gM`?[2^@gM`?[2_@eM`?]2_@cM`?_2_@aM`?a2^@`Ma?b2]@_Mb?c2]@]Mb?d2]@]Mb?e2\\@\\Mc?f2\\@ZMc?h2\\@XMc?j2[@WMd?k2Z@VMe?l2Y@UMf?m2Y@SMg?m2X@UMf?m2Y@SMg?n2X@RMg?o2Y@QMf?Q3X@PMh?P3X@PMg?R3X@nLh?S3W@mLh?T3W@mLi?T3V@lLi?U3W@kLh?W3V@jLj?W3U@gLl?Z3T@fLl?[3S@eLl?]3S@cLm?]3R@dLm?^3R@bLm?_3R@bLn?_3Q@aLn?a3Q@_Lo?a3Q@_Ln?c3R@\\Ln?e3R@ZLm?g3T@XLk?j3U@ULj?l3W@SLi?n3V@RLi?o3X@PLi?P4V@PLj?P4V@PLk?o3T@RLl?n3S@SLm?m3S@SLm?m3R@TLn?l3R@TLn?l3R@TLn?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?n3P@RLP`0n3P@RLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3R@PLn?Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?Q4e_O^J9a1R`0Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?^4d_ObK[`0R60000000O01O1O1ON3M4UNo_OfKP`0Z4o_OhKP`0X4P@jKn?V4R@kKm?U4S@lKl?T4U@kKl?T4T@lKl?T4T@mKk?S4U@mKk?S4U@mKk?S4U@mKl?R4T@nKl?R4U@mKk?S4U@mKk?R4V@nKj?R4V@nKk?P4V@PLj?P4V@hKR`0X4n_OiKQ`0V4P@kKo?U4Q@kKP`0S4Q@nKn?R4R@oKm?Q4S@PLl?o3U@RLj?n3V@SLi?m3W@SLj?k3W@ULi?k3W@SLk?l3W@QLk?o3U@nKn?R4R@oKn?o3S@QLm?o3S@RLl?m3U@SLk?m3U@TLj?l3V@ULj?i3W@WLi?i3W@WLj?h3V@XLj?g3W@WLl?h3T@VLo?h3R@YLn?f3R@[Lm?e3S@\\Ll?c3U@^Ll?fNW_Og4n0cLk?fNW_Oe4o0eLj?fNW_Od4P1fLj?fNV_Oc4P1gLk?eNU_Oc4Q1hLk?eNT_Ob4Q1iLk?eNT_Oa4R1jLk?eNS_O_4S1lLl?T3T@lLm?R3T@nLl?R3T@mLn?Q3T@nLl?Q3U@oLl?P3T@mLo?R3R@aL\\`0^3d_OcL\\`0[3e_OeL\\`0Y3e_OhLZ`0X3f_OhL[`0V3f_OkLZ`0S3g_OnLY`0P3h_OPMY`0m2j_OSMW`0i2k_OXMV`0d2l_O\\MU`0a2m_O`MT`0\\2n_OdMT`0X2n_OiMR`0T2P@lMR`0Q2P@nMR`0n1P@RNR`0j1P@UNR`0i1o_OWNS`0f1n_OYNU`0b1n_O^NS`0^1P@aNS`0[1P@dNR`0W1Q@hNR`0S1Q@jNS`0S1`3K5K4M4K5K3N2M4N1O2N2N0M5L3M4MQcV`0" + } + ], + "question": "What are the primary colors of ?", + "choices": [ + "A. Green and yellow.", + "B. Silver and blue.", + "C. Light blue and white.", + "D. Solid silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_120.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01108895.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WUk23lg04K5K6K4gXO\\Oif0U1K4L5U]OnN^>X1PAVOm>Q1k@SOS?S1i@mNU?Y1f@hNW?_1U^OcMZ2P1^?e1a@[N\\?i1b@XN]?j1b@VN]?l1a@UN^?m1a@SN^?o1`@RN_?P2`@PN_?Q2a@oM^?S2`@nM_?T2`@lM_?V2_@kM`?W2_@iM`?Y2_@gM`?[2^@gM`?[2_@eM`?]2_@cM`?_2_@aM`?a2^@`Ma?b2]@_Mb?c2]@]Mb?d2]@]Mb?e2\\@\\Mc?f2\\@ZMc?h2\\@XMc?j2[@WMd?k2Z@VMe?l2Y@UMf?m2Y@SMg?m2X@UMf?m2Y@SMg?n2X@RMg?o2Y@QMf?Q3X@PMh?P3X@PMg?R3X@nLh?S3W@mLh?T3W@mLi?T3V@lLi?U3W@kLh?W3V@jLj?W3U@gLl?Z3T@fLl?[3S@eLl?]3S@cLm?]3R@dLm?^3R@bLm?_3R@bLn?_3Q@aLn?a3Q@_Lo?a3Q@_Ln?c3R@\\Ln?e3R@ZLm?g3T@XLk?j3U@ULj?l3W@SLi?n3V@RLi?o3X@PLi?P4V@PLj?P4V@PLk?o3T@RLl?n3S@SLm?m3S@SLm?m3R@TLn?l3R@TLn?l3R@TLn?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?n3P@RLP`0n3P@RLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3R@PLn?Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?Q4e_O^J9a1R`0Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?^4d_ObK[`0R60000000O01O1O1ON3M4UNo_OfKP`0Z4o_OhKP`0X4P@jKn?V4R@kKm?U4S@lKl?T4U@kKl?T4T@lKl?T4T@mKk?S4U@mKk?S4U@mKk?S4U@mKl?R4T@nKl?R4U@mKk?S4U@mKk?R4V@nKj?R4V@nKk?P4V@PLj?P4V@hKR`0X4n_OiKQ`0V4P@kKo?U4Q@kKP`0S4Q@nKn?R4R@oKm?Q4S@PLl?o3U@RLj?n3V@SLi?m3W@SLj?k3W@ULi?k3W@SLk?l3W@QLk?o3U@nKn?R4R@oKn?o3S@QLm?o3S@RLl?m3U@SLk?m3U@TLj?l3V@ULj?i3W@WLi?i3W@WLj?h3V@XLj?g3W@WLl?h3T@VLo?h3R@YLn?f3R@[Lm?e3S@\\Ll?c3U@^Ll?fNW_Og4n0cLk?fNW_Oe4o0eLj?fNW_Od4P1fLj?fNV_Oc4P1gLk?eNU_Oc4Q1hLk?eNT_Ob4Q1iLk?eNT_Oa4R1jLk?eNS_O_4S1lLl?T3T@lLm?R3T@nLl?R3T@mLn?Q3T@nLl?Q3U@oLl?P3T@mLo?R3R@aL\\`0^3d_OcL\\`0[3e_OeL\\`0Y3e_OhLZ`0X3f_OhL[`0V3f_OkLZ`0S3g_OnLY`0P3h_OPMY`0m2j_OSMW`0i2k_OXMV`0d2l_O\\MU`0a2m_O`MT`0\\2n_OdMT`0X2n_OiMR`0T2P@lMR`0Q2P@nMR`0n1P@RNR`0j1P@UNR`0i1o_OWNS`0f1n_OYNU`0b1n_O^NS`0^1P@aNS`0[1P@dNR`0W1Q@hNR`0S1Q@jNS`0S1`3K5K4M4K5K3N2M4N1O2N2N0M5L3M4MQcV`0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The object is made of ceramic tiles and grout.", + "B. The object is made of metal and plastic.", + "C. The object is made of painted wood.", + "D. The object is made of woven fabric and straw." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_121.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red and white.", + "B. White and black.", + "C. Silver and black.", + "D. Orange and silver." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_122.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "Which of the following accurately describes a feature of ?", + "choices": [ + "A. The object has a curved handle on top.", + "B. The object has a large, gray, overarching handle.", + "C. The object is primarily cylindrical in shape.", + "D. The object has a square base." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_123.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "Which statement correctly describes a shape-related feature of ?", + "choices": [ + "A. The display screen on its front is rectangular.", + "B. The top of the object is completely flat.", + "C. It has a large, curved handle on top for carrying.", + "D. The main body of the object is a perfect cube." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_124.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What is the primary color of the body of ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Silver.", + "D. Orange." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_125.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What material is primarily made of?", + "choices": [ + "A. Metal.", + "B. Wood.", + "C. Plastic.", + "D. Concrete." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_126.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100 made of?", + "choices": [ + "A. Woven.", + "B. Plastic.", + "C. Cardboard.", + "D. Leather." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_127.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100?", + "choices": [ + "A. Woven.", + "B. Plaid.", + "C. Polka-dotted.", + "D. Striped." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_128.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100?", + "choices": [ + "A. White.", + "B. Red.", + "C. Black.", + "D. Brown." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_129.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "PP`:1og02N000000001O00000000001O0000000000001O00000P\\Ok1U>UNkAm1S>SNmAn1R>SNmAm1fMoMk>4_Cm1dMVOh=mNdDh1iM`0^TNYOR2h0_NaNh0^2ZOZMe0m2[OlLf0Z3[O_Ld0h3[ORLf0T4ZOfKe0a4YO[Kh0j4UOTKk0Q5ROmJm0Y5oNfJQ1_5lN_JT1f5iNXJV1n5fNQJZ1T6cNiI]1]6`N`Ia1e6]NXIb1n6^NlHc1Y7]NbHb1d7^NVHc1o7]NkGc1[8]N^Ge1g8[NSGe1S9[NgFe1_9[N[Ff1j9ZNPFf1V:ZNeEe1a:\\NYEd1l:\\NnDd1X;\\NcDc1c;]NXDc1m;]NmCc1Y<]NbCb1d<^NWCb1n<^NlBc1Y=^NaBb1d=^NWBb1n=^NlAc1Y>]NbAc1c>]NXAc1m>]Nn@c1W?]Nd@b1b?^NY@b1l?_Nk_Od1Z`0\\Nd_Oa1a`0_N__O\\1f`0cNZ_OY1k`0gNV_OR1Pa0nNP_Ol0Va0TOj^Oe0]a0\\Oc^O=ca0C]^O7ia0IW^O1oa00Q^OMQb03o]OKSb05n]OHTb09i200001O00001O00001O001O0000001O00001O00001O00001O0000010N10001O001O00001O00001O00001O001O00001O00001O001O0[YOUOUf0k0eYO]OYf0V1O0000O1000000000000000000O100000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000O100000000O10000000000O2OO100000O10000000000O100000000O10000000000O10000O10000O100O10000O10000O10000O10000000000O10000000000O1N2F:J6JQa70m^H:[OEXYOd0hf0;O00O11O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O00" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of plastic.", + "B. It is made of metal.", + "C. It is made of wood.", + "D. It is made of fabric." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_130.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "PP`:1og02N000000001O00000000001O0000000000001O00000P\\Ok1U>UNkAm1S>SNmAn1R>SNmAm1fMoMk>4_Cm1dMVOh=mNdDh1iM`0^TNYOR2h0_NaNh0^2ZOZMe0m2[OlLf0Z3[O_Ld0h3[ORLf0T4ZOfKe0a4YO[Kh0j4UOTKk0Q5ROmJm0Y5oNfJQ1_5lN_JT1f5iNXJV1n5fNQJZ1T6cNiI]1]6`N`Ia1e6]NXIb1n6^NlHc1Y7]NbHb1d7^NVHc1o7]NkGc1[8]N^Ge1g8[NSGe1S9[NgFe1_9[N[Ff1j9ZNPFf1V:ZNeEe1a:\\NYEd1l:\\NnDd1X;\\NcDc1c;]NXDc1m;]NmCc1Y<]NbCb1d<^NWCb1n<^NlBc1Y=^NaBb1d=^NWBb1n=^NlAc1Y>]NbAc1c>]NXAc1m>]Nn@c1W?]Nd@b1b?^NY@b1l?_Nk_Od1Z`0\\Nd_Oa1a`0_N__O\\1f`0cNZ_OY1k`0gNV_OR1Pa0nNP_Ol0Va0TOj^Oe0]a0\\Oc^O=ca0C]^O7ia0IW^O1oa00Q^OMQb03o]OKSb05n]OHTb09i200001O00001O00001O001O0000001O00001O00001O00001O0000010N10001O001O00001O00001O00001O001O00001O00001O001O0[YOUOUf0k0eYO]OYf0V1O0000O1000000000000000000O100000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000O100000000O10000000000O2OO100000O10000000000O100000000O10000000000O10000O10000O100O10000O10000O10000O10000000000O10000000000O1N2F:J6JQa70m^H:[OEXYOd0hf0;O00O11O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O00" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Red.", + "C. Yellow.", + "D. Grey." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_131.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hebe0 is correct?", + "choices": [ + "A. The object is a combination of blue and white.", + "B. The object is a combination of black and green.", + "C. The object is entirely brown.", + "D. The object is a combination of red and yellow." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_132.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hebe0?", + "choices": [ + "A. Rubber.", + "B. Leather.", + "C. Canvas.", + "D. Plastic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_133.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01155009.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WRn23Sg0ObYO7Wf0l0K5N2O1M3O1O000100O000O10000O10000000000000000000000000000000000000000000000000000000001O0O2O001O2J7nNiYO0Pmjc0" + } + ], + "question": "What is the background color of ?", + "choices": [ + "A. Red.", + "B. Yellow.", + "C. Black.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_134.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. White.", + "B. Silver.", + "C. Gold.", + "D. Brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_135.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. It is a hook-shaped object.", + "B. It is a circular object.", + "C. It is a rectangular object.", + "D. It is an oval object." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_136.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. The masked object is silver.", + "B. The masked object is beige.", + "C. The masked object is white.", + "D. The masked object is brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_137.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metal.", + "B. Plastic.", + "C. Glass.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_138.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Green.", + "C. White.", + "D. Silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_139.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the shape of the top of ?", + "choices": [ + "A. It has a pump-action dispenser.", + "B. It has a rounded, dome-like shape.", + "C. It is flat and tapered.", + "D. It is a simple, cylindrical screw-top." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_140.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. Dome-shaped.", + "B. Cylindrical.", + "C. Irregular.", + "D. Rectangular." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_141.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "Which of the following best describes the shape of in the image?", + "choices": [ + "A. A long, thin cylinder.", + "B. A dome shape.", + "C. A short, wide cylinder.", + "D. A long, narrow rectangle." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_142.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Wood.", + "B. Plastic.", + "C. Metal.", + "D. Cardboard." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_143.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "j^T=385S10_l0NYRO69HNf0]m0N`ROe0\\m0R1O2M3M2O2M2O2N1O1O101M101N100O2O0O100O2O0O100O2O0O010O010O01O001O00000000O100O101O000000000000O10O10O10000001O1O2N2N1O2N1O001O1O1O1O1O1O1O001O1O1O1O1O1O001O001O0000000O2O0O10000O101N100O10001N100O100O2O000O2O0O2O1N2O1O1N2O1N2O0O2O1O1N2O1N2O1N2N1O2O1N2N2N2N2O1N2N2N2N3N1N2N2N2N2O1N3M2N1O2O0O2N1O10O01O1O00100O001O10O01O1O010O1O001O10O01O1O010O1O001O010O00O1O2F9N200O2O000O1O2M2O2N10001O001O001O000O2O001O00001O001O0N3L300100O2O0O100O1O101N100O1O101N1000000O2O000O101O000O101O000O101O0O10O1000O0100O10O0100O010O100O010O1000O01000O01000000O10000O1000000O10000O1000000O10000O10000O100BWVOeJii0[5WVOdJji0[5XVOcJii0]5ZVO]Jii0b5;O1O100O1O2O0O1O2O0O2N100O2N1O2O0O1O2O0O2N100O2N101N1O101N1O2O0O1O2O0O2N100O2N2O1N1O2O1N2N2O1N1O2O1NVC" + } + ], + "question": "Which of the following statements accurately describes the shape of the ears of ?", + "choices": [ + "A. The ears are rounded at the tips.", + "B. The ears are pointed and triangular.", + "C. The ears are floppy and folded downwards.", + "D. The ears are not visible in the image." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_144.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "V\\_:2U10`m04\\QONj04fm06VRONhm05SRO0jm03RRO1km01QRO3mm00oQO2Qn00jQO3Vn0l0000O1N101O1N2O0O2O1N2O0O2O1N2TOZQO;gn0E]QO5fn0Ii0d0\\O3M3M2N1O0O2O00001O2N1O1N3M3N1N2N2O0N2O001O1O1O1O1O100O02O001O2N5K3M2ZNiQO^1]n00000000004L1O001N010O0000100O1O1O00001OO1000O2O001O001N1O3D;M4M4KRP55koJ2N2O1HCjPO>Uo0c0B6K2N2N2M2O1O2N3L3N2N3M2M4M2N2N2N2O0O1O1O100O1O100O1O00100O1O100O1O[OlRO`NSm0a1mRO_NSm0a1nRO]NRm0d1oROZNRm0f1RSOUNnl0l1c01N10000_ROQNQm0o1nROSNPm0o1nROSNQm0m1oROTNPm0l1oROUNPm0l1PSOTNPm0^200O10000O10000O10000O1000O010000O100O10000O10000O100O01000O10000O100O1000000001O0O100000001O00000000001N1000001O001O0O2O001O1O001O1O0O2O001O1O001O1N2O1O1O1O1O1O1O1N2O1O1O1O1O1O1N2O1O1O1O2N2N2N2M3N2N1O1O0O2O0O2O0O2O0O2O0O2O0O2O001N100O2O0O2N1O2N1O1N2O1O1O1N200O1O1O1O100O1O1000O100001N2O001O00O1O1O10O01O1O1O00100O001O1O1O0O2O001N101N1O2N1O2O000100O1O010O1O1O1H8K5M3J7L`dm2" + } + ], + "question": "What is the shape of the ear of ?", + "choices": [ + "A. Rounded.", + "B. Floppy.", + "C. Triangular.", + "D. Pointed." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_145.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "V\\_:2U10`m04\\QONj04fm06VRONhm05SRO0jm03RRO1km01QRO3mm00oQO2Qn00jQO3Vn0l0000O1N101O1N2O0O2O1N2O0O2O1N2TOZQO;gn0E]QO5fn0Ii0d0\\O3M3M2N1O0O2O00001O2N1O1N3M3N1N2N2O0N2O001O1O1O1O1O100O02O001O2N5K3M2ZNiQO^1]n00000000004L1O001N010O0000100O1O1O00001OO1000O2O001O001N1O3D;M4M4KRP55koJ2N2O1HCjPO>Uo0c0B6K2N2N2M2O1O2N3L3N2N3M2M4M2N2N2N2O0O1O1O100O1O100O1O00100O1O100O1O[OlRO`NSm0a1mRO_NSm0a1nRO]NRm0d1oROZNRm0f1RSOUNnl0l1c01N10000_ROQNQm0o1nROSNPm0o1nROSNQm0m1oROTNPm0l1oROUNPm0l1PSOTNPm0^200O10000O10000O10000O1000O010000O100O10000O10000O100O01000O10000O100O1000000001O0O100000001O00000000001N1000001O001O0O2O001O1O001O1O0O2O001O1O001O1N2O1O1O1O1O1O1O1N2O1O1O1O1O1O1N2O1O1O1O2N2N2N2M3N2N1O1O0O2O0O2O0O2O0O2O0O2O0O2O001N100O2O0O2N1O2N1O1N2O1O1O1N200O1O1O1O100O1O1000O100001N2O001O00O1O1O10O01O1O1O00100O001O1O1O0O2O001N101N1O2N1O2O000100O1O010O1O1O1H8K5M3J7L`dm2" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. Its back is arched.", + "B. Its back is completely straight.", + "C. Its tail is curled up.", + "D. Its head is tilted downwards." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_146.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. It has a smooth texture.", + "B. It has a woven texture.", + "C. It has a crinkled texture.", + "D. It has a rough texture." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_147.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Green.", + "C. Black.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_148.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "Which of the following statements accurately describes ?", + "choices": [ + "A. It is a canvas handbag.", + "B. It is a leather handbag.", + "C. It is a nylon satchel.", + "D. It is part of a leather jacket." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_149.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "What is a characteristic of ?", + "choices": [ + "A. It has a visible seam.", + "B. It has a metal zipper.", + "C. It is made of woven fabric.", + "D. It has a leather strap." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_150.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "Which statement accurately describes a color-related feature of ?", + "choices": [ + "A. The object is orange.", + "B. The object has white text on its rear.", + "C. The object is entirely black.", + "D. The object is red." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_151.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "What is the material of the taillight on ?", + "choices": [ + "A. Glass.", + "B. Metal.", + "C. Plastic.", + "D. Rubber." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_152.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "What text, which indicates its purpose, is displayed on ?", + "choices": [ + "A. TAXI.", + "B. Norwich.", + "C. 21 22.", + "D. city & rail station." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_153.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O?", + "choices": [ + "A. Gray.", + "B. Black.", + "C. White.", + "D. Silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_154.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O?", + "choices": [ + "A. Hard, smooth plastic.", + "B. Polished metal.", + "C. A soft, cushioned material.", + "D. Flexible rubber." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_155.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O in the image?", + "choices": [ + "A. A curved handle of a pair of scissors.", + "B. A curved telephone receiver.", + "C. A curved headset.", + "D. A curved arm of a desk lamp." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_156.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The object has a cylindrical shape.", + "B. The object has a square shape.", + "C. The object has a rectangular shape.", + "D. The object has an irregular shape." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_157.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Gray.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_158.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Gray.", + "C. Black.", + "D. Beige." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_159.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "What is the binding style of ?", + "choices": [ + "A. It is bound with a spiral wire.", + "B. It is held together by staples in the center.", + "C. Its pages are glued together at the spine.", + "D. It uses a three-ring binder mechanism." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_160.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "Which of the following descriptions accurately portrays the pattern on ?", + "choices": [ + "A. The object has a purple \"Y!\" logo on its cover.", + "B. The object has the text \"Microsoft\" printed on its cover.", + "C. The object has the text \"YAHOO\" printed on its cover.", + "D. The object is plain black with no text or logos." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_161.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "What is the pattern on the surface of ?", + "choices": [ + "A. It has a purple logo on the cover.", + "B. It has white text on the cover.", + "C. It is plain black with no markings.", + "D. It has black text on a white cover." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_162.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "flf[28b_1:G8H9G8K6J4M2M3N3L3N2M4M2M3N3M2N2N3N1N2O2M2O1N3M2O1N3N0O2O0O2N101N101N101N1O2O0O2M2O2N1O2N101N101N101O00001O001N1O100000000000O100000000O1000000O2O0000000O10001O0O1000001O0O2O001O1N2O001O1O0O2O1O1O0O2O1N1O2N2N2N2M4M2M3M3M3M3N2M3N2N2N2N3L3N2M3M3M3M3M3L4K6SOYaN9m^1CXaN8[_1L5K\\SSj0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. The object has a rough and bumpy surface with a cross-hatch pattern.", + "B. The object has a smooth surface.", + "C. The object has a leathery and wrinkled texture.", + "D. The object is covered in a layer of fine fuzz." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_163.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "flf[28b_1:G8H9G8K6J4M2M3N3L3N2M4M2M3N3M2N2N3N1N2O2M2O1N3M2O1N3N0O2O0O2N101N101N101N1O2O0O2M2O2N1O2N101N101N101O00001O001N1O100000000000O100000000O1000000O2O0000000O10001O0O1000001O0O2O001O1N2O001O1O0O2O1O1O0O2O1N1O2N2N2N2M4M2M3M3M3M3N2M3N2N2N2N3L3N2M3M3M3M3M3L4K6SOYaN9m^1CXaN8[_1L5K\\SSj0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. It has a rough and scaly texture.", + "B. It has a dimpled texture.", + "C. It has a matte and dull texture.", + "D. It has a glossy texture." + ], + "answer": "D", + "type": "texture/pattern", + "image": "images/vqa_164.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "Q`kc27h_13N7I4L2N1O2N3M6J6J7I7I2N2N1O2N2N001O00001O0000001O00001O0000000000000000001O2N2N2N001O1O001O1O00000000O1000000O1000000O1000000O1000000O100O100O100O100O100O100O1O100O100O100O100O100O1O1O1O1O1O1O1O1O100O1N2H8G9K5L4K5K6NP`ob0" + } + ], + "question": "Which of the following best describes ?", + "choices": [ + "A. A pomegranate.", + "B. A large cherry.", + "C. A small red apple.", + "D. A red plum." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_165.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01276645.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "maPe0=ao06J6L4K5M2N3L4M2N2N1N3N2N101N100O100O1O100O10O01OO2O0O2O9F7J1N101N1O2O0O2O0O2N100O1O001O1O1N2O0O2O2L4LR\\[1" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Tan.", + "C. White.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_166.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01312527.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "`ja>3lg02O1N1000O010000O1N101N2O1O100O1O1O1O10000O10O100O10O10O14L2M101O000O101N101O001O4L1O100O0001O000010O00O2H7N4M101N10000O1O100011O0O10O000O10003M2N1N2O1N1O2O0NcUb7" + } + ], + "question": "What is the primary color of the body of ?", + "choices": [ + "A. Black and white.", + "B. Grey and brown.", + "C. Solid grey.", + "D. White and grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_167.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01312527.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "`ja>3lg02O1N1000O010000O1N101N2O1O100O1O1O1O10000O10O100O10O10O14L2M101O000O101N101O001O4L1O100O0001O000010O00O2H7N4M101N10000O1O100011O0O10O000O10003M2N1N2O1N1O2O0NcUb7" + } + ], + "question": "Based on its shape, what is in the image?", + "choices": [ + "A. The folded wings of a bird.", + "B. A frog sitting on the grassy bank.", + "C. A piece of a decaying tree stump.", + "D. A fish jumping out of the water." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_168.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01350089.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "fnQf0?]g06J7I6L5L3LKaYOoN[f0R1gYOnNVf0T1jYOmNSf0U1mYOlNQf0U1oYOmNme0U1QZOnNne0R1QZOPOme0Q1SZOPOke0Q1UZOPOie0Q1WZOPOge0P1ZZOgNN0fe0Z1\\ZOfNO0ce0[1^ZOeNO0be0\\1_ZOdNO1`e0\\1bZOaNO3^e0]1mZOdNQe0]1nZOeNQe0[1nZOfNQe0[1nZOgNQe0Y1nZOhNQe0Y1nZOiNQe0W1nZOjNQe0W1nZOkNQe0T1oZOmNQe0S1nZOnNQe0S1nZOnNRe0R1lZOQORe0P1mZOQOSe0o0mZOQOSe0o0mZOQOSe0o0mZOQORe0P1oZO\\N=Jdd0j1oZOZNfe0f1:00O1L4MlZO^NPd0X1oZOiNd0?]d0g0g[O]OYd0c0d[O@\\d0`0d[O@\\d0`0d[O@\\d0?[[OlNFe0od0?X[OoNIb0od0?T[OSOM>od0?Q[OVO0;od0?Q[OVO0;od0?Q[OVO0;od0?Q[OVO1:nd0`0Q[OVO29md0a0Q[OVO38ld0b0Q[OVO47kd0c0Q[OUO67id0d0Q[OUO85gd0f0Q[OUO94fd0g0Q[OUO:3ed0h0Q[OUO;2dd0i0Q[OUO<1cd0j0Q[OUO0F4:kd0k0Q[OUO0G47ld0m0P[OUO0H45md0m0oZOVO0J41nd0n0nZOWO0K4Ond0o0nZOWO0L4LPe0P1lZOXO0M4JQe0P1kZOYO0N3ISe0o0jZOZO0O2HTe0o0jZOZO001FWe0P1fZO[O200EYe0a1gZOkNMF\\e0_1gZORO[e0m0eZOSO[e0m0eZOSO[e0n0cZORO^e0g110\\J" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. The masked object is black.", + "B. The masked object is white.", + "C. The masked object is brown.", + "D. The masked object is silver." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_169.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Black.", + "B. Brown.", + "C. Silver.", + "D. Dark green." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_170.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Metallic.", + "B. Pinstriped.", + "C. Grooved.", + "D. Smooth." + ], + "answer": "D", + "type": "texture/pattern", + "image": "images/vqa_171.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. Oval-shaped.", + "B. Circular.", + "C. Square.", + "D. Rectangular." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_172.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364554.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 820 + ], + "counts": "PPa7;eo0;E2N1O1O1O2N1O1O1O1O1O1O2N1O1`QONPm0k1F7I7I3M3M3M3M3M2N1O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O001O001O00001O000000001O0000001O000000001O0000001O00000oTOUKmj0Q5N2N1OO100O1N2O1N20000000TUO[K]j0T501ON2N2000000O12N2N3M2N2N001OO100N2N2N2N2N2N2N2N2N21O00001O001OO100O100O1O100O1O100O1O100O100O1O100O1O100O1O100O100O10SUO`KZj0`4eUObKZj0]4gUOcKYj0]4gUOcKYj0\\4gUOeKYj0[4gUOeKYj0Z4gUOgKYj0Y4gUOfKZj0Z4eUOgK[j0X4eUOiK[j0W4eUOiK[j0V4eUOjK\\j0V4dUOjK\\j0U4dUOlK\\j0T4dUOlK\\j0T4cUOlK^j0S4bUOnK^j0R4bUOnK^j0Q4bUOPL^j0P4bUOoK_j0P4aUOQL_j0o3aUOQL_j0o3`UORL`j0m3aUORL`j0n3_UOSLaj0l3`UOTL`j0l3_UOULaj0j3`UOULaj0k3_UOULaj0k3^UOVLbj0i3_UOWL5Jbi0o3YVOVLO3gi0f3ZVOXLG:oi0^3ZVOXLE?", + "choices": [ + "A. The lettuce inside is shredded.", + "B. The tomato is diced into small cubes.", + "C. It contains whole, intact lettuce leaves.", + "D. The tortilla is rolled into a closed cylinder." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_173.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364554.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 820 + ], + "counts": "PPa7;eo0;E2N1O1O1O2N1O1O1O1O1O1O2N1O1`QONPm0k1F7I7I3M3M3M3M3M2N1O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O001O001O00001O000000001O0000001O000000001O0000001O00000oTOUKmj0Q5N2N1OO100O1N2O1N20000000TUO[K]j0T501ON2N2000000O12N2N3M2N2N001OO100N2N2N2N2N2N2N2N2N21O00001O001OO100O100O1O100O1O100O1O100O100O1O100O1O100O1O100O100O10SUO`KZj0`4eUObKZj0]4gUOcKYj0]4gUOcKYj0\\4gUOeKYj0[4gUOeKYj0Z4gUOgKYj0Y4gUOfKZj0Z4eUOgK[j0X4eUOiK[j0W4eUOiK[j0V4eUOjK\\j0V4dUOjK\\j0U4dUOlK\\j0T4dUOlK\\j0T4cUOlK^j0S4bUOnK^j0R4bUOnK^j0Q4bUOPL^j0P4bUOoK_j0P4aUOQL_j0o3aUOQL_j0o3`UORL`j0m3aUORL`j0n3_UOSLaj0l3`UOTL`j0l3_UOULaj0j3`UOULaj0k3_UOULaj0k3^UOVLbj0i3_UOWL5Jbi0o3YVOVLO3gi0f3ZVOXLG:oi0^3ZVOXLE?", + "choices": [ + "A. It is a full circular slice.", + "B. It is a sliced piece.", + "C. It is whole and round.", + "D. It has a jagged, leafy shape." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_174.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^dca01ng02N000QYO5ke0KTZO7je0IWZO8ie0HUZO9ke0GVZO8ke0HTZO7ne0HRZO8ne0IPZO8Qf0HnYO8Rf0HmYO9Tf0GkYO9Vf0FkYO8Vf0IiYO6Yf0IgYO6Zf0KfYO1]f0OcYO2]f0MdYO2\\f0OcYO1^f0NbYO3]f0NcYO1]f0OcYO2]f0MdYO2\\f0OcYO1^f0NbYO3]f0NcYO1]f0OcYO2oe0LhYO1:2me0OhYO190oe01gYOO91oe01hYON90Pf0M]YO3:17OUf00eYO051Wf0NdYO150Xf00bYO14O[f00bYO021\\f0ObYO020]f00aYO10Oaf00_YO0O0cf01]YOOO1ef00UYO0M08Ohf01PYO30O4Mnf09nXOK2MQg0a0nXO^OQg0d0nXO\\OQg0k0N02O1N5L=YO_XO5fg0O101N2N^bQ5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. It has a high, gooseneck-style spout.", + "B. It has a straight, right-angled spout.", + "C. It is composed of two separate cross-shaped handles and a central spout.", + "D. It has a curved or arc-shaped spout." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_175.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. A straight, rectangular bar.", + "B. A circular knob.", + "C. A curved handle.", + "D. A T-shaped pull handle." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_176.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "Which of the following descriptions about the texture of is correct?", + "choices": [ + "A. The masked object has a smooth surface.", + "B. The masked object has a grooved texture from the wooden planks.", + "C. The masked object features a distinct wood grain pattern.", + "D. The masked object has a slatted texture." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_177.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "What is a defining characteristic of in the image?", + "choices": [ + "A. It is a vertically oriented mount.", + "B. It is a horizontally positioned dispenser.", + "C. It has a cylindrical shape.", + "D. It is an L-shaped bracket." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_178.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "Which of the following statements accurately describes in the image?", + "choices": [ + "A. The control panel is located on the left side of its door.", + "B. It has ventilation slots located on its top surface.", + "C. The object is the same color as the wooden cabinets above it.", + "D. It has a large, vertical handle for opening the door." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_179.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "bdf>5ig03N101N2O1O001O1O001O1O1O001O1O0000O1N2O100O1000000O1000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1nN1SZOOje0;oYOEPf0V1N2N2N200O10000000000000000000001O001O000000000000000001O000000000000000000000000000O1O0F;O101N2No\\P6" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Silver.", + "C. Brown.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_180.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "bdf>5ig03N101N2O1O001O1O001O1O1O001O1O0000O1N2O100O1000000O1000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1nN1SZOOje0;oYOEPf0V1N2N2N200O10000000000000000000001O001O000000000000000001O000000000000000000000000000O1O0F;O101N2No\\P6" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. It is a high-arc gooseneck faucet with a pull-down sprayer.", + "B. It has two separate handles for hot and cold water.", + "C. It has a single handle and a curved spout.", + "D. It is a wall-mounted faucet positioned above the sink." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_181.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01396529.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Pkd`09fo02O1M4M2O1O1N2N2N2O1N2N2N2N101N100O2O0O100O2O1N1O010N2N2NObQOcN^n0]13011OO1O2N1O0O2O001L310O1O1N1O2O0O2M20001O0O1O1O1O1O0011O0O100ZO[OPROf0Pn0[OnQOf0Qn0[OmQOf0Tn0[OjQOf0Vn0ZOiQOf0Xn0VO]QO2:h0\\n0XOcQOi0^n0<001N6K1N6J2N2N2O2N1N2O1O5J2MXUg4" + } + ], + "question": "What is the primary material of ?", + "choices": [ + "A. Plastic.", + "B. Leather.", + "C. Rubber.", + "D. Canvas." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_182.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01403825.jpg", + "mask_rles": [ + { + "size": [ + 1152, + 2048 + ], + "counts": "fX[\\1b0]S12M3L3O2N101N101N101N101N101N101N101N101N101N1O2O0O2O0O2O1N101N100O01O001O001O00001O010O010O010O0010O010O0mN^NjoNb1TP1aNjoN_1UP1dNjoN[1UP1hNioNY1TP1kNjoNU1UP1mNkoNS1SP1POkoNP1SP1TOloNl0SP1VOkoNj0UP1XOjoNh0VP1YOhoNg0XP1[OgoNd0YP1^OeoNc0[P1^O`oNe0`P1\\O_oNe0`P1]O]oNd0dP1]OYoNe0fP1Z10010O001O010O00010O010O0100O1O010O100O10O0100O1O010O100O010O100000O1000000O0100000O10000000O0100000000O100000001O0O2O00001O001N110O00001O001O001O01O01O001O001O001O0000O1O1O1O10O0100O100O10000O010O100O100O10O010O10O10O1000O010000O02O0000000O1000000O101O00000O100000000O10001O0O100000000O1000001O0O1O100O1O100O1O101mNSoNPOnP1o0boNaN_P1]1S1O1N2N2O1N2O2M2O1N200000000O10001O000O101O00001O0O101O00001N10001O000O2O00001O0O10001N10001N10001N10001N10001N10001N10001N10001O0O2NXfVa0" + } + ], + "question": "What is the material of the cover of ?", + "choices": [ + "A. Wood.", + "B. Rubber.", + "C. Plastic.", + "D. Cardboard." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_183.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01403825.jpg", + "mask_rles": [ + { + "size": [ + 1152, + 2048 + ], + "counts": "fX[\\1b0]S12M3L3O2N101N101N101N101N101N101N101N101N101N1O2O0O2O0O2O1N101N100O01O001O001O00001O010O010O010O0010O010O0mN^NjoNb1TP1aNjoN_1UP1dNjoN[1UP1hNioNY1TP1kNjoNU1UP1mNkoNS1SP1POkoNP1SP1TOloNl0SP1VOkoNj0UP1XOjoNh0VP1YOhoNg0XP1[OgoNd0YP1^OeoNc0[P1^O`oNe0`P1\\O_oNe0`P1]O]oNd0dP1]OYoNe0fP1Z10010O001O010O00010O010O0100O1O010O100O10O0100O1O010O100O010O100000O1000000O0100000O10000000O0100000000O100000001O0O2O00001O001N110O00001O001O001O01O01O001O001O001O0000O1O1O1O10O0100O100O10000O010O100O100O10O010O10O10O1000O010000O02O0000000O1000000O101O00000O100000000O10001O0O100000000O1000001O0O1O100O1O100O1O101mNSoNPOnP1o0boNaN_P1]1S1O1N2N2O1N2O2M2O1N200000000O10001O000O101O00001O0O101O00001N10001O000O2O00001O0O10001N10001N10001N10001N10001N10001N10001N10001O0O2NXfVa0" + } + ], + "question": "Which of the following best describes a shape characteristic of ?", + "choices": [ + "A. The object is rectangular in shape.", + "B. The handle of the object is curved.", + "C. The object has a pointed tip.", + "D. The head of the object is rounded." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_184.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. It is rectangular.", + "B. It is conical.", + "C. It is cylindrical.", + "D. It is spherical." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_185.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Red.", + "C. Green.", + "D. Yellow." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_186.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. It is blue.", + "B. It has white text.", + "C. It has red measurement markings.", + "D. It is solid white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_187.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "kkY63b12ee03UZO6de0LXZOde0YO]ZO:NM4ge0^O\\ZO?M3ge0BWZO=11ge0j0YZOUOhe0k0XZOUOhe0j0YZOUOge0l0YZOTOge0k0ZZOTOge0k0ZZOUO6ROod0i1lZOTO3WOnd0f1oZOSO2ZOmd0b1S[ORO0^Old0`1U[OQONAld0]1W[OROMBkd0[1Y[OSOKDjd0Y1][OQOJHgd0W1_[OQOIJgd0U1a[OPOHLfd0T1c[OnNHOdd0S1e[OnNE2cd0Q1j[OkNC5bd0P1n[OhN@a0e0QObb0e1\\]OfN]Of0c0QOcb0c1_]OdNZOj0b0QOcb0b1c]O`NYOn0?ROdb0`1f]O]NWOR1?ROcb0_1j]OXNVOX1;TO^a0Do^Oh1?@TOVOYa0GR_Oc1b0_OSOXOUa0JT_O`1d0]OUOYOPa0MV_O]1f0\\OTO[Ol`00Y_OY1h0ZOUO\\Oi`03Y_OW1j0XOVO^Oe`05Z_OT1m0WOVO_Oa`09Z_OR1Q1SOVOA^`0nc0CS]OOoN>mc0DU]ONnN?", + "choices": [ + "A. Solid color.", + "B. Checkered pattern.", + "C. Striped pattern.", + "D. Polka dot pattern." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_188.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "kkY63b12ee03UZO6de0LXZOde0YO]ZO:NM4ge0^O\\ZO?M3ge0BWZO=11ge0j0YZOUOhe0k0XZOUOhe0j0YZOUOge0l0YZOTOge0k0ZZOTOge0k0ZZOUO6ROod0i1lZOTO3WOnd0f1oZOSO2ZOmd0b1S[ORO0^Old0`1U[OQONAld0]1W[OROMBkd0[1Y[OSOKDjd0Y1][OQOJHgd0W1_[OQOIJgd0U1a[OPOHLfd0T1c[OnNHOdd0S1e[OnNE2cd0Q1j[OkNC5bd0P1n[OhN@a0e0QObb0e1\\]OfN]Of0c0QOcb0c1_]OdNZOj0b0QOcb0b1c]O`NYOn0?ROdb0`1f]O]NWOR1?ROcb0_1j]OXNVOX1;TO^a0Do^Oh1?@TOVOYa0GR_Oc1b0_OSOXOUa0JT_O`1d0]OUOYOPa0MV_O]1f0\\OTO[Ol`00Y_OY1h0ZOUO\\Oi`03Y_OW1j0XOVO^Oe`05Z_OT1m0WOVO_Oa`09Z_OR1Q1SOVOA^`0nc0CS]OOoN>mc0DU]ONnN in the image?", + "choices": [ + "A. White.", + "B. Green.", + "C. Red.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_189.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "What is the shape of the platform on ?", + "choices": [ + "A. Trapezoidal.", + "B. Rectangular.", + "C. Square.", + "D. Circular." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_190.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The masked object is circular.", + "B. The masked object is triangular.", + "C. The masked object is rectangular.", + "D. The masked object is oval." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_191.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "Which of the following statements accurately describes a color feature of ?", + "choices": [ + "A. The top surface of the object is gray.", + "B. The object has a black display screen.", + "C. The main body of the object is light green.", + "D. The entire object is a uniform color." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_192.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "`hla02Pi02N2O1N1OKWWO3hh0NXWO2bh02`WON_h02bWON]h02cWON]h03cWOL]h04cWOL]h05bWOK^h05bWOK^h06aWOJ_h0O`WO21O_h0OaWO2OO`h0OaWO2OO`h0ObWO7^h0IbWO7^h0IcWO6^h0JaWO6_h0JbWO5_h0JaWO6_h0K`WO5ah0L]WO4ch061O1O1O00001O2N2O0O2OO01O01O01O010O0010O0001Ndio5" + } + ], + "question": "What is the material of the handles of ?", + "choices": [ + "A. Metal.", + "B. Wood.", + "C. Rubber.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_193.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Blue.", + "C. Yellow.", + "D. White." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_194.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of metal.", + "B. It is made of plastic.", + "C. It is made of wood.", + "D. It is made of stone." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_195.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "Which of the following describes the shape of ?", + "choices": [ + "A. A flat-topped object with four legs.", + "B. A cylindrical object with a rounded top.", + "C. A series of connected, curled tentacles.", + "D. A rectangular frame with a grid pattern." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_196.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01455911.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "P_k23Tf0Nb[O5[d0Nb[O4]d0Lb[O6]d0JeZO315Ze0H]ZOa02I_e0GYZOk02@4E^d06\\[OJKV11UO4Nad00\\[Of1NaNbd0k2K4M3L3M2O1N2O1O1O1O1O1O1O1O1O1O1000000O1000000O10000000O01000000O0100000O101O\\Lc\\Oe2]c0YMd\\Oh2[c0YMe\\Og2[c0XMf\\Oi2Yc0VMh\\Oj2Xc0VMh\\Oj2Xc0UMh\\Ol2Xc0SMi\\Om2Xc0RMh\\On2[c0nLf\\OR3^c0iLd\\OV3\\c0jLe\\OU3[c0jLf\\OV3Zc0fLk\\OY3Tc0hLm\\OW3Sc0hLo\\OW3Pc0jLP]O>[OR2ec0_MR]O2E_2Xc0`Mh]O`2Xb0_Mi]Oa2Vb0`Mk]O_2Ub0`Ml]O`2Sb0aMm]O_2Sb0`Mn]O`2Rb0`Mn]O`2Sb0^Mn]Ob2hc000000O100O100O100O100O1000000O1000000O100000000O1000000O1000000O100001O1O1O1O1O1O1O1O1bMgZOS2Ze0kMgZOU2Ze0gMjZOX2ae0N1O1O2N1O1O2N1O2N1O1O2N1O:F>B3M0000O10O10000O100O00100O1O100O1O1O101N2N2O1N2N2Ml`\\a0" + } + ], + "question": "Which of the following descriptions about a part of is correct?", + "choices": [ + "A. It has a rectangular license plate.", + "B. The rearview mirrors are circular.", + "C. The seat is triangular.", + "D. The headlight is square." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_197.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01511060.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Rb0e0Zg01O100000001O00000000000000000000001O00000000000000000000001O000000000000000000001O0001O00001O00001O000010O0001O00001O00000O2O0O101N100O2O0O101N101N101N2O1NcUTf0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Light beige.", + "B. Light gray.", + "C. Dark gray.", + "D. Off-white." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_198.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01525619.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "ZQ\\l0g0P5[OVe0h0_ZOC^e0?YZOJfe07PZO3ne0OiYO:Wf0F_YOe0_f0]OWYOn0gf0ROoXOY1Pg0gNgXOc1dg0RNQXOY2ng0gMiWOc2Xh0[M^WOQ3ah0nLVWO\\3lh0aLjVOi3Ui0XLbVOQ4Wi0WL^VOS4Zi0VL\\VOR4ci0V1O2O1N20O01000gIoVOb5hi001O2M10O1001O000O2O000O1O0010O01O1O10O01O1O00100O001O10O01O1O010O1O1O010O1O00100O001O10O01O1O1N101O001O0O2N101O1O0010O10M2N3M2N3M2M4M2N3M2N3N002K5K5L4K4L5K4L5K3M4M3L4L4L4L4L4L4M3L5KoK" + } + ], + "question": "Which of the following descriptions about the shape of is correct?", + "choices": [ + "A. The masked object has a curved handle.", + "B. The main body of is a perfect cylinder.", + "C. The masked object is heart-shaped.", + "D. The top rim of has a scalloped edge." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_199.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01534987.jpg", + "mask_rles": [ + { + "size": [ + 786, + 1178 + ], + "counts": "Z`d27Yh04L4L4L4L4L5K4M2N2N2N2N2N2N1O2N2N2N2N1O2O1N2N101N101N100O2N100O100O2O0fNkMV\\OV2ic0mMT\\OT2lc0oMQ\\OR2nc0QNn[OP2Rd0SNk[Om1Ud0UNh[Ol1Xd0WNd[Oj1\\d0YNa[Oh1^d0ZN_[Og1ad0\\N\\[Od1dd0_NX[Ob1hd0U10O3M3M3M2O2M3M3M2N3L4M2N000000000O10000000000O10000000000O2O0000000O10000O101N100O100O10000O2O0O100O100O100O2O000O2O0O1O2N1O2N2N1O2N1N3L3N4J5J6J7I6J7I7J`adf0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Transparent.", + "B. White.", + "C. Brown.", + "D. Black." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_200.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01575962.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Xh0>bo010000000O0100O10000O1O101N100O2O0O1010O1O1N2N2LoQP2E^noM1O100N101O1N2O1O1O10O10O100000000O0100000O100O1O001N2O1O0001O1N1L500O1000O010000O10O01O1MdThc0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The masked object is made of metal.", + "B. The masked object is made of ceramic.", + "C. The masked object is made of wood.", + "D. The masked object is made of plastic." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_201.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01575962.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Xh0>bo010000000O0100O10000O1O101N100O2O0O1010O1O1N2N2LoQP2E^noM1O100N101O1N2O1O1O10O10O100000000O0100000O100O1O001N2O1O0001O1N1L500O1000O010000O10O01O1MdThc0" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. The masked object is long and cylindrical.", + "B. The masked object is a set of two long, flat metal strips.", + "C. The masked object is a thin, flat slice.", + "D. The masked object is a folded piece of cloth." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_202.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01616394.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "^bh31io09G7J7PRO]OZl0i0cSO^OSl0i0jSO]Olk0j0PTO]Ofk0i0XTO\\O_k0k0^TO[OXk0l0eTOZOQk0l0lTO[Oij0m0TUOYObj0n0[UOXO[j0n0cUOXOSj0o0iUOXOmi0o0PVOVOgi0P1WVOTOai0S1\\VOoN^i0m3J6I7J6J6J5N3M3N2O1N2_N[HeZOf7Pe0eHoZO\\7md0hHR[OY7ld0lHP[OU7od0lHoZOV7od0lHP[Om2^OOae0UMP[O^21:nd0YMP[O\\277id0]MP[O[2<5cd0aMP[OZ2a0YNcN4ke0ZOoZOY2i0QNcN:ee0\\OnZOX2o0jMeN`0^e0^OmZOW2U1cMgNf0We0_OmZOW2X1^MkNj0Ze0WObZO`2[1ZMPOm0Re0ZObZO^2^1VMUOP1jd0]ObZO\\2b1QMYOT1bd0_ObZO\\2^3SNoa0BbZOZ2c3RNja0EbZOX2h3QNea0HbZOV2m3oMaa0LaZOT2Q4oM]a0MbZOS2T4oMYa0ObZOQ2W4PNWa0O`ZOQ2\\4oMSa01`ZOo1`4oMo`03`ZOm1d4nMl`05`ZOl1g4mMi`08_ZOj1k4lMf`0;^ZOi1m4kMe`0=\\ZOh1R5iMb`0?\\ZOg1U5hM_`0b0[ZOe1Y5fM]`0f0YZOc1]5dM[`0j0WZOa1`5cMZ`0n0TZOi06ZNa5KV`0U1PZOc0gl0Y12O1O1O1O1O100O1OU]OVMb8i2^GZM`8e2`G\\M`8d2`G]M_8b2aG`M\\8a2eGaMW8`2jGaMS8`2oGaMn7`2RHaMk7`2WH`Mf7a2[HaMa7`2aH`M]7`2eHaMW7e0a^O:Y:ROS7d0k^O5S:WOo6d0T_O0n9^Ok6b0\\_OKk9Cf6b0g_ODe9Ka6`0P@@`91_6=X@]OZ97\\6;`@XOU9>Z69f@SOS9d0V68mKIQ46PLKo35RLLm32TLOl3OVL1k3LWL4i3JXL6j3GXL9h3FXL:j3CXL`0e3^O]Le0^3\\ObLi0X3WOiLm0R3TOnLQ1k2POVMR1g2nNZMS1d2nN\\MS1b2mN_MT1_2lNbMU1\\2lNdMU1Y2lNhMU1V2kNkMV1S2jNnMW1P2jNPNW1n1iNSNX1j1iNVNY1h1hNXNY1f1gN[NZ1c1fN^N[1`1fN`N[1^1eNcN\\1\\1cNeN^1Y1bNiM^OkCQ2[>_NgMGlCk1\\>[NfM1lCf1]>UNeMQNdMe0mC[1_>lMbMP1mCU1b>fM_M\\1mCo0Qk0YOlTOi0nj0^OPUOc0jj0DTUO>hj0GUUO:ij0`2N2O00000001N2N2M3K5K5K5K5dM`TO2ek0EcTO7bk0@gTO;^k0BfTO0hk0M\\TOL2WO\\j0k0gUOGO@Zj0g0kUODMFXj0d0oUOAKLVj0a0TVO]OG4Uj0>WVOYOF9Tj0<[VOTODa0Qj09iWOHWh06jWOJWh04iWONXh0OiWO2Wh0LjWO4Wh0JjWO7Vh0HiWO:Wh0DjWO?", + "choices": [ + "A. Golden brown.", + "B. A mix of green and white.", + "C. A mix of purple and orange.", + "D. Creamy white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_203.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01616394.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "^bh31io09G7J7PRO]OZl0i0cSO^OSl0i0jSO]Olk0j0PTO]Ofk0i0XTO\\O_k0k0^TO[OXk0l0eTOZOQk0l0lTO[Oij0m0TUOYObj0n0[UOXO[j0n0cUOXOSj0o0iUOXOmi0o0PVOVOgi0P1WVOTOai0S1\\VOoN^i0m3J6I7J6J6J5N3M3N2O1N2_N[HeZOf7Pe0eHoZO\\7md0hHR[OY7ld0lHP[OU7od0lHoZOV7od0lHP[Om2^OOae0UMP[O^21:nd0YMP[O\\277id0]MP[O[2<5cd0aMP[OZ2a0YNcN4ke0ZOoZOY2i0QNcN:ee0\\OnZOX2o0jMeN`0^e0^OmZOW2U1cMgNf0We0_OmZOW2X1^MkNj0Ze0WObZO`2[1ZMPOm0Re0ZObZO^2^1VMUOP1jd0]ObZO\\2b1QMYOT1bd0_ObZO\\2^3SNoa0BbZOZ2c3RNja0EbZOX2h3QNea0HbZOV2m3oMaa0LaZOT2Q4oM]a0MbZOS2T4oMYa0ObZOQ2W4PNWa0O`ZOQ2\\4oMSa01`ZOo1`4oMo`03`ZOm1d4nMl`05`ZOl1g4mMi`08_ZOj1k4lMf`0;^ZOi1m4kMe`0=\\ZOh1R5iMb`0?\\ZOg1U5hM_`0b0[ZOe1Y5fM]`0f0YZOc1]5dM[`0j0WZOa1`5cMZ`0n0TZOi06ZNa5KV`0U1PZOc0gl0Y12O1O1O1O1O100O1OU]OVMb8i2^GZM`8e2`G\\M`8d2`G]M_8b2aG`M\\8a2eGaMW8`2jGaMS8`2oGaMn7`2RHaMk7`2WH`Mf7a2[HaMa7`2aH`M]7`2eHaMW7e0a^O:Y:ROS7d0k^O5S:WOo6d0T_O0n9^Ok6b0\\_OKk9Cf6b0g_ODe9Ka6`0P@@`91_6=X@]OZ97\\6;`@XOU9>Z69f@SOS9d0V68mKIQ46PLKo35RLLm32TLOl3OVL1k3LWL4i3JXL6j3GXL9h3FXL:j3CXL`0e3^O]Le0^3\\ObLi0X3WOiLm0R3TOnLQ1k2POVMR1g2nNZMS1d2nN\\MS1b2mN_MT1_2lNbMU1\\2lNdMU1Y2lNhMU1V2kNkMV1S2jNnMW1P2jNPNW1n1iNSNX1j1iNVNY1h1hNXNY1f1gN[NZ1c1fN^N[1`1fN`N[1^1eNcN\\1\\1cNeN^1Y1bNiM^OkCQ2[>_NgMGlCk1\\>[NfM1lCf1]>UNeMQNdMe0mC[1_>lMbMP1mCU1b>fM_M\\1mCo0Qk0YOlTOi0nj0^OPUOc0jj0DTUO>hj0GUUO:ij0`2N2O00000001N2N2M3K5K5K5K5dM`TO2ek0EcTO7bk0@gTO;^k0BfTO0hk0M\\TOL2WO\\j0k0gUOGO@Zj0g0kUODMFXj0d0oUOAKLVj0a0TVO]OG4Uj0>WVOYOF9Tj0<[VOTODa0Qj09iWOHWh06jWOJWh04iWONXh0OiWO2Wh0LjWO4Wh0JjWO7Vh0HiWO:Wh0DjWO in the image?", + "choices": [ + "A. A smooth, curved arc.", + "B. A ruffled or scalloped edge.", + "C. A collection of separate, sharp fragments.", + "D. A complete and perfect circle." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_204.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Brown.", + "C. White.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_205.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "What material is at the person's waist made of?", + "choices": [ + "A. Elastic.", + "B. Leather.", + "C. Cotton.", + "D. Nylon." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_206.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. It has a ribbed texture.", + "B. It has a smooth surface.", + "C. It has a denim-like texture.", + "D. It has a braided texture." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_207.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nm=5jg02N2O00000001N10000001O00000001O00000000O1L4`XOISg0e0L3O2O0O2N1O1O1O2N1O1O101N1O1O1O1OD`YO[O^f0d0dYO\\O[f0e0fYOZOZf0f0fYOZOYf0h0gYOXOXf0h0hYOXOWf0j0iYOUOVf0l0jYOTOVf0l0kYOSOTf0o0lYOQORf0P1nYOPORf0Q1nYOnNQf0S1oYOmNQf0T1oYOlNoe0U1QZOkNne0V1SZOiNme0X1RZOhNme0Y1TZOfNle0[1SZOeNle0\\1UZObNke0`1:0O2O0O101N101O001N10000O10O1N2O001O1O1O1O1O1O2N1O1O1O102M10001O0O2O001N101O001N101O0O2O001O0O2O1O1N2O001N2O1O2M101O1N4M3M4K`jed0" + } + ], + "question": "Which statement accurately describes a feature of ?", + "choices": [ + "A. The masked object is long and curved.", + "B. The masked object is perfectly spherical.", + "C. The masked object has a short stem.", + "D. The masked object has a long, prominent stem." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_208.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01629547.jpg", + "mask_rles": [ + { + "size": [ + 1600, + 1200 + ], + "counts": "mP1`8P1jHAUOdU1S8ijNjIUU1X6jjNiITU1X6ljNiISU1X6ljNjIRU1V6njNkIQU1T6PkNmIoT1S6QkNnInT1R6RkNoImT1P6TkNPJmT1o5SkNPJoT1o5QkNQJoT1o5QkNQJoT1o5RkNPJnT1P6SkNoImT1Q6UkNlIlT1T6_kNaIaT1_6^kNjGoNQ1cU1U7^kNjGB8VU1n7XkNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjG[V1U8eiNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8diNlG\\V1T8diNlG\\V1T8diNlG\\V1T8ciNmG]V1S8ciNmG]V1S8ciNmG]V1S8biNnG^V1R8biNnG^V1R8biNoG]V1Q8biNQH]V1o7ciNQH]V1o7ciNRH\\V1n7ciNSH]V1m7ciNTH\\V1l7ahNdGi0b0fV1h7_hNmGg0;jV1h7]hNSHe06mV1]8nhNdGRW1`8jhNaGUW1c8ghN]GYW1g8bhNZG^W1d900000000000a0_O6J6J6J6J6J6J6J6J6J6J6J3M3M4L4L9G5L2M1O1O1O2N2N001O1O001O001O1O001O0000oE^lNd5bS1ZJalNe5_S1ZJdlNd5\\S1ZJglNd5ZS1ZJjlNd5VS1ZJmlNe5SS1YJQmNe5oR1YJTmNe5mR1YJVmNf5jR1XJZmNf5fR1XJ^mNf5bR1XJemNb5\\R1]JjmN^5WR1_JomN]5QR1aJVnNY5kQ1eJ[nNW5fQ1fJanNU5`Q1hJgnNR5[Q1kJknNo4\\Q1jJknNn4YQ1oJmnNj4WQ1VKmnNc4VQ1\\KonN\\4UQ1cKonNV4UQ1jKonNn3UQ1QLonNi3TQ1WLPoNa3TQ1^LQoNZ3SQ1fLQoNR3SQ1mLToNj2nP1WMYoN^2kP1aM\\oNU2hP1\\L]jN7R5R3eP1eL[jN8^5[2bP1[MRjN:^6:_P1ZOUiN;V_1Dk`NS_1@o`N?R_1_OPaN`0R_1]OPaNa0R_1^Oo`N`0T_1]On`Na0T_1]On`Na0U_1\\Oo`N`0S_1^OQaN>R_1_ORaN=Q_1_OTaN=[`1K4M2N3KWblb1" + } + ], + "question": "What is a primary material of ?", + "choices": [ + "A. Bamboo.", + "B. Metal.", + "C. Wood.", + "D. Rubber." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_209.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01629547.jpg", + "mask_rles": [ + { + "size": [ + 1600, + 1200 + ], + "counts": "_Sji02la14L3N2N2N2N2N2N2N2N2N2N2N2N2N2N2N2DnNg_NT1V`1=N1O1O1O1O1O1O100O1O1O1O1O010O1O1O1O1O100O1O001O3M2O2M2N3M2N3M2WdNZMVX1h2fgN\\MWX1f2ggN[MXX1h2egNZMYX1h2fgNXMEk1eS1P1dlNUMBd2US19XmNRM_OT3oR1L`mNPM\\O\\3PS1GamNmL[Oc3RS1AamNmLXOj3SS1\\OcmNiLVOS4TS1UOdmNhLVOX4TS1ROdmNfLVO]4SS1nNemNeLWOb4QS1kNfmNcLVOg4RS1gNfmNbLWOl4PS1dNfmNaLXOP5oR1`NhmN_LWOW5nR1\\NhmN^LXOZ5nR1YNimN\\LXO`5lR1VNimNZLYOf5kR1QNkmNYLXOj5kR1oMjmNWLZOo5iR1kMkmNWLYOT6iR1fMlmNVLZOX6hR1cMmmNULXO^6hR1]MomNULXOc6fR1YMPnNTLYOg6eR1UMQnNTLYOj6eR1SMQnNSLXOm6fR1PMQnNSLXOQ7eR1mLQnNRLZOS7dR1kLQnNRL[OV7bR1iLRnNQL[OX7bR1gLRnNQL\\OZ7aR1hLomNnK@]7_R1hLmmNkKD_7^R1gLmmNjKDb7]R1eLmmNiKFd7\\R1dLlmNhKHf7[R1bLlmNhKIi7YR1`LmmNgKIk7YR1^LmmNgKIn7YR1\\LlmNfKKQ8WR1YLmmNgKKR8WR1WLnmNfKKV8UR1TLomNfKLX8TR1QLPnNgKMY8RR1PLPnNhKM[8QR1mKRnNgKN]8oQ1lKRnNgKO`8mQ1iKTnNfK0b8kQ1hKTnNgK0c8kQ1fKUnNfK1f8hQ1dKVnNfK2h8gQ1[IbmNId0SN4j8eQ1ZIdmNHc0TN3m8dQ1WIhmNG?VN5m8cQ1VIkmNE=WN5Q9aQ1SIomND9YN7Q9`Q1RIQnNC8ZN6S9`Q1PITnNB5ZN8U9^Q1oHWnNA1\\N9U9_Q1nHXnNAO[N;V9^Q1nHZnN@L\\N;X9^Q1lH]nN_OH^N=W9^Q1lH^nN^OG^N=Y9^Q1kH`nN]OC`N>Y9_Q1jHbnN\\O@`N`0[9]Q1iHdnN\\O^O`N`0\\9^Q1hHfnN[OZObNb0\\9]Q1gHinNZOWObNc0^9]Q1fHknNYOTOcNc0_9^Q1eHlnNDBl7aQ1`HonNA\\OT8eQ1[HQoN_OVOZ8iQ1WHRoN]OnNe8oQ1nGUoNm9kP1SFWoNk9iP1UFXoNk9gP1UF[oNi9eP1WF]oNg9cP1XF_oNh9`P1XFboNf9^P1YFeoNe9\\P1ZFaoNj9^P1YDVnNX1]1`:]P1WD_nNGER1`1P;]P1VDgnNh0l0S;]P1SDinNi0j0T;^P1PDknNk0g0U;bQ1jD^nNW;cQ1gD]nNY;dQ1fD\\nNZ;eQ1eD[nN\\;eQ1cD[nN];gQ1aDYnN`;gQ1_DYnNa;hQ1^DYnNa;hQ1^DXnNc;iQ1[DWnNe;jQ1ZDWnNe;jQ1ZDVnNf;kQ1YDUnNg;mQ1VDTnNj;mQ1UDTnNj;mQ1TDTnNl;mQ1RDTnNn;nQ1oCTnNPZ1L]Nl9fQ1kE]nNb0`1G^Nj9gQ1mEXnNd0d1D]Nk9gQ1oEVnNb0g1C\\Nk9hQ1RFRnN`0k1C\\Ni9hQ1UFomN`0n1@\\Nk9gQ1WFlmN?Q2_O^Nh9fQ1cGmoNdN^Ng9fQ1eGmoNbN_Ng9eQ1gGUQOW8ln0iGTQOU8mn0kGUQOR8ln0nGUQOQ8kn0oGWQOn7jn0RHWQOl7jn0THXQOj7hn0VHYQOh7hn0WH[QOf7fn0YH\\QOe7en0ZH^QOb7dn0\\H_QOb7bn0UHiQOh7Xn0XHiQOf7Xn0YHkQOd7Vn0\\HkQOa7Wn0^HkQO`7Vn0`HlQO]7Un0bHmQO\\7Tn0cHoQOZ7Rn0fHoQOW7Sn0hHPROU7Rn0jHoQOT7Rn0kHPROS7Qn0mHPROQ7Rn0mHQROo6Rn0PIoQOn6Sn0PIoQOm6Un0PImQOm6fn0`H^QO\\7eT1M4L4L4L3M4L4L4L3M4L4L4L4L3M4L4L4@?^Oc0UNRcNQN_]1l1WcN_Mk\\1_2UcNaMm\\1]2ScNcMo\\1[2QcNeMR]1X2nbNhMT]1U2lbNkMW]1S2ibNmMZ]1P2fbNPN\\]1n1dbNRN^]1W33M2O2bL^aNY3d^11O001O000bHdLXPO\\3fo0gLYPOY3eo0jLZPOV3do0lL\\POU3bo0lL^POU3_o0nL`POR3^o0PMbPOQ3[o0QMePOo2Zo0RMfPOn2Xo0UMgPOk2Xo0VMhPOj2Xo0VMhPOY1cIeNgU12ePOY1dIeNgU12ePOZ1cIcNiU13dPOZ1cIaNkU16]PO^1gIXNPV1:XPO_1hIWNQV19WPO`1hIWNQV18XPOb1fIVNRV18XPOb1fIVNRV17YPOc1eIVNRV16ZPOe1cIUNTV14ZPOg1bIUNTV12\\POj1_ITNVV1O]POm1]ITNVV1N_POm1[IUNWV1K`POQ2XISNYV1KaPOQ2VITNQ^1k1naNVNS^1j1laNVNU^1i1kaNWNV^1i1iaNWNW^1l1faNTN[^1m1caNSN]^1P2`aNPN_^1S2_aNmMa^1V2\\aNjMc^1Y2[aNgMe^1[2YaNeMg^1j2000002N4L5K5K3M2N1O2N1O2iJTLVlNm3gS1VLXlNl3fS1ULXlNm3fS1ULYlNm3eS1TLZlNm3eS1TLZlNn3cS1TL\\lNm3cS1TL\\lNn3bS1SL]lNn3bS1SL]lNo3bS1QL]lNP4cS1PL\\lNR4dS1mK[lNT4iS1hKVlNZ4iS1fKVlN[4iS1eKWlN\\4hS1eKWlN\\4hS1dKXlN]4gS1cKYlN^4fS1bKZlN_4dS1bK\\lN_4cS1aK]lN`4bS1`K]lNb4bS1_K]lNb4bS1^K^lNb4bS1UKRhN0\\4l4aS1TKShN0\\4m4aS1RKShN1\\4n4`S1QKThN1\\4o4_S1ZK`lNg4`S1XK`lNi4_S1WKalNj4^S1VKblNj4_S1UKalNl4^S1TKblNm4]S1SKclNn4]S1RKblNo4]S1QKclNP5\\S1PKdlNQ5\\S1nJdlNS5[S1mJelNS5\\S1lJdlNU5[S1kJelNV5[S1jJclNX5\\S1hJdlNY5\\S1fJdlN[5[S1eJelN[5\\S1dJdlN]5[S1cJelN]5\\S1bJdlN_5[S1bJdlN^5]S1aJclN_5]S1aJclN_5^S1`JblNa5]S1_JclNa5^S1^JehNN\\3d5PT1\\JchN3\\3b5QT1YJdhN6Z3a5`T1aJ_kN_5aT1cJ]kN^5bT1cJ]kN]5cT1eJ[kN[5eT1gJYkNY5gT1iJWkNX5hT1iJVkNX5kT1iJSkNW5mT1kJQkNV5nT1lJPkNT5PU1mJojNS5RU1nJljNR5TU1PKjjNQ5UU1QKijNo4XU1RKfjNn4ZU1TKdjNm4[U1UKcjNk4^U1WK_jNi4aU1YK]jNh4bU1ZK\\jNf4eU1[KYjNe4gU1^KVjNb4jU1`KTjNa4lU1`KRjN`4nU1cKoiN]4QV1eKmiN\\4SV1eKkiN[4UV1gKhiNZ4XV1iKeiNW4\\V1jKbiNV4_V1kK_iNU4aV1mK]iNS4dV1oKYiNQ4hV1PLViNP4jV1RLTiNn3mV1VLnhNi3TW1[LghNe3ZW1^LbhNb3^W1_LahNa3`W1_L_hNa3bW1`L\\hN`3dW1aL[hN^3gW1bLXhN^3iW1bLVhN^3kW1bLThN^3RX1]LmgNc3TX1]LkgNc3VX1]LhgNc3YX1^LfgNb3[X1^LdgNb3\\X1_LcgNa3]X1`LbgN`3^X1aLagN_3^X1cLagN\\3`X1eL_gN[3aX1gL]gNY3dX1gL[gNY3fX1gLYgNY3hX1gLWgNY3iX1hLVgNW3lX1iLSgNW3nX1iLQgNW3PY1iLofNV3SY1jLlfNU3UY1k21N2O1N2N101N2N2O1N3N1N3M2O2M2N3M2N2N3M2N3M2N3M2M4M2N3M3TKodNd2T[1R24L3M3iLUeNTOoZ1h0SeNVOP[1h0PeNWOS[1g0mdNWOX[1e0jdNYOY[1e0gdNZO\\[1d0ddN[Ob[1`0_dN^Oh[1;YdNDo[15QdNIX\\10icNL]\\11ccNLc\\10^cNMh\\10YcNKn\\12RcNKT]11mbNKY]13hbNI^]14bbNId]14\\bNHk]14WbNHl]18TbNDP^1;RbNAR^1>oaN]OU^1b0maNZOV^1d0oaNVOT^1:d`NLo`13?000001O00001N10c_NGU_15k`N0S_1Nm`N4^`12N2O00000O20O000000010O000000mNDb`N=Z_1He`N7Z_1Ke`N5Z_1Me`N3[_1Nd`N3Z_1Ng`N1Y_10f`N0Z_10f`N0Z_10f`N0Z_10f`N1Y_10g`NOY_11g`NOZ_10f`N0Z_11e`NO[_11f`NOZ_10f`N0Z_11e`NO[_11e`NO[_11e`N0[_1Of`N0Z_11e`NO[_11e`NO[_11e`NO\\_11c`N0\\_10e`NO[_11e`NO\\_11c`NO]_11c`NO]_11d`NO\\_10d`N0\\_11c`NO]_11c`NO^_10b`N1]_1Od`N0]_1Nd`N2\\_1Nd`N2]_1Ld`N4]_1Jd`N7\\_1Fg`N9``10000010O00000010O000000010O0000010O00000O2O1N2N2NRi\\4" + } + ], + "question": "What is the color of the seat on ?", + "choices": [ + "A. Red.", + "B. Orange.", + "C. Black.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_210.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01634579.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 853 + ], + "counts": "Vlg>3io07K3M2O2L5M2N3M2N2N2N2O1N2O000XQOlNdn0[1M1O1O1O001O0000001O00000000000000010O0010O0010O01O1O3M1O3M2N1O001O1O1mNWQOl0Qo0N2M2O0O2N2O0O3N3K7IWSo9" + } + ], + "question": "What is the material of the item from which most likely originated?", + "choices": [ + "A. Plastic.", + "B. Waffle.", + "C. Stainless steel.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_211.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01635395.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "Rcfi03ho07I7I6J6K4K6J5_Nb1L4M2M4L3M4M3L3M4L3N]OQTOZMkk0`3M2N3N2N1O2O0N2O2N1O001000O100N2fNQUObMPk0Q2nUOZMVj0X2Q2B>A?B>ATSk00knSOEgl0]1hROfNel0d2^Ob0\\ObLSTOP4gk08J6J6J5K5L4K6J4L5K5D;01O1O1O2O000O100001O01O01O1O2N0001O00000000001O0O100000001O00000000000O2O00000000001O00000O10001O00000000001O0O100000001O000000001N2O1O1O001O1O1O1O1O001O1O1O1O1O1O001O001O00001O0000001O00001O0000001O00001N1O1O2N1000001O002N10O010O101NZB" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The object is circular.", + "B. The object is rectangular.", + "C. The object is oval-shaped.", + "D. The object is square-shaped." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_212.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01635395.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "Rcfi03ho07I7I6J6K4K6J5_Nb1L4M2M4L3M4M3L3M4L3N]OQTOZMkk0`3M2N3N2N1O2O0N2O2N1O001000O100N2fNQUObMPk0Q2nUOZMVj0X2Q2B>A?B>ATSk00knSOEgl0]1hROfNel0d2^Ob0\\ObLSTOP4gk08J6J6J5K5L4K6J4L5K5D;01O1O1O2O000O100001O01O01O1O2N0001O00000000001O0O100000001O00000000000O2O00000000001O00000O10001O00000000001O0O100000001O000000001N2O1O1O001O1O1O1O1O001O1O1O1O1O1O001O001O00001O0000001O00001O0000001O00001N1O1O2N1000001O002N10O010O101NZB" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The masked object is made of glass.", + "B. The masked object is made of plastic.", + "C. The masked object is made of metal.", + "D. The masked object is made of rubber." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_213.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01689730.jpg", + "mask_rles": [ + { + "size": [ + 855, + 1024 + ], + "counts": "`dg>:\\j0a0@f1ZN?A01O00001O000000000000000000000000000000000000000000000000000000000000000O10000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000001O000O10000000000000000000000000000O100000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000O1O1O1O1O1O1O1O1O100O00100O1O1N2O1O1O1O100O1O1O100O1O1000000000000O1000000000000O1O11O2N1O1O1O1O1O1O2M2O1O2N1O2N1O2N2N1O2N2N2N2N2N2N1O2N2N2N2N2N1O2N2N2N2N2N1O4L4L4L5K4L4L3M1O1O1O4L1O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O1O2N1O1O1O2MReS2" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is a painted wooden board.", + "B. It is made of red plastic.", + "C. It is made of fabric.", + "D. It is a metal sheet painted red." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_214.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Plastic.", + "B. Metal.", + "C. Wood.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_215.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metallic.", + "B. Wooden.", + "C. Plastic.", + "D. Ceramic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_216.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "Based on its shape, what is ?", + "choices": [ + "A. A spoon with an oval head.", + "B. A pie server with a triangular blade.", + "C. A knife with a long, rectangular blade.", + "D. A fork with four tines." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_217.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01729425.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dUZa05jg06J2N2N2N2O1N1O2O000O101N1O1O100O2O000O1O1O1O1O100O1O1N2O1O100O100000001O00000000O2N2OO11O2N2M4L3M3N3M2N2M2O1N2N2O1O001N10000000O100O10000O100O100104JYjl4" + } + ], + "question": "Which of the following statements correctly describes the shape of ?", + "choices": [ + "A. It has a pointed toe.", + "B. It has a square toe.", + "C. It is an open-toed object.", + "D. It features a round toe box." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_218.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01729425.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "fcc55jg03M2POKRZO8ke0JRZO:ke0HRZOke0EQZO?me0BoYOc0oe0]OPZOf0ne0ZORZOi0le0VOSZOl0le0TOTZOl0me0SOSZOm0me0SOSZOm0ne0SOQZOm0oe0SORZOl0ne0TORZOl0ne0TORZOl0ne0TOPZOn0oe0SOPZOn0Pf0a0O100O10000001O1O1O1O1UNmYOg1Vf001O00001N10001O001N101ROcYO7^f0GcYO9^f0EdYO9^f0EcYO8af0G_YO6df0I]YO3hf0H\\YO5Yg0O2N3N2MW\\Ra0" + } + ], + "question": "Based on the image, which of the following statements about the color of is correct?", + "choices": [ + "A. The clothes it is wearing are green.", + "B. Its main body is yellow.", + "C. The ears are pink.", + "D. It is holding a red basket." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_219.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01770249.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "h:k6`0]Il1T1X:a5jC\\In1S1V:e5jCXIP2S1U:h5hCUIT2S1S:k5fCSIW2R1R:n5eCoHZ2S1P:P6dCnH\\2R1n9^6RFbIm9^6TFbIk9_6UFaIj9`6VF`Ii9`6XF`If9b6ZF^Ie9b6\\F]Id9e6[F[Id9g6[FYIc9k6[FUId9m6[FSId9P7ZFPIe9R7ZFnHe9T7ZFlHd9W7[FiHd9Y7[FgHd9[7[FeHd9^7ZFbHe9`7ZF`Hd9c7[F]Hd9e7[F[Hd9g7[FYHd9i7[FWHc9m7[FSHd9o7[FQHd9Q8[FoGe9R8ZFnGe9T8ZFlGe9V8YFkGf9W8YFiGf9Z8WFfGi9\\8VFdGi9^8UFcGj9_8UFaGj9a8UF_Gj9c8TF^Gk9\\9\\EdFd:a9VE`Fi:g9QEYFn:];O1O1O1O1O1O1O100O1O1O1O1O1O1O001O1O100O1O1O1O1O1O1O1O1O1O2O0O1O1O1O2N1O1O1O1O2O0O2N3M2N3M2O1N3M2N3N1N3M2O2M2N2O2M2N3N1N3N1N3N1N101N2O1N2N2O1N2O1N2O1N101N2O1N2N2O1N2O1N2O0O2O1N2N2O1N2O1N2O1N100O100O1O100O100O100O100O100O1O100O100O100O100O1O100O10000O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O1000000O100000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O100000000O100000000O100000000O100000000O1000000O100000000O10000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O10000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O100000000000000O100000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000000000000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O0000000000000000001O0000000000001O0000000000001O000000000000001O0000000000001O0000000000001O0000000000001O000000000000001O0000000000001O0000000000001O00000000001O0000001O0000001O0000001O0000001O00001O0000001O0000001O0000001O0000001O00001O00O100O100O100O100O100O100O100O10000O100O100O100O100O100O100O100O10000000\\]O[Kma0e4R^O\\Kna0d4R^O\\Kna0d4R^O\\Kna0d4Q^O^Kna0b4R^O^Kna0b4R^O^Kna0b4Q^O`Kna0`4R^O`Kna0`4R^O`Kna0`4Q^ObKna0^4R^ObKna0^4R^ObKna0^4Q^OdKna0\\4R^OdKna0\\4R^OdKna0\\4Q^OfKna0Z4R^OfKna0Z4R^OfKna0Z4R^OgKma0Y4R^OhKna0X4R^OhKna0X4R^OhKna0X4R^OiKma0W4S^OiKma0W4S^OiKma0W4S^OjKla0V4T^OjKla0V4T^OjKla0V4T^OkKka0U4U^OkKka0U4U^OkKka0U4U^OlKja0T4W^OkKia0U4W^OkKia0U4W^OlKha0T4X^OlKha0T4X^OlKha0T4X^OmKga0S4Y^OmKga0S4Y^OmKga0S4Z^OlKfa0T4Z^OmKea0S4[^OmKea0S4[^OmKea0T4Z^OmKea0S4[^OmKea0S4[^OmKea0S4\\^OmKca0T4\\^OlKda0T4]^OkKca0U4]^OlKba0T4_^OkKaa0U4_^OkKaa0V4_^OjK`a0V4`^OjK`a0V4a^OjK^a0V4b^OjK^a0V4b^OkK]a0V4c^OiK]a0W4c^OiK]a0W4d^OiK[a0W4e^OiK[a0X4e^OhKZa0X4f^OhKZa0X4f^OiKYa0W4h^OhKXa0X4h^OhKXa0Y4h^OgKWa0Y4i^OgKWa0Y4i^OhKVa0X4k^OgKUa0Y4k^OhKTa0Y4l^OfKTa0Z4l^OfKTa0Z4l^OgKSa0Y4n^OfKRa0[4m^OfKRa0Z4n^OfKRa0Z4o^OeKQa0[4o^OfKPa0Z4Q_OeKo`0\\4P_OeKo`0[4Q_OeKo`0[4R_OeKm`0[4S_OeKm`0[4T_OdKl`0]4S_OdKl`0\\4T_OdKl`0\\4U_OdKj`0\\4V_OdKj`0]4U_OdKj`0\\4W_OcKi`0]4W_OcKi`0^4V_OcKi`0]4X_ObKh`0^4X_OcKg`0^4X_ObKh`0^4Y_OaKg`0_4Y_ObKf`0_4Y_OaKg`0_4Z_OaKe`0_4[_OaKe`0`4Z_OaKe`0_4\\_O`Kd`0`4\\_OaKc`0`4\\_O`Kd`0`4]_O`Kb`0`4^_O`Kb`0a4]_O`Kb`0`4^_O`Kb`0`4__O`K``0a4__O_Ka`0a4__O_Ka`0a4`_O_K_`0b4`_O^K``0b4`_O_K_`0a4b_O^K^`0c4a_O^K^`0b4b_O^K^`0b4c_O^K\\`0c4c_O]K]`0c4c_O^K\\`0b4e_O]K[`0d4d_O]K[`0c4e_O]K[`0c4f_O]KY`0d4f_O\\KZ`0d4f_O\\KZ`0d4g_O\\KX`0e4g_O[KY`0e4g_O\\KX`0d4i_O[KW`0f4h_O[KW`0e4j_OZKV`0f4j_O[KU`0f4j_OZKV`0f4k_OZKT`0f4l_OZKT`0g4k_OZKT`0f4m_OYKS`0h4l_OXKT`0h4l_OYKS`0g4n_OXKQ`0j4n_OWKP`0j4P@VKo?k4R@UKl?m4S@SKl?n4T@SKj?n4W@QKh?Q5W@PKg?Q5Y@oJf?R5[@nJc?T5\\@lJb?V5^@kJ`?V5a@iJ^?Y5a@hJ\\?Z5e@eJZ?\\5f@eJW?^5i@aJV?`5j@aJT?`5m@_JQ?d5n@]JP?d5QA\\Jm>e5SA[Jk>h5UAXJi>i5XAVJg>k5ZAUJc>n5^APJa>R6_AkIb>V6_AgIa>\\6_AcI`>_6eAZI[>g6]13N1N3N2N1N3N1O2M3N1O2L3M4L4K4M4L3O2N2O0O2N1O2_BXGh;h8VD[Gh;g8UD[Gj;h8SDZGk;h8RD[Gm;g8PDZGo;h8nC[GP?", + "choices": [ + "A. The object is entirely black, matching the stove it is on.", + "B. The object is uniformly silver in color.", + "C. The object contains a bright yellow utensil.", + "D. The object is filled with red-colored food." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_220.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01788343.jpg", + "mask_rles": [ + { + "size": [ + 1009, + 1024 + ], + "counts": "_oo78\\n0n0N2M2O100O2N100O100O2O0000001N11OO10001O000000001O00000000001O000000001O00000000001O00O100000000000000000001N10001O00001O00001O0O101O00001O00001O0O1O2I7Hh0XOUVSe0" + } + ], + "question": "What material is a component of ?", + "choices": [ + "A. Plastic.", + "B. Cardboard.", + "C. Wood.", + "D. Metal." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_221.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01811034.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hPYf06gg04K6J5K5K5K4L5K5K4M4K4L5K4L4L4L4L3M4L4L3M4L3M4K4M4L3M4L3M4K4M4L3M4TOmKg]OV4Yb0mKa]OW4^b0lK\\]OW4db0c000O010O0100O001O1N1N3K5L4L4L4L4L4L4L4L4L4L4L5K4L4L5K4L4L5K4L5K4L4L5K4L5K]D" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metal.", + "B. Glass.", + "C. Cardboard.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_222.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01811034.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hPYf06gg04K6J5K5K5K4L5K5K4M4K4L5K4L4L4L4L3M4L4L3M4L3M4K4M4L3M4L3M4K4M4L3M4TOmKg]OV4Yb0mKa]OW4^b0lK\\]OW4db0c000O010O0100O001O1N1N3K5L4L4L4L4L4L4L4L4L4L4L5K4L4L5K4L4L5K4L5K4L4L5K4L5K]D" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. A combination of orange and white.", + "B. A combination of blue and white.", + "C. A combination of green, yellow, and white.", + "D. Primarily red and black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_223.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is the texture/pattern of ?", + "choices": [ + "A. Ribbed.", + "B. Pleated.", + "C. Smooth.", + "D. Velvet." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_224.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is in the image?", + "choices": [ + "A. A knotted tie.", + "B. A bow tie with a butterfly shape.", + "C. A decorative epaulet on the shoulder.", + "D. The leaf of the poppy pin." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_225.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Black.", + "D. Light blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_226.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red.", + "B. Green.", + "C. White.", + "D. Blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_227.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the texture of in the image?", + "choices": [ + "A. It has a smooth, glossy surface.", + "B. It has a papery skin.", + "C. It has a grainy, wooden texture.", + "D. It is embroidered with colorful threads." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_228.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the color of the root of ?", + "choices": [ + "A. Red.", + "B. White.", + "C. Brown.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_229.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D?", + "choices": [ + "A. The object has a smooth, polished surface.", + "B. It has a prominent, visible wood grain pattern.", + "C. The surface is painted a solid, matte color.", + "D. It is covered with a striped pattern." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_230.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D is correct?", + "choices": [ + "A. It has six strings.", + "B. It is a four-stringed instrument.", + "C. It is painted bright red.", + "D. It is standing on a pink stand on the floor." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_231.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D?", + "choices": [ + "A. f-shaped.", + "B. Oval.", + "C. Round.", + "D. Diamond-shaped." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_232.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D in the image?", + "choices": [ + "A. Blue.", + "B. Black.", + "C. Red.", + "D. Light brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_233.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D in the image?", + "choices": [ + "A. Trapezoidal.", + "B. Triangular.", + "C. Rectangular.", + "D. Curved." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_234.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D?", + "choices": [ + "A. The masked object is a component of the white van.", + "B. The masked object is a component of the black car.", + "C. The masked object is a component of the blue building.", + "D. The masked object is a component of the gray road surface." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_235.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D?", + "choices": [ + "A. White.", + "B. Black.", + "C. Gray.", + "D. Blue." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_236.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01936287.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "l\\me01ng02N3N000ROKQZO5ne04jYOKWf06hYOJYf05hYOJb01bd0MnZO06`0id0@Q[O0Lk0Re0UOR[O0DR1]d0SOo[OKOn0DSObd06j[OIDo0Ye0YOS[OI[Ol0le0\\OYZOn0Pf079DIPOVZOc0Xf0;7CTYOGlf03\\YOKef0McYO2_f0KdYO3P\\a1" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Black.", + "B. Yellow.", + "C. White.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_237.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01939853.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sin91mo04M2O1N1000O10000000O1000O100000O1000O0100O010O010O01000N1O2N110O2O1N2O1N2O1N2O1O1N2O1N10001N10O1000O10000O1000000O100000O01000000O1000000O10O100GTQO]Oln0c0WQOZOjn0e0901O001O0000001O00001O0000001O00001O0000001O00001O0000001O00001O00001N100O2O2L4L]W]:" + } + ], + "question": "Which of the following statements accurately describes the shape of ?", + "choices": [ + "A. The masked object is a large animal lying on the ground.", + "B. The masked object is one of the small pigeons walking on the ground.", + "C. The masked object is a small animal standing on its feet.", + "D. The masked object is a decorative golden statue at the base of the temple." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_238.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01939853.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sin91mo04M2O1N1000O10000000O1000O100000O1000O0100O010O010O01000N1O2N110O2O1N2O1N2O1N2O1O1N2O1N10001N10O1000O10000O1000000O100000O01000000O1000000O10O100GTQO]Oln0c0WQOZOjn0e0901O001O0000001O00001O0000001O00001O0000001O00001O0000001O00001O00001N100O2O2L4L]W]:" + } + ], + "question": "Which of the following descriptions about the texture of is correct?", + "choices": [ + "A. It has a shaggy coat.", + "B. It has a smooth coat.", + "C. It is covered in feathers.", + "D. It has a spotted pattern." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_239.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01944558.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "\\]n5i0Ug05J4H8M3N1O1O100O10000000000001N1O2N2H8M4L;EiZ`a0" + } + ], + "question": "What is a characteristic texture of ?", + "choices": [ + "A. Creamy.", + "B. Hard and woody.", + "C. Woven fabric.", + "D. Smooth and metallic." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_240.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01948375.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Plm=166[g0;L3N3kYOG]d0;S[O3md0`100O1000O100000O1000O1000O1000O100O002N1O2O1N0000001O1L4]Oc0N2M2N3M3N2M3N2O001O1O1O1O1N2O101\\OjXO;_g0OO1O001O00O102N2HZTe8" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Grayish-white.", + "B. Teal.", + "C. Brown.", + "D. Beige." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_241.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01948375.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Plm=166[g0;L3N3kYOG]d0;S[O3md0`100O1000O100000O1000O1000O1000O100O002N1O2O1N0000001O1L4]Oc0N2M2N3M3N2M3N2O001O1O1O1O1N2O101\\OjXO;_g0OO1O001O00O102N2HZTe8" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. Smooth.", + "B. Ribbed.", + "C. Plush.", + "D. Waxy." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_242.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F?", + "choices": [ + "A. Plastic.", + "B. Ceramic.", + "C. Fiberglass.", + "D. Porcelain." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_243.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F?", + "choices": [ + "A. Patterned.", + "B. Matte.", + "C. Smooth.", + "D. Rough." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_244.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F in the image?", + "choices": [ + "A. The masked object is beige.", + "B. The masked object is white.", + "C. The masked object is light brown.", + "D. The masked object is blue." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_245.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01968981.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "al\\<:ag0g0[O6K4L5K4L3N4K3N2M4M2M4M3L4M3M3M2N2N3M2N2N2N2O1N2O1N2N2O1N2O0O2N2O1N101N2O1O1O0O2O1N101O1N101N2O001N2O1O001O1O1O1O1O1N2O1O1O1O1O0O101n\\O`Kbb0`4]]OdK`b0\\4]]OgKcb0Y4\\]OhKdb0X4\\]OhKdb0k4O001O01[O]]OkKcb0U4_]OhKbb0X4_]OeKcb0[4`]O`Kbb0_4`000O2O0O1N2M3N2O1O2N1O1O2N1O2N2N1O1O2N100O2N1O1O2N1O2N2N1O2N1O2N2N2N2N2N1O2N2M3M3N2M3N3L3N3L3M3M3M4M6I5K5J5J7J8Ec[\\8" + } + ], + "question": "What is the primary material of the envelope of ?", + "choices": [ + "A. Canvas.", + "B. Rubber.", + "C. Polyester.", + "D. Plastic sheeting." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_246.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01968981.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "al\\<:ag0g0[O6K4L5K4L3N4K3N2M4M2M4M3L4M3M3M2N2N3M2N2N2N2O1N2O1N2N2O1N2O0O2N2O1N101N2O1O1O0O2O1N101O1N101N2O001N2O1O001O1O1O1O1O1N2O1O1O1O1O0O101n\\O`Kbb0`4]]OdK`b0\\4]]OgKcb0Y4\\]OhKdb0X4\\]OhKdb0k4O001O01[O]]OkKcb0U4_]OhKbb0X4_]OeKcb0[4`]O`Kbb0_4`000O2O0O1N2M3N2O1O2N1O1O2N1O2N2N1O1O2N100O2N1O1O2N1O2N2N1O2N1O2N2N2N2N2N1O2N2M3M3N2M3N3L3N3L3M3M3M4M6I5K5J5J7J8Ec[\\8" + } + ], + "question": "Which statement accurately describes a feature of in the image?", + "choices": [ + "A. It has a simple horizontal striped pattern.", + "B. It has a small basket hanging underneath.", + "C. It is primarily colored green and white.", + "D. There is no visible basket attached to it." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_247.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "obW95jo03M3M3N2N1N2O1N100O1000001O01O0000O100000O2N1N2L4M3000010O0001O001O010O001O0010O010O010O01O1O010O001O010O000001O0000001O0000001O01O00000000000000000000000mNF`RO:_m0H`RO8_m0J`RO6`m0K_RO5`m0L`RO4`m0M_RO3am0M_RO3am0N^RO2cm0N\\RO2dm0N\\RO3dm0M[RO3fm0LZRO4fm0MYRO3hm0LXRO4hm0MWRO3jm0MURO3km0MURO3lm0MSRO3nm0LRRO4nm0MQRO3Pn0LPRO4Pn0MoQO3Rn0MmQO3Sn0MmQO3Tn0MkQO3Vn0LjQO4Vn0MiQO3Xn0LhQO4Xn0MgQO3Zn0MeQO3[n0MeQO3\\n0McQO4]n0KcQO5]n0LbQO4_n0KaQO5_n0L`QO4an0L^QO4bn0L^QO4cn0L\\QO4en0K[QO5en0LZQO4gn0KYQO5gn0LXQO4in0LVQO4jn0LVQO4kn0LTQO4mn0KSQO5mn0LRQO4on0KQQO5on0LPQO4Qo0=001O10O01O001O001O1O001O001O001O1N1O2N1N3L4M3Mm\\P:" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. It has spoon-like, cupped ends.", + "B. It has scalloped gripping ends.", + "C. It is a single, straight utensil with a pointed tip.", + "D. It has flat, spatula-like ends." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_248.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "obW95jo03M3M3N2N1N2O1N100O1000001O01O0000O100000O2N1N2L4M3000010O0001O001O010O001O0010O010O010O01O1O010O001O010O000001O0000001O0000001O01O00000000000000000000000mNF`RO:_m0H`RO8_m0J`RO6`m0K_RO5`m0L`RO4`m0M_RO3am0M_RO3am0N^RO2cm0N\\RO2dm0N\\RO3dm0M[RO3fm0LZRO4fm0MYRO3hm0LXRO4hm0MWRO3jm0MURO3km0MURO3lm0MSRO3nm0LRRO4nm0MQRO3Pn0LPRO4Pn0MoQO3Rn0MmQO3Sn0MmQO3Tn0MkQO3Vn0LjQO4Vn0MiQO3Xn0LhQO4Xn0MgQO3Zn0MeQO3[n0MeQO3\\n0McQO4]n0KcQO5]n0LbQO4_n0KaQO5_n0L`QO4an0L^QO4bn0L^QO4cn0L\\QO4en0K[QO5en0LZQO4gn0KYQO5gn0LXQO4in0LVQO4jn0LVQO4kn0LTQO4mn0KSQO5mn0LRQO4on0KQQO5on0LPQO4Qo0=001O10O01O001O001O1O001O001O001O1N1O2N1N3L4M3Mm\\P:" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of plastic.", + "B. It is made of metal.", + "C. It is made of wood.", + "D. It is made of ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_249.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "Which of the following descriptions best fits the shape of ?", + "choices": [ + "A. The masked object is perfectly straight from end to end.", + "B. The masked object has a distinctly curved handle.", + "C. The tines of are blunt and rounded.", + "D. The handle of is cylindrical and thick." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_250.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Smooth.", + "B. Grooved.", + "C. Brushed.", + "D. Hammered." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_251.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "Which of the following correctly describes a feature of ?", + "choices": [ + "A. It has two tines.", + "B. It has three tines.", + "C. It has five tines.", + "D. It has four tines." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_252.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "e\\X;2jo06J5L4N1N2O1N2O0gNZ1N2O1O100O11O001O2M4M4K5K3K5K4L5K5M3N1OO1O1O1O100O1O101N2`QOnNPn0R1mQOROQn0Q1kQOROSn0^101N1M2N3N10100O0100O10000O100000001O001O1O2N2N001O001O001O001O001O001O001O001N100O2M3B=O1O00O1O100O2O1N101O1O1O00GXQO[Oin0c0[QOYOfn0e0;00O10O0100O2O0O2O2M102M2O2H\\PO0QeV9" + } + ], + "question": "Which of the following statements accurately describes a feature of in the image?", + "choices": [ + "A. The masked object has a large, round nose.", + "B. The masked object has long, floppy ears.", + "C. The masked object has small, beady eyes.", + "D. The masked object has a slender body." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_253.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01981955.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Zm\\8a0^g03M1O2N1O2N2N2N2N1O2N1O1O2N2N2N4M0O2N2N1O2O0O2O0LYNnYOi1Qf0WNoYOi1Tf01KVNRZOj1me0VNSZOl1oe0100O3N1O3M00001O00001MjMZZOV2fe0jMZZOV2he000000000000000O101N2O0O2K400N200O2N1M4M2N3L301N1O1O1O2N1O2M3N2N1O4L3M1O9F3M3Jjbe=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Light blue.", + "B. White or beige.", + "C. Red and yellow.", + "D. Blue with white polka dots." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_254.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. The object has a wood grain texture.", + "B. The object has a smooth, metallic texture.", + "C. The object has a woven texture.", + "D. The object has a porous, baked texture." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_255.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Wood.", + "B. Woven fabric.", + "C. Plastic.", + "D. Wicker." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_256.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the texture/pattern of in the image?", + "choices": [ + "A. Smooth with wood grain.", + "B. Woven.", + "C. Smooth and metallic.", + "D. Porous and baked." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_257.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Yellow.", + "C. Brown.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_258.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/1886.jpg", + "mask_rles": [ + { + "size": [ + 384, + 683 + ], + "counts": "f^g3g0V;5K4K4L4N11B=01001O2N2N:E3N1O1O000001AhESOZ:m0<3N00003M_O\\ECi:2Xbn3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one counting from the left", + "B. The fifth one counting from the right", + "C. The sixth one counting from the left", + "D. The fifth one counting from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_259.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/1888.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "PbT4:c;5M3M2O0O10O0O2M2103M3L3N4L2N2N1O0000I7N200O100O1001O0oDYOj:Q1M00L4M3M4O1O2O4L9G5K^i\\1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the right", + "B. The third one counting from the left", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_260.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2032.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "bQf2a0^;6J2N5L2N1N3N2N1O001O0O100000000O1000000O100000000O10001O000000O1000000000O1000000000001OO101O000000000000000O10000000000O1O1O1M3M300M3M3O1M3M3N2N2N2O100O1001O1O002N2N2N1O2N3M1O1N3N1O2N002N1O2N1O1N10001O000000000O100000000000000000000000000000000000000O100000000O1000O100000O100000000001O000000001O001O00001N101O0O2N1G:G8I8Jij5" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one counting from the top in the left column", + "B. The third one counting from the top in the right column", + "C. The fourth one counting from the top in the right column", + "D. The third one counting from the top in the left column" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_261.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2051.jpg", + "mask_rles": [ + { + "size": [ + 384, + 461 + ], + "counts": "X]V13j;4K5L3N2K6M2O1O2O001O001O00001O001O00001O0000010O00000000O2O000O2O0O2O0O101O1N110O001O001002M3NO010O001O010O00010O01O001O010O001O00100O001O010O001O10O010O01O100O101N0001N2N1O2O0O2N2O1N101N1N3N2O1O001O001O1O1O2L6JUZn2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one from the top in the left column", + "B. The fourth one from the top in the right column", + "C. The third one from the top in the right column", + "D. The fourth one from the top in the left column" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_262.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2179.jpg", + "mask_rles": [ + { + "size": [ + 384, + 526 + ], + "counts": "a_Y4`0^;5K7K4K3N3L2O2M3N2M2O2M2O2M2O1N2N2N2N2N2N2O0O2N1O2N1O2O0O2M2O1N3L3N2O2M2N2M3O100O1000000000000000000001O0000000O101O000O10001O0O101O0O100O2O0O2N1O2N1O2N1O2N2O0N3N2N2N2M2O3L3M3N2M4K5K5K5K5J:@Pig0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left in the first row from the top", + "B. The second one counting from the left in the last row from the top", + "C. The third one counting from the left in the last row from the top", + "D. The fourth one counting from the left in the last row from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_263.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2613.jpg", + "mask_rles": [ + { + "size": [ + 384, + 684 + ], + "counts": "SW:5i;3N1O2N0O2O1O0O1O1O2N2N3M5K2O1ON2100O1O1O101N2N2O1N1O1O100OJ]EROb:m0bEnN`:P1801O10O01O10O010000O10000O10000001O100O2N2N2O0O1O2N014K10O001O1O0OO_Ob0N1L5N3O1N3M4JTbj6" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the left", + "B. The second one from the left", + "C. The first one from the right", + "D. The second one from the right" + ], + "answer": "A", + "type": "ordering", + "image": "images/vqa_264.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2614.jpg", + "mask_rles": [ + { + "size": [ + 384, + 604 + ], + "counts": "QeZ61k;6M2N22N2OO0000001N11O2OO01O00O01000O1N2N2N2oDGX:=aEGj9I]FV1b9mNZF1Fi0P:XOWFOIj0Q:XOSFNMj0R:BnE`0S:]OnEd0R:[OPFc0Q:]OPFb0o9_OQFa0l9CRF=c91YF1f9FZFc0g9[O]Fc0e9ZO]Fe0f9VO\\Fj0f9PO_Fo0Y:0ORO^Ea0`:^OcEa0\\:_OeEa0Y:@iE`0U:_OmEa0Q:@PF`0n9@TF`0V9ZOhF93>T92mFOT9NnF3R9JPG6R9GoF9T9BmF`0S9^OnFb0R9]OoFc0S9ZOnFg0T:1O101O1N`0A6I2\\J" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the left", + "B. The first one counting from the right", + "C. The second one counting from the right", + "D. The second one counting from the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_265.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/286.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "Q`n1=_;8I6J5M2L6K5L3M2N4L2N3L4N2L4M2N3M2N2N3M101N2N2N1O2O1N1O2O1N100O10001N1000001N100000001O000O100010O00001O00000000000O101O0000001N2O0O2O0O2O001N2N2O0O2N2N3M1O2N3K5M2L4M4K5M2M3E;K`0UOoDLUSh3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one from the left", + "B. The 2nd one from the left", + "C. The 3rd one from the left", + "D. The 2nd one from the right" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_266.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/290.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "[oZ49e;4K5L2M4J5I8M3N1N3H7O2M201N2N2O1O1O1O1N2O1O1O1O1O1N3OO01O000O1O100001O1O001OO1000O10001N10000O1O2O0O2N1O2O0O2N2O1N1O3M2N2N4L3M2N1O2M5L3L4L4J:FPQj0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the left", + "B. The second one counting from the right", + "C. The first one counting from the right", + "D. The second one counting from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_267.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/291.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "mR[35i;5E8G9M3N2M3N2N2M3N2N2N2O1N2N3M2N2O1O1O001O1O1O1JRNYFo1g9RNWFn1i95000O10001O000O2O00001N100O01D;M3O2O00eMkFm1U9QNPGl1P9RNTGk1b9O1N2O0O2O0O10O001N100000000000O1O1O2N1UOfE0[:OgEO[:OgE0Z:MiE2W:LlE5S:JnE6R:IoE7Q:GQF9o9FRF:n9ETF9m9FTF:l9EUF in the full image?", + "choices": [ + "A. The second one counting from the left in the middle row", + "B. The third one counting from the left in the top row", + "C. The fourth one counting from the left in the middle row", + "D. The third one counting from the left in the middle row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_268.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2910.jpg", + "mask_rles": [ + { + "size": [ + 384, + 1210 + ], + "counts": "`X[:1Q86aHo3Y6Q1L3O0O2O0O1001O1O00000000000000000000000000000000000000001O00O100000000001O000000000001O00O10000000001O0000O1000000000000001O00O1000000000000000000000000000000000000001O0000O10000000000001O00000001N10000001N100000O101O0000000001O00O1000000000000O10000001O00O10000O2O00O2O00000O1000000000001N2N5KV1PK[K3XSR2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the left in the first row.", + "B. The first one from the right in the first row.", + "C. The second one from the right in the first row.", + "D. The second one from the right in the second row." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_269.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2922.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "eRY1122g;OXD3g;MYD000g;2ZD50>1]OO3V:d1M000O100000001OO1001O00000000O11O00000000O1001OO11O00O1000000001O01N11O000O1001O00O11O00O1000000000000001O000000O1001O00000000O1000O11O00000000O100001O0000O2O000000O11O00001O004LWe\\2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 2nd one from the left in the 2nd row", + "B. The 3rd one from the left in the 2nd row", + "C. The 2nd one from the left in the 3rd row", + "D. The 3rd one from the left in the 3rd row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_270.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2938.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "h_6:]:Z1N2O00000000000000000000000000000000000000000000000000001O000001OO1001O00000000000000000000000000000000000000000000000000000000000O2XOWRi3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The last one from the left", + "B. The first one from the right", + "C. The first one from the left", + "D. The second one from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_271.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2941.jpg", + "mask_rles": [ + { + "size": [ + 384, + 484 + ], + "counts": "[Sg23j;Z2iM1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000000000000000000000001O0000000000000000000000000000O11O0000O11O0000000000O1001O00O11O0000000000000000000000000000000000000000001O3LdT_1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left in the second row counting from the top.", + "B. The second one counting from the right in the first row counting from the top.", + "C. The first one counting from the right in the second row counting from the top.", + "D. The second one counting from the right in the second row counting from the top." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_272.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "gae37i;2M2O1O1O1ON2O2M3NYfV2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left", + "B. The second one counting from the right", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_273.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/317.jpg", + "mask_rles": [ + { + "size": [ + 384, + 579 + ], + "counts": "jje12l;3M3N2O2M2N2N2O1O1O1O000100O1O001O1O2N3MT]k4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left.", + "B. The fourth one counting from the left.", + "C. The fifth one counting from the left.", + "D. The fourth one counting from the right." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_274.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3269.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "ZYP35h;4HIdD9Z;IeD7[;IeD8Z;IdD8\\;601O00000000000002N1O00001O1O2N1N101N2O1Mhb^1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top in the second column counting from the right.", + "B. The second one counting from the top in the first column counting from the right.", + "C. The second one counting from the top in the second column counting from the right.", + "D. The second one counting from the top in the second column counting from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_275.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3281.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "lU_35i;3M2O1N3M2N2O10000O101O000O100O1I]OPEd0P;600O2O000O100O101O0O10000O2N1N200O100O10000O101O0000000000O1000O100N1O2000O10000000001N10000O1O100O100O1O1O2O1N2N2N1N3N1O2O0O2N1O2O1N5K2N2Nkj8" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the top in the first column from the right.", + "B. The second one from the top in the second column from the right.", + "C. The second one from the top in the first column from the right.", + "D. The second one from the bottom in the first column from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_276.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3284.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "eWR29f;2N1N2O100O1O1O010O100O010O100O010O10O100O10O010O01000O0100O10O01O1000O010000O010O01O1O001O1O001WO^ORFc0i90iE1U:n0M3M201O1N2O1O1O1O1O001O1O001O100O2N101O1N1O2O1O0O2O0O2O000O101O0O2O0000001O0O2O000000000000O1O1M3O1O1O1O1O1O1O1O100O10001N10000O10001N10001N10001O0O2O00001N101O10OO2O0O2O1N2O1N2N2N2M3M3N3M2M4L5J6J9@ZVU2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left in the third row counting from the left.", + "B. The third one counting from the right in the second row counting from the left.", + "C. The second one counting from the right in the third row counting from the left.", + "D. The third one counting from the right in the third row counting from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_277.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3500.jpg", + "mask_rles": [ + { + "size": [ + 384, + 613 + ], + "counts": "Q^e2:c;6K5K4L4M3L4M3L4M2N2N3M2N3M2N3M2N3M3N2M2N101N102M2O1O1N101O1O001O001O010O000010O00001O001O1O001O001N2O1N101O1O1N2O1N2N3N1N2N3M2N2N3M2N3M2N3L4M3M3L5L3L6K6F_Uc3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top", + "B. The first one counting from the bottom", + "C. The second one counting from the bottom", + "D. The first one counting from the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_278.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3644.jpg", + "mask_rles": [ + { + "size": [ + 384, + 577 + ], + "counts": "]UW25g;8H6K4L4M3M2N3M2N2O2M2O1O1N2O001O1O1O1O1O1O1O10O01O001O1O01000O010O000010O0100O0100O0010O01000O01O10O0100O00100O010O1O10O01000O0100000O010000000O10O1000O1000000000000O100000O10O100000000O100000000000000000000000O1000000000000O2O00000O101O001O0O10001O0O10001N1O2O1N1O2N2N2N1O2N2N3L3N3L4M6GPlj2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top", + "B. The first one counting from the bottom", + "C. The second one counting from the bottom", + "D. The last one counting from the bottom" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_279.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3696.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "UeR1?];:F8J4J7J4M3L4M4M2M3N2O0O3M2N2N2M3O0O2O2M2O0O2N2O1N2N2O1N2N101N2N2O0O1O2O0O2O0O2O0O2O001N2O0O101O001O001N10000000001N1000001O000000001N10000000000000001O00000000000000000001O000000000000000000001O00000000001O000O101O001O00001N101O0O2O00000O2O000O2O000O2O000O2O000O2O0O2O1N2O0O2O1N101N101N101N1O2O0O2O1N102M1O2N2N2N2N1O2N2N1O2N2N2N2M4M2M4L3N3L3M6H in the full image?", + "choices": [ + "A. The second one counting from the right in the top row", + "B. The third one counting from the right in the middle row", + "C. The second one counting from the right in the middle row", + "D. The second one counting from the left in the middle row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_280.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4.jpg", + "mask_rles": [ + { + "size": [ + 384, + 575 + ], + "counts": "afW36h;4M2O1O1N2O1O1OO2N1O1O2M3M5K_YZ3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left", + "B. The second one counting from the right", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_281.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4063.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_ZW2=_;7K4L3N3L3N2N3M1O2N2N2O0O101N101N10001O000O10000000000000000001O0O10001N101N101N2N101N2N2N2N2M3N3L4L5J8Fdid1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the top", + "B. The first one from the bottom", + "C. The first one from the top", + "D. The last one from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_282.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4066.jpg", + "mask_rles": [ + { + "size": [ + 384, + 531 + ], + "counts": "^R`1d0X;8J5K5J5M3L4L4M2N2N2N3L3O1N2N2M3O1N1O2N2O0O2O1N101N100O2O000O101O0O101O0O10001O0000000O10000000000000000001O00000000001N10000O101N10001N101O0O2O001N2N101N2O1N2N2N2N2N2N2N3M2N3M2N3L4L4M3L4L4J8Egeb3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the top", + "B. The third one from the top", + "C. The second one from the bottom", + "D. The second one from the top" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_283.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4405.jpg", + "mask_rles": [ + { + "size": [ + 384, + 580 + ], + "counts": "lPR45i;3N3M101N2O1O1O1O00O1O1O2N1N4L4LVSa2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one in the top row", + "B. The last one in the bottom row", + "C. The second one in the bottom row", + "D. The first one in the bottom row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_284.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4406.jpg", + "mask_rles": [ + { + "size": [ + 384, + 604 + ], + "counts": "^c\\38e;7K3M3L4N1N2N3M2N2O1N2O1O1O1N2O1O2N5K1O00000000KfEcN[:[16O2N1N2O1O2N1O2N1O2N2N2M3M3M3N3LfTV3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 4th one counting from the right", + "B. The 5th one counting from the left", + "C. The 6th one counting from the right", + "D. The 5th one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_285.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4412.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "Q[a19e;5L2N3M2O1O2M2O1N2O1O2N1O2N3M3M00I7N2N2O2N1O2N1N2O2N2N1O3L3LRQk4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left", + "B. The fourth one counting from the left", + "C. The third one counting from the left", + "D. The third one counting from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_286.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4493.jpg", + "mask_rles": [ + { + "size": [ + 384, + 577 + ], + "counts": "kiS35a;`1fNb1jEjLh3k1WOX7K4L2N2O1N2O00000001O1O0O2N3M2N4\\I5k0V7WN[_\\3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 8th one counting from the left", + "B. The 9th one counting from the right", + "C. The 9th one counting from the left", + "D. The 10th one counting from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_287.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4495.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "V^c03l;101N101O001O000O10000O1O1O1O1O1O00100O0O100O20OO1001O01O00O01010OO10O1010O0O0010010NO2010kDAh:?XECf:=ZEDd:<^EDa:<_EE`:<`EE_::aEG_:7bEJ_:4aEM_:3`EN`:2^E0c:N\\E4d:I]E8d:H[E9e:F[E;e:EZE in the full image?", + "choices": [ + "A. The 2nd one from the left", + "B. The 1st one from the right", + "C. The 1st one from the left", + "D. The last one from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_288.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4497.jpg", + "mask_rles": [ + { + "size": [ + 384, + 570 + ], + "counts": "dVk433N9g0^8_OUHJTOl1Q4UNmK;0E2Og00V1>bNj2S3b1_LaN^3R5O1N2O1O100O10000O01O1M3DBWE]Oo:3UY`1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the right", + "B. The fourth one counting from the left", + "C. The fifth one counting from the right", + "D. The fourth one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_289.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4895.jpg", + "mask_rles": [ + { + "size": [ + 384, + 532 + ], + "counts": "m]Y43h;9J6K2N4L3N2M3N2M3N2M2O2M3N1O2N1O2N1O1O2N3M1O1O2N1O1O1O100O1O10O01O10O01O0010O01O001O01O02O3L5K2O0O2OO010O0O2O1N1O2N1O2N2N2M2O2N2O010O1O010]OVFnNk9P1YFmNh9Q1ZFnNf9R1ZFnNg9Q1ZFnNg9P1ZFoNh9P1YFnNj9P1e0M3M2O0BPEKR;3oDMQ;1RENQ;KSE5\\;0O01000001O0000001O0O101O1O2Mj`h0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the right", + "B. The fourth one counting from the left", + "C. The fifth one counting from the right", + "D. The fourth one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_290.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4904.jpg", + "mask_rles": [ + { + "size": [ + 384, + 522 + ], + "counts": "ccR2;b;5M3L4L3O1M3N2N2N1O2N1O2N1O1O2N2N1O1O101N1O101N100O101O0O2O000O10000O101O0000000000000000000000000000O2O00001N101N2O1N2O1N2N3N1N3M2N3M2N3M3M4K5KbhU3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 2nd one in the 2nd row", + "B. The 3rd one in the 2nd row", + "C. The 1st one in the 3rd row", + "D. The 2nd one in the 3rd row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_291.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4922.jpg", + "mask_rles": [ + { + "size": [ + 384, + 513 + ], + "counts": "ZSV17d; in the full image?", + "choices": [ + "A. The first one from the left", + "B. The second one from the right", + "C. The third one from the left", + "D. The second one from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_292.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4934.jpg", + "mask_rles": [ + { + "size": [ + 384, + 483 + ], + "counts": "hiW3131g01XO2i:NlE:T:EjE7]OK]fm0m9VOfEM=n0k9VOhEL=n0j9WOXFk0g9TOYFn0CnNm95`FT1^9lNaFV1]9kNbFV1^9jNaFW1^9jNaFW1_9hNaFY1^9hNbFX1^9gNbFZ1^9gN`FZ1`9gN^FZ1b9fN]F[1b9b000000000000000001O0000001O0000001O0O2O0GSF_Nn9`1RF_NP:_1RF`No9^1RFbNo9\\1RFcNQ:[1oEeNR:Y1oEfNT:X1kEhNW:U1=M2G9K5M5K5JWfY1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second item from the right in the bottom row", + "B. The third item from the left in the bottom row", + "C. The third item from the right in the bottom row", + "D. The third item from the right in the top row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_293.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4939.jpg", + "mask_rles": [ + { + "size": [ + 384, + 480 + ], + "counts": "kfj4a0[;7K3L6K4M2N3M2N2N3N1N3M2O2M4M2M2O1N2O00001N10000000000000O0100000000000O00100O1O010O1O10O01N2O1O2O0O2M2O2N2M3O1M2O2N2M3N3K4M4K4M4J6K_5" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The bottom one in the rightmost column", + "B. The second one from the bottom in the rightmost column", + "C. The second one from the top in the rightmost column", + "D. The second one from the bottom in the middle column" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_294.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4948.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "chh2j0Q;9D in the full image?", + "choices": [ + "A. The third from the left in the second row", + "B. The second from the left in the third row", + "C. The third from the left in the third row", + "D. The fourth from the left in the third row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_295.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5145.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "gUd3 in the full image?", + "choices": [ + "A. The first from the top down", + "B. The second from the bottom up", + "C. The first from the bottom up", + "D. The second from the top down" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_296.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5362.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "bnS34j;2O2O0O10000O1001O01O0001O1N2N`Q_1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third item from the left", + "B. The second item from the right", + "C. The third item from the right", + "D. The fourth item from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_297.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5375.jpg", + "mask_rles": [ + { + "size": [ + 384, + 580 + ], + "counts": "\\co11o;2N0nSg1OokXN9H7J4L4M4M3L3M2O2M2N2N2N2N2O1N2N1O2O1N2N101N2N101N101N2O0O1O2O001N1O1O1O100O100O10000000O0100000O10O1000000000O0100000000O100000O010000O100000000000000000000000000000O10001O0000000000001O00000001O000000000001O00000000010O00001O0000001O00001O0O101N10000O1O2O001N100O2N101N1O101N2N2O2M3M2O1N2M101O1O2M2O2M4L4J9GTUW1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 3rd one in the 3rd row", + "B. The 4th one in the 3rd row", + "C. The 3rd one in the 4th row", + "D. The 4th one in the 4th row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_298.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5461.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "\\hS15h;6L4L2O2M2N2O1N2N2O1N1O2O0O2O0O1O10000O100000000000O10000O101O0O2O0O2N1O2O2M2M3N2N3L4Lhgk2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one in the second row", + "B. The second one in the first row", + "C. The second one in the second row", + "D. The first one in the first row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_299.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5571.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "cZd4<`;:H6J4L4L4M3L3N2N3M2N2N2O1N2O1N1O2O0O2O0O2O000O2O0O101O0O1000001O000000000000000000O10001O0O101O0O101O0O101N2O0O2O1N101N2N2N2N2N2N2N3M2M4M4K4L4L4K6J]a?" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second row from the top, in the first column from the left.", + "B. The first row from the top, in the first column from the right.", + "C. The second row from the top, in the first column from the right.", + "D. The second row from the top, in the second column from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_300.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5572.jpg", + "mask_rles": [ + { + "size": [ + 384, + 614 + ], + "counts": "oYT4`0Y;:]Ob0I6K6L4K5M2M4L3N2N1O2N2N1N3N2N2N101N2N2N101N101N101N101O0O10001N100O101O000O1000001O000000000000000000O10001O000O10001O000O101O0O2N101N1O2O0O2O1N101N2N2N1O2O1N2N2O1N3M3L4M3L4K5L5J6J6^Od0AoZn1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third row from the top, the second one from the right", + "B. The second row from the top, the first one from the right", + "C. The third row from the top, the first one from the right", + "D. The first row from the top, the third one from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_301.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5575.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "gP`36R;j0J4M3M5M1N3N2M2O1O2M2O1O1O1O2N100O3M100O2O1N101N100O2O0O101O1O0O101O000000001O000000000000000001O0O1000001O000O2O00001N101O0O2O0O2O1N2N2O1N1O2N2N2N101M4M3M2N3L5K3M5I7JU4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first column from the right, the second item from the bottom upwards.", + "B. The second column from the right, the first item from the bottom upwards.", + "C. The first column from the left, the first item from the bottom upwards.", + "D. The first column from the right, the first item from the bottom upwards." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_302.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5813.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "g[X26g;5N01O1O1O1O0000I7K5M3L4L4M3N2N2N2L4N2N2O1N2O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O100O2N1O1O1O100O001O2N100O1O100O010O2O0O10000O1000O11O0O100000O100000001O000O101O00010O001O001O1O1O001O1O1O1O1O1O1O001O2N1O1O2N1O2N2N2N2N3M3M4L3M4L3M5J7Fk`\\2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The one in the upper part", + "B. The one on the left side", + "C. The one in the bottom part", + "D. The one in the center" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_303.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5819.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "V]V1154_;:L4M2N2N2O0O1O1N2M3M3M3N2N2M3N1O2M201N2N1O101N1O1O101N10000O2O0O100000001O000000000000000000000001O0000001O000000000000ORNQFm1o9SNQFm1o92O10O10O11SNQFg1U:O1N100O2N100O2O0O100O1O2N2O1N100O2O1O2M101N101O0O2N100O2O0O2O2M2N2M4K6JhV[2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 1st row", + "B. The 2nd one in a row of two", + "C. The 1st one in the 2nd row", + "D. The 2nd one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_304.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5918.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "Qj><`;7K5K4L3M4M2N2N1O2N101N100O1O100O10000O100O101O0000000O1000000000000000000000000000000001O0001O01O1O001O1O001O001O001O001O1O1O1O001N2M4L3M3N3I7K8FQ^^3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 2nd row", + "B. The 2nd one in the 1st row", + "C. The 2nd one in the 2nd row", + "D. The 1st one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_305.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5930.jpg", + "mask_rles": [ + { + "size": [ + 384, + 514 + ], + "counts": "hm]4:^;>H5J5K5L5L4K5L2M4M2N2M3N3M2N2O1N3N1N2N2O1N2O001N2O1O0O2O1O001O001O001O1O001O001O00001O000001O0000001O001N101N101N1O2N2N1O2N2N1O2N2N2M4M3L3M3M4L4L5J6J6H9H=ZO^Ve0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the left to the right", + "B. The second one from the right to the left", + "C. The second one from the left to the right", + "D. The first one from the right to the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_306.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6.jpg", + "mask_rles": [ + { + "size": [ + 384, + 575 + ], + "counts": "R[d38f;4M3N1N2N2O1O2N1N2O2N2N00M3O1O2N1N2O2N1O2M3N3LPYj2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one from the left to the right", + "B. The fifth one from the left to the right", + "C. The sixth one from the left to the right", + "D. The fifth one from the right to the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_307.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6646.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_b`06d;:K2O0000000O1000001O0001O00000000000000000000000000000000000000000000000000000000000000001OO100001O000001O000000000000000000000000000000000000000000000000000000000000000000001O0001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O10000000001O00O10000001O0000O100001O0000O100001O0000O10000001O0000O1001O0001O00000O10002Mdig1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one from the top down", + "B. The sixth one from the top down", + "C. The fifth one from the top down", + "D. The fifth one from the bottom up" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_308.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6650.jpg", + "mask_rles": [ + { + "size": [ + 384, + 480 + ], + "counts": "TRo0b0S;g0^O=F8H6K4L4M2N3M2N1O2N2N2N101O001O001O00000000000000000001O00000000000000000000O1000000000000000000000000000000O10000000000000000000000001O0000000O10001N100O1O2O0O1O2N2N2N1O2N2N2N2M5L3K6K5J9Dk0POkU`3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth row from the top, the first one from the left", + "B. The third row from the top, the second one from the left", + "C. The fourth row from the top, the second one from the left", + "D. The second row from the top, the fourth one from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_309.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6651.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "jUj11132MY;W1eDlN_:f1C4M3M3N1N=@i0iFfL00\\8Q4_O100O100O2O000000001N101O001O1O1O3M4L3M1O2N01N100O101gMkHBY7\\OSJE_NROo7S1l2H9^Oa__2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 1st row", + "B. The 2nd one in the 2nd row", + "C. The 2nd one in the 1st row", + "D. The 3rd one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_310.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6660.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "gmm012013[;b0J3O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1N2O101N1O100000000000000000000000000000000000000000001O000000O1001O000000O100001O00O1000000000000000000001OO100000000000000000000000000000000000000000000001OO1001OO1001OO1000000001OO1000000000000000000001OO1000000001OO10000001O0000000001O00O100001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000O11O00O2O000000000000002M_R<" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second from the top down", + "B. The third from the bottom up", + "C. The second from the bottom up", + "D. The bottom one" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_311.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6743.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "`Yn24g;;H4L4N3L3M2N2O1N2N101N1O10001O0O1000000000000000000001O001O0O2O001N2N101N2N3M2M4L4L5HeVQ1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first column from the right, the first row from the top", + "B. The second column from the right, the second row from the top", + "C. The first column from the right, the second row from the top", + "D. The first column from the left, the third row from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_312.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/680.jpg", + "mask_rles": [ + { + "size": [ + 384, + 931 + ], + "counts": "ega71a;g0bDDO0178AN2]8c3@4K4N00000000000000000000000000000000000000000001OO10000000000000000000000001O00O10O1000000001O0000000001O00O10000000000000000000000001O00O1000000000001OO100000000O100001O0000001O0000000000000000O1000000000000QMXHELW1m7TOUHf1l7[NRHf1o7U11N2oNlGbM00]8W2gG^MN31ON0e8O^G0N12NNb0OBOO]bP2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the bottom and the fourth one from the left", + "B. The second one from the bottom and the fifth one from the left", + "C. The first one from the top and the fifth one from the left", + "D. The first one from the bottom and the fifth one from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_313.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6804.jpg", + "mask_rles": [ + { + "size": [ + 384, + 550 + ], + "counts": "Vf\\29e;4L4L4L5K3N1N2O1O1O1O1O1O1O2N100O1O2O0O101N2O0O2O001N2O001O1O1N2O1O1O1O00O2M2N2M4M2N2M4M2N3L3N3M3M2O2M3N2M3O1N2O1O2M201N2O2MomY3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second line from the bottom, the second one from the left.", + "B. The third line from the bottom, the third one from the left.", + "C. The second line from the bottom, the third one from the left.", + "D. The third line from the bottom, the second one from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_314.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6813.jpg", + "mask_rles": [ + { + "size": [ + 384, + 413 + ], + "counts": "^[_2<^;>E5L3M4L4M2N3M2N2N2N1O2N2N2N101N2N1O2O0O2O001O0O2O001O0000001O000O11N10000000000O1000001O0O2O0O2N101N2N1O2O0O101N101N101N2N2O1N3M4L4K5K5K8H5J```1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 3rd one from the right in the 4th row.", + "B. The 2nd one from the left in the 4th row.", + "C. The 2nd one from the right in the 3rd row.", + "D. The 2nd one from the right in the 4th row." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_315.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/682.jpg", + "mask_rles": [ + { + "size": [ + 384, + 485 + ], + "counts": "dbT1=];8I7J6L4L5K5K5K4L3O1N2M7bFkMb8o2I4L4N3N3N001O3M5LO0001O00001OO100000000001OO101O001O3M9G4gN`GjNa8P1jGjNY8o0PHlNR8o0UHlNo7o0d1K7J7H6J5J6J`]m3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one in the second row", + "B. The third one in the second row", + "C. The second one in the third row", + "D. The third one in the third row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_316.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6827.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "iX\\1?`;4L2N5K:F9H4L4K5L4L5K6J6J5J6K4L5K8H4L4L1O1O1O001O001O0O2O001O0000001O0000000000000000001O00000000000000000O11N100000O1000O100000000O1000001N10000O10000O2O0O2N1O2N6aL^G81J;U2l9iMfE`1k:@d0UOk\\c3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first row, the first one from left to right.", + "B. The first row, the second one from left to right.", + "C. The second row, the second one from left to right.", + "D. The first row, the third one from left to right." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_317.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6960.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_lj2:b;9I3M4L4L3N2N2N2M3N2O1N2N101N100O2O0O2O0O101O000O1000000O2O00O10000000000000000O2O00000O2O000O2O0O100O2O1N1O2NBeNmE0008Y1l9gNYFX1Y:N2M3N2N3LkNUOZGh0S:K5J_n>CQRA0X=0c91TYO0UT10lkN0nDOTk8" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third row from the top and the second column from the right.", + "B. The second row from the top and the second column from the right.", + "C. The second row from the top and the third column from the right.", + "D. The second row from the bottom and the second column from the left." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_318.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7153.jpg", + "mask_rles": [ + { + "size": [ + 384, + 557 + ], + "counts": "bSl14h08o9KkE in the full image?", + "choices": [ + "A. The 1st one from the left to the right", + "B. The 2nd one from the left to the right in the 2nd row", + "C. The 3rd one from the left to the right", + "D. The 2nd one from the left to the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_319.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7370.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "od^2121c;a0C8J6K3M4L4M2M3N2N2N2N1O2O1N1O2O0O101N100000001O00000000O1000001N10001N2O0O2N2O0O2N2N3M2N2N2M4L4L6J5I8IY_e3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 4th one from the left.", + "B. The 6th one from the left.", + "C. The 5th one from the left.", + "D. The 5th one from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_320.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7384.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "nmU2Q2n94M1O0000000000001O000000000000001O0001O000000000001O000000001O0001O0000000001O0000000000001O000001O000001O0000000000001O000000000001O00000O1]Od0nNofg1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the left in the third row", + "B. The third one from the left in the second row", + "C. The fourth one from the left in the third row", + "D. The third one from the left in the third row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_321.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7488.jpg", + "mask_rles": [ + { + "size": [ + 384, + 683 + ], + "counts": "mee3l0R;4M3L3N2O2M2N101N2O0O2O0O2O0O2O1N2O0O2O001N10001N10001O00001O00000000001O000000000000000001O00000000001O00000O10001O00000O101O001N100O2O001N101O1N1O2O1N1O2N2N2N2M4L3M4L5@YVZ3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. From the bottom up, the first row; from the left to the right, the second element", + "B. From the top down, the first row; from the left to the right, the first element", + "C. From the bottom up, the second row; from the left to the right, the first element", + "D. From the bottom up, the first row; from the left to the right, the first element" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_322.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000242934.jpg", + "mask_rles": [ + { + "size": [ + 227, + 500 + ], + "counts": "VfP1b0`69H5kIQOl5[1G4M5K1O2N1O2N2N1O1O2N2N1O1O3L[N]Km0b4SO_K_OI[1g4VOcKk0\\4VOcKj0]4VOdKi0\\4WOdKi0\\4VOgKh0Y4WOiKh0W4XOjKg0V4YOjKf0W4YOkKf0T4[OnKb0S4^OmKb0S4^OmKb0S4^OoK`0Q4@QL=P4DPL;P4FPL:o3FSL7n3JQL6o3JQL6o3KPL5o3MPL3P4NoK2Q4NoK2Q4OnK1R40mKOT43jKMW42iKNW43hKMX45_K1b41[K0e41ZKNg44WKLh4X10000000O1000O10O1000001N10000O0100000O100O1O2N10O1001UNPKa1^5J3N1O0O10000O101O0O10001O00001N100O101O0O10001N10001O001N1000O1103L1O000010N2O000000O1O1N2DhIEd\\b1" + }, + { + "size": [ + 227, + 500 + ], + "counts": "Rn`1221g6;K2N2O1O1N2O1O1O0O2O1O1O3L2O001O2M3N2N3M1N2O1O1O1N2O1O1O1O1N2O1O1O1O1O1O00001O01O0kNoJ8R5GoJ8Q5GQK8P5GPK9P5FRK9o4DSKb4A\\Ka0d4@[K`0e4@ZKa0f4@YK`0h4_OfJL:f0P5_OeJK?b0m4DYK:g4D]K:d4D^K;b4E^K;b4D`K;a4D_K\\5E[Jb0f5=001O00001O0O2N1O1O2M2O2N1O1N3N1N2O2L3M5Hlo1N]PNNXe20iZM10O_Rk0" + }, + { + "size": [ + 227, + 500 + ], + "counts": "Qfi03n66L4K2O00001N9H4L2N0ODoIHQ6e0000DoIFQ6:PJEo5?", + "choices": [ + "A. ", + "B. ", + "C. None of the above", + "D. Both and " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_323.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000376093.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "noc51n>1O1O100O100O101N00101N1O100O1O100O1O100O1O100O1O100O1O1N2O1O100000000000000000O1001O0000O02O000000O1001O00O10000001OO1000O1001O7I5M4K6K3M3N2M4M3L3N1O1O1N101O2N1N3N3L4M2N2M5L3L3N2N2M3N1O1N10000O2N1000001N1000001N100O1000000O10000O1000000O1000O0100O10O1000O10O10O10O01000O10000O100O100O00100O10000O00100O01O01O001O10O01O1O1O00001O1O1O1O0O3N1O1O1O1O3L3N1O3L3M4J8I4M5K6H6J6Jf>AlA4OO3LZg`6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "QfX36i>3L4N1N3M2N101O2N0O2O001N101N2O2N1O1O1N3N1O1O1O1N2O00G9O100O100N2O10000N2N1100RFXOd5h0[JZOd5e0YJ_Og5`0\\IEYMMZ9=YIMZMG]9;UI6XMCa98UI8XMBb94bG^O2k1[8fNdG@HR2d8^NcGB_OY2m8UNeGBVOa2U9lMeGHQO^2Y9jMeGm2\\ObLX8`0]Hn2ZOlLP86eHo2YOoLP81hHP3WOPMQ80gHQ3XOoLS8MfHT3WOnLT8MeHV3WOkLW8NbHW3WOkLW8McHY3UOjLY8KbH\\3UOiLZ8IaH_3TOiL\\8G_Hb3TOgL^8E_Hd3SOgL^8E^Hf3SOeL`8C^Hi3QOdLm9]3RFdLm9^3RFbLm9a3nEbLR:V400O100O100O100O1N20O10O10000kFWKR8j4oGUKQ8j4QHVKn7j4RHVKn7j4SHTKn7l4S10000ZLRFZ2n9fMXFT2T9hLnFX13l1n8dNTGZ1m8eNUGY1k8gNWGW1i8iNXGW1Q8TMjGd15UN@P3^8CRH`M]Oo2Z8FXH\\M^Oo2W8FWH`MCk2Q8H\\H]MCl2n7IZHRMI:0j2k7LZHTMI^3k7@WHnL1g3i7[O[Hc0f7^OYHb0g7_OXHb0g7_OVHd0j7]OTHd0k7]OUHb0l7Y300O10M3L4I7N2M3]ObG_Jc8`5?TOPGgKQ9Y4SGbKn8T1nF`15ZMn8T1oFb14nLI2V9]1oFb1h9\\N\\Fb1e9]N\\Fb1d9]N^Fb1]1oLY6^1]Ha1Z1QMZ6\\1^Hb1X1RM[6Y1bHa1T1VM[6W1bHc1S1VM\\6V1bHc1R1WM\\6T1dHQ1UO\\Nl1_O\\6R1dHf1P1XM]6P1eHf1[9XNgFg1Z9VNjFh1W9TNmFk1U9PNoFn1U9hMSGW2m:L2O3M2K5J7E=D0O100O1O100O100O100O100O100O100O100O100O1O100O100O100O100O100O100O100004L5K4LWmj6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "X]c44l>3L5L5K4L4L1N1001ClALS>4oAKQ>5SBGm=4nAI71k=5PBI52k=5PBI61j=6PBI]>77JYA1g>Noe_4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "WbU64l>4K2OO1000000O1000O10000000000000001O000001Nkdk2" + } + ], + "question": "Which statement correctly describes the actions and position of ?", + "choices": [ + "A. is holding and standing on .", + "B. is standing on and holding .", + "C. is looking at while standing on .", + "D. is holding and looking at ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_324.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000376093.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^aQ1>[>9ZOZOgBP1T=b0K5K3N2N5K4K4K5K7K4L4K7J3L4M2Nf:]O^Ec0b:YOaEg0]5M4K6K3M3N2M4M3L3N1O1O1N101O2N1N3N3L4M2N2M5L3L3N2N2M3N1O1N10000O2N1000001N1000001N100O1000000O10000O1000000O1000O0100O10O1000O10O10O10O01000O10000O100O100O00100O10000O00100O01O01O001O10O01O1O1O00001O1O1O1O0O3N1O1O1O1O3L3N1O3L3M4J8I4M5K6H6J6Jf>AlA4OO3LZg`6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nn[31n>2OO001001O00N\\d`20a[_M;F6M2N1O2O1O0O2O0O101O0oCWOj9j0PF@l9`0SFCj9=WFEf9a2Q1[MaN4=b2R1YMcN49e2S1XMdN36h2V1UMeN23k2X1SMeN21m2Z1QMeN3Mo2^1nLfN2IS3`1lLgN1HT3a1kLhN1EU3c1jLhN2\\N]OHh3T3iLiN2WNBHe3X3hLiN1SNFZOR4j3WLjN1oMd4W3[KjN2jMg4]3WKiN2gMi4`3UKiN3bMl4e3RKhN3`Mm4i3PKgN4[Mo4n3nJfN5XMo4S4lJfNV6Z1kIfNT6[1lIeNS6[1nIdNR6]1nIcNQ6^1oIbNP6^1QJbNn5_1RJ`Nn5`1RJ_No5b1PJYNU6h1PJjMX6W2kI_M[6a2kISM[6n2iIgL]6Z3cI_Lc6b3]IZLf6g3ZITLj6m3WInKl6R4XIeKm6\\4k1001O1O1O001O1O001O1O001O001O00YOnEaLQ:Z3YFbLf9[3_FeL_9Z3eFaL]9]3lFXLX9g3Q1O1N2O1O100O1O100O1O100O100O100O100O100OnNgL_FY3Z9WM\\Fi2b9`MWFa2e9_1_O^K^Fh4^9?L4O0N3\\McJcK_5[4kJ_I^Of1k5j4UKiJn4V5YKcJi4\\5`KXJd4h5bKlId4S6U20O01N2M3M3O1N2N2O1N2N2O1O2L3O1O2M3N1O2gLWG8j8F[G4i8KZG1g8N]GNd81^GKf83]GJe83^GIf85^GEe8:`G@c8>bGXOUNTO]:a1dGmNj8n0g2N2L6K4J9F8ISgh0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "QfX36i>3L4N1N3M2N101O2N0O2O001N101N2O2N1O1O1N3N1O1O1O1N2O00G9O100O100N2O10000N2N1100RFXOd5h0[JZOd5e0YJ_Og5`0\\IEYMMZ9=YIMZMG]9;UI6XMCa98UI8XMBb94bG^O2k1[8fNdG@HR2d8^NcGB_OY2m8UNeGBVOa2U9lMeGHQO^2Y9jMeGm2\\ObLX8`0]Hn2ZOlLP86eHo2YOoLP81hHP3WOPMQ80gHQ3XOoLS8MfHT3WOnLT8MeHV3WOkLW8NbHW3WOkLW8McHY3UOjLY8KbH\\3UOiLZ8IaH_3TOiL\\8G_Hb3TOgL^8E_Hd3SOgL^8E^Hf3SOeL`8C^Hi3QOdLm9]3RFdLm9^3RFbLm9a3nEbLR:V400O100O100O100O1N20O10O10000kFWKR8j4oGUKQ8j4QHVKn7j4RHVKn7j4SHTKn7l4S10000ZLRFZ2n9fMXFT2T9hLnFX13l1n8dNTGZ1m8eNUGY1k8gNWGW1i8iNXGW1Q8TMjGd15UN@P3^8CRH`M]Oo2Z8FXH\\M^Oo2W8FWH`MCk2Q8H\\H]MCl2n7IZHRMI:0j2k7LZHTMI^3k7@WHnL1g3i7[O[Hc0f7^OYHb0g7_OXHb0g7_OVHd0j7]OTHd0k7]OUHb0l7Y300O10M3L4I7N2M3]ObG_Jc8`5?TOPGgKQ9Y4SGbKn8T1nF`15ZMn8T1oFb14nLI2V9]1oFb1h9\\N\\Fb1e9]N\\Fb1d9]N^Fb1]1oLY6^1]Ha1Z1QMZ6\\1^Hb1X1RM[6Y1bHa1T1VM[6W1bHc1S1VM\\6V1bHc1R1WM\\6T1dHQ1UO\\Nl1_O\\6R1dHf1P1XM]6P1eHf1[9XNgFg1Z9VNjFh1W9TNmFk1U9PNoFn1U9hMSGW2m:L2O3M2K5J7E=D4K2OO1000000O1000O10000000000000001O000001Nkdk2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "lS`2=^>7J6Jb0^O7K3K4M3M3L4N2M3N2M3N2L4K5O1K5L4M3N2M3N2N2N2N2N2O1N2O1O1O1O1O100O1O1O1O100O100O100O100O10000000000000000000000001O00O10000000000O1000000000000000000001O001O0000001O0000001O001O001O001O1O00001O1O1O001O001O1O1O1O2N1O2N2N1O1O2N3M2N2N3M3M5K3M4L4L3M4L5Km0SO;E6J2N2N2L4N2N3M2N2NTfj4" + } + ], + "question": "Which person is holding , and which person is looking at ?", + "choices": [ + "A. is holding , and is looking at .", + "B. is holding , and is looking at .", + "C. is holding , and is looking at .", + "D. is holding , and is looking at ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_325.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000222317.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "kc^1g0T>>E7J2N2O0O1fCQOnN_O10[;`1fEFlNkNn:_1WF2`9N`F9Y9GgFl0f8SO[GX1Z8hNgG]1S8cNmGb1n7^NRHf1j7ZNVHj1e7WN[Hk1c7UN^Hl1`7TN`Hm1_7RNbHo1]7QNcHQ2[7oMeHR2Z7nMfHS2Y7mMgHT2X7lMhHT2X7kMiHV2V7jMjHW2U7iMkHX2T7hMlHY2S7gMmHZ2R7fMnH[2P7fMPI[2o6dMRI\\2n6dMRI]2m6cMSI^2l6bMTI_2k6aMUI`2j6`MVIa2i6_MVIc2i6]MWId2g6]MYId2f6\\MZIe2e6[M[If2d6YM]Ih2b6XM^Ih2b6XM^Ij2`6VM`Ik2_6TMbIm2]6SMcIm2]6SMcIn2\\6RMdIYOkNK2P2]7mNfISOTOG0Y2T7mNhIkN@GJ`2m6nNiIiN:Z2l5mNjIgN=\\2h5mNlIdN?_2d5mNoKR1P4nNQLR1n3nNSLR1l3nNTLS1k3mNVLS1i3mNZLQ1d3PO]LP1b3PO_LP1`3PO`LQ1_3oNdLo0[3QOfLo0Y3QOhLo0W3QOjLo0U3QOlLn0T3ROmLn0Q3SOPMn0n2ROSMm0m2SOTMm0k2SOUMn0j2ROWMn0h2ROYMo0e2QO\\Mo0c2QO]Mo0c2QO]MP1b2PO^MQ1a2oN_MR1_2nNcMR1\\2nNdMS1[2mNeMS1[2mNeMT1Z2kNgMV1X2iNiMX1V2hNjMY1U2fNlM[1R2eNoM\\1P2cNQN^1n1aNSN_1m1aNSN`1l1_NUNa1k1]NWNc1h1^NXNb1h1^NXNb1h1]NYNc1g1\\NYNe1g1ZNZNf1f1YN[Ng1e1XN\\Nh1d1XN\\Nh1d1WN]Ni1b1XN^Nh1b1XN^Nh1b1XN^Nh1b1XN]Ni1c1WN]Ni1c1WN]Ni1c1WN]Ni1b1YN]Ng1c1YN\\Nh1d1XN\\Nh1d1XN\\Nh1d1YN[Ng1e1YNZNh1f1YNYNg1g1YNYNg1g1ZNXNf1h1ZNWNg1i1YNWNg1h1ZNXNf1h1[NVNf1j1YNWNg1i1WNYNi1g1VNZNj1f1VNZNj1f1UN[Nk1e1TN\\Nl1d1SN^Nl1a1TNaNk1_1TNcNk1]1UNdNj1\\1UNeNk1[1TNgNk1Y1TNhNl1X1TNiNk1W1TNjNl1V1SNmNk1S1UNoNi1Q1VNQOi1o0WNQOi1o0WNSOg1l0YNXOd1h0[N]Oa1c0_N@^1`0aNC]1=cND\\1P2@SN>n1^OVNb0j1\\OXNd0h1[OYNe0g1YO\\Nf0d1YO]Ng0c1WO_Ni0a1TObNl0^1SOcNl0^1ROeNm0[1ROgNm0Y1ROiNm0W1QOkNo0U1POmNo0S1oNPOP1P1oNQOQ1o0nNTOP1l0nNWOP1j0oNXOP1h0oNZOP1f0POZOP1f0nN^OP1b0oN@P1`0POAo0?POBP1>POCo0=PODo0=QOCo0=QOCo0=QOCn0>ROBn0>ROBn0>QOCo0=QOCo0=QOCo0=QOCo0=ROBn0>ROBn0>ROBm0?SOAm0?SOAm0?SOAm0?SOAm0?SOBk0?UOAk0?UOBj0>WOAi0?WOAh0`0XO@h0`0XOAg0?YOAg0?YOBf0>ZOAg0?ZO@f0`0ZO@f0`0ZO_Og0a0YO_Og0a0ZO]Of0d0ZO[Og0e0YO[Of0f0ZOZOf0f0[OYOe0g0[OXOf0h0ZOXOf0h0[OVOf0j0ZOVOf0j0ZOVOUN^LV1\\4e0VOSNaLW1Y4g0TOoMhLY1T4h0TOPNgLX1U4h0TOSNdLU1X4h0TORNgLS1V4l0ROoMlLS1R4n0ROnMnLS1P4o0SOiMRMV1l3Q1ROhMSMW1k3R1QOfMVMW1i3S1QOcMYMZ1f3S1QOaM\\MZ1d3U1oN_MaM[1`3V1nN`MbMZ1`3W1lN_MfMY1^3X1kN_MhMY1]3X1lNhLfLc0R1\\1\\3Y1POYMfM]1Z3[1POVMgM^1Z3\\1POTMiM^1W3^1QORMjM^1V3`1POQMlM^1T3a1QOoLmM_1R3c1QOkLPNa1o2d1ROjLRN_1l2g1SOhLSN_1k2i1o0UNQOl1Q1QNoNo1S1nMnNR2U1kMkNV2X1fMhNZ2[1cMeN]2_1^MbNc2h1iLfIK0Ob4]3m6001O000000001O00001O0000001O000000001O0000001O0000001O0000001O0000001O00000O2N1L4K5F:J7H7M3L4L5M2N2O2M2N2O1O3M2M2O2N1O100O1O101N1O1O2O0O10000O1000001N10001N1000000O2O00001O000O101O00001N100N2J7L3N2O2N1O1N200O2N101N1O100O101O000O101O0O101O000O101O0O10001N1000000[JgHc3Z7ZLkHc3U7\\LVIY3k6gL_Io2a6PMaIo2`6PMbIn2^6QMaJQ2_5oM`KeNiL0O07h1b7CfMcNa2\\1c5O0000001O000O2O00000000001O00001N1000001N1000001O00001O00000000001N1000001O0000VK" + }, + { + "size": [ + 480, + 640 + ], + "counts": "oRk03k>4aAJP>7mAKS>6lAKS>6lAJU>6iAJX>8dAI:Lh=`0WBBi==WBEg=;YBEh=:XBGg=9YBHf=8ZBIf=6ZBKe=5[BLd=4\\BMd=2\\BOc=1]B0c=O]B3b=L^B5a=K^B7a=I_B8a=G^B;a=E^B=b=B]B`0b=AYBd0g=;O101N1O100O100O1O1O1O100O1O1O1O1O2N100O1O1O1O2O0O1O1O1O11O1O2N10O00O2O01N100001O3M00OXNkBe1X=05KM_NhB`1X=aNgB_1Y=bNgB]1_=O000000O11O000000001O1O1O1O002N1O1O1O002N1O2N001O1O1O1O1O1O2N1O1O2N1O00001O1O001O3M1O001O1O1O1O001O1N2K[Ye6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "X3n6R8000000001O1N2O0000001O001O1O002M101O00001O00001O001O000O10000000O1O100O010O100000000000O01N2L4K5I7M3O1O100O0100000O10000O1000000O10O10O10000O10000O100O00100O100O100O100O1UOcGRK^8m4dGRK[8n4kGmJU8Q5oGlJR8S5PHkJQ8U5PHjJP8U5RHhJP8X5l0O1O1O0oNZFjLf9l2SGdLn8X3\\GaLe8]3`1O1O1O1000000O01000000000000000O1000O100000000000000000O100000O100000000000O10O1000000000000O10O1000000000000000O1000O1000000000000000O10O100000000000000O10O100000000000O100000O10000000O10000000O100000O10\\MiLYIGQN`3f8RMUIo2k6aMeH_2[7lMYHU2g7nMZHn1f7XN_H]1a7cNaH[1_7aNVG_N]1n2^7gNbHX1^7mN^HR1b7TOeG[NF`2e8_OXGUN3[2e86\\GHd8:[GEe8<[GCe8?[G_Oe8b0\\G\\Od8e0\\GYOe8h0ZGXOf8i0ZGVOf8l0YGSOg8m0cGfLFj1g8a1RHXNn7j1QHUNo7l1QHSNo7o1QHoMo7g2\\GWMe8k2ZGTMf8n2YGQMg8P3YGoLg8R3YGmLg8T3ZGjLf8W3ZGhLf8Z3YGeLg8\\3ZGbLf8_3ZG_Lg8c3XG\\Lh8e3XGZLh8g3XGXLh8i3ZGoKk8R4\\G`Kj8b4i01O3M5K6J00O1O1O104K2O1N2O3M;DZ1B:D=E?A;E;E:F9GB=Cg0YOe0cLmCc2Q=WOh0WOc0^O_dP5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`UZ33k>2O2N2N1O2N1O2O0O2N1O101N00100O1O010O01000O010O101N10000O2O00001O0O101O001O0O2O001O001N101O001O001O1O2N0000001O000000001O0000001O0010O01mBfN^a0Aa0_Oo0QOa0_Oc0]Of0ZO?", + "choices": [ + "A. Lying on , which is on .", + "B. Lying on , which is attached to .", + "C. Attached to .", + "D. Lying on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_326.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000148719.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0]3c;001O00000000001O001O1O1O00001O2N1O1O000000000000O1O100O100000000001O001O1O00001O001O0000O1O1O1O1K5N204L3N1M4M1O7I4L2N1O1N9H4MgNkMiEP2X:YNbEd1^:aN^E^1b:cNeEU1[:oNdEn0\\:TOeEi0[:[OaEe0_:^O_Ea0a:BbE8^:KcE1]:3`EL`:9\\EFd:<\\EBd:a0\\E\\Od:f0\\EXOd:i0\\EVOd:m0YESOg:n0ZEPOf:S1[EiNe:Y1^EbNb:a1]E]Nc:h1YEWNg:j1YEUNg:n1[ElMf:V2Q12N2N2N1O2N2N4L2N3M4L2N2N1O2N1O00000000000000000000000000000000000000000000000000000004WMYDQ2gA:G8Ha0_O6J8H3LQX2o0ofM;D:I8H`0@7I6J;E1O1O001O000000001O0000O1001O0000001O0000000000000000000000001O0000001O001O2N1O001O00000000001O000000000000000000O1001O0000001O1O5K4L3M4L3M3M1O1O1O0aEbK\\:]4dEdK\\:\\4eEcK[:]44L4]Oc0A?001Oj0VON2O1000000O1O1O1N2O1N2N2N2M3M3001ON200O11O00000000O100O1O1O100000000000000M300N20000N20000O1O100O100O10000000000O1001O01O1O00010TNPDf0R<^NlCLj0d1`4dNObC3l;GYCj0?Dm;AdCW1;\\Ol;l0SDWOj;k0UDVOg;o0UDSOi;R1oCTOo;P2N2N1O1O2M200O2O001O0O101O001JbLhD_3W;aLiD_3[;10001O0000000L_LiDa3W;_LiDa3W;_LhDb3X;^LhDc3Z;00001O00001N1O101O001O0000001O00002N2M3N101N2N1N200O1O2M101N200O0O101O00001O000000001O0O11O0O1GdEnK\\:[40O10000000000O0O2O10000001N11O01N1O010O1O101O001O00O10001OO100000001O0O100O01000O2O01O00O101O00001O000000001O0O10000O101N100000001N1O100O2L3M3N3M2O101M2N2O100N2O2M2N2O1N3N1M3O1N2N3N1O1K6M2B[I^Hi6a7WI]Hk6b7:0fJXH\\3g7dLZH\\3e7fLZHZ3f7dLZH_3e7n1O100000000O2N11O00O2O001O00000000000O2O000000001O00O1001O0O101O001O00001N1001OO2O0000001O00O1001O00000O11O0000O105K1O1O5K3M3M100O2M3N10O01N1O2O1O10O0001O00000000000000001O000O1000000000O100000001O000O01000O2O0O1O100O10000O1O1O2O0O1O10000O1O1N2O2M2O1N2L4N2O1N2N1K6N2N2M3K5C=E;O1O20O00000000000000000000000001N100O1O100O1O2O000O100O1N3N1000000O100O2O0O1O2N1000000O10001N1O1O1O101O000O2N102N0O2N3M6K1O3L7J5J5L1N7I;E2O1O3M1N6J5K2O1N10O01O100O100O100O10000O10000000O01001O1O2N3M1O1N101O00000O101N1000000000O1000000O1O10000O0O2N1N3O1O1F:F:L41O0000000000000M3OM400O100M[CkMe:fANU>1mAGZ>0cA35Kmi7" + } + ], + "question": "Which of the following statements accurately describes the arrangement involving , , and ?", + "choices": [ + "A. is beside and in front of .", + "B. is beside and in front of .", + "C. is beside and in front of .", + "D. is parked on and beside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_327.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000148719.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "U5[3e;00O1000000000O100000O100O10O102]DoLS;d3K4L3L4M1O2N1O1O2J`KhE`4X:`KhEa4[:1DeKQF\\4m94dNObC3l;GYCj0?Dm;AdCW1;\\Ol;l0SDWOj;k0UDVOg;o0UDSOi;R1oCTOo;P2N2N1O1O2M200O2O001O0O101O001JbLhD_3W;aLiD_3[;10001O0000000L_LiDa3W;_LiDa3W;_LhDb3X;^LhDc3Z;00001O00001N1O101O001O0000001O00002N2M3N101N2N1N200O1O2M101N200O0O101O00001O000000001O0O11O0O1GdEnK\\:[40O10000000000O0O2O10000001N11O01N1O010O1O101O001O00O10001OO100000001O0O100O01000O2O01O00O101O00001O000000001O0O10000O101N100000001N1O100O2L3M3N3M2O101M2N2O100N2O2M2N2O1N3N1M3O1N2N3N1O1K6M2B[I^Hi6a7WI]Hk6b7:0fJXH\\3g7dLZH\\3e7fLZHZ3f7dLZH_3e7n1O100000000O2N11O00O2O001O00000000000O2O000000001O00O1001O0O101O001O00001N1001OO2O0000001O00O1001O00000O11O0000O105K1O1O5K3M3M100O2M3N10O01N1O2O1O10O0001O00000000000000001O000O1000000000O100000001O000O01000O2O0O1O100O10000O1O1O2O0O1O10000O1O1N2O2M2O1N2L4N2O1N2N1K6N2N2M3K5C=E;O1O20O00000000000000000000000001N100O1O100O1O2O000O100O1N3N1000000O100O2O0O1O2N1000000O10001N1O1O1O101O000O2N102N0O2N3M6K1O3L7J5J5L1N7I;E2O1O3M1N6J5K2O1N10O01O100O100O100O10000O10000000O01001O1O2N3M1O1N101O00000O101N1000000000O1000000O1O10000O0O2N1N3O1O1F:F:L41O0000000000000M3OM400O100M[CkMe:fANU>1mAGZ>0cA35Kmi7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "d45k>0N200O1O2N2O1N1O1O1O010O2O3L4M000O100O010OO2N1NI9O10O1010O0100O1O2M2O2N2FfAK^>OfANg>OjSi8" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0]3c;001O00000000001O001O1O1O00001O2N1O1O000000000000O1O100O100000000001O001O1O00001O001O0000O1O1O1O1K5N204L3N1M4M1O7I4L2N1O1N9H4MgNkMiEP2X:YNbEd1^:aN^E^1b:cNeEU1[:oNdEn0\\:TOeEi0[:[OaEe0_:^O_Ea0a:BbE8^:KcE1]:3`EL`:9\\EFd:<\\EBd:a0\\E\\Od:f0\\EXOd:i0\\EVOd:m0YESOg:n0ZEPOf:S1[EiNe:Y1^EbNb:a1]E]Nc:h1YEWNg:j1YEUNg:n1[ElMf:V2Q12N2N2N1O2N2N4L2N3M4L2N2N1O2N1O00000000000000000000000000000000000000000000000000000004WMYDQ2gA:G8Ha0_O6J8H3LQX2o0ofM;D:I8H`0@7I6J;E1O1O001O000000001O0000O1001O0000001O0000000000000000000000001O0000001O001O2N1O001O00000000001O000000000000000000O1001O0000001O1O5K4L3M4L3M3M1O1O1O0aEbK\\:]4dEdK\\:\\4eEcK[:]44L4]Oc0A?001Oj0VON2O1000000O1O1O1N2O1N2N2N2M3M3001ON200O11O00000000O100O1O1O100000000000000M300N20000N20000O1O100O100O10000000000O1001O01O1O00010TNPDf0R<^NlCLj0d1`?", + "choices": [ + "A. and ", + "B. , , and ", + "C. and ", + "D. , , and " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_328.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000098716.jpg", + "mask_rles": [ + { + "size": [ + 359, + 640 + ], + "counts": "nh59l::G1O1O1O1O1H8M3O100O100001O000YOVF1k9Mj0K`nc6" + }, + { + "size": [ + 359, + 640 + ], + "counts": "]kd0:j:6K2O1O1J]OgEe0V:700O1N200O100O@mENS:0QFMP:2SFLn93RFNm90VFOl9NUF1m9MTF3b:JnTT6" + }, + { + "size": [ + 359, + 640 + ], + "counts": "Qn]6l0Z:Y1hFZN^6_3I1O00000001O001O0SMhHTO3]2W7mMgIk1[6TNfIi1\\6VNeIj1[6VNeIj1[6WNdIj1[6UNfIk1Z6UNfIj1[6VNdIk1\\6VNaIk1`6iNaHQO0\\2`7`0g0N2O1O1O1M3N2O2O0lM_Gb1a8\\N_Hf0a7ZO`Hd0a7[O`He0`7\\O_Hd0a7\\O^He0b7[O^He0b7[O^He0b7\\O]Hd0c7\\O]Hd0c7\\O]Hd0c7]OdGE;n0Q8`1000000000000000000YK" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is sitting on with .", + "D. is over ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_329.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000203317.jpg", + "mask_rles": [ + { + "size": [ + 423, + 500 + ], + "counts": "YUi2b0_<:I7I6K4L4L4L4L3N3L3N2N2M3N2N2N2N2N2N2O0O2N2N101N1O2O0O2O0O101N100O101O0O1000001N1000000000000000O100000000O100000O0N3N2N1N3O0N3O0M4M2M4N1RNXL\\Jl0\\Nj1Y7]MVJb0kNo1P7dMnI=TOo1m6SN_INGm1j6[NWIJ0j1i6`NoHL8b1h6dNnHM:]1g6iNkH^O1A=V2f6nM[Ie0JVO5U2f6oM^I`0\\OTO=82T2h6nM`I=OAIR2j6gMfIb0LEEo1l7WNgHJ]Om1j7ZNlHIZOk1g7^NRIGWOh1e7cNWIEUOe1c7eN\\IGQO_1e7hN^IInN\\1d7iNbIKjNY1f7gNfI0eNU1g7jNfI1eNo0h7oNeI2dNk0j7ROdI2dNe0m7XO`I3gN;n7A]I4V8KkG5T8JnG7P8IQH7n7HTH8l7GUH9j7GXH8\\:1O0000O11O00000XCH`A?o8SO^G?B`0P9ROZGa0D?R9QOUGc0H`0Q9mNUGe0Ha0R9jNSGh0Ib0R9gNPGc0GWO6b1Q9eNnF40IKL5e1P9bNmF3:0G]1P9QO^GA_Ob1R9iNdGC[Of1o8dNiGDXOk1n8_NmGDVOo1k8ZNRHT2m7iMWHW2h7iMXHX2g7gMZHZ2e7fM[H[2d7eM[H]2d7cM[H_2d7aM[H`2e7`MZHb2d7_M[Hc2d7]M\\Hc2d7\\M\\Hf2c7ZM]Hg2b7YM^Hg2b7XM^Hj2a7UM`Hk2`7TMaHm2_7PMcHP3^7nLcHS3h800001N10001O0001O0001O00000000000000000000000000O1000000hLVFR3k9nLUFR3k9nLUFR3k9mLVFR3k9oLTFQ3l9oLTFQ3l94100O100O1O100O100O1O1O1O100\\MkEU2W:iMlET2V:iMmEU2U:gMoEW2a:N4M1O2M3N3L3M4L5`N_DQ1f;lN]DP1Qk;BUD>k;BUD>Z<0EBmC>So;ChC09=^<000001OO1001O00O10000O1N2N2O1O1O1N2O1N2O10O0101O00000000O1001O00000000O011O00000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000001O001O1O1O2N1O1O1O1O1O1O1O2N001O001O1O1O00010N101O1O00001O01OO1000001O00000000001O000000O10000000000O1000000O100O10000O100O1000000O1O1O1O10000N2O100O100O1N2O1O1O1O1O100O100000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001OO100000000001O00000000O100001O001O001O1O1O1O001O1O001O001O1O1O0000001O001O00001O0000001O0000000000000000000000O10000000000O10000O10000O10000O1O100O1O10000O1O1O100O1O1O1000000000000000000001O00O11O00000000000000000000000000000000O11O000000000000O11O0000000000_C" + }, + { + "size": [ + 423, + 500 + ], + "counts": "0m;Z1000000001O00O100000000000000000000000000000000001O000000000000001O00001O00000000000000000000000000001O00001O0000001O0000000000O100O100O1N2N2KgNZD[1d;5O1O1M3O1O11O0000001O00O1001O00O11O00000000001O000000000000000000001O00O11O000000001OO1ZK]NhMc1n6000RK\\NYNd1f1]NZNc1f1\\N[Nd1c1^N]Nb1c1^NVJN_3d1\\2]N\\Nc1e6O00PK_NZNa1S5_N`Ga1S:0dM_N^GOm0b1d7eNkFK:OV1a1h7]NnF240V1a1R:01O_M_NfG1i0a1a7]NgG1i0b1]7^NkG0i0b1Z7aNlGMj0b1Z7bNkGNh0a1]7]NoG1f0a1Q:OPK^N[Nb1d1_NdK1>`1X6^NiIb1W7^NiGb1W7_NQH0f0b1Y7_NPHOTO0`1b1]7]NgIb1o80PK^N[Nb1d1_NfKOB]Ob2o6lMfIA\\Ob2n6kMfHUOP1?\\O`2m6lMkIEWO^2l6kMRJGRO]2k6lMVJEoN_2j6kMZJFlN^2j6hM_JKiNY2h6cMiJ4`NW2g6eMjJ3aNV2e6gMjJ6_NR2h6gMjJ6aNi0_OhN0Q1X7UOlJ7_Nd0FNP7UOmJ9\\N`0^OSO9P1P7TOnJ:_N8G:l6SOPK:Db0]5POTK=_Ob0]5ROTK>\\O`0`5SOTK=[O`0a5ROVK=YOa0V8^OiGb0b5QOYK>TOa0c5ROYK=SOa0[8_OdGa0^8@nEC\\1m0f8HVFYOf0o0U9GUF[Oe0n0X9FTG9m8GRG9n8GRG9n8HoE[Oh0n0\\9EjE_Oi0l0_9ChEBh0k0a9BgEEf0i0d9EbEBk0h0d9L]F4d9L[F4f9LYF4h91RFOP:3lEMU:3jEMW:4gELY:5fEK[:5dEK]:4cEL]:4cEL^:3bEM^:4`EM`:4_ELa:4`EK`:6_EJa:6_EJb:6\\EKd:5\\EKf8XOhHn0bNJf8\\OdHj0fNJe8]OeHi0fNJe8]OeHi0fNJe8A`Hf0kNIf8@_Hg0kNIf8A\\Hg0oNHe8CWHh0TOEf8FmGj0]O@g8GgGl0B]Og8GcGP1FYOh8G^GS1KUOg8JYGT10ROh8o1WGRNi8n1WGRNi8n1WGRNk7WOeHf2@SNj7YOfH\\1ZO@7Ji7\\OdH`2ESNg7^OcH_2FSNg7^OcH^2GTNf7^OcH]2HUNe7^ObH_2HSNf7_O`H_2ISNg7@\\Ha2JPNj7_O[Hd2ImMl7_OZHh2HhMn7ASHn2MbMP8U400O10VLQH\\2o7XM_Hf2a7WMPHUO?d3a7WMbHi2^7SMPHYOc0d3]7oLkHP3U7oLlHQ3T7nLmHR3S7lLoHT3Q7lLoHT3Q7lLnHU3R7jLoHV3Q7jLnHW3R7iLnHW3Q7jLnHW3R7iLmHX3S7hLmHX3S7hLmHX3S7hLlHY3S7hLmHX3T7gLlHY3U7fLkHZ3U7dLmH\\3S7bLnH_3W7VLoHj3W800000001OO1001O00000000000000000000000000000000000000000000000000000000000000000000000000001O001O1eLaFl2_9QMeFn2]9nLeFR3\\9jLfFW3e903M3MM30lLSFn2U:hNhE]OY:`0jE_OW:=lECX:oNgEo032g:^ObEa0b;0O11O00000000000000000000O11O000000O10000000000000000000000" + } + ], + "question": "Which statement accurately describes the position of ?", + "choices": [ + "A. is parked on and attached to .", + "B. is attached to and parked on .", + "C. is parked on and beside .", + "D. is parked on and attached to ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_330.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000542856.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "mWY66R=5L4N1N20O00001O0O2O1O5HdVl1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "W6b5h7001O10O100000000O001O100O100O1O10000M3N2N10100O100000O1000000O100001O00O10000000O1000000O100001O00000000000000O02O000001O00O1001O0O0101OO100000000O1000000000000000000000000000000000000000000000O100000000000000000000000001O000000000000001O000000000000000000000000000000000000000000VNoGZNQ8`30000000000000000000000000000O100001O000000000000000000000000000000O1000000001OO10000000000000000000000000000000000000000O10000000000000000O100000000000000000000O10000000000000000000000000000000000O100N2N2McJ[HZ5c7hJ]HX5c780000OaJ^HW5b7hJ`HW5`7jJ^HW5b79000_J`HW5`7iJ`HW5`7jJ_HV5a7jJ^HW5b79O10000000000000000002N1_J\\HY5d77O10[J]Hb5c7]J_Hb5a7^J_H2N\\5b7dJ^Ha5b74004L1O01O00O100000O11O0`JYH\\5h7cJXH]5h7cJXH]5g740`JYH\\5h7bJYH^5g7bJYH^5f7dJYH\\5g7dJYH\\5g7dJYH\\5g7dJYH\\5g7dJYH\\5g740000000`JYH\\5g7400000000000000000`JYH\\5g7400000000000000000000000000000000000000O10000001OO100001O00O100H]JhHc5X7]JiHb5\\3^J60^Lb5U3`JgLNf30^Lb5R3dJiLJo3c5ZO_Jm0`5_3000G^JhHc5X3^JcL1g3O]Lb5Y7^JgHb5Y7]JhHc5a7N10O110O1O1N1000O1000000001O1O1O1GXJmHi5Z71O2N1O1O001O1N1N3N23M00O1O00001O00000O20O1O0000000000000000000000000000000O10000000000O1000O01O10000O1O100O1O1O1O100O1N2O1N2O1N2O1O10000O100000000000000000000O10O2O00010N101O0001OO10000001N0101O00O100000000000000000000000000000000000O11O0000O1O100000000001O0000O100000000000000000000O100000000001O00O10O1000000001O000000001O000000O100000000O101O00O03N1O0000001O8dJoG7:o3P8jKWHS4j7mK\\Hm3d7RLaHj3c8M:E7J4jLlEm2Z:O5K9F7J2N1oMmDg1^;N8aNaDFIi0g;]ObDJH0Od0k;]O_D4F>m;]O_Di0T and the other objects?", + "choices": [ + "A. is driving on and contains .", + "B. contains and is being driven by .", + "C. is parked on away from .", + "D. is an advertisement seen on and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_331.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000333745.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "RTR1=ac03K5K5N1M4M3L3M3dKVO`Em0Z:BZE`0b:NPE6n:6gDKX;?^DBa;e0YD[Of;h0UD[Oj;h0oC]OP interacting with and ?", + "choices": [ + "A. is holding and standing next to .", + "B. is holding and carrying .", + "C. is on top of , and is walking towards them.", + "D. is carrying and wearing ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_332.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000189698.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "2Z4R90O2O000000O1001OO1O1O1001OO100O100000000O11O00O1O1O100O10000O11O0000O1001O0000O1000000000000001O001O0VGXLEF1NQ8U4VHeLg7[3YHfLf7d4LNVHdJh7]5WHdJh7a5O2N1O001N102N00100N010001M2001N10O2N1O1001O00001O0O2O10O01O2N1O1O2N5K2N1nHoIg6^6MO1001O2\\IZI^6j60N200002N00001O001O1O1O3M1O1O0000O11O1OM300000000O1N2O10000000000O1O100000O10010O1O1fIPIV6R71001O1O1N2O1O1O000000O11O1O001O1O1O1O001O001O1O1OdKYJT1f5iLmLmN_Mf3c5]MUMb2j2^MWMd2f2\\MZMe2e2[M[Mf2XMYLT5Q1dMf2WM[LT5o0eMh2TMZLX5m0dMk2UMULW5P1dMk2VMTL0OQ5S1iMU3U2kLkMW3UMhKi4Q1RNW3S2iLmMk2TMXLS5m0iMk2UMSLV5R1eMQ3Y2oLgMQ3Y2oLgMn2\\2RMdMk2_2UMaMj2`2VM`Mf2d2ZM\\Mc2g2]MYMa2i2_MWM`2j2`MVM_2k2aMUM_2k2aMUM_2k2aMUM_2k2aMUM_2k2aMUM`2j2`MVM`2j2`MVM`2j2`MVMa2i2_MWMe2e2[M[Mg2c2YM]Mh2b2XM^Mi2a2WM_Mj2`2VM`Mj2`2VM`Mj2`2VM`Mk2_2UMaMk2_2UMaMl2^2TMbMm2]2SMcMm2]2SMcMn2\\2RMdMn2\\2RMdMo2[2QMeMP3Z2PMfMQ3Y2oLgMR3X2oLgMT3V2kLkMW3S2iLmMX3R2hLnMY3Q2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMY3Q2gLoMY3Q2gLoMY3Q2gLoMY3Q2gLoMZ3P2fLPN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN\\3n1dLRN\\3n1dLRN\\3n1dLRN\\3n1dLRN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3n1bLRN^3n1bLRN^3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1dLRN\\3n1dLRN\\3n1dLRN\\3n1dLRN]3n1bLRN^3m1cLSN^3l1bLTN`3j1`LVNa3i1_LWNa3i1_LWNb3h1^LXNb3h1^LXNa3i1_LWNa3i1_LWNa3i1_LWNa3i1_LWNa3i1_LWNb3WMYK_4T1[Nb3UM\\K^4S1\\Nb3h1^LXNb3h1^LXNc3g1]LYNe3e1[L[Ne3e1[L[Nf3d1ZL\\Nf3d1ZL\\Ng3c1YL]Nh3b1XL^Nh3b1XL^Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nk3_1ULbNj3^1VLaNk3_1ULbMkNROP5\\3ULbMkNROQ5[3TLbNl3^1TLbNm3]1SLcNm3]1SLcNm3]1SLcNm3]1SLcNl3^1TLbNl3^1TLbNl3_1SLaNm3^1TLbNl3^1TLbNl3^1TLcNk3]1ULbNl3^1TLbNk3_1ULaNk3_1ULbNj3^1VLbNi3_1WLaNh3`1XL_Nh3b1XL^Ng3c1YL^Ne3c1\\L\\Nc3e1\\L[Ne3e1[L[Ne3e1[L[Nd3f1]LZNa3g1_LYNa3g1_LYNa3g1_LXNb3h1^LXNb3h1^LYNa3g1_LYNa3g1_LXNb3h1^LXNb3h1^LXNb3h1^LXNc3g1]LYNc3g1\\LZNd3f1\\LZNe3e1\\LZNd3f1\\LZNe3e1[L[Ne3e1[L[Ne3e1[L[Nf3d1ZL\\NQ1WM<]4cN]NoNHn1j1SO^NlNLQ2e1SO_NkNMR2d1ROZ1o0eNRO[1m0eNSO[1n0dNRO\\1n0dNRO\\1o0cNQO]1o0cNPO^1P1bNPO`1n0`NQOn7M3O10000O1001O00O10000000000001O002N3mFlNWM6D:2En0d0ROXO?3P4Q1mLkNK;mNU13Z1d3[NbMkNIaNU12]1f3VNoNXOYMV10^1g3TNPOYOWMV11^1h3SNPOY4o0gKQOZ4n0fKROZ4n0fKROZ4n0fKRO[4m0eKRO\\4n0dKRO\\4n0dKRO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO]4m0cKSO]4m0dKRO\\4n0dKRO\\4n0dKPNjNYOb5g2dKPNjNYOb5g2dKPNjNYOc5f2cKQNjNYOc5f2cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKQMiNB0^1d5o1cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO_4k0aKUO_4k0aKUO_4k0aKUO`4j0`KVO`4j0`KVO`4j0`KVO`4j0_KWOa4i0`KVO_4k0aKUO^4l0bKTO^4l0bKTO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO\\4n0dKRO\\4n0cKQMjN?0Bc5n2cKSO\\4n0eKQOZ4P1fKPOY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1fKPOY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNZ4P1fKPO\\4n0dKRO]4m0cKSO^4l0bKTOa4i0`KVOk4?UKAn4TKBk4?UKAj4`0VK@i4a0WK_Ob2dMoMn2_O^O]2jMSNh2@^O\\2lMSNf2A^O[2mMTNe2A^OX2SNSN`2E]OS2YNWNZ2E^OR2[NXNW2F^OQ2^NVNU2I]OQ2_NUNT2J]Oo1bNVNQ2K]Om1gNSNn10[Ol1jNQNl13ZOk1lNPNk15YOj1mNQNj15YOi1oNQNh16YOh1ROoMf19XOf1TOPNe1:WOe1UOQNd1:WOd1VORNc1:WOb1XOSNb1;VO_1\\OTN_1=UO^1]OUN^1=UO^1]OTN_1>TO]1^OUN^1>TO[1@WN\\1>TOZ1AWN\\1?SOY1BWN\\1`0ROY1BWN\\1`0ROX1CWN\\1a0QOV1EXN[1b0POU1FXN[1c0oNU1GWNZ1d0oNU1GVN[1e0nNU1GVN[1e0nNT1HVN[1f0mNS1IUN\\1h0kNR1JUN\\1i0jNQ1KVN[1i0oMkNjNV2l1VN[1i0jNQ1MTNY1k0jNP1OSNX1m0iNP10RNW1n0iNo01SNV1n0iNo01SNV1n0iNo01RNW1o0hNn02RNW1P1gNm03RNW1Q1fNm03QNX1R1eNm03QNX1R1eNl05PNW1T1dNk06oMX1V1bNk0_4UOaKk0_4UOaKk0_4UOaKj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Ki0a4WO_Ki0a4WO_Ki0a4WO_Kh0b4XO^Kh0b4XO^Kh0b4XO^Kh0b4XO^Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kf0d4YO]Kg0c4ZO\\Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YOZKj0f4VOXKm0g4TOTKP1l4h30001O000000001O1O0000000000000000001O00000000001O00001O0000001O000000001O00000000000000000000001O00000000" + }, + { + "size": [ + 428, + 640 + ], + "counts": "Uj[12m in relation to ?", + "choices": [ + "A. It is driving on .", + "B. It is located under .", + "C. It is parked beside .", + "D. It is positioned behind ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_333.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000362434.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "PjU3`1Q:60000000000000000000000000000000001O000000000000000000O100O1O10000O100001O00001O1O00001O1O001O001O00001O001O1O1O001O2N1O1O2N2N1O3M3M2N3M2aG]Mf7f2hGoMR8m2N1O3M2N2N1O1O3M1O1O1O0000001O0000001O00001O001O0000000000001O00000000000000000000000000001O000000O10000000000001O00000000000000000000000000000000000000001O000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000" + }, + { + "size": [ + 375, + 500 + ], + "counts": "U6a5V6000000000000000O1000000000000000PMoI:Q6WMPJV21b0o5WMRJU22b0l5XMVJR20e0W6ZOkId0V6ZOlIe0V6^M]If1>k0i6^MTIi05g1[7`MYH;>T2d7hM_HV2d7gM]HX2b8O2N1O2N4L2N1O1O2N001O3M001O3M1O3M4L2N5K2N001O00001O00001O00001O00001O1O00000000001O00001O001O003M001O2N1O00001O2N001O001O1O001O1O1O1O1O1O001O000000O1000000000000000000O100000000001OO100000000000000000000000000O10000000000000000000000000000O100000000000000000000000000000000000000O1000000000000000000000000000000O1000O11O00000000000O10000000000000000O10000O1000000000000000000000000000000000000000000O100001O00O1000000000O1O11O1O0O11O0000000000000000000000000000000000001O00000000000000001O00001O0000001O001O00000000000000000000000000000000001O000000000000000000001O0000000000DGZE9e:H\\E7c:J]E6c:J]E6c:J]E6c:K\\E5c:L]E4c:L]E4c:L^E3a:N_E2a:N_E2a:N_E2`:0ZEHO8g:0ZEH07f:1ZEH07f:1ZEH07f:1ZEH16e:2ZEH16e:2ZEH16e:2ZEH16e:2aEN^:3bEM^:3bEM^:3cEL\\:6cEJ\\:7dEI\\:6eEJ[:7dEI\\:7dEI\\:7dEI[:8eEHZ:9fEGi92RF96De9n0[FROb9Q1^FoN`9S1`FmN^9U1bFkN[9X1eFhNY9Z1gFfNW9\\1iFdNU9^1kFbNR9a1nF_NQ9b1oF^NP9c1PG]No8d1QG\\NW82jGe1OYNT85kGd11WNR87kGd13UNn7;nGa14TNj7?QH^15SNh7a0RH\\17SNh7`0PH_17QNi7?PHa1OlM24P8>oGb17PNj7>nGc18oMk7, , and ?", + "choices": [ + "A. is on and attached to .", + "B. is on and beside .", + "C. is inside and on .", + "D. is beside and on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_334.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000052565.jpg", + "mask_rles": [ + { + "size": [ + 458, + 640 + ], + "counts": "_W\\23U>4N1O0O2O1N2O0O2O1O1N100010O000000100N100O100000O1O101O1N4L7H7J5J;F8]CVNWUO_B0_f\\2" + }, + { + "size": [ + 458, + 640 + ], + "counts": "e;d2g;O00001O00001O000000001O00000000000000000000001O00001O0000000000001O001O00O10000001O000000000000000000000000000000000000001O00000000001O000000000000001OO100000000000000001O000000O100001O00000000000nMQDi1P positioned relative to and ?", + "choices": [ + "A. is standing on , with behind it.", + "B. is standing between and .", + "C. is standing on and is positioned in front of .", + "D. is partially submerged in , next to ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_335.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000304396.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "kTW4;bc07I6K5L3M2N3M2O2M2N2O1N2O1O1O2N001N2O1O010O1O001O0010O01O010O00010O01O001O001O10O01O1O1N2O1O1O1N2O1O2M2N2N3M3M2N4K6I:Bdnl2" + }, + { + "size": [ + 640, + 425 + ], + "counts": "\\no1c0Vc0;H7H7K5K4L4M2N2N3M2N1O2N2O001N100000000O11N100O2O0O2N1O2O1N2N2M4M2M4K5K6I7I8K;\\OQS`5" + }, + { + "size": [ + 640, + 425 + ], + "counts": "R>V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LR and , and the tower, ?", + "choices": [ + "A. Both and are on , and is beside .", + "B. is on , and is beside .", + "C. Only is on , with located beside it.", + "D. is on , which is on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_336.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000304396.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "\\no1c0Vc0;H7H7K5K4L4M2N2N3M2N1O2N2O001N100000000O11N100O2O0O2N1O2O1N2N2M4M2M4K5K6I7I8K;\\OQS`5" + }, + { + "size": [ + 640, + 425 + ], + "counts": "R>V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LRZ]OCeb0=Z]ODfb0;Z]OGeb09[]OGeb09[]OGeb08\\]OHdb08[]OHfb08Z]OHfb07[]OIeb07b0N2N21O001O0e\\OLjb04R]O2lb0OQ]O4nb0`0O1O00001O00001O002N1O3M3M1O1O002N4L1O1O001O00001O001O00001O00001O1O00001O00001O00001O00001O001O00001O00001O001O00001O00001O001O001O00001O00001O00001O00001O001O00001O00001O1O1O0000O100O100O10000001O000000000000001O2N001g^OhMh`0m2K1O001O001O000Z@gLe>Z3XAUM[>k2dAhMj=X2UBTN`=m1^BbNT=^1kBkNm?", + "choices": [ + "A. , , and .", + "B. Only and .", + "C. Only and .", + "D. and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_337.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000184324.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "Ula52U=3N2N4L4M2M10001O00000000N2O1N2J6O1000000012M4L010000O11OO1O1O1N`QZ2" + }, + { + "size": [ + 425, + 640 + ], + "counts": "fka44T=1iNMXE3h:0UE1k:MWE2j:LWE4o;00N21oBNi<3TC2iV4`5eLZJNi0\\3Q5XLoICZ2U4f601O1ON2O2OO0100N200N2M3K5M3M3O100O10000O1O1O1K5O1N21O000000O10000001O1O0010O01O1O1O001O000hKQHV3o7jLRHU3o7jLQHV3o7iLSHV3o7bLWH^3j7^LYHb3i7XL[Hh3g7TL[Hl3d8M1O1O000000O100001OO1ROfLhG_3S8n0O1O1N2N2N2O100O1O1O1O100001O1O1O1O1O2N1O000O10O100N2O1O100O1N2O1O100001O1O2N1iKZHTOCa3U8YM\\Hg2f7VM\\Hi2f7UMZHk2o7aLfGM7_90UFEc0NkNOb:=Y2O0O0O2N100N2O2L\\Ya6" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i:S2V;1O2N2O0O2N1O0100O5gMQEo1Z;UNcD^1`;_N`Da1e;O2fNiD>[ and ?", + "choices": [ + "A. is carrying .", + "B. is on .", + "C. is attached to .", + "D. is inside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_338.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000184324.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "fka44T=1iNMXE3h:0UE1k:MWE2j:LWE4o;00N21oBNi<3TC2iV4`5eLZJNi0\\3Q5XLoICZ2U4f601O1ON2O2OO0100N200N2M3K5M3M3O100O10000O1O1O1K5O1N21O000000O10000001O1O0010O01O1O1O001O000hKQHV3o7jLRHU3o7jLQHV3o7iLSHV3o7bLWH^3j7^LYHb3i7XL[Hh3g7TL[Hl3d8M1O1O000000O100001OO1ROfLhG_3S8n0O1O1N2N2N2O100O1O1O1O100001O1O1O1O1O2N1O000O10O100N2O1O100O1N2O1O100001O1O2N1iKZHTOCa3U8YM\\Hg2f7VM\\Hi2f7UMZHk2o7aLfGM[ and positioned relative to ?", + "choices": [ + "A. Both and are over .", + "B. is attached to and is over .", + "C. is on and is inside .", + "D. is over and is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_339.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000169996.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Qjl5j0S>7J4L3N2M3N2N2O0O2O0O10000001O00000O2O001N1O2N3M1N5K2MLTOWBm0k=03L5NL601O2N2N1O00TOaB7`=BfB=P>N2M2K401O2O0O18H5\\NB\\Db0_;_OaDb0^;^ObDd0[;^OcDe0[;[OeDf0Z;^OcCJM34M20010000O100001O1N3N8H9G9G10O5K1O0O101N101O0O101O1O00001N103M1O001O3K:A\\Z2n0mdM4K2O000O100000001O00000000000000000000000000001O000000000000001O000000001O001O1O001O1O0010O01O2N1O1O001O001O001O1O001O1O001O1O001O1O001O001O1O001O1O001O1O2N1O1O1O1O1O001O100O010O01O1O1O001O1O1O1O1O1O1O2N1O3M1O1O1O1O2O0O1N3M2M3M4J6J6M3L3K5N1N2N4K4L`fX3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "UQ[83j>5J5M3N2O0O2O001N3N1O1N10001O0O1000O10O1000O1N1010O100O1O1O100N200O1O1O100O100O1O100O101N2O1O1O01Nia9" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y=:9h0R=XOnBh0Q=ZOmBg0R=_OhBc0V=d0O1O100O1000000000000000000000000O10000000000O10O2O010O001O001O1O1O1N101O000000000000001OO1000000000001O0000000000000000000000000000000000000O100O1O1O100O1N2O1O1O1O10000001O000000000000000000O1000000O1000000000000000000000000000000000000O11O000000O1000000000000001O00001O1O00001O00000000001O0O10010O1O4K2O102M3M3L2O1O010O1N2O1O01OO2O000001OO2O00000000O100000000O100O100O1O1O1O100N2N2N200O100O10000O100O100O100O1O1O10000O1O1O100001O6J1O1O001O1O001O001O2N1N110O00001O0000000000000000000000O10000O100O1N2O100O100O100O020O01O00000000000O100010N1000000000000000000O1O1O100O1O1M3M3N2O10000001O001O1O1O00001O0000000000000000000000O1O1O1O1O1O1O1O100000000O10000000000000000000000000O11O000000O11O0000O1001O000O10O11O0000O1001O0000O1001O0000O1001O01O2N2M200O1O1O0O11O01O000O11O00000O10000000000O100O1O1001O002N1O1O001O1O1O00000000000000000000O100O1O1O100O1N2N2M3M3N200O1O100dNhMPFX2P:hMPFX2\\;00000O100M3N2O11O3M4L3MdMmMmGQ2S8PNmGo1U8PNlGn1d:O1O1O1O1O0000hMXNYGg1g8XNZGh1f8XNZGh1n:100gMVN\\Gj1d8WN[Gi1d8[NYGe1g8\\NYEKi1i1o8cNnF^1R9bNmF_1S9aNlF`1T9`NkFa1U9_NjFb1U9_NhFd1X9\\NfFf1Y9[NeFg1Y9[NeFg1[9YNUFCTO^2f:PNSFY2j9jMVFV2j9jMVFV2j9jMVFV2j9jMVFV2j9jMUFW2k9iMUFW2k9iMUFW2g9TMbEf0g0V2o9eMQF[2^:UMcEk2Y;00000000000O10000000000000000iNVMXFj2h9VMXFj2i9TMXFl2f9XM\\EMb0l2j9iMUFW2h9lMXFT2h9lMXFT2h9lMXFT2h9lMXFT2h9lMXFT2h9lMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMVFV2j9jMVFV2j9jMVFV2j9jMVFV2o9QM`E5NK00d0o2Y;00000000000000000O1aN_MfEC`0n2j9hMVFX2k9gMUFY2k9gMUFY2j9hMVFX2j9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMTFX2l9hMTFX2l9hMUFW2k9iMUFW2k9iMTFX2l9hMTFX2l9hMTFX2k9iMUFW2k9jMTFV2l9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2m9gMSFY2n9fMQF[2P:mL`Ed0`0_2P:nL_Ec0a0_2P:dMPF\\2S:aMmE_2S:aMmE_2S:aMmE_2T:`MlE`2T:`MkEa2V:^MjEb2V:^M\\E^O:T3Z:^M\\E^O:T3Z:^M\\E^O:T3Z:^M\\E^O9U3[:^M[E]O:U3[:^MZE^O;T3[:^MZE_O:S3\\:^MZE_O:S3\\:^MZE_O:S3\\:cMbE^2^:fM^EZ2b:fM]E[2c:fM\\EZ2c:hM\\EX2d:R10000O10000O1O10RNfEVO[:d0nEXOR:g0oEYOQ:g0oEYOQ:f0PFZOo9g0QFYOo9f0RFZOo9e0QF[OP:_O\\E3f0=o9UOiEEB7i0o0l9TO^F@J\\1h9TO`Gl0`8SOaGm0`8oNcGQ1]8mNeGS1]8bNlG^1k:0000000000000000000000000000000000000000000bB" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hUn54h>7H6L4M3M3L4M4K4M4K=D2M3O2M2O2N2N1O1O1OI\\CQNf>1O1O1O1O1O1O2N2N1N`a\\2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "R:S3m;O001O2N1O1O1M3N2O100000O2O00001O000000O100001OO10000O1O11O011N3M1O2N1O1O1O1O1O001O00001O00000000000000000000O100001O00000000000000O10000000000000000O1O1O100O1O1N2M3N2M3H8L40001O0O100001O001O00001O00000000000000O100000000000000000000O2OO2O00000000001O1O00001O001O0000001O3M100O1O1O001O1O001O000000000000O1M3J6I7O1N2O1N2GUC[Nm2O1N2O1O1O1N2O002N001O1O1O1O1O1O001O000gBDn;?", + "choices": [ + "A. , , and ", + "B. , , and ", + "C. , , , and ", + "D. , , and " + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_340.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000245026.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "Y[Z2>d<7J7J5J5N3J5K6L4J6J6I7J6fEbMe9c2WF`Mf9d2VF_Mf9S3N3N2M3N3M5J4L3NO11OO1000010OO2ON3N101O001O10O0100O000O2000O100000000O10000O10000O1000001N010000000000OWNVLUJk3_5bL`J^3\\5lL^JT3_5RM^Jn2_5WM_Ji2^5]M_Jc2_5cM]J]2b5hMYJY2e5kMYJT2f5PNXJP2f5TNXJl1g5WNVJj1g5[NWJf1e5_NYJa1d5eNXJ\\1g5gNVJZ1i5iNUJW1j5T300O1O100000O100000O10000O100O10O01000000O1000001O0O2O0000000000000001N1000000010OO101O1O00001O00001O00100O1O1aKPJd1Q6WNTJh1m5TNWJk1j5RNXJn1j5oMXJP2i5mMYJS2i5jMYJT2k5hMVJX2l5dMVJ\\2l5aMVJ^2l5`MTJ`2m5^MTJb2m5\\MTJd2m5ZMUJe2l5XMVJh2j5WMWJh2k5VMWJi2j5oKYIN;a0e0b3h5mKjI>?e3`6WLaIi3a6RLcIm3l7001O01O0000001O001O00000O100000001O0001O01O1O001N101O00001O0001O0001O01O01O1O001O001O1O001N3N001O1O3N0O2N1O2M4M2N4M0O3M5K2N4M0O3N3O0N2O1N1N4M2O0O2N2N1N3M2N3M3M5ROWD3n;F[D3i;G]D4T\\j2" + }, + { + "size": [ + 424, + 640 + ], + "counts": "bY]54bHHc9^O[Fl09MIJ]9h0hF_OKI]9i0gF^OLJ[9j0hF\\OLK\\9j0fF\\ONJ\\9l0bF\\O2H\\9o1eFQN[9o1dFRN\\9n1dFRN\\9n1dFRN\\9m1dFSN^9l1bFUN]9k1cFUN]9k1cFUN]9k1cFUN]9j1dFWN[9i1eFWN[9?_FO6B[9>eFK0G[9=jFHJK]9=kFFHM]9[EE0K^:n0]EWO6Ik9e1lEcN9Gi9[2XFbMj9^2VFaMk9_2?0000001O00000000000000000000000000hN^EJb:5bEjNIj0e:;XFTOh9h0]FWOd97mFIY;1O0bCHl;7TDKS;7iDO3KS;9gDM6Jf:3nD95J7J^:o0[EWO7JQ:^1eEiN9Je9\\2ZFdMf9\\2YFfMf9\\2WFTN[9l1cFUN\\9m1cFSN]9m1cFSN]9m1cFSN^9m1aFSN_9m1bFRN^9n1bFSN]9m1cFSN]9m1dFRN[9P2dFPN\\9P2dFPN]9o1cFQN]9o1cFQN]9P2bFPN]9Q2dFnM\\9R2dFnMf6\\OYI1U2e2lMnMf6\\OYI1U2e2mMnMe6[OZI0V2f2lMmM[9S2fFlMa9V1[FgN`0A_O0Ta1" + } + ], + "question": "What are the relationships of and with respect to ?", + "choices": [ + "A. is in front of , and is attached to .", + "B. Both and are in front of .", + "C. Both and are attached to .", + "D. is in front of , and is attached to ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_341.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000115885.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "RWo35_;7K2N2L5M4L3L3N2N3M2M3M4L3M3K5K5L4L4L3J8I6M2N3M4M2N2N1O2N2N1O1O2N1O1O1O2N101O0O1O1O10000jM[LYLf3l5OnM^LPLc3P4aLkK`3U4bLhK^3Z4bLeK^3[4dLbK\\3`4fL\\K[3e4dLZK]3g4cLWK^3i4gLPK[3P5mLfJU3[5kLdJU3\\5mLaJT3`5PMZJQ3f5PMXJQ3i5nLWJR3j5nLSJT3m5[1001O001O1O001O01O01O001O001O10O000001O1O010O1O00010O001O010O10O100O01O10O01O010O0010O01O000O2O00001N2N1O2N101L3O1O2N2O1N1O2L4M3N1O2O01O01N2N3K300O2O001O0O1N3NaIcL_NOo5]3dKcL]N0P6[3eKcL\\N2o5[3fKbL\\N4l5Y3iKcLZN8j5U3mKbLYN9j5T3kKeL\\N8i5Q3kKiL[N6k5o2lKiL[N7i5o2nKhLZN1N1j5V3VLhLV2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "]bP33b;3O1N1N3N2N1N3N1N3N1N2N3L3N3M2M4L3M4M2N2N3M2N3L3N3L3N3N1N3N1O2L3M4M2N3M2M4N1N2O2N1O2N2N1O101N101O0O2O0O2O00001O001O0010O010O001000O0100O1O100O100O10O001O01O01O0010O01O010O010O0010O00010O01O0010O01O0100O010O01O1O1dH`L`6b3[IaLd6Y40010O100O1O10000O0100O02O0O010O01O00101N001O1O00001O00001O00001O001N3N1O1O1N2O1O2M2O2M2N2N2N3M2O2M2O2N2N1N2N2M3M4M1O3N1N2O2M2N3N1N2O1N3M2N2N3N0N4M2N2N2N3L3N3L4L4M3M2N3N2L4M3L4K6Ifo=" + }, + { + "size": [ + 375, + 500 + ], + "counts": "]1Y:^100000O100000000000000000000O100000000000000000000000000000000000000O1001O00O100000000000000000000000000000000O100000000000000000000000000000O11O0O1000000000O11O00O1001O00O1001O00O1001N10O11O0000O11O0000O11O0000O11O0000O11O0O10000000O1001O00O0101O00O100000000001O00O1001O00O11O0O10O1001O00O11O0000O1001OO1001O00O1001O00O1001O0000O11O000000000O10000000000000000000000O02O00000O1000O2O1O01O0O1000010O001O2N7H8I7J:Ea8h1^GkM0>c8`20\\O]GTNd8f1^GhM0001N8f8o1_GhMM0O:d8P2aGTN`8k1`GUN`8i1[GiM51J9f8P2_GTNb8j1]GXNc8h1_GVNb8g1aGPNG1i8o1_GXNa8i1^GWNb8b1XGTN7:b8a1WGTN7;c8a1VGVN5:d8`1VGWN5:f8^1VGUN7;e8_1UGTN7=e8^1TGVN6 and ?", + "choices": [ + "A. is on .", + "B. is looking at .", + "C. is beside .", + "D. is looking at ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_342.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000289659.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i_e2?e<>E8J5K4J6K5L3M3M2N3L5K4M3M4K6Ka0_O5L2M3M7I;EY1gN7I100O1OUNZISMg6g2eIQM\\6P3jIeL[6\\3ZHYLZ9R4J4K9H5K7I6J6J3M00iNYHjLh7Q3Z1J7M3L3M4N2K5M2N201O001000O1O1O1O2N1O2N3M7I:G4K7I6I7K7cGSK71X7e5L3N2NO2O01O0O20O00001POUI`Kk6\\4`IXKUO2\\7a4X1I6N3N2O010O11O1N4YGcKKo0]7i4100O1O100O100O100O1O0C_HPKd7j4a0L4N2N2M3M2O2M3N2J6J5RNlFBZ9=iF\\O\\9b0gFYO]9f0fFSO^9m0gFiN_9V1gF\\Nb9c1R10100O100O010O1O10O0100O010O010O100O010O00100O10O01O10O01O0010O01O10O01O010O01O01O10O01O1O00100OO20O010O10O01O10O01O01O0100O0010000O10O010O0100O010O10O01O01O1O10O1000O001000O010O0010000O001O1O10O1O01O1O10O100000O001O100O010000O10000000100O1O00O20O1O1O1O002M2N2N1O1E;O2N1O101O1O100O000100O1O101O2M2O002M2N3M2O0O1O1O0101N0010O1O2O1N2O1O1N2O1O0O2O1O010O000O100000000O1O2N1O1O1O2N2M2O0O3M2O1O001O2N2M4M2M5Kl]^1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "kRZ37R=3N2[CKQ<6mC0m;2PD2n;f0N2M201O00O11O0000001O00001O001O1O000000O1000000001O001O1O1O1O2N1O1O4L5K2N2N;E001O1O3M2N4L2N2N6J;E2NgNYFoNg9l0jFmM@h0f9Z1RGbNn8]1VG`Nj8^1^G[Nc8d1b1N2N2O1N200O1O100O1O1O100O100O100O100O100O100000000O1000000O101L3O1O2N1N201N2N6I3NX8o2kGcLM=Y8P3jGdLL?", + "choices": [ + "A. is standing on and eating it.", + "B. is in front of and standing on .", + "C. is standing on and eating .", + "D. is standing on and eating ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_343.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000210032.jpg", + "mask_rles": [ + { + "size": [ + 401, + 640 + ], + "counts": "Yja12\\<3L4M3M3J6M3L5L3N2L4N2O1O1N2N20O01O2OO0100000000O1000000000000jNaNYG`1c8gNZGY1e8kNhFJL\\1[9_OdFa0\\9_OcFb0]9_ObFa0^9_ObFa0^9@`Fa0`9X10000000000000000000O10000000000000000000000000000000000000000O1001O00000VHWMl5i2UJVMk5j2n102N1aM_Fl1V:UNdE[1^:aNdE_1g:0001O00001O000O1001O00O2O00000O2O0O1N3K5N11O002O0O11O0O101M2N2M4M2N2O2M2O1O2N2N2N1O1O1O1O1O1O1001N3N000O01O001O1O00001O1O001O1O1O1O1O2MRY]4" + }, + { + "size": [ + 401, + 640 + ], + "counts": "jZY17P<=K5K4L3M3N2M200N3N2N2N100O2N100O1M4M2O1O10000N200O1O101N1O1N3L3O1N2O2N1O100O2O0O100O10001O0O10000000000O1000001OO1001N100O101N1O1O10000O1000000000000O1000001O01O0000000000000001O001O001O001O000O11O00000O101O0000000001O0000000001O00000O1000000000001O000000001O00000000001O0000001O00000O10000000000000000O100000001N1000000001O1O0O1000000O0100O1000000O10000000000000000000000000000000001N10000000001N1000001O001O00001O00001O00001O001O00000000001O0O101N10001O001O1N101O0O2N2O0O2N2N2O0O2N2N2N2N:F3M2N3M3J6L4K5L4K5LbQ]3" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is eating .", + "B. is beside .", + "C. is looking at .", + "D. is holding ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_344.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000144114.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "iU_31Y<7M3N1O1O010O1O1O1IDYD=e;800O1N2O100O1000000001N2O2N3M3]OWD9QgDC1003\\;h04M3K4H9N2O01000O010O1000O010O010O10O1000000O001O010000O010O1000O0010000O0100000O01O0100O01000O10O0100O0100000O0100O10O1O0100O01000O010000O010O0100O01O1000O10O0100N1100O1000O1KHQD8n;50100O10O1000O10O10O010O10000O1000000O010000000000000002M4M1O0O3K_i8" + } + ], + "question": "Based on the provided information, what is the relationship between and ?", + "choices": [ + "A. is in .", + "B. is over .", + "C. is driving .", + "D. is under ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_345.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000144114.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "iU_31Y<7M3N1O1O010O1O1O1IDYD=e;800O1N2O100O1000000001N2O2N3M3]OWD9QgDC1003\\;h04M3K4H9N2O01000O010O1000O010O010O10O1000000O001O010000O010O1000O0010000O0100000O01O0100O01000O10O0100O0100000O0100O10O1O0100O01000O010000O010O0100O01O1000O10O0100N1100O1000O1KHQD8n;50100O10O1000O10O10O010O10000O1000000O010000000000000002M4M1O0O3K_i8" + }, + { + "size": [ + 400, + 600 + ], + "counts": "2\\<400kMOkG1U80jG0W8OiG1[:0mMMiG3W8NhG2W80hG0X80hG0X81gGOY81gGOY81gGOY81gGOY81gGOZ80fG0Y81gGOY81gGOY81gGOZ80fG0Y81gGOY81gG0W81iGNW82jGNR86nGJP89oGGP8:PHFP8;oGEP8PHCn7>RHAo7?QHAn7a0QH_Oo7a0QH_On7b0RH^On7b0RH^On7b0RH^On7a0SH_Ol7b0TH_Ok7a0UH_Ok7a0UH^Ol7b0TH^Ol7b0TH^Ok7c0UH^Oj7b0VH^Oj7b0VH]Ok7c0UH]Ok7c0UH^Oj7b0VH^Oj7b0VH^Oj7a0WH^Ok7`0VH@j7?WHBj7:XHFk74XHKh76XHJh76XHKg75YHKg75YHKg74ZHKf76ZHJf76ZHKe75[HKX6LnH9j0KW6NnH7k0JW60nH5l0LU6OnH6m0KT60oH5m0KS61PI4m0KS62oH3n0JT63mH4o0JS61oH5n0JS61oH5n0JS61oH4o0JR63oH3o0JR63oH3o0KQ62PI3o0KP63QI2o0KP63QI2o0JQ63QI2o0KP63PI3P1Ko52QI3P1Ko52QI3P1Ko52QI3P1JP63PI3P1Ko51RI4o0Kn52RI4P1Jn52RI3Q1Ke32RL0XO3Q1Je38nKJ]O4P1Kc3>jKCC4P1Kc3b0fK_OF5Q1Jb3g0bK[OK4Q1Jb3Q1XKQO53R1K`3U1VKmN74S1J`3Y1QKjN<3S1J_3`1lJbNc04R1J^3g1gJ[Ni05Q1H`3Q2\\JSNS14Q1I^3_2aKhMQ1I^3_2aKhMQ1I^3_2aKhMQ1I^3`2`KgMR1I_3b2[KfMV1H`3b2YKeMX1I`3c2VKdMZ1Ib3c2RKdM\\1Ic3c2PKcM^1If3a2kJgM_1If3`2jJgM`1Ig3_2iJgMa1Jh3]2gJiMa1Ij3^2dJiMb1Jj3\\2dJjMb1Jk3\\2aJkMd1Im3Z2_JmMd1Io3Y2\\JnMe1HS4W2XJQNe1IS4U2WJSNf1HT4U2UJSNg1HU4U2SJSNh1GX4U2oISNj1HY4S2mIUNj1IY4Q2lIWNk1HZ4P2kIXNk1H\\4n1iIYNl1H_4l1eI\\Nl1H`4k1dI]Nl1I`4i1dI^Nl1Ib4g1bI`Nl1Hd4n1YIYNT2Id4n1WIYNU2If4m1SI[NW2If4k1SI\\NW2Ii4h1PI_NW2Hl4f1nHbNV2Hn4c1mHeNU2Ho4a1mHgNT2Io4^1nHhNT2IP5^1kHjNU2HQ5\\1kHlNT2HR5Y1lHoNR2IR5T1oHSOo1IS5R1oHUOn1HU5R1mHUOo1IT5S1lHTOP2IT5S1kHUOQ2IT5Q1kHVOQ2IT5Q1kHVOQ2HU5S1hHVOS2GU5S1hHVOS2HT5R1iHVOS2HU5Q1hHVOT2IT5Q1hHVOT2HU5R1gHVOT2HU5R1gHVOT2HU5R1fHWOU2HT5Q1gHWOU2GT5S1gHVOU2GT5T1fHTOW2IR5S1gHTOW2IR5S1gHTOW2IR5S1gHTOW2HS5T1eHUOX2GS5T1eHUOX2GR5U1fHTOX2HQ5T1gHTOX2HQ5T1gHSOY2HQ5U1fHSOY2Io4U1gHSO?Hl00n5U1gHTO50V1Gn5U1gH3Z1hNo5U1gH3Z1gNP6V1fH3Z1gNP6V1eH3\\1hNn5V1eHFJROi12g5W1fHYOU2_OV5X1eHQO]2Gn4X1eHQO]2Hm4W1eHRO^2Gl4X1fHPO_2Hk4X1fHPO_2Gk4Z1fHoN_2Gk4Z1fHoN_2Hj4Y1gHoN_2Hj4Y1gHoN_2Hi4Z1hHnN_2Gj4[1fHoN`2Gi4Z1gHoN`2Gh4[1hHnN`2Gh4[1hHmNa2Hg4[1hHmNa2Gh4\\1fHnNb2Gg4[1gHnNb2Gg4[1gHmNc2Hf4[1gHmNc2Gf4]1gHlNc2Gf4]1fHmNd2Ge4\\1gHmNd2Ge4\\1gHmNd2Gd4]1hHlNd2Fe4_1fHkNe2Gc4_1gHjNg2Gb4_1gHjNg2Gb4_1fHkNh2Fb4_1gHjNg2Fc4`1fHjNg2Ga4`1hHiNg2Ga4_1hHkNg2Fa4`1gHjNh2Fa4`1gHiNi2G`4`1gHiNi2G_4a1hHhNi2G_4a1gHiNj2F_4a1gHiNj2E`4f0gH0OEj2F_4e0iHONFj2F^4j0iHGOIj2F^4j0iH6j2PO\\4R2dKmM]4S2cKnM\\4R2dKnM\\4R2dKnM[4S2eKlM\\4T2dKlM[4U2eKlMZ4S2gKmMY4S2gKmMY4S2gKlMZ4T2fKlMZ4S2gKnMW4S2iKmMW4S2iKlMX4T2hKlMX4T2hKlMW4U2iKlMV4o0nHOl2ROV4T2jKlMU4U2kKjMV4V2jKkMU4T2lKlMT4T2lKkMT4V2lKjMT4V2lKjMT4V2lKkMS4U2mKkMR4V2nKjMR4U2oKjMR4V2nKjMR4V2nKkMQ4U2oKkMQ4U2oKjMQ4W2oKiMQ4W2oKiMQ4W2oKjMP4V2PLjMo3W2QLhMP4W2QLiMn3X2RLiMm3W2SLiMm3W2SLhMn3X2RLhMn3W2SLiMl3X2TLiMj3X2VLhMj3X2VLgMk3Y2ULgMk3Y2ULgMk3X2VLiMh3X2XLgMi3Y2WLgMi3Y2WLgMi3Y2WLhMh3X2XLhMh3X2XLhMg3Y2YLfMg3Z2ZLfMf3Z2ZLgMe3Y2[LgMe3Y2[LfMf3Z2ZLfMf3Z2ZLgMe3X2\\LhMd3X2\\LhMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMd3Z2\\LgMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMd3Z2\\LgMc3Y2]LgMc3Y2]LgMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMc3[2]LeMc3[2]LeMd3Z2\\LfMd3Z2\\LgMc3Y2]LgMc3Y2]LgMc3Y2]LfMd3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LhMb3X2^LhMb3X2^LgMd3W2]LiMc3W2]LiMc3V2^LjMb3V2^LjMb3U2_LkMa3U2_LkMb3T2^LmMa3S2_LmMa3S2_LmMa3R2`LmMb3R2^LnMc3Q2]LPNb3o1_LPNb3P2^LPNb3P2^LPNc3o1]LRNb3n1^LRNb3m1_LSNb3l1^LSNc3m1]LSNd3l1\\LTNd3l1\\LUNd3j1\\LUNf3j1ZLVNg3i1YLWNg3h1ZLYNe3g1[LYNf3f1ZLYNg3f1ZLZNg3e1YL[Ng3e1YL[Ng3d1ZL\\Ng3c1YL]Ng3c1YL]Ng3c1YL]Ng3c1YL^Nf3b1ZL]Nf3c1[L]Nd3d1\\L\\Nd3d1\\L\\Ne3b1\\L^Nd3b1\\L^Nd3b1\\L^Ne3a1[L_Ne3a1[L_Ne3a1[L_Ne3`1\\L`Ne3_1[LaNe3_1[LaNe3_1[LaNe3_1[LaNe3_1[LaNd3`1\\L`Nd3`1\\L`Nd3`1\\L`Nd3`1\\L`Nd3_1]LaNc3^1^LbNb3^1^LbNa3_1_LaNa3_1_LaNa3^1`LbNa3]1_LcNa3]1_LcNa3]1_LcNb3[1_LeNa3[1_LeNa3Z1`LfN`3Z1`LfN`3Z1`LfN`3Y1aLgN`3W1aLiN_3V1bLjN^3U1cLkN]3T1dLlN\\3S1eLmN[3R1fLnNZ3R1fLnNZ3Q1gLoNY3Q1gLoNY3P1hLPOX3P1hLQOW3n0jLROW3l0jLSOW3l0jLTOV3k0kLUOV3j0jLVOV3i0kLWOU3i0kLWOV3g0kLYOU3g0kLYOU3f0lL[OS3e0mL[OT3d0lL[OU3d0lL\\OT3d0lL\\OT3d0lL\\OU3c0kL]OU3b0lL^OT3a0mL_OU3?kLBU3JB5>LB4>LB3?MA3?MA3>NB1?OA1?OA1>0B0>0BO?1AO?1AN`02@N?3AM?3AL`04@L`04@L`04@Ka05_OKa05_OKa05_OKa05_OK`06@J`06@J`06@J`06@J`06@J`06@J`06@K>6BK=5CN:2F071I015OL040LO51KO51LN42LN42KN62IO71IN82HM93GM93GM93R6000cIN<2CO=1CO=1CO=1CN>2BN>2P60`INb02^ONb00" + } + ], + "question": "What is the spatial relationship between and the other objects?", + "choices": [ + "A. is over .", + "B. is over .", + "C. is over .", + "D. is in ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_346.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000369370.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "dje28g>3M2M4N1O2M3M4M2M2N3N3Ma0^O4M3L3N2N1N2N2N2N3N2M2N2O1N2N1O100O2O0O100O1O10000O2O0O1O100O1O1O11O1O000000O10000O10000O10000000O010000O1000O10O1000O01000O01000O010_OWDjMj;T2ZDiMf;W2\\DgMe;X2\\DgMe;Y2]DcMd;]2`000O010O1O100O10O10O10O0100O010O1O10000O10001O00000O10O10O001O1O1N101N2O1O0N3N2N3L3N3L5mNfB8W>CUXi4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "b01o>0nYX50PfgJ4N7WAF2ON2S>i000O00000O101O000000000000000000001O00000000000000001O0000001O001O00010O0000001O0000001O001O000000001O0001O000001N1000000000001O00000000000000001O0000000000001O00001O001O001O001O2N1N2O001O1O1O1O1O1O1O1O1O001O1O001O1O000O2O001O001N10000O2O001N101O0O2O0O2N1O2N1O2N101M3N2N100O1N3N1O1O1O2N1O1O1N2O1O1O1O1N2O1O1O1O1O1O1O1O1O1O100O100000000O1000000000000001O00000000000000000000000000000001O0O11O000000000000000000000000000000000001O000000000000000000000000000000001N100001O00000000O100000001O01O000000O1000000001OO100000001O00000000000000000000000000000000" + }, + { + "size": [ + 480, + 640 + ], + "counts": "h1i4W:0001OO1KXKQFi4j9WKVF2Ng4m9WKUFm4k9TKSFm4m93O1LnJZFR5j9000O001N21O000O0100000000O010000000O10O100000N20O010000O10000000O0100O001O1O10000000O100O010000N1100000O1O1O1000O101N101O0000O1000O1000O10O0100O1O01000000O10O100000O1000000O10O10O100000O10O1000000O10000O100000000O10000O100000000O10O1000O10000000O010000O100000000O10000O10000O10O010000O1000O0100O10O10O101N10001N10000O101O0O101O0O2XLhEf2Y:WMlEe2V:YMmEe2T:XMoEf2S:XMPFf2S:iLaE3b0Q3T;M2O3L4M1N3N2N2M2O2aMjCo1f, , and ?", + "choices": [ + "A. is on and beside .", + "B. is on and beside .", + "C. is on and beside .", + "D. is on and beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_347.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000369370.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lg_32m>2O1N101N2O1M4M2O1N2O0N3N101N1O2N1SOUOdCm0Z:G8J3O3I5M4N1N2M3M4L3N1O2M3M3N2N1O2M3N2M3O1N2N2N101O0O2O0O2O1N101N101O1N101O1N101N2O001N2N101O001N2O1O0O2O001N2O001N2O1N2O1O001N101O001N101N10001O0O101O1O0O101O000O101O00000O101O0O10001N10000O1000000O10000000O010O10000O01000O100000O010000000000O100000O10O100000000O0100000000000O1000000000O10O10000000000O10000O100O1000000O1000000O1000000000000000000O10000O1000000O10010O0O10000000000O11O00000000O11O00O100001O00O10000O100O2N11O1O0000O2N1000000O20O000001O0000010O0O11O010OO10001O00000O1100O0000O1O10001N1O1001O00O2O0O100O1O10001O000O1O10001N1L4N21O1O00O2L3001O01N100003NO000O10010O000000010O000O11O01O00001O0000001O0O100000000010O01N101O001N101O00001O00000001O01O00001O00010O00000O101O000000001O00001O00001O001N10001O0000001O0000001O0000001O00001O00001O01O0001O00001O001O001O001O1O001O100O2N2N2M3N2M3N3L3N3L3N1O1N2O1O3L2N2O2M4M3M2M3N4K5Ib]f2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "b01o>0nYX50PfgJ4N7WAF2ON2S>i000O00000O101O000000000000000000001O00000000000000001O0000001O001O00010O0000001O0000001O001O000000001O0001O000001N1000000000001O00000000000000001O0000000000001O00001O001O001O001O2N1N2O001O1O1O1O1O1O1O1O1O001O1O001O1O000O2O001O001N10000O2O001N101O0O2O0O2N1O2N1O2N101M3N2N100O1N3N1O1O1O2N1O1O1N2O1O1O1O1N2O1O1O1O1O1O1O1O1O1O100O100000000O1000000000000001O00000000000000000000000000000001O0O11O000000000000000000000000000000000001O000000000000000000000000000000001N100001O00000000O100000001O01O000000O1000000001OO100000001O00000000000000000000000000000000" + } + ], + "question": "What is the relationship between the sandwich and the slice of bread ?", + "choices": [ + "A. is on .", + "B. is leaning on .", + "C. is leaning on .", + "D. They are beside each other on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_348.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ojk57R=4K3N2N2O0O2M3N4L3J4QEVO\\98_E1n0Mb90dE2c06h9FgE59<8@`8MRG7Oc0H8n8lN]G:D^2k8WMcG]3^8aLcG_3_8]LcGc3S9O0000000000F^LlFc3m8b0L6L4K6oG^Ko6h4kHYKU7j4gHWKY7m4bHUK]7o4^HRKa7\\5000000001O001N2O0O2SKYHe36nKb7;ZH_3^8_LcGn2MlLg83_Gn2R9nLPGo2g9O1N2jNkE`N4FN2Y:c1jE[NQ;P1eEmN]:h0Y1L4N2M4N2L6IVPe1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ldT13V=100O3M1O2N2N2O2O1O2N1O2LBgCOW and ?", + "choices": [ + "A. is looking at and playing with .", + "B. is flying over .", + "C. and are both playing with .", + "D. is walking on towards ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_349.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ojk57R=4K3N2N2O0O2M3N4L3J4QEVO\\98_E1n0Mb90dE2c06h9FgE59<8@`8MRG7Oc0H8n8lN]G:D^2k8WMcG]3^8aLcG_3_8]LcGc3S9O0000000000F^LlFc3m8b0L6L4K6oG^Ko6h4kHYKU7j4gHWKY7m4bHUK]7o4^HRKa7\\5000000001O001N2O0O2SKYHe36nKb7;ZH_3^8_LcGn2MlLg83_Gn2R9nLPGo2g9O1N2jNkE`N4FN2Y:c1jE[NQ;P1eEmN]:h0Y1L4N2M4N2L6IVPe1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ldT13V=100O3M1O2N2N2O2O1O2N1O2LBgCOW and ?", + "choices": [ + "A. is playing with , while is walking on .", + "B. is looking at , who is walking on .", + "C. Both and are flying over .", + "D. is walking on , while is playing with ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_350.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404128.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "aX?2W=5M1N2O001O00001O0000001O00000000001O00001O000000001O0000001O000000001O0000000000001O01O000001O000000001O0000001O00000000000010O0000000001O000000001O00000000000010O0000000001O00000000001O0000000eC]On;d0oC^OQW:[10O01OFeEeM[:W2>1O0O201N100O1O3UNoMXHR2f7UNSHm1k7YNPHi1n7]NlGf1R8aNdGe1X8m1LO5K4M2013L06F9H7H2_OlFeLW9X3?N2O1O1O101C relative to and ?", + "choices": [ + "A. is enclosing .", + "B. is parked on .", + "C. is under and over .", + "D. is over both and ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_351.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000493905.jpg", + "mask_rles": [ + { + "size": [ + 640, + 571 + ], + "counts": "TYf43R?0eE2Y:OgE2U:1kEOU:2jENV:4hEMW:6fEJX:9gEGY::fEEZ:=dED\\:=cED[:`0aEA_:`0]ECc:>[ECf:RDBo;?PD@Rg0T@[Ob1O[>f0o_O_Oe1J^>]1m_OcNS11P?P2j@PNW?T2d@mM]?T2`@lMa?W2[@iMg?X2V@iMj?X2U@gMl?Z2R@fMn?[2Q@fMo?Z2P@fMQ`0Z2n_OfMS`0Z2m_OeMU`0Z2i_OhMW`0X2h_OhMY`0o22OO01OV@gLn>X3m@TM]ODZ?X3UAaMBYOm>V3^APN]>P2cAaNl=`1RBdNk=]1TBfNh=\\1UBiNh=j3`DQJk8Q6RGQJm8Q6PGPJP9Q6nFPJR9S6jFnIV9T6hFmIX9V6bFlI^9W6^FiIc9Z6YFgIh9\\6RFfIn9]6lEfIT:\\6hEfIZ:[6ZETI^Ob0X;\\6VEUI_Oa0\\;[6SElIo:W6lDjIU;`71O1N2O3nLZDdMR^41O1N2\\LmAX1U>PNdBm1_=nMgBn1\\=mMjBP2Y=jMlBS2Z=^Mo@No1a2^?M4K5Mk0TO7I4L4K4M2N1N3N1O1O2N1O2N2N2N1O2M3L_bb3" + }, + { + "size": [ + 640, + 571 + ], + "counts": "SRm34jc04M2O0O2O001O000000O2N2O1NoQo6" + }, + { + "size": [ + 640, + 571 + ], + "counts": "hP11Z`0[3k_OaM0]5m8TMRG_M0^5n8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMO[5P9SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMO[5P9SMQGbMN\\5Q9RMQGbMO[5P9SMQGbMO[5P9SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMN\\5Q9RMQGo3o8QLQGo3o8QLQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9QMRGaMO^5o8QMRGbMM^5Q9QMQGo3o8QLQGo3o8PLRGP4n8PLRGbMM^5Q9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMQGbMN_5Q9oLQGbMO^5P9PMRGaMN_5o8QMSG`MN_5o8QMSG`MN_5o8QMSG`MN_5P9PMRGaMN_5P9PMRGaMN_5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMN_5P9PMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMM_5P9oLSGbMM_5P9oLSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMM_5P9oLSGbMN^5o8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMN`5o8oLSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGbMM_5P9oLSGbMM_5P9oLSGbMM_5P9oLRGbMO_5o8oLRGcMN^5P9oLRGcMN^5P9oLRGR4n8nKRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9nLSGdMM^5P9nLSGdMM^5o8oLTGcMM^5o8oLTGR4l8oKSGQ4m8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGR4l8nKTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8mLUGbMMa5n8mLUGbMMa5n8mLUGbMMa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTG`MOb5m8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8mLUGbMMa5n8mLVGaMLb5n8mLVGaMLb5n8mLVGaMLb5n8nLVG_MLc5n8mLWG`MKc5n8mLXG^MKe5m8mLXG^MKe5m8nLXG]MJe5n8nLXG]MJe5n8nLYG\\MIf5n8nLZGZMIh5m8nLZG[MHg5n8nL[Gk3e8UL\\Gj3d8WL[GXMGi5n8oL\\GVMGk5m8oL]GVMEk5n8PM\\Gf3d8ZL]Ge3c8\\L\\Gd3c8]L^Gb3b8_L]Ga3c8_L^G`3b8aL]G_3c8bL]G]3c8cL^G\\3b8eL^GZ3b8fL^GZ3b8gL^GX3b8hL^GX3b8iL^GV3b8jL_GU3a8lL^GT3b8mL^GR3b8nL^GR3b8oL^GP3b8QM]Go2c8QM^Gn2b8RM^Gn2b8SM^Gl2b8TM^Gl2b8UM^Gj2b8WM^Gh2b8YM^Gf2b8ZM`Gd2`8]M`Gb2_8_MbG`2^8aMaG_2_8bMaG]2_8cMaG]2_8cMbG\\2^8eMaG[2_8fM`GZ2`8gM_GY2a8gM_GY2a8hM_GW2a8iM_GW2a8jM_GU2a8kM_GU2a8lM_GS2a8mM`GR2`8oM_GQ2a8PN_Go1a8RN^Gn1b8RN_Gm1a8SN`Gl1`8UN_Gk1a8UN`Gj1`8WN_Gi1a8XN_Gg1a8ZN_Ge1a8[N_Ge1a8\\N_Gc1a8]N_Gc1b8oIVGV48j1b8nIYGW45k1b8nI[GV43k1b8PJ\\GS42m1b8oI]GU41k1P9UNPGj1P9WNoFi1Q9WNPGh1P9YNoFg1Q9YNPGf1P9[NPGd1P9]NPGb1P9^NPGb1Q9]NPGb1P9_NoFa1Q9_NPG`1P9aNPG^1P9bNPG^1P9cNPG\\1P9eNoF[1Q9eNPGZ1P9gNPGX1P9hNPGX1P9iNPGV1P9kNoFU1Q9kNPGT1P9lNPGT1Q9lNoFS1Q9nNnFR1S9mNnFR1R9oNnFP1R9QOmFo0T9POlFP1T9QOlFn0b8`JcGb4Kn0b8aJcGa4Km0b8bJcGa4Km0a8dJcG`4Lk0a8eJcG`4Mj0`8fJdG`4Kj0a8gJcG`4Lh0a8hJcGa4Lf0a8jJcG_4Lg0`8kJeG]4Lg0_8lJeG^4Kf0`8mJeG]4Ke0`8nJeG]4Ld0_8oJeG^4Lb0_8PKfG^4Jb0`8PKfG^4Ka0_8RKfG]4Ja0`8RKfG]4K`0_8TKeG\\4L`0_8TKfG\\4J`0`8TKfG\\4J`0`8TKgG\\4I?`8UKgG\\4J>`8UKgG]4H>a8UKgG^4HfK]AY4c>hK\\AX4c>iK^AV4b>kK^AT4b>lK^AT4a>nK_AQ4a>PL^AP4a>RL_Am3`>TLaAk3_>VL`Aj3b;oKgF8gMi3a;SLdF6jMg3c;TLaF5lMg3d;WL[F4QNd3h;ULVF8QNc3j;YLoE6WN`3l;lLXEFkN^3SkLeAU3Z>mLfAR3Z>oLeAQ3Z>PMgAo2Y>QMgAo2X>SMhAl2_8ULkLo0gJk2]8ZLiLl0jJi2]8]LUIKRN1V3P1VMf2]8`L[Hl0l1O]Md2\\8cLWHm0n1N_Ma2\\8fLRHQ1o1JcM^2[8jLoGR1P2HfM[2[8mLiGU1T2DiMY2Z8UM]GV1\\2^OlMW2[8@eIYNPNW2[8D`IVNUNV2[84oHhMeNT2\\8>dH`MoNR2]8c0]H^MVOn1]8i0WH\\MZOk1_8k0TH\\M]Oh1_8m0QH^M_Oe1`8n0oG^MBc1_8P1lG_MF`1^8Q1lG`ME_1_8R1jG`MH]1^8S1iGbMIZ1^8U1gGbMKY1^8V1eGbMNW1]8W1cGeMOT1^8X1aGfM1Q1^8Z1_GgM3n0^8]1[GhM6k0_8S4bGlK^8U4aGkK_8U4aGkK_8U4bGjK^8V4bGjK^8W4bGhK^8Y4aGgK_8Z4`GfK`8[4`GdK`8\\4aGcK_8]4bGbK^8_4bG`K^8`4bG`K^8`4bG`K^8a4bG^K^8b4bG^K^8d4`G\\K`8d4`G\\K`8e4`GZK`8g4_GYKa8g4_GYKa8h4aGUK_8k4bGTK^8l4bGTK]8n4cGQK]8P5cGoJ^8Q5bGnJ^8R5bGnJ^8S5bGlJ^8T5cGkJ]8V5cGiJ\\8X5eGgJ[8Y5eGgJ[8Y5fGfJZ8[5fGdJZ8]5eGcJ[8^5eGaJ[8_5fG`JZ8a5eG_J[8b5eG]J[8d5eG[J[8e5eG[J[8f5eGYJ[8g5eGYJ[8h5eGWJ[8i5fGVJZ8k5fGTJZ8l5fGTJZ8m5fGRJZ8o5eGQJ[8P6eGoI[8Q6eGoI[8Q6fGnIZ8S6eGmI[8S6fGlIZ8U6fGjIZ8W6eGiI[8X6eGgI[8Y6fGfIZ8[6fGdIZ8\\6hGbIX8_6gGaIY8_6gGaIY8`6fG`IZ8`6gG_IY8b6gG]IY8d6fG\\IZ8e6fGZIZ8g6eGYI[8h6eGWI[8j6eGUI[8k6eGUI[8k6eGUI[8k6fGTIZ8m6fGRIZ8o6eGQI[8o6fGPIZ8Q7eGoH[8R7eGmH[8T7eGkH[8U7fGjHZ8W7eGiH[8W7eGiH[8X7eGgH[8Y7eGgH[8Z7eGeH[8[7eGeH[8\\7eGcH[8]7eGcH[8^7eGaH[8_7fG`HZ8a7eG_H[8a7fG^HZ8b7fG^HZ8c7fG\\HZ8e7eG[H[8e7fGZHY8h7fGXHZ8i7fGVHZ8k7eGUH[8l7eGSH[8m7eGSH[8m7fGRHZ8o7eGQH[8o7fGPHZ8Q8eGoG[8Q8fGnGZ8S8eGmG[8S8fGlGZ8U8fGjGZ8W8fGhGZ8X8fGhGZ8X8gGgGY8Z8gGeGY8[8gGeGY8\\8gGcGY8]8hGbGX8_8gGaGY8`8gG_GY8b8fG^GZ8b8gG]GY8d8fG\\GZ8d8gG[GY8e8hGZGX8g8gGYGY8g8hGXGX8i8gGWGY8j8gGUGY8l8fGTGZ8l8fGTGZ8m8fGRGZ8n8gGQGY8o8gGQGY8P9gGoFY8Q9hGnFX8S9hGlFX8U9gGkFY8V9gGiFY8W9gGiFY8X9gGgFY8Z9fGfFZ8Z9gGeFX8\\9hGdFX8\\9iGcFX8]9hGbFX8_9gGaFY8`9gG_FX8b9hG^FX8c9gG]FZ8c9fG\\FZ8d9gG[FY8f9gGYFY8g9gGYFY8g9hGXFX8i9gGWFY8j9gGUFY8l9fGTFY8m9hGRFX8o9gGQFY8o9hGPFX8Q:hGnEX8R:iGmEW8T:hGlEX8U:hGjEX8W:hGhEX8X:hGhEX8Y:hGfEX8Z:hGfEX8[:hGdEX8\\:hGdEX8]:gGcEY8^:gGaEY8`:fG`EZ8a:fG^EZ8b:fG^EZ8c:fG\\EZ8d:gG[EY8f:gGYEY8g:`010Od:YEg0" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is playing with and jumping over .", + "B. is playing with and standing on .", + "C. is talking to and is beside .", + "D. is holding and standing on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_352.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000272212.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "PVZ73l>2O1M2O2M1O2N200N2O101N1kA]Om=c0SB]Om=c0RB_On=a0QB_Oo=b0oA_OQ>h01O0\\OPB8P>EUB9Z>N101O010N2OjTe1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "TVU22m>2O0O2O0000000O2L9\\ADX>c0N2N1000001O01O01O010O100O1O2M2O2M2O4K4M6I;F3M1O00YOg0O1O1O1O10000O11O001O2N2N4L2N2NCiBROV=n0kBSOS=m0oBUOmn0L4K5O13N6IWOiBFY=6iQn5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m7R7n7000000000001O00001O0000000000000000001O00000000001O000000000000000000000000001O0000000000001O0000000000000000000000001O0000000000000000001O000000001O000000000O1000000001O0000O1001O0000000000000000O10000001O00000000O100000000001O0000O2O0001O00000000000000O20O00000000000001O0000000O1UJfGQ5Y8oJhGP5X8PKiGo4W8QKiGo4W8QKjGn4W8PKjGP5V8PKkGo4U8QKkGo4U8PKlGP5T8PKmGo4T8PKlGP5T8mJoGS5Q8lJSHQ5m7oJ]Hg4c7YK^Hf4b7YK`Hf4`7ZKaHe4_7ZKbHf4^7ZKdHd4\\7\\KeHc4[7]KeHc4[7]KeHc4[7^KdHb4\\7^KcHc4]7]KcHc4^7\\KcHc4]7]KcHc4]7]KfH`4Z7`KgH_4X7bKjH\\4W7cKkH[4U7eKmHY4S7gKPIV4P7iKSIU4m6kK_Ii3a6VLfId3[6[LfId3Y6\\LiIc3W6^LhIb3X6^LhIb3X6]LhId3Y6[LXIT4h6lKQI[4o6eKoH]4Q7dKmH]4S7bKnH^4R7bKnH]4S7cKmH]4S7cKnH\\4R7eKnHZ4R7eKQIY4o6gKSIW4m6iKVIU4i6kKXIT4h6lKZIR4g6mK[IP4f6PL]Im3c6SL`Ij3`6WL`Ih3`6XLlHFWOR4l7WLmHLSOm3P8WLlH3nNf3V8WLlH5lNd3X8WLlH6jNd3Z8VLlH`4T7`KlH`4T7`KkH`4W7_KiHa4W7^KkHa4U7_KkHb4T7^KmHa4S7_KmHa4S7_KnH;eN^3]8WLnH7iNb3Y8VLRI1iNi3U8VLfIj3Z6VLfIi3[6WLfIh3Z6XLfIh3[6WLeIj3Z6VLfIj3Z6WLeIi3[6WLeIi3[6WLeIi3[6WLeIi3[6XLoH]4Q7cKiHc4W7^KiHa4X7`KgH_4Y7bKgH]4Y7dKeH]4[7eKjGT5V8nJcGW5]8f000000000000O11O0000000000O10000001O01O00000O10001O000001O00000000O2O00000000001O000000000000000000000000000000000000000000000000000000000001O00O1000000000000000000000001O0000000000000000000000000001O00O100000000000000000000000001O00000000000000001O0000001O00001O00000000001O000000000000001O0000000000001O00000000001O0000000000000000001O00O1001O00000000000001O00O100000001O000000000000000000001O0000000000000000000000001O00000000000000001O00000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000aJjFT5W9iJkFW5V9hJjFX5V9hJjFX5W9fJjFZ5V9fJjFZ5V9fJjFZ5V9eJjF\\5V9cJkF]5\\900000001O00001O0000001O0000001O001O001O0000000000001O00000001O00O10000000000000000000001O0000001O00000000001O00000000O1000000001O00O10000000000000000000000O10000O100_OiJYGW5g8jJWGW5i8lJSGU5m8lJQGU5o8mJlFV5T9;000000000000000000000000001O0000000000001O00000000000000001O000000000000000001O0001O0000gF" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PeZ74j>3N1O1N2N2N2O1O101N10000jA^On=b0QB@n=a0QB_Oo=b0oA@Q>g00000000O01O1O012N3M00000010O001O006K0O00000000001O0000000000000000000000000001O01O00010O00001O001O000000001O000000001O001O1O1O1O1N2O00001O0O101O0O2O1N3Lege0" + } + ], + "question": "Which statement accurately describes the state of the cows on ?", + "choices": [ + "A. All three cows, , , and , are walking.", + "B. All three cows, , , and , are lying down.", + "C. is walking while and are lying down.", + "D. is walking while and are lying down." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_353.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000574520.jpg", + "mask_rles": [ + { + "size": [ + 399, + 640 + ], + "counts": "g`_32Y<4N3N1PDGj;9TDIl;=00000O1O101O1N[EIU96iFOV9OhF5W9JgF9Y9GeF;Z9EeF=Z9CeF`0Y9@gFa0Y9^OfFd0Y9]OfFd0Y9\\OfFf0Y9ZOgFf0Y9ZOgFg0X9YOhFg0Y9YOfFh0Y9XOgFh0Y9XOhFg0X9ZOgFg0X9YOiFe0X9[OiFd0W9]OiFa0X9_OiF`0W9AhF>Y9BiFEd0[OoN2Ei4c0TMGPO`0Db0l4WOaLFnNc0Ca0n4VOaLGmNb0C`0Q5WO`LFlNd0A?T5WO_LFlNd0AOB@c57^LFlNe0_ONFTOl5c0SLFlNf0^OKj5IlKFlNf0^OIl5KjKFkNh0^OGm5KjKFkNh0^OGGWOS6d0mKGkNg0]OJj5IoKEjNg0^OKi5IoKFjNe0^OLi5IoKFjNe0^OLj5HnKHjNb0_ONi5HnKHkNa0^OLl5KkKIkN?^OLm5LjKIlN?\\OLn5LjKImNe0TOFU6LjKJmN`1Y5fNkKIlN`1Z5gNjKJdNd1d5bNhKKUNFKP2Y6_NhKc2X4]MhKb2Y4^MhKa2X4_MhKa2X4_MiKa2V4_MjKa2V4_MjKa2V4_MjKa2V4_MkK`2U4`MlK_2T4aMlK`2S4`MnKa2P4_MTL]2l3cMYLX2g3hM]LT2c3lM`LR2_3nMdLo1\\3QNhLl1W3TNoLg1P3YNSMd1m2\\NTMd1k2\\NVMd1i2\\NYMc1f2]N[Mc1d2]N]Mc1b2]N^Mc1b2]N^Md1a2\\N`Md1_2\\NbMc1^2]NcMb1]2^NdMa1\\2_NeM`1dLlM]5d0PN_1bLoM]5b0TN\\1]LWN\\5=XN[1\\LYN[5iNAX1`0iN]OX1c0Z500000000O10000001O00O10000000000001O00000000000000000000000000000000000000000000000000000001O0000O10000000000001O000000O100000000001O000000O1000000001O00000000O1000000001O0000O1000000000000001O0000000000000000000O11O00000000000000O100001O00O10O11O0000000000000000000000000000000000O1001O00000000000000O1010O0O100000O2O00000001O00000O11O0000O100O1001O001O001O0000O100O100000000000000000000000000000000O100001O00001O000000000000O10000001O0000_O" + } + ], + "question": "Which statement accurately describes the relationship between , , and ?", + "choices": [ + "A. is in , while is over .", + "B. and are both in but are not touching.", + "C. is standing on , and both are beside .", + "D. is standing on , and is on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_354.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000289417.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "gXn43a;4N2N00M3001N110SE4n9MoE7P:h0G:N1OI8M3N2O12O6J10O001N2\\OdEA1N\\:=SF@o9" + }, + { + "size": [ + 375, + 500 + ], + "counts": "SRf21f;000]j<0bUC1bG5P5KmJ, , and ?", + "choices": [ + "A. and are both beside .", + "B. is in front of , who is in front of .", + "C. is in front of , who is in front of .", + "D. is beside , and is behind ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_355.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439854.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "hmT19o0JR8=gGDX8`0SGG3Jj8o0SGROl8X100ZOjNUHV1i7nNUHQ1k7QOTHo0k7SOTHm0k7UOTHk0l7VORHL26l70QHJ54j73PHI73i75oGH;0f78oGH=Me7 and ?", + "choices": [ + "A. is on and is riding .", + "B. is riding and is on .", + "C. is beside and is on .", + "D. is on and is riding ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_356.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000474164.jpg", + "mask_rles": [ + { + "size": [ + 640, + 633 + ], + "counts": "h`b03kc04M2N2N1O1O2N1O100O1O011O2N3M3L2O1O1O000000000O2OO1000000000O0100N1O1O110O01O1O010O100O1O100O100O01J7]O^d:l0W[E101O0000000000000000O1O1N2J6J6K6L3M3O1N3NTXP:" + }, + { + "size": [ + 640, + 633 + ], + "counts": "^i1h13jN^a0R2J1O1L4O1000000O1000000000000TNUMcBk2Y?01O00000000iMVMYCi2g00000O1001O^MXMlCh2T000^MWMlCj2f>O0000TMYM_Dg2a;YM^Dh2a;YM`Df2a;XM`Dh2]>OO1oLWMkDi2U;WMkDi2S;YMmDg2R;YMoDg2P;[MoDe2P;]MnDd2R;XMSEg2S>00000O11O0000`LXMhEh2X:XMhEh2i=O3M0000M3003M00M30000000YMj^Od2Va0300000YMj^Od2Ya0000M[Mk^Oe2Ua0300000000000000I7L4ZOmLZ@X3c?QNP@VOk0AUO1`00_O0k?a1W@^NQ21mM0R?m4_O>I7J6K5H8L4J6M3L4K5J6N2H8L4K5N2N2O1O1O1O1N2O1O100O1N2J6M300O100O100N20000O1000000O1000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000000001O00000000000000O10000000000000000000000001O00000000000000000000000000000000000000O11O000000000000001O00000000001O0000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000001OO10000001O0000000000000000000000000000001O00000000000000O1000000000000000000000000000000000000000000001O0000000000000000000000000000001OO10000001OO1001O00000000000000000000000000000000000000001O000000000000000000001O000000000000000000001O000000000000000000000000000000001O0000000000000000001O0000000000000000001O000000000000001O000000001O000000000000001O000000001O000000001O001O000000001O00001O001O00002N001O001O2N1O001O002N1O001O1O3M1O1O001O2N1O1O002N2N1O001O1O2N1O001O2N2N1O00003M2N1O001O2N1O1O1O1O1O2N1O001O1O2N2N1O1O1O2N1O001O1O3M1O002N1O1O1O1O1O1O2N2N1O001O2N1O001O1O3M1O1O001O2N1O001O1O2N1O1O001O3M1O00001O3M1O001O1O2N1O001O1O2`MW@\\NO2N?3_ON303Ra06n^OGW1Nad0" + }, + { + "size": [ + 640, + 633 + ], + "counts": "WUm:1WP50Q\\O1fWL2N1O1O1O2N1O1O100O1O2N100O1O1O1O2N1O1O1O1O2N100O1O1O1O101N1O1O1O1O001O100O1O2N1O1O100O1O1O1O100O1O1O1O1O1O1O1O1O1O10NQB" + }, + { + "size": [ + 640, + 633 + ], + "counts": "RZn44jc05g@I`K1n>;\\EKb:8YEMd:4\\ELc:6[ELc:9YEId:?TECk:>PEHm:;PEGn:;XDWOaMb0V>:ZCiNoNf1g=CXBiNO8Bi1V>YOUBkNLd2n=cNQBnNHf2V>_NnAV2Q>m1000000000000001O000O100O10000000000000000000000000000000000000000000O1O1O2O000001O001O1O1O1O1O1O1O1O001O00000dKiAo2W>kLUBP3k=eLdBV3\\=iLiBS3W=jLoBS3R=kLQCS3oL9G4L3M2hNP_O\\O[a06l^OFXa0DX^O0o02eb0Kl`e5" + } + ], + "question": "What is the location of ?", + "choices": [ + "A. Inside .", + "B. Sitting on .", + "C. Sitting on .", + "D. Sitting on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_357.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000565391.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "e6173a2M\\M0R>3nBM13QO14O[1NbN3NNM0e=n2\\BiNOOOZN218Z4f7f7K1N2O001O1O001O001O001O001O1N101O00001O001O001O1O1O00001O001O001O1O001O001O00001O1N2O1O001\\LPC6P=]3001O1O1O1O001O00002N001O1O001O1O1O1O1O001O002N1O001O1O2N2N1O2N1O1O1O3M2N2N4L2N1O001O2RKaAW4b>eK`AHNL0W4f>n04L4L3M5K6J;`KZ@P4m?N1O1O3M1O1O00003M1O1O00O1002N01O03M0O100001O00O1001O0000O100001O0000000000000000000000000000000000O11O00O10000000000000000000000000000000000000000000000001O00000000000000000jE`Ll3`3UL_Lk3a3QL_LS4a3V6000dE_LY4a3S6000bE_L]4a3cK_L]4a3cK_L]4a3Q600000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000001O000000000000000000001O0000001O001O0000001O0000001O001O00001O1O001O001O001O1O001O1O1O1O1O2X@lK`?\\4N2N2N2N2N2N3M2N2N2N3M3M2N2[EdJb6_5n36J3M2N2N3M3M3M2N1O3M5K3M2N2N3M3L4M3N1N3M3M3M1O2N2N3M3M1O2N2N4L2N2N2eEjGh8X8UGjGj8Z8nFbGPO4R:]8kF_GSO4R:_8mFbGR9a8jF`GV9b8eFbGZ9b8_FaGa9b8\\F^Gd9c8ZF^Gf9d8QFoF09NIP:d9PF\\FP:c9QF^Fn9b9RF^Fn9j901O000000001O00000N31N00LoEYFR:f9oEYFQ:g940lEXFP:h9oEYFQ:g940000000001O00000O11O000000000001O000O11O0aM_FYHNo1c9h5_FYHOn1d9g5]F[HOn1f9]5XFfH5NOm1d9\\5]FgH:1Db1b9i5TGdH[Oc1a9i5^GVJa8g5ZFgHV1c1_8]N\\FS7X1bJ\\8[N\\FS7W1cJ]8`5dG`J]8`5bG`J_8_5aGaJa8]5_GdJ`8\\5`GdJ_8]5aGcJ^8^5bGbJ`8\\5_GfJ`8Y5_GiJa8W5YGoJd8T5\\GlJd8T5\\GlJe8S5[GmJg8Q5YG^IQOm0f9e5YGZIUOQ1`9h5YGXIWOP1`9h5YGXIWOQ1`9f5YGUI[OU1]9e5XGUI]OU1W9i5]GQI]OV1V9i5]GQI]OV1T9k5_GoH]OV1T9k5_GPI[OV1V9j5_GoH\\OW1V9j5VGgHA27\\1R9k5UGiHAN9_1Q9j5VGhH2^1h8j5VGgH3`1f8i5VGiH3^1c8m5YGfH5\\1_8R6lF_HO<3D:_1g8S6lFRIL^O2O0O9a1k8R6nFUIM^O:[1k8S6mFUIM^O:Z1k8V6kF\\I4nNOa1P9c6mFaH3]OMb1Q9Z9XKWE]LKV12GP;^NoD0Ob0o5_O^J5c0Iac0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "aX`74lc04K3N2N2N1O:F3M00000000O1O10000000O01000000O100OX]OAQb0?k]OEUb0:k]OGUb08k]OIUb07k]OIUb06k]OKUb05k]OKUb04k]ONTb01l]O0Tb0Om]O1Sb0Ol]O2Tb0Mm]O3Sb0Ml]O4Tb0Km]O5Sb0Km]O6Rb0In]O8Rb0Go]O9Rb0Fm]O;Vb0Aj]O`0Yb0\\Og]Oe0[b0XOf]Oh0[b0VOe]Ok0hb00O1O1O1O2O0O10000O2M200O1000001O000O11O0001O0000000000001O000000000000001O000000001O000000000O1001ZO]]OKh9" + }, + { + "size": [ + 640, + 480 + ], + "counts": "i]f34jc02O100O2N1O1O1@GY]O9fb0JX]O6hb0LV]O4jb0MT]O4lb0LT]O4lb0MS]O3nb0Nm\\O5Sc0<0O1000O100O100O1O100O10O1001O0000000000000O100O10O010000O10000002N1O0000000000000000001O00O1O101N1O101O0O1000000O100000000000000001O00001O00000O11O000010O000001O00000001O0000000000000000010O00000000000O101O000000000000000O10000000001O0O1000001O001O00001O001O0O3N002N1O2N1O1O2M2O1O2M5K\\jb2" + }, + { + "size": [ + 640, + 480 + ], + "counts": "n??240_OO30N11OO171H4<21KEi?o2Z@_MLBj?o3001O001O001O001O00001O001O001O1O001O001O001O001O1O001O001O001O001O1O001O1O001O001O1O001O1O001O1O001O001O001O1O1O1O001O1O1O001O1O1O2N1O001O1O1O1O1O1O2N2N1O1O2N2N1O1O2N2N1O2N2N3M2N2N3M3M4L5K6YOW]OK`c0O3M2N1O1O1O2NQhY7" + }, + { + "size": [ + 640, + 480 + ], + "counts": "T^h0191N11Od`0\\3F0000000000001O000000000000000000000000000000000000000000000000000000O100001O000000O1000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000O10000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000O11O00000000000000000000000000000000O10000001O0000000000O10000000000000000000000001O00000000000000000000000000000000O1000000001O00000000000000000000000000000000001O000000000000001O0000000000001O0000000000001O00000000000000001O00001O00001O00001O001O00001O001O001O1O001O00001O1O1O1O001O1O1O3M7I3M3M2N2N2N1O4L2N3M2N3M4L2N1O1O2N8H3M2N2N2N4L2N2N3M2N5K2N2N2N2N6J2N1O001O2N5K5K1O3M3M3M1O2N2N2N3M1O1O00000000001O001O0000000000000000000000000000000000000000000000000000000000000000O1O1F:GSDYHS and ?", + "choices": [ + "A. ", + "B. No object is between them.", + "C. ", + "D. " + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_358.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000319607.jpg", + "mask_rles": [ + { + "size": [ + 640, + 640 + ], + "counts": "cim2g1:gNVa0f3ZNe0Z@ZKg0OV=b6ZOQ1hB]HO2k;U8I3M1O1M4N10001O0O1O100O10001N100000000000000001O00001O010O1O001O010O2N1O1O1O1O2N1O1O0O1000O10000001N1000000O2O00001O0O2O001N105JW1jNR1hJiAb2V?TLi@N=h3o?^Lk_Od2n`0I7[Nd^O_O24aa0OS2Bh]o0MXiQ6" + }, + { + "size": [ + 640, + 640 + ], + "counts": "WlQ1<_c07L2N2M3M3L4M4M3M2M4L5K4N2O1N3M3M4M1O1N2O3L2O002N3L3N1O2N1N2N3N2N1O1O2M4M1O1O1O2N1N2O1O003M4L1N2O2N1N3M3N1O1O0O4M1O1O2N1N3N1O0O3N2N3L3N1O1O2M3M2O2N001N5L1O1O2M3M2O1O2N1N3N3M1O2M2O1O003NjI]Bl5f>RJdAODa4h?Bm1YLe^OF:=QY`0@YZ@0Pd05k[O3L20O00O100]\\OF`c0:_\\OJ]c07c\\OJ\\c03e\\OI14Wc0c0M2O1N1O100O1O2N1O1N4M2N4L001N3N1M3O001N4L5K3M101O1O1O3M1N102N1jGcNgM^1W2EgFnMl5U1SJ0Z9T1`LkNXJ0W9Y1nFdMV5R1eJ1V9j1PL^Oo3e0PLZOP4f0QLVNhJHX9S2oKTNkJHU9V2SLPOm3R1TLiNeJQNW9V3WLeNm3\\1SLaNo3_1RL^No3c1SL[Nm3f1SLYNl3h1ULVNl3j1ULUNk3k1ULTNk3n1XLnMh3R2]LiMc3X2]LhMb3X2aLeM_3[2bLdM^3\\2fL`MY3a2gL_MX3b2iL^MV3b2kL]MU3c2lL[MU3d2PMXMo2i2XM`LcI:T9V3bMiL]2W3cMiL]2V3dMkL[2U3gMiLX2Y3iMfLU2[3mMcLS2\\3QN`LP2`3W7101N1000O11N101O001N2O0fDQMc5P3[JUMa5l2]JXM_5j2`JXM^5i2aJYM\\5j2_JYMa5i2\\JYMb5j2\\JVMd5m2YJSMg5P3UJQMk5S3PJWMf5l2VJZMf5h2XJYMg5i2WJXMh5j2VJWMh5n2TJVMg5n2VJSMi5n2VJRMj5o2UJSMi5P3UJUMd5P3[JoLkLjM]8Y5gJYMW5i2gJXMW5m2eJTMZ5n2gJQMV5R3hJPMV5S3gJnLX5S3hJlLX5U3gJkLX5Y3eJgL[5[3dJdL\\5`3`J`L`5a3`J^L`5l3VJSLj5Q4TJnKk5T4UJkKk5W4SJiKm5X4RJhKn5Z4PJfKo5^4nIbKR6a4jI`KV6c4fI^KY6d4eI\\K\\6e4cI[K]6g4`IYK`6j4^IUKc6l4[ITKe6m4[ISKe6m4\\ISKc6n4\\ISKc6o4nI_JP6d5oI[JQ6f5nImISNcNo7b7mIiIYN]NKOn7m7mIgIa6Z6fHRHE`1f7`6bHQHI_1c7b6bI]I]6d6cI]I[6e6dI[I[6h6dIVI\\6k6fIRIY6o6hIPIX6P7iIoHW6Q7jInHU6T7lIjHT6W7lIhHS6Y7mIgHS6X7oIfHQ6Y7TJbHn5]7TJaHm5_7TJ`Hk5`7VJ_Hk5`7VJ`Hj5_7XJ_Hh5b7WJ^Hj5a7XJ]Hi5c7YJ[Hf5f7ZJYHg5g7YJZHe5g7WJ]Hh5d7TJcGfM62>o7i7ZJkGkM6m7P8WJiGmM4o7S8SJkGmM2JER8c8SJiGnM2n7V8SJkGkM2Q8S8UJiGlM2o7U8VJiGlMNNEP8e8VJVHkMUOn7g8WJSHkMXOk7h8YJoGmME]7[8hJnGPNCX7^8iJnGSN]OEN_7g8jJnGQNAU7a8mJlGmMDU7`8PKlGjMDU7_8VKiGfMFU7`8WKhGhMEQ7d8^K^GcMMo6f8WL_Ga3c8]L^Gb3c8`LZG`3g8aLVG`3l8_LRGb3Q9e31O00004L5K4L1O004L1O2N1O1O1O3M2QFWFc9T:O004L4L001O00002N2N1O2N1O1O1O00FQGbEP9_:PG`EP9a:oF_EQ9a:oF_EQ9`:PG`EP9`:PG_EQ9Q:nFYF2DQ9R:PG[FS9e9nFZFR9f9PGXFP9h9SGSFo8l9c0O1O1N2N2O1O100O100N2O1N200N2O1O10000O100M3O100000000O100O1N2N2O1O1O1000000O10000O100O1O1O1O10000O100N2O10000O10000O1O1O1O1O100O10000O1O100O10000O10000001O0000002N1O00001O1O3M001O00001OO1000000O1O1O1O1O100O100O100O100O1O10000O1O1O1O100O100O1O1O1O10000O100O1O100O1N20000O100O100O1O100O100O1O100O1O100O1O1N200O100O100O100O100O1O1O100O1O100O100O3M2O1N3M2N3M3N2M2O1N2N2NBeI[CY6cC\\BY2i=hMXBT2j=mMVB>D_OW>3VBo1k=TNSB=PB;GVOZ>" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is on top of .", + "D. is behind ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_359.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "Sjn73k?3N2N1O2N2N2N102M2N2N1N3O1N2N2M3N2O0O2M3N2M3N2N2O1N2N2O001N2O2FUN[Bn1_=XN\\Bl1`==L3[E^M[7S3_HVMY7n2aHWM\\7m2aHVM\\7n2aHSM^7o2`HRM^7Q3`HQM^7>`F]1Q2VN^7P3bHQM\\7P3dHPM[7P3fHQMY7o2gHRMW7o2hHRMW7o2iHRMV7n2jHRMU7o2kHQMU7n2lHSMS7m2lHTMS7T2oFcMT1]OoN26j0h8R2RGdMP1U1m7W1TGcMP1U1m7W1TGdMP1S1m7W1UGfMn0Q1o7W1TGhMo0n0n7Y1UGiMn0l0n7Y1VGjMn0j0n7Z1VGlMm0h0n7[1UGmMo0f0m7[1VGoMm0d0o7[1VGPNm0b0o7\\1UGSNm0?o7\\1VGTNm0>n7]1UGUNo0;n7^1UGVNo09n7_1UGXNn06o7a1SGYNQ12n7c1SG[NQ1Nn7e1RG]NR1Kn7g1QG^NS1EP8l1mF_N];`1dD_N];`1cDaN^;]1bDcN_;]1`DdN`;[1aDdN`;[1`DeNb;Y1]DiNc;W1\\DiNe;V1ZDlNg;S1XDmNi;R1WDnNk;Q1SDQOm;h0XCgNl0`0n;f0YD[Oh;c0YD]Oh;c0WD]Oj;c0UD\\Om;c0SD]Oo;b0oC_OT<>lCBY<:fCE_<7bCH`<6`CIaIaAoN^>P1:O000J60010J41KZOQAf0T?0O1010O01N20Mn@XOQ?h03O02M3O000000001O10O000000100O0100O0010O01N110O010O010O00O1001N1O2O000000001O1O0nLPOTGR1g8UOUGm0j8VOoFn0n8WOPGi0P9YOlFi0S9ZOiFi0T9\\OhFe0U9AgFa0R9GkF:U9GiF;T9HjF9T9JiF9U9IjF7S9LjF7U9KhF7X9JcF;[9F_F`0a9BWFd0h9kN_ETOf0S2k9eNeEVO?U2l9fNhEQO;[2m9bNhFb1W9\\NlFc1U9[NmFc1T9[NmFf1S9XNPGg1P9ZNoFg1Q9XNoFg1R9XNoFg1R9VNQGj1P9UNPGk1P9TNoFm1S9RNnFm1R9SNnFn1R9RNoFl1Q9VN^FYOUO^2`:YNWF_OWOW2c:ZNTFAUOX2h:VNQFFSOV2m:UNmE]2T1kLn6h0jG`2T1oLo6c0iG`2R1fL[N=g8>iG`2X1TMo6=fG_2^1SMl6`0bG^2b1SMl6a0^G\\2g1TMk6T4TInKl6Q4UIPLj6o3WIRLh6m3XITLh6i3ZIYLe6f3[I]Lc6b3]I_Lc6`3]I`Ld6_3\\IcLc6\\3\\IgLc6Y3]IgLc6Y3[IiLe6X3ZIhLf6X3YIiLg6X3WIjLh6W3UIlLj6T3UIoLi6R3TIPMl6P3SIQMm6P3PISMo6m2PITMP7m2nHUMQ7k2nHWMQ7j2mHWMS7j2jHZMT7f2iH^MV7b2iH_MW7a2hH\\LTO0T8e3fHZLYO0Q8f3fHYLCHg7o3eHYLHEd7c2lGSNf0GJCd7a2oGUNa0HMAc7`2SHUN;H6@[7d2THUN7H>]OW7e2VHVN2Jb0[OV7d2WHWNOJf0ZOT7e2WHWNOIh0ZOR7e2YHWNLJk0YOP7d2[HZNGJo0WOX6NPIf24[NBLS1TOS63TI]29aN\\OKT1TOQ65VIT2a0hNROLW1ROo5?XABh>>WADg>=XADh>=SAFn>f0O00ESADm>=TACm>?QA@P?g01O100O1O1O2O1N2N3L4I8KPdn3MU\\QL5U@Ka?46N2O100O11OOg?1Y@1OOgo30ZPL00ObbZ1" + }, + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Vlo54j?3M2N3N1N2N3M2O1N3N2M2O2M6K2M4M2N2N3L4M3L3N3M6J5dCkMd:X2UEoMh:R2REUNl:k1PE[No:d1WDeM;R1\\;a0WDTO2YO6X1_;;[DRO0]O0[1d;7[Dk0d;VO[Dl0d;h1O1O1O10O0100N2O0O2iDSLZ:o3_EXL_:e4O100O1O100O100O100O10000O10000O10000O10000O100O100O10000O100O100O100O1O1iNTJ[G2e0j5i7iJRHX5i7oJUHQ5h7TKVHl4g7XKWHi4h7YKWHg4g7[KYHe4e7_KXHb4g7`KXH`4g7m1O100O1O1000000O100O100O10000O100000000000000001O000000001O001O001SJVHb3j7]LYHa3g7\\L\\Hd3d7XLaHg3`7SLfHl3Z7QLiHo3X7mKlHR4U7mKkHS4V7kKlHT4U7kKlHd2TO\\MQ8OlHc2UO^MP8LmHW4T7hKmHW4U7eKnHZ4U7bKmH]4V7`KkH[3nNgKMk0a8JgHn2hNQM, , and interacting with each other and the environment?", + "choices": [ + "A. is looking at , while is running on .", + "B. and are running on , while is looking at them.", + "C. is looking at , while runs on .", + "D. is looking at , and both and are running on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_360.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "To59f?2N2N2O1N2O1O1N1O3M3N1O1O1O1O1O0O2O0N2O2O000O100O1O10000O1JhNbAY1b>IaAoN^>P1:O000J60010J41KZOQAf0T?0O1010O01N20Mn@XOQ?h03O02M3O000000001O10O000000100O0100O0010O01N110O010O010O00O1001N1O2O000000001O1O0nLPOTGR1g8UOUGm0j8VOoFn0n8WOPGi0P9YOlFi0S9ZOiFi0T9\\OhFe0U9AgFa0R9GkF:U9GiF;T9HjF9T9JiF9U9IjF7S9LjF7U9KhF7X9JcF;[9F_F`0a9BWFd0h9kN_ETOf0S2k9eNeEVO?U2l9fNhEQO;[2m9bNhFb1W9\\NlFc1U9[NmFc1T9[NmFf1S9XNPGg1P9ZNoFg1Q9XNoFg1R9XNoFg1R9VNQGj1P9UNPGk1P9TNoFm1S9RNnFm1R9SNnFn1R9RNoFl1Q9VN^FYOUO^2`:YNWF_OWOW2c:ZNTFAUOX2h:VNQFFSOV2m:UNmE]2T1kLn6h0jG`2T1oLo6c0iG`2R1fL[N=g8>iG`2X1TMo6=fG_2^1SMl6`0bG^2b1SMl6a0^G\\2g1TMk6T4TInKl6Q4UIPLj6o3WIRLh6m3XITLh6i3ZIYLe6f3[I]Lc6b3]I_Lc6`3]I`Ld6_3\\IcLc6\\3\\IgLc6Y3]IgLc6Y3[IiLe6X3ZIhLf6X3YIiLg6X3WIjLh6W3UIlLj6T3UIoLi6R3TIPMl6P3SIQMm6P3PISMo6m2PITMP7m2nHUMQ7k2nHWMQ7j2mHWMS7j2jHZMT7f2iH^MV7b2iH_MW7a2hH\\LTO0T8e3fHZLYO0Q8f3fHYLCHg7o3eHYLHEd7c2lGSNf0GJCd7a2oGUNa0HMAc7`2SHUN;H6@[7d2THUN7H>]OW7e2VHVN2Jb0[OV7d2WHWNOJf0ZOT7e2WHWNOIh0ZOR7e2YHWNLJk0YOP7d2[HZNGJo0WOX6NPIf24[NBLS1TOS63TI]29aN\\OKT1TOQ65VIT2a0hNROLW1ROo5?XABh>>WADg>=XADh>=SAFn>f0O00ESADm>=TACm>?QA@P?g01O100O1O1O2O1N2N3L4I8KPdn3MU\\QL5U@Ka?46N2O100O11OOg?1Y@1OOgo30ZPL00ObbZ1" + }, + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Vlo54j?3M2N3N1N2N3M2O1N3N2M2O2M6K2M4M2N2N3L4M3L3N3M6J5dCkMd:X2UEoMh:R2REUNl:k1PE[No:d1WDeM;R1\\;a0WDTO2YO6X1_;;[DRO0]O0[1d;7[Dk0d;VO[Dl0d;h1O1O1O10O0100N2O0O2iDSLZ:o3_EXL_:e4O100O1O100O100O100O10000O10000O10000O10000O100O100O10000O100O100O100O1O1iNTJ[G2e0j5i7iJRHX5i7oJUHQ5h7TKVHl4g7XKWHi4h7YKWHg4g7[KYHe4e7_KXHb4g7`KXH`4g7m1O100O1O1000000O100O100O10000O100000000000000001O000000001O001O001SJVHb3j7]LYHa3g7\\L\\Hd3d7XLaHg3`7SLfHl3Z7QLiHo3X7mKlHR4U7mKkHS4V7kKlHT4U7kKlHd2TO\\MQ8OlHc2UO^MP8LmHW4T7hKmHW4U7eKnHZ4U7bKmH]4V7`KkH[3nNgKMk0a8JgHn2hNQMS1Z9X3jFbL]OVOi9Y4iFaL]9^3dFbL^OUOh9Y4mFfLR9Y3SGgLi8V3[FlKS1d0c8[3aFPLR1=^8b3bFPLX;o3=O1N2N2O100N200O1O1O100000000000000O1000000000000001OO1O100000000O1000000000000000000O1001O0000O1001O00O100001O0000001O2N1O00000000O11O001O1O4L00N22N1O1O00000000000000O100000000O100YMnC_1SgNmAW1S>iNPBT1a>M2N2N2M3N3M1N2O2N2M6K4K_\\b3" + }, + { + "size": [ + 512, + 640 + ], + "counts": "\\jj33i?5O0O1j@K`>6^AM_>6^AM_>5`AL_>5`AIGIh>`0_AHKHb>b0bAFd>:[AGd>:[AFf>:XAIf>8XAKg>d02O2O0O2O1O1O00100O101N2N2M3N2N2M4K5Loed5" + } + ], + "question": "Which statement accurately describes the actions of ?", + "choices": [ + "A. is running on alongside .", + "B. is wearing and running on .", + "C. is running on while wearing .", + "D. is wearing and is looking at ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_361.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000500477.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0001O1O1O1O2N1O1O1O1O2N1O001O2N1O1O001O2N2N001O2N1O1O1O1O2N1O1O100O1N2O1O00101N1O1O001O1O1O1O001O1O1O1O1O001O1O1O001O1O001O1O1O1O001O001O1O1O1O1O1O001O1O1O1O2N1O002N001O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O100O1O010000000002N00O11O00001O01O00010_EmLR9T3fFTMY9m2bFXM]9i2ZF_Mg9m302O0O2N10N101O0O1O1O2O0O2O0O1O2O0O1O1O100O1O1N2O1O1O2N01gEhKi9X4TFkKl9e41OO100010O001O0O100O1O2N1O1O1O1O3M001N2O2N100O1O1O2O1N2O02N02N1OI6M4M2N3MTLfEV3Y:j0M3N3L3OSLQFj2l9W1M3MUL[F`2b9`MaF_2^9aMfF]2W9dMkF[2R9gMPGX2n8iMTGV2j8lMYGQ2f8nM]GQ2b8oM^GS2`8lMaGU2^8jM\\G_2b8aM^G`2a8`M[GUODm2P9oMVGk2h8i1N1O1N101O0O01L4E:N3N101L4bMfFnNb9Q1_FnNa9Q1aFoN_9o0bFRO]9l0dFWO[9d0iFaMBd1c9j0gFFW99jFIU97gFMY93eF0Y9Q1cFTN\\9i3O1N10001N1000dNkFnLS9S3RGiLm8W3WGfLh8[3YGcLf8_3[G`Ld8`3_G^L_8c3dGZL\\8f3gGXLX8i3iGVLV8j3lGULR8l3PHRLP8n3QHRLm7n3VHPLi7Q4XHoKg7P4[HoKd7Q4^HoK`7Q4bHoK\\7Q4fHnKZ7Q4hHoKV7Q4kHoKT7Q4nHnKQ7S4PIlKo6T4SIkKl6V4TIiKm6W4TIhKk6Y4VIeKi6]4YI`Kg6b4YI\\Kg6e4j10M3O1O1O1N2O1O1N1O20OO2O0O11O0O1O001O002M2O2M3M3L3O2N2O1O1O1O101M2O1M2O2M3N2N2N2N3M2N2M3M2L5M3I7L4K5J7L3M3N2N101N2O1O2M2O1O1O1O1O1O001O1O1O2N1O1O1O1N2O1O1O0O3N1O1O1O1N2O1N101N2O1N3L3N2N2O1N2N1N2O2O2M3K4N2Oo\\i3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i3132O0O]6R8aIPH2Nc6Q88O11O4ZIkGZ6T8aIRH0JZ6T8iInG]6P8:M301N10000010O0PISH70]6l7=0001O1O001O0021ON1O002O2MfH_HX7_7gHaH[7]7eHcH^7Z7cHdHa7Y7_HgHd7V7[HlHh7P7XHPIj7n6VHRIk7Q71JUHTIn7j6SHUIn7j66N00011NiGZIU8i6OO1010O010O01O00001O0001O101lGVIo7n61M2O101N2N1O10010002NN1O011N4M1N1O2O02OO`GbI]8`600010O010O001O1O1O1O00001O2N000O110O1O1O0001O1O1O001O101N100O00002M2O001O1N5L2N1O0O3O3L1O1O002N6J3M0O100O2N1O001O2O0O2OO1001O1O1O00001O1oHYHg6h7VI[Hi6e7WI\\Hh6d7XI]Hg6o7O010O1O001O1O1O001O2N13M0O1O00100O1O1O0O4M1O1O1O1O1O100O0010O2O0O100O02O00O10O0100O100000O0101N10O00100O1O0010O00001O002M2O2N20OO001O1O00001O1N1010O100000O001O001O00001O00001O0000O1000000O10000000O1001N100000O100000O100O1O2N1000001N1O100001O001O0000O10000000000000000000000O10001N010O11O00O100O10O10001O1N2O1O0100O0001O1O001O001O000000000000N2O10000O100O1000000O1O10000O100O1000000O1000000000000001O0000001O00000YJkGc4U8[KnGd4R8[KPHd4P8[KRHd4o7[KRHd4n7[KTHd4l7\\KUHc4k7\\KVHd4j7ZKYHe4g7WK]Hi4d7TK^Hl4c7QK`Hn4a7QK_Ho4d7mJ]HS5f7gJ]HY5e7dJ]H[5f8N1O001O001O00000000000000000000001O1O1O0000001O00001O000000000000001O00000000001O001O00001O001O001O00000000001O000000001O0000001O00000000000iKoE_3R:_LQFBHj3W:bLXF\\3h9cL[F[3f9cL\\F\\3f9`L]F_3d9_L^F`3c9\\LaFc3_:O1O1O001O1O1O2N1O1O2M2O1O1O2N1N2O8G3[MkC[12[O] and ?", + "choices": [ + "A. is attached to .", + "B. is cleaning .", + "C. is lying on .", + "D. is cleaning ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_362.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000485844.jpg", + "mask_rles": [ + { + "size": [ + 396, + 576 + ], + "counts": "Pmi21[<0RU12kjN4L8I2M4M2M3N0O2O0O2O0O101O000O10000O1O1L4O0SFmNa8U1]GoN_8R1_GSO]Oh0\\76VIUOVOn0_7OXIn0e6SOYIR1c6oNZIV1c6kN[IY1b6hN\\I\\1a6eN[I_1d6bNZIa1c6aN\\Ib1_6aN_Ib1]6`NcIf1V6\\NhIU2d5QNZJS2`5PN^Ja2Q5aMnJd2l4_MRKe2f4e2K4M\\L_KA`4W4M2N200O1O1K5O1O1N2O1O1O1N2O1M3O1O001N2O1OM3O1L4O2O1O1N3M4K4L4M3M2N3M2N2M3N3L3WOQKdIO6U5R6[KdIk4W6h0M3L4L4K6J6K7I5L4K6K6K4K5J4M5J:F5K7TNRG0j:K4fNSE>VXj2" + }, + { + "size": [ + 396, + 576 + ], + "counts": "n^Z26T<3M2N3M2O2M2O1O1O1N3N1O1N2O1O001O1O1O101OO10O10O1N2K4N3N2N2O1N1O20000000O1000000O100001O1O2N6J3M3M1N]bP4" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is holding .", + "B. is holding .", + "C. is on .", + "D. is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_363.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000485844.jpg", + "mask_rles": [ + { + "size": [ + 396, + 576 + ], + "counts": "QWd21Y<4N00001O000000O100000000001O000000001O0000001O0000000O100001O0000001O0O3Lhjk3" + }, + { + "size": [ + 396, + 576 + ], + "counts": "Vhk07S<4N1O1O1N2O1N1O2N1O2O0O101N101N10001N101O0O1000001N1000000O2O00000000001N1000000010O00O11O01O00000000000000000000000000000O1000000000O100000000O2O0000000000001N100000000O2O1O001N1000001N101O0O2O0O2O0O2O000O2N1O2N2O0O1N2M2O20001O01O0000010O00010O1O010O00100O00010O0001O001O001N`f2MdYM00003M000[OLRE4n:LRE4n:LRE4n:LRE4c;000000000000001O0000001O00000000000000MlCNU<1kCOU<40000001N3MRWl3" + }, + { + "size": [ + 396, + 576 + ], + "counts": "Pmi21[<0RU12kjN4L8I2M4M2M3N0O2O0O2O0O101O000O10000O1O1L4O0SFmNa8U1]GoN_8R1_GSO]Oh0\\76VIUOVOn0_7OXIn0e6SOYIR1c6oNZIV1c6kN[IY1b6hN\\I\\1a6eN[I_1d6bNZIa1c6aN\\Ib1_6aN_Ib1]6`NcIf1V6\\NhIU2d5QNZJS2`5PN^Ja2Q5aMnJd2l4_MRKe2f4e2K4M\\L_KA`4W4M2N200O1O1K5O1O1N2O1O1O1N2O1M3O1O001N2O1OM3O1L4O2O1O1N3M4K4L4M3M2N3M2N2M3N3L3WOQKdIO6U5R6[KdIk4W6h0M3L4L4K6J6K7I5L4K6K6K4K5J4M5J:F5K7TNRG0j:K4fNSE>VXj2" + }, + { + "size": [ + 396, + 576 + ], + "counts": "n^Z26T<3M2N3M2O2M2O1O1O1N3N1O1N2O1O001O1O1O101OO10O10O1N2K4N3N2N2O1N1O20000000O1000000O100001O1O2N6J3M3M1N]bP4" + }, + { + "size": [ + 396, + 576 + ], + "counts": "QT6b1f9aN`F[2Z9?K4M3N2O1N2O1N2O1N2K5M3O1O1N_MbGX1\\8[NTHd1l7YNWHg1h7YNZHf1g7UN]Hk1d7PN`HP2`7mMcHS2]7kMeHU2\\7iMeHW2]7cMgH]2d8000000000000000001O00000000000000000O100001O000000000001O1O5K1O1O1O1O1O0O2O01lNUNfGj1W8]NfGb1Y8`NfG`1Z8`NfG`1Z8`NgG_1X8bNhG^1X8aNjG^1V8aNkG_1U8`NlG`1S8aNnG^1R8bNnG^1R8bNnG^1R8bNnG^1R8bNoG]1Q8cNoG]1P8cNQH]1o7cNQH]1o7cNQH\\1P8dNQH\\1n7dNRH\\1n7cNSH]1m7cNSH]1m7cNSH]1n7bNSH]1m7bNTH^1m7[NYHe1W900000001O0O10001O0000000001O0000000000O1000000000000000000000000000O100000000000000O1gN[NkGe1T8\\NlGd1T8\\NlGd1S8^NlGb1S8_NmGa1R8`NmGa1R8aNmG_1S8aNmG_1Q8cNoG]1P8eNoG[1P8fNoG[1Q8fNnGZ1Q8gNoGY1P8iNnGX1Q8jNnGV1Q8kNoGU1P8lNPHT1m7oNRHR1m7PORHP1n7POQHQ1o7oNQHQ1o7POPHP1P8POoGQ1Q8POnGP1R8QOlGP1T8POlGP1T8QOjGP1V8POiGQ1W8POgGQ1Y8oNgGQ1Y8oNfGR1Z8nNdGT1]8kNaGW1_8iN_GY1a8fN`GZ1`8fN_G[1a8\\N]G\\O2X2a8ZNjGf1V8YNkGg1U8XNmGg1S8XNnGh1R8WNoGi1Q8VNQHi1o7WNQHi1o7VNSHi1m7WNSHi1m7WNTHh1l7XNTHh1l7WNVHh1j7YNVHf1j7ZNVHf1j7YNWHg1i7YNXHf1h7ZNXHg1g7YNZHf1f7ZNZHe1g7[NYHe1g7[NYHe1g7[NXHf1h7ZNXHf1h7ZNXHf1h7ZNVHh1j7XNTHj1l7VNQHm1o7SNQHm1o7SNRHl1n7TNRHl1n7UNQHk1o7UNQHk1o7UNQHk1o7TNSHk1m7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7WNQHi1o7XNPHi1o7WNQHh1P8XNPHh1P8YNPHf1o7[NQHe1o7\\NPHd1P8]NoGc1Q8_NmGa1S8`NmG_1T8aNkG_1U8aNkG_1U8aNkG_1W8bNfG^1\\8aNcG_1_8_NaGa1b8\\N_Gc1e8YN[Gg1g8XNXGh1j8WNTGj1n8UNQGk1Q9XNjFh1X9WNgFi1\\9TNdFl1^9RNaFo1d9lM[FU2k931O002N2N4L3M2N2N3M3M2N2N3M6J9G7I6I;Da]d3" + } + ], + "question": "Based on the scene, where is located?", + "choices": [ + "A. It is being held by .", + "B. It is on .", + "C. It is inside .", + "D. It is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_364.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000151480.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Z\\`26d>=E4M3K6L2M3O1O1O3]BmNV=g1G7I6J6K3L;E5L5_DlLl:Y3lDkLS;a300O1BWLdEj3Y:_L`Eb3_:KgEbLY:Y3nEfLR:_3hEbLX:R4100000001N102oK]EQ3OQMU;h2TESMn:a2_E\\Mc:U2]1G7N3M20100O2N004L2M4H;GWVT6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i^_23N11O0000000000002N8FQmf6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "YTd26f>6L2L4K5K5AWOfBj0Y=YOdBg0]=ZObBf0^=]O^Bc0c=<2O3M3M2NO10000000001O2O0O10O00O2N1NQO\\Bf0c=ZO^Bf0a=ZOaBe0^=[OdBc0]=\\O[CLg<2T1O3NePW6" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is wearing both and .", + "B. is wearing and carrying .", + "C. is carrying both and .", + "D. is carrying and wearing ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_365.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000026204.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "\\ga75R=8J4L3DY7DeH<[7EdH;\\7EdH;]7EbH:_7H_H9a7H]H8d7G\\H9d7H[H8f7GZH9i7FUH:P8BoG>W8]OhGc0[8[OdGe0]8ZOcGf0^8\\O_Gd0b8\\O]Gd0d8\\O[Gd0e8\\O[Gd0e8]OZGc0f8_OXGa0h8@WG`0i8@WG`0h8BWG>i8CVG=j8ETG;j8HTG9k8JSG6m8KRG5m8MRG3o8NoF2R9OkF2W9NgF2[9MdF3]9MaF4a9K^F5d9KZF5g9JXF7h9IXF7h9JVF7k9HSF:n9FoEd?", + "choices": [ + "A. and ", + "B. and ", + "C. and ", + "D. and " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_366.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530099.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "T\\Y11d;3N1O100O1O1O1O1O1O100O1O1O100O1O1O1O1O1O1O100O1O1O1O100O1O1O1O1O1O100O1O1O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O010O1O1O2N1O100O1O1O1O100O1O1O1O100N200O1O1O100O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1N2O1O100O100O1O1O100O100O1O100O1O10000O10000O100O100O10000O10000O10000O10000O1000000O100O10000O100O10000O100O100O10000000000O100O10000O10000O1000000O100O10000O10000O1000000000000O1OVM[Hd1e7ZN\\Hg1d7VN_Hj1a7UN_Hl1a7SN_Hn1a7QN_HP2`7PN`HQ2`7nM`HS2`7mM_HT2`7kMaHV2_7iMaHX2^7hMaHZ2_7eMaH\\2_7bMbH_2]7aMcH`2]7_McHb2]7]MdHc2\\7\\McHf2\\7[McHf2]7XMdHi2[7VMfHk2Z7TMfHm2Z7RMfHo2Y7QMgHP3Y7oLhHQ3X7nLgHT3Y7kLgHV3X7jLhHW3X7hLhHY3X7fLhH[3X7dLhH]3X7bLiH^3W7aLiH`3V7`LjHa3W7\\LiHf3b72O1O1O1N2O1O1O1O1O1O1O1O100O100000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000GhKmH11X4R7gKmH21V4R7hKmH21V4R7kKPIU4P7kKPIU4Q7jKoHV4R7gKPIY4W700000001O000000cH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "Zel11f;1N2O3L2O0O2O0O2O1N2N3N3L3M102M1O3L2N3O1N2O1N3M2N2M2O1O1O1O1O1N2O1O001O1O10O00010O01O100O001O100O100O100O1N3O1N2N1O2OO10O0100O010O010O10O01O10000O010O1000000O10O1000O1000O010000O10000O1O01000O1O1000O01O1000O0100000O01O1000O10O1000O01000000O01000O1000O1000O0100000O1000000O0100000O1000000000000O10000O1000000O100O1000000O101O0O10001N101N101O001O000O2O001O001N10001N1O1O1O2N1O1N3N1OaEROX:l0gEVOY:i0gEYOY:e0gE[OZ:d0gE\\OZ:a0fEA[:=eED[: and ?", + "choices": [ + "A. is sitting on .", + "B. is inside .", + "C. is under .", + "D. is sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_367.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is over .", + "B. is in front of .", + "C. is behind .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_368.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "1f2c:00001O0000001O00001O001O001O000000001O00000000O10000O1000000O1010O0O1000000000O10001O01N10000000001O0001OO100000000000O10001N10O1000000000000001O01N10000000000010O01O001O0000000O100001O01N1000001O000001O01O000O110O00000000O100000000O100000000001O001O1O001O00000000001O000000N2N1O201O0O1001N110O1O2N1O1OO100O100O10000O001O2O0O1000O11O0001O001O1O001O000000000000O10000001O00000000000000O10000000000000000O1000000000000000000000000000000000000000000001O00001O001O000000001O00000000000000000000O1N2O1O1N2O100O1000000001O00001O0000000000O100O1O100O100O100O1000000001O001O001O1O001O2N1O1O1O1O1O1O1OO1000000O10000O11O00O10000000000000000O10000O10000000000O10000O10000000000000000000000000000000000000000000000001O000000001O00000000001O0O110O0000000000O1000O11O0000000001O00000O11O000O01001O000000O10O2O0000000000000O10000O10O10O2O0O10O1000O2OO10O101O0000O10O101O0000000O1000000O100O100000000000000001O000000001O0000O011O000O01001O000000O1001O00O1001O0001N1000010OO010O2O000000O1001OO10001O10O01N2O1O1O0000001O010N10001O0000001O1O10OO2O1O0000001O00001O00000000001O000000000000001O00[OPFQNP:o1PFQNP:d201OO100000000000000YOoEVNQ:j1oEVNQ:a200000000000VOoE\\NQ:^2000000000O100O100000000O10000O10000UOkEaNV:_1jE`NW:`1iE`NW:Y2O02O000O100O1UOgEfNY:Z1gEfNY:Z1gEeNZ:U2O02N10000O10000000O1001O00000000000000O100010mNcEXO]:k1ON2OdEXMY:l20kNfE[OZ:k1N1000010OhNgE@Y:`0gE@Y:?hE@Y:`0gE@X:a0gE@Y:?iE@W:`0iE@W:`0iEAV:?iEBW:>hEEV::kEAZ:?gE_OZ:a0gEdNM41L\\:\\1fE[O\\:e0dEeNN9_:Q1dEfNM4OF`:" + }, + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i2T5U801O0000000001O01O1O10O01O001O00000001O00001O1O0O2O1O000O10O100000000000000000000000O11O000000001O0000001O0000O100000000O10000O100O10jLRHQ1n7iN]HR1c7jNeHR1\\7iNjHU1W7fNTIS1l6jNXIU1i6iNXIW1h6gNZIY1f6eN]IY1d6fN]IX1e6fNeHoNVO2a0W2e7eNaH_OHj1i7gN[HDMa1k7iNUHNOU1m7lNSH21n0n7oNRH4Ol0Q8nNPH8Oh0R8POoG8Og0S8POmG<0c0S8QOlG>1`0S8QOlG`01>T8ROkG`02=S8ROlGa04:P8UOkGb0=0i7]OjGc0a0Lf7@jGc0f0H`7EiGd0k0C]7HhGd0Q1_OW7MhGc0T1^OU7NfGd0\\1WOo65eGc0d1nNj6?bGb0f:_OWEa0j:DQE;P;FnD:S;GkD:U;l01O1O2N100O1O1O1O100O100O1O1O10O02O0O10001N2O1N100O100O100O101O0O2N10000O2O001N2O1OO0100000O001O10000O1O1N11000O100O10O010000001N101OVGmNa5P1_3M00001O1O00001O001O00001O00010O00000010O0O101N100O2O00000O2O0000001O0O10010N1O1N200O1O2O0O11O001O1O10O0001O0000000000000O1000N2O01000O10O1000000O100O2O00O0O2M2YFZOj6i0TIYOl6f0SI]Ok6d0SI^Om6b0RI_On6`0RIAm6`0SI@n6>SIBm6>SIAo6>QIBo6=RICn6=SICl6=TIBm6>SIBn6=RICn6?", + "choices": [ + "A. is in front of and under .", + "B. is in front of and under .", + "C. is beside and behind .", + "D. is behind and over ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_369.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "1f2c:00001O0000001O00001O001O001O000000001O00000000O10000O1000000O1010O0O1000000000O10001O01N10000000001O0001OO100000000000O10001N10O1000000000000001O01N10000000000010O01O001O0000000O100001O01N1000001O000001O01O000O110O00000000O100000000O100000000001O001O1O001O00000000001O000000N2N1O201O0O1001N110O1O2N1O1OO100O100O10000O001O2O0O1000O11O0001O001O1O001O000000000000O10000001O00000000000000O10000000000000000O1000000000000000000000000000000000000000000001O00001O001O000000001O00000000000000000000O1N2O1O1N2O100O1000000001O00001O0000000000O100O1O100O100O100O1000000001O001O001O1O001O2N1O1O1O1O1O1O1OO1000000O10000O11O00O10000000000000000O10000O10000000000O10000O10000000000000000000000000000000000000000000000001O000000001O00000000001O0O110O0000000000O1000O11O0000000001O00000O11O000O01001O000000O10O2O0000000000000O10000O10O10O2O0O10O1000O2OO10O101O0000O10O101O0000000O1000000O100O100000000000000001O000000001O0000O011O000O01001O000000O1001O00O1001O0001N1000010OO010O2O000000O1001OO10001O10O01N2O1O1O0000001O010N10001O0000001O1O10OO2O1O0000001O00001O00000000001O000000000000001O00[OPFQNP:o1PFQNP:d201OO100000000000000YOoEVNQ:j1oEVNQ:a200000000000VOoE\\NQ:^2000000000O100O100000000O10000O10000UOkEaNV:_1jE`NW:`1iE`NW:Y2O02O000O100O1UOgEfNY:Z1gEfNY:Z1gEeNZ:U2O02N10000O10000000O1001O00000000000000O100010mNcEXO]:k1ON2OdEXMY:l20kNfE[OZ:k1N1000010OhNgE@Y:`0gE@Y:?hE@Y:`0gE@X:a0gE@Y:?iE@W:`0iE@W:`0iEAV:?iEBW:>hEEV::kEAZ:?gE_OZ:a0gEdNM41L\\:\\1fE[O\\:e0dEeNN9_:Q1dEfNM4OF`:" + }, + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i2T5U801O0000000001O01O1O10O01O001O00000001O00001O1O0O2O1O000O10O100000000000000000000000O11O000000001O0000001O0000O100000000O10000O100O10jLRHQ1n7iN]HR1c7jNeHR1\\7iNjHU1W7fNTIS1l6jNXIU1i6iNXIW1h6gNZIY1f6eN]IY1d6fN]IX1e6fNeHoNVO2a0W2e7eNaH_OHj1i7gN[HDMa1k7iNUHNOU1m7lNSH21n0n7oNRH4Ol0Q8nNPH8Oh0R8POoG8Og0S8POmG<0c0S8QOlG>1`0S8QOlG`01>T8ROkG`02=S8ROlGa04:P8UOkGb0=0i7]OjGc0a0Lf7@jGc0f0H`7EiGd0k0C]7HhGd0Q1_OW7MhGc0T1^OU7NfGd0\\1WOo65eGc0d1nNj6?bGb0f:_OWEa0j:DQE;P;FnD:S;GkD:U;l01O1O2N100O1O1O1O100O100O1O1O10O02O0O10001N2O1N100O100O100O101O0O2N10000O2O001N2O1OO0100000O001O10000O1O1N11000O100O10O010000001N101OVGmNa5P1_3M00001O1O00001O001O00001O00010O00000010O0O101N100O2O00000O2O0000001O0O10010N1O1N200O1O2O0O11O001O1O10O0001O0000000000000O1000N2O01000O10O1000000O100O2O00O0O2M2YFZOj6i0TIYOl6f0SI]Ok6d0SI^Om6b0RI_On6`0RIAm6`0SI@n6>SIBm6>SIAo6>QIBo6=RICn6=SICl6=TIBm6>SIBn6=RICn6?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. No object is in the background." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_370.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000099810.jpg", + "mask_rles": [ + { + "size": [ + 332, + 500 + ], + "counts": "jmi04V:3M2M3M3L4I7L4J6K5H8D]L@d3a0\\L\\Of3e0ZLXOh3h0YLVOh3j0ZLROh3h0`LUOa3i0bLTO`3i0cLVO^3GXK@_1g0Z3E`K]OY1k0Y3GbKWOY1P1W3HgKoNU1X1U3FjM8X2GiM8X2FjM8X2GiM8X2FjM8X2FjM8X2FjM9W2FkM7W2HlM4V2KmM1U2NnMOS20PNLR23QNIQ26RNEQ2:Q400O100000000O100000000000000O10000000000000000000000000000001O00000O11O000000000O10SNFhI9V800O1O1000000O01[NJTI6i6MXI2e62ZIMe65[IKc67\\IJc67]IId66\\IJc67]IIc67]IJb66^IJb67]IIc67]IIc67YICeN6R87XIDfN5R87XIDeN6S86XIDeN7R85YIDfN6Q87WIDhN5Q87VIEiN4Q87VIEiN5P86WIEiN6o76VIEkN5o76VI3i6MVI4k6LTI5k6KTI6l6JTI7k6JTI6m6IZH@=h0Y7IXHA>g0X7JWHA`0g0Y72[HUONj0g71ZHWOLk0i7OXH>f7U100O10013NO140LN2OO00O2N2O1M3N2M3N1O2M1oJkLk2U3RMoLm2P3RMSMm2k2RMXMm2h2RMZMn2c2RMaMl2^2nLjMR3U2`LSMgNU1h4h1VLgNh3Z1QLmNo3S1lKSOS4m0hKYOV4i0eK[O[4\\3N2N2O001M3N2M4L3O3L5L4L9G8H4L4L4L6I8I5K2N4L1O3M2N3M1N2O1O000O10000O1000O0100O1O0100O010O0000010O001O10OO2N101M2M30000000000001O2N2N2N2M4L4M2O1N1O1O10O010N2K5M3N1O2L400O10000000O101O001N2O1O2M2O1O1N3M2N2N3M8F6KfXZ1" + }, + { + "size": [ + 332, + 500 + ], + "counts": "bTP21X:3N2N2O1O1O100O2O0000000000001O0000000010O00010O000001O001O1[FBZ9?cFD\\9f001O01N2N3M4L5Llod2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is standing in front of .", + "B. is holding .", + "C. is beside .", + "D. is holding ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_371.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000191013.jpg", + "mask_rles": [ + { + "size": [ + 640, + 474 + ], + "counts": "Tia22kc03N3O0N2O1N2O2O0O000O2O100O1O1O1O10O00001001O0O10000000000000000000001O002N3M00000000001O00000000001O000000001O001O001O001O1O001O1O1O1O2N1O1O1O2N1O2N2N1O002N00O10000O1001O001O1O7I1O2N3M2N5K2N1ON2N2004L0000000000000001N100O1N2O1N3N1O101O00001N2O010N3N3M=C8fNY]Oj0dc0YOiUU4" + }, + { + "size": [ + 640, + 474 + ], + "counts": "^bS68fc03K5M3N2O1N101O0O101O001N101O1O00000O1000000O2O0O10001M2O100O1O001N2O1O01O00O11O0010O2M2O100O10O01000000O100O1000000O100000000O100000000000000000000O100001O000000000000000000O10000000001O0000001O1O1O100O0000000001O01O0000O2O1O2M2N2N5H:H7J4M2M:G3KdUg0" + }, + { + "size": [ + 640, + 474 + ], + "counts": "Xc0;dc01J7M2H8L4L4M2N3O100000000000O10001O00000000001O01O00O100O2O0001O00O100O10001O0OEPOh]Oo0Yb0QOg]Om0\\b0ROd]Om0hb0N3I6NG]OY]O=mb0DR]O9]c0K`a[30X^dL?", + "choices": [ + "A. is driving on , while and are parked.", + "B. is parked on , while and are driving on it.", + "C. All vehicles, , , and , are driving on .", + "D. is driving on , while and are parked." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_372.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000191013.jpg", + "mask_rles": [ + { + "size": [ + 640, + 474 + ], + "counts": "Q_b1:fc01N3N1O010O1O0O2O00001O0000001O000000000K6J5K5L4H8J6L5J5I8E;H8I6J6I7J5K6L5I6I:C7OfJcAS5Z>PKfAo4Z>RKfAn4Y><1000O010O00000100O1VKeAn3\\>QLeAP4Z>PLgAP4X>oKiAQ4W>PLhAP4W>QLiAo3T>XKkAj0On3W>XKjAd5Y>0N10100000O0100O001O001O010O00100N20^JmAV5S><0OO2O001O10O100O10O1000O1N1gJmAg4S>c000O1O1OfJRBd4n=]KPBd4P>^KPB_4Q>aKoA_4Q>aKoA_4Q>`KPB`4o=aKRB^4n=aKSB_4m=k000O000O2O001N10010O1N101O00001O1O0001dJZB]4e=cKZB^4e=P1OUK\\B[3c=eL]B[3c=a101O1O00010O001OkJaBi3_=WL`Bk3_=[101O000010O01O001O0010O010O2O0O1O2N2O0O8H204J4L2XJjAb5^>MO2N010N101O0O2O1O0GYJXBi5g=801O0O2O0O2O0O2N101O0N3O001N101N1O2O00001UN^ISEO8i6k6VITJ0Oc8j5]H_HQO514O10Fc00g9c5QF]JP1OPO2N0S5" + }, + { + "size": [ + 640, + 474 + ], + "counts": "mei2`0Xc0a0B9H7I6L5J5L5K4L4M2M4L3N3L3L4M2N5L2N2M2M5M2N2N1O2N1O3M1O2N2N1N3N2N101M3N1O1O2N1O2N101N1O2N1O2N1O2N100O1O2N101N1O10001N1O1O2N10000O1O101N100O100O100O10000O100O10000O1000000O100000000O1000000O100O01000000O01000O10000O10000O10000O100O1O100O10000O10001N100O100O101N1O101N100O2N1O101N101N101N1O2N2N1N3N1O101N2N2O1M2O2N3M1N3N1O2O1M2O2N3M2M5K3N1O2M6K3L4M2M4M3L5J5L3L5L5J4L7I7H8G>@YSi2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is driving on .", + "B. is hanging from .", + "C. is hanging from .", + "D. is parked on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_373.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000057027.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "moe05R=6I6L3M3N2N1N3J5M4L4M2O1L5N2M2O1N3L3O1N3N1O2M2N2O1O2M2M3O1N2N2N2IZMfEj2Q9lMoF[O5S3V8mLiGY1Oj1h6WMZIGLX21j0S6g0kIYOe5n0ZJQL0V3a5m0aJlKNW3\\5Q1gJSOW5l0lJTOo4f4L4N2MWJVKaM3_5f4nLdKP3Y4mLYKeM?`5V4iLnKZ3P4fLPL\\3n3iLbKd3\\4f2M3N2N2NROTLbHk3[7\\LbHa3_7cL_H\\3_7hL`HV3`7nL]HR3`7UM\\Hk2b7_10ON300N200O0N3M30000O002N1O1O2N01001N1O2O0O1000[LVIl0h6TO]Ih0e6TO_Ij0c6ROaIm0_6jNkIT1X6aNQJ_1Q6ZNUJd1o5nM\\JQ2`8N100O100O100O2N100O100O10000O1000000O100O1O100O10000O100O1O1OlNjMVGV2h8PNTGP2i8TNVGk1i8WNWGi1f8ZNZGf1c8]N]Gc1a8_N_G`1^8dNbG\\1[8gNeGX1_7RNSHl0>R1V7MjH3S7NVHiMf0Z2Q7OSI1l64iH2V71_H9`7J\\H7d7h2N2O1O1O0O2O1O1O0O2O0O200N1O2O1O1O10O010000O100O10000000O10000000O01000000O1000000O100000000O10O100000O1O100O1000000000000O100000000000000000000000000000000000000000000O10000000000000000000000O101O00000O101O00000O100000000O100000000000000000O11O00000000000000000000000000O101O000000000010O000000000lMlHSNS7j1QIUNo6i1TIVNl6i1VIVNj6i1WIWNi6g1YIYNh6f1XIZNh6e1YI\\Nf6f1XIZNh6h1VIXNk6h1nHaLHg1[7R4001O001O0001O01bJTIb0J]2T7kL\\I<4[2b9eM_E]1f:]N_E_1c:VNhEi1l:0001O00000000000001O0000001O000000000oNZN^Ff1_9_N_Fa1`9bN^F^1`9eN_F[1`9gN_FY1_9jN`FW1]9lNbFT1]9mNcFS1[9POdFP1Z9SOeFm0Y9VOfFj0W9ZOgFg0V9^OhFb0V9AiF?U9FgF;V9n1N2N2M3I7K5L5L3N2N2N2M3M3N2N2N2N2O101O01O2N2N1O1O2O2M3M2O0O4L4M0O3M002N5K4L1O2O2M3M1O1O1O5K3M2iMZFf0i9TO^Fh0c9VO_Fi0d9QO`Fo0b9lNaFS1f9cN^F\\1m9UNWFk1e:O0000000000000000001O0000000000TJ\\N2d1HeN5[1GmN5S1FVO6j0H]O3c0LB0>OFN:2KI561EO;8^OGc0`0VO@i0e0SO[Ol0j0POVOP1l0nNTOR1Q1TKXM`3g1Z1]1aNcN]1h1ZNWNe1l1ZNTNe1m1[NSNd1n1[K[Lm2g1h1T2iKcL]1Y1j2Q4YLoKg3T4VLlKk3T4TLlKl3U4SLjKn3X4PLhKP4Z4nKfKR4\\4lKdKU4[4kKeKU4\\4jKcKX4]4gKbKZ4_4eKaK\\4_4cKaK]4`4bK`K^4b4`K]Ka4e4\\K\\Kd4i4UKYKk4c600001N2O1O00001O000000000O2O0000000000000000010O000000010O0001001O0O2N10O01oLjJmMV5R2mJkMU5o1eJYK9f2S5P2dJZK9g2S5o1PKPNT5l1lJUNU5h1_JaK:h2Y5d1]JdK3O1j2`5j40iL]JhNb5W1`JhNb5U1aJiKMj2b5eM[JT3`0TOW5`McJY37UO[5YMdJh1D[NNZ1=]1U7[MfI;^O[2e93M1O2N2M3N1O2M2O6I:F?A2O1N3M3K5L4M2M4H;XNf1F:G8H:Hb0YOc0@;E:F=A>UOk0A=H:Aknb0" + }, + { + "size": [ + 426, + 640 + ], + "counts": "m\\Z2:o<4K2K5M0_C]O_Q9BRG`0g8EYG>c8C]G?`8C]Ga0a8]O`Gf0]8\\ObGf0\\8\\OWGQ1g8ROmFY1Q9jNhF\\1W9\\1O1N3N1O1O1N2O1O_OjFPMV9m2PGoLP9n2g0N2N2N3M2O100O101OSF_MS9b2cFkMX9Y3K6J1N2O0O2O000O10000000000O010000QORGfL1h0m8\\2cG\\M^8_2jG^MV8_2nG_MS8_2PH`MP8^2SHaMm7]2VHaMk7\\2YHcMg7[2\\HcMe7\\2_H`Mb7`2d1000000001N1000O100000000000000O01000000000O2O00000O10000O100O10PGgMo6Y2nHlMP7T2mHPNR7T2hHnMX7V2aHmM`7U2ZHnMf7X2jGSNU8a3000000000000001O0000000CWGYLj8a3^G\\Lb8b3bG[L`8c3cGZL^8d3e0N3M3N1012cF_Lk8R4N01O1O00001O001nMfF2Z9MkFOU90oFMR92VGGj89^G^Ob8c0aGYO_8g0gGSOZ8m0lGlNT8U1VH`Nk7_1WH_Ni7c1VH\\Nj7e1VHYNl7g1THXNl7i1UHUNl7k1UHRNl7o1g1100O100O011N10000O100O4M2O1N1O3M3N0]E^MN0Y:Q3MN3M3oMlEe0Z:PO\\F?l9UO`Fb0Z;GZYc3" + } + ], + "question": "Which statement best synthesizes the relationships involving and ?", + "choices": [ + "A. is guiding from a position above it.", + "B. is positioned over and is guiding it.", + "C. is walking in front of .", + "D. is guiding while walking on it." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_374.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000498463.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "YQQ24j>o0ROd0]Oh0XOl0TO>B00000000000000YMTEZ1l:eNVEZ1j:eN_ESO_Oh1R;TOcF1]9OeFO[90gFOY90hF0X9OiF1W9NjF2V9MkF3U9LlF4T9LlF4T9KnF4R9KoF5P9LPG4P9KQG5o8JRG6n8ISG7m8FVG:j8DXGe8B\\G>d8A]G?e8_O[Ga0f8]O[Gc0e8]O[Gc0e8\\O[Ge0f8YO[Gg0e8YO[Gg0_;0000000001O7J3M3N2N1O2N2N2O1O0O101N100O10000O1000000000000000000001O00001O00001N10YI" + }, + { + "size": [ + 480, + 640 + ], + "counts": "ZUW28f>3N2NY1hN4K2N2N1O1O1O100O100O1O1O1O1O1N2M3O2N10O01O1O10O10O2O03Me0[Oi0VOd0ZO`\\d6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l_T7i0V>2O000000000O1O1bNVOdDl0Y;YOaCKd0V1k;YO_CY1`001O000000OUE^OZ7b0dHBZ7>cHG[7:bHI]77bHK]75bHL^74bHL^74bHL^73cHN\\72dHN\\72dHO[71eHO[71eHO[71eHO[70fH0Z70fH0Z70fH0Z70fH0Z70fH0Z7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OfH2Z7OeH1?[OV3e0[L0;2g2MoL18h0U2WOcM17]1b1bNWN16R2o0mMkN25\\2e0bMWOd0A^2d0mLL[1iNY2k0\\L\\Ng1U2^Nd2e0dJeNj0b0Hg1V2[Nf2\\2dJPM0O4Y2c2[Nf2Z5ZMeJg2^5`KRJ`0?Q4Q7`J^Ho0?b4^7^JTH`0NB`0`5h7^JjG50M=`5l8^JgF]5W:WOh0XOg0YOb0^Of0ZOd0TN]BR1^>nNeA2Q^Y7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "j`T7a0]>201O000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000O100000000000000000000000000000000000000000001O001O000000001O000000000000000000000000000000000000000000TH" + } + ], + "question": "Which statement accurately describes the arrangement around the microwave, ?", + "choices": [ + "A. is on and is beside .", + "B. is attached to .", + "C. is on and is beside .", + "D. is on and is beside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_375.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000581062.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "X]`11d21Z63_I2a6NZI8d6JjG1[OJl0d0o7AdGc000[8^O_GR2a8oMVGY2j8:OO2N1E and ?", + "choices": [ + "A. is on top of .", + "B. is holding .", + "C. is standing on .", + "D. is sitting on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_376.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000029640.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "h\\R51T=9H6M3L3N2I[OfCf0X<8M2O1N3N1O2N2O010O0000010O00010O00010O01N1O1M3O2N1000000O1O1O1O2O010O01O000001O0000001O001O0O1O2M2L5M2N3M3Mf_^2" + }, + { + "size": [ + 426, + 640 + ], + "counts": "Pim43V=3N2N1N3N1O2M3N1N2O1O000O_OeC4\\ and ?", + "choices": [ + "A. is positioned next to but not touching.", + "B. , the carrot with a slight curvature, is attached to .", + "C. , the carrot with a green stem end, is attached to .", + "D. is larger than ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_377.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000370486.jpg", + "mask_rles": [ + { + "size": [ + 640, + 421 + ], + "counts": "h4Z31W4Q9]3N2O00fN^GkFDHP9X9nGbF^8g8h1J7L4K5L4K5L4H8GhCdH^L2N1O0O2O002N1O1N3M``k6" + }, + { + "size": [ + 640, + 421 + ], + "counts": "[f97fc07J3M3N2N2M3J6M22O0000O010O001O000010O00000000001O00001N10000O2O0O2N4L5K9G_eU7" + }, + { + "size": [ + 640, + 421 + ], + "counts": "eob22nc00a_:OVdFg0^kN9G8l@lNT;X1^DVO^;m0ZD]Ob;e0XDAe;a0WDGb;=YDMc0^Nf7g1dG2a0XNi7h1cG5a0SN[72QFk1^1VN`NU2a2hMU7g2aG;U1nLW7l2`G=k0dLTN3_9o2oFR1W1SLTNMb9S3hF[1Y1_K_N<_9n2`Fc1W1mKT8j7]GfHX8j9F8YGdD]8e;H:G:E7I5J7J5L5XIkBd07g4`=lJeBS5]=gJhBX5P>001O2NBdAXK\\>f4hAXKX>f4b0ObAUKi=k4WBZKd=e4]B\\Kc=b4^B`K`=_4dB^K]=`4fBbKW=]4jBdKT=\\4mBbKU=]4oB^KR=a4W10QB^KX9i0hGi3oNaKW1MQ5X1dJZ3SObKd8d1\\F]NV1\\4JcK_8]2eGo1i9XNTFh18dKd7l2QHa1:eK^7R3VHY1[^OCea0>W^OHea0[1N5L1O1N5L3L4M3j@^Mh?AFOK1d00j?`3b_ORMc?f300O1K5O1ZOaKjA`4k>01O0000000000000000000000001OO10000000000AaK\\A`4]>oKYAS4k>?2N9G2N2N9G2N1O001O00000000000000000000000000000000000000O10000000000O100O1K5O100O100WOdK]AL=`4k=TLTBl3j=WLTBj3l=XLmAm3n<\\KlCf5o;PK_CQ5a01O00000000000000000000000000000000O10000000000hN]LSBc3cgKfAX4S?N1O001O0000001O1O6J:F;E6J7I5K3M3M2\\Nk^O9Ya0@n^OS1QAmNm>V1RAjNl>Y1SAgNh>^1XAbNg>`1XA`Nh>a1WA_Ni>a1WA_Ni>b1VA^Nj>b1VA^Nj>b1VA^Nj>c1UA]Nk>c1UA]Nk>d1SA]Nm>c1SA]Nm>d1QA]No>d1PA\\No>f1n@\\NR?e1l@\\NT?e1j@\\NV?W3O1O1WO\\KVBd4j=]KUBc4j=_KTBb4l=^KTBb4l=_KSBa4l=`KTB`4l=aKSB_4m=aKSB_4m=aKSB_4m=aKSB_4m=aKSB_4m=bKRB^4n=bKRB^4n=cKQB]4o=cKQB]4o=cKQB]4o=dKPB\\4P>eKnA\\4R>eKmA[4S>fKlAZ4T>gKjAZ4V>hKfAZ4Z>f000000000O10000000000000000001O00O1001O0000000gJdAn4]>lJjAR5c>O1O00001O0000000000000000000000000000001O00000000001O;E2N2N1O1O1O000000001O00000000000000000000000000000000000000000000001O001O00000000000000000000001O00O100000000000000000000001O3kKh@Mm2" + }, + { + "size": [ + 640, + 421 + ], + "counts": "Ybf18cc06UOj0J7K4L3O2M3M3N2M3N1O2O1N2N2O001O1O100O1O00100O1O100O1O1O001O100O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O2M2O1O001N101N101O000010O001O10O0100O0010O010O01Ec_ORM^`0l2g_OPMY`0o2=I7K4M4N101N2N1O2M201O0N3K4M4L4O0El]OiNWb0W18100O1OO2O01O0010O00001O10O010O010O1O00100O100O0010000O101O01O0000000000010O000001O0000001O001O001O0O2O00001N3N2N3Q^OPNha0\\2H7I1N101O00O010O1O1O1O1O1O1O100O1O1O1O1O1O1O2N100O1O1O101N1O1N200O1O1O2N1O1O100O1O1O2O0O1O1O2O0O1O100O2@?L4J6L5L3M3N2NSV\\2" + } + ], + "question": "Which object are both and standing on?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_378.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000087875.jpg", + "mask_rles": [ + { + "size": [ + 487, + 640 + ], + "counts": "X6n8Y60O1000O100000001O000000000000000000000000000000000000000001O00O10000001OO1001O000000O11O00000O1000000000RNiIlJW6R700000000000QNjImJV6S5jImJV6R700001O000000O10000000O2O0001OO010001O0000O1001O00000000000000O20O0O100000000000000000000000000000000000O100000000001O0000O1001O0000O11O000000O1001O0000O1000000000000000000000000fJlIa1T6]NRJ_1n5_NUJ`1k5`NUJ`1k5_NWJ`1i5_NXJa1h5_NXJa1h5_NXJa1h5^NYJb1g5\\N\\Jc1d5\\N^Jd1a5ZNbJd1_5ZNeJd1[5[NkJ`1U5_NoJ^1Q5aNSK\\1m4dNVKY1j4fNYKX1g4hN[KW1d4hN_KV1a4jNaKT1_4_MlIEg1j2]4]MSJFb1k2[4]MVJGa1j2Y4_MWJFb1i2W4`MXJGc1Z1_N]Nf5b0XJGd1Y1bNZNb5e0YJHj0ROZOT23[N`5h0YJGi0TO[OP25]N]5g0[JHh0VO[Ol16^N\\5h0\\JGg0XO\\Oh17`NZ5i0\\JGg0ZO[Oe19`NY5i0]JHe0]O[Oa1:aNY5i0_JFd0_OZO`1;aNX5j0aJCc0BZO^1;bNX5j0cJB`0E[O[1;cNW5k0dJ@`0G[OZ1:dNW5k0gJZOa0LXO[18dNX5j0mLQOkM]11hNW5h0\\N<_LkNU5>c1XOaN>Y8K5N2N2O1O1O1O1O1Nbjf00`UYO1N3N1O001O1O1O1O000aAHo=8PB6c=J]B7b=I^B8a=H_B8a=H_B9`=G`B9`0Dl2K]L3]84kG:;Gh2S1fMcNlN3\\66oI;8Hg2W1eMmNV5HWJ<7Gg2Y1dMmNW5GXJ;5Hi2Y1cMmNW5FYJ<4Gj2Z1bMmNW5FYJ<4Gl2Y1_MnNX5EZJ>3Dm2[1^MoNW5D\\J=2El1K_N^10QOW5B^J?0Dj13^NW11ROY5@`J`0NBj18]NS12SOS61cI@LL_1a0kNo02TOV6ObI@KO_1a0lNm00UOX6ObI\\OM4[1?nNm0OVOZ61mI_Ok0=oNm0NWO[60mI@j0;QOo0KWO^6NlIBi08SO`2X6fMlICh01ZOf2R6fMlIDh0L]Oj2Q6dMjIHQ2d2V4cMhIKP2c2Y4aMgIMn10XMo1d8RNPJ2\\Ml1d8TNlI2`Mj1d8UNiI2eMh1b8XNfI1hMg1b8ZN`I2PNIIV1g8POYI7WNBJW1f8QORI<_NZOJY1e8QOjG[O=U1[OTOJ[1d8ROhG^O2[1IgNK]1c8SOfGHDW19`NJ^1c8SOeGg1MgML_1b8TOdGf1NfMM`1`8VOcGe11dMLa1`8VObGf13aMMb1^8XOaGe14aMM=I`0e8N_Ge17_MK?J?f8M^Gg16]MM?J`0e8L_Gh17[MK`0Ka0d8K`Gi1a0jM\\Ob0c8K_Gk1a0gM^Oc0h6oNkJj0_Nm1`0fM^Oe0h6nNkJ8WN88W2`0eM_Of0c8SOZG85Z2?eM_Of0d8QO[G84[2`0dM^Og0]6PO`K0RN93Z2c0eM\\Oh0\\6POlK6jMY2f0hMXOf0U9WOnFV2k0kMSOg0V9VOmFV2m0kMQOh0h9\\1YGkMnNj0h9Z1\\GkMmNk0g9Y1]GkMlNn0f9W1lGiNT8W1lGhNT8Z1jGhNU8Z1iGhNU8Z1iGfNW8b1`G`N_8g1ZGZNe87TF6R1Cj86VF6P1Eh85YF6n0Fi83ZF;i0Cl81\\F=h0Ak82^F>f0@l81_F`0d0_Om81_Fb0a0cNXOe9MZFI6\\1b0\\N[Oc0c9K[FI6]1b0ZN[Oe0b9J\\FJ5]1n0oNa8I^FI4_1m0oNa8I^FI4_1m0nNb8J\\FI5`1l0nNc8I\\FI5`1l0mNd8J[FH6_1l0nNe8JZFI5^1m0mNf8KYFI5]1m0oNf8JYFI5]1l0oNh8KWFI4\\1o0nNh8MUFH5Z1Q1POf8MkFR1?QOh8LiFQ1a0ROg8LiFo0c0TOe8LiFn0d0VOd8KhFm0e0ZOb8HjFl0f0_O]8DoF3lN;i10[8@RG2nN:g17V8]OVGMQO>d1:V8WOdG<7a0l9]OVFd0i9[OXFe0h9ZOYFf0h9XOYFh0g9UO\\Fk0f9QO]Fn0n;0001O00001O000000001O0GlA^OU>a0mA^OT>`0:O2N100O1O2M4M3LU[h3" + }, + { + "size": [ + 487, + 640 + ], + "counts": "[RY24Q?5K3N100O101N1O1N3N1M4N2N2N2O1N3N2N0O100dNkNfDV1T;ROiDn0U;UOjDk0V;UOjDk0T2SO\\53`Hj0S2\\OU5JhHj0S2]OT5JhHi0S2_OT5HjHh0R2AS5GkHh0Q2CR5FmHf0R2DQ5FmHf0R2DQ5GlHe0R2FP5GlHd0T2EP5JiHb0W2CP5LhHa0X2Dn4NgH?[2Cn40bHa0_2_Oo4T2QKlMn4Y3mGYM?A[7]6^OS1[O5M3N1O1O100O1O100O1]O_G_Jb8`5aG\\Ja8d5a000O10000000000O100O1K5N2O10000000000000000001O1O7I00001O0000000000001O0kF]Jf8d5VGaJh8P6O1O1O1O0]GgIX8Z6gGfIY8[6fGfIY8[6fGeIZ8d600gH[IP6f6oI[IP6f6nI\\IQ6e6nI[IR6i6iIYIV6X7YIhHg6Z7WIfHi6[7VIeHi6\\7WIeHh6[7XIeHN7S6T7oIfHJ9V6R7PJ\\IT2QOdM12d1V1`6POjHl1c1gNe5\\OhHi1g1kNa5[OiHg1i1nN_5ZOhHh1i1nN_5ZOiHg1h1nN`5[OhHg1h1nNa5ZOgHg1j1nN_5[OhHf1i1oN_5[OhHf1j1nN^5\\OhHe1l1mN^5]OgHe1k1nNh6P1ZInNg6Q1ZImNi6R1YIbNnLKo9a1c3N2N2N2N2N2N3M2L5M3M3J5L4L4N2N2N2M3NlkQ5" + }, + { + "size": [ + 487, + 640 + ], + "counts": "\\gm21V?1O4K2O00001O0O1000000000000001O00iNK\\C5clCZO98k;b0hCVO=8k;R1UDnNk;R1UDnNd1@n7c1\\FnNd1AP8c1ZFlNe1AR8f1VFiNh1AR8g1UFhN^1_O_N2o9g1TFhN]1@_N2P:f1TFhN]1B\\N2R:e1TFgN^1La8[1QFiN^1Lb8\\1nEhN`1La8]1oEgN`1La8^1mEgNa1Lc8V2]GiMe8V2ZGkMf8U2ZGkMf8U2YGlMg8T2YGlMf8V2YGjMf8W2ZGhMe8Z2[GfMe8Z2ZGgMf8Z2YGfMg8[2XGeMh8]2VGcM7Ak6l2cGRMV1a0E_OKQ6Q3RJ_M=F_OLQ6P3RJ]M?H]OKR6Q3PJ]Ma0G]OKR6T3mIZMd0G]OKANk5W3bJYMd0H^OJAMl5Y3`JXMe0H^OJ_OOn5Z3\\JVMg0J_OH^OOR6Y3XJVMh0K_OH_ONR6Y3WJWMh0K@H\\OOU6Y3UJUMj0K@H\\OOU6Z3jIaLMc0W1MAE[O1Y6W3eIeLNa0X1M@F[O0a6KUI=9P2LjML;R1;JZO[OO`7[3VIbMNTO[OOb7[3XI_MKWO[OOb7\\3WI_MKXOZOMe7[3WI_MJZOWONi7X3VIaMIFR7g2VIcMHFR7h2TIcMJEQ7i2UIcMICU7h2RIeMHDX7e2PIjMROTO4=k7d2nH^NSOoNn7d2PIYOo6h0QIXOn6i0RIWOk6l0UITOk6l0UITOj6m0VISOj6P1SIPOm6m0VISOj6m0VISOb3\\MbN`3mMSOS3_MTL0k2_3mMROU3^MSL1k2_3lMSOV3bMlKNQ3a3jMPOX3jMmNX3jMkLUO^O1O1f1R4jNkNZ3lMhLBR1g3lNkN[3kMgLBS1h3kNjN]3kMeLXO^O6f1m3kNiN_3]MULI>:Q1f3mNjNd3XMiL>a0?`Nl1c0Nf3VMgLc0>QN_O]1Q6aNfLU1]Mk0>mM_OO0c1P6aNfLU1]Mi0a0mM^Of1o5_NfLS1\\Ml0g0AY5]NeLV1\\Mi0h0C;`N\\6T1aHh0i0D1^NTO4a7?\\HK6\\1l0G4eN]6=^HJ5[1n0F2lN\\68`HK4Z1o0FLSOa62`HJ5X1P1JnNjN6=X7KaHJ4Y1P1JmNmN4]7GcHJ2S1V14fNjNO`0b7DaHJ3S1V15fNiNKc0f7AbHI3Q1W19cNhNL<2]Oc7m7H[HEGN:U1Z1;bNeNJc0n7C\\HG5Q1V1=aNfNHc0Q8_O\\HK5l0X1a0_N@i7VO\\HL6O_Of0c0]O8]1@ZOe7YO]HM:f0J@9Z1AZOe7YO^HL:e0GEg0@@P:J`Ff0@@P:HbFh0^OAo9FdFi0]OAo9EfFi0[OBo9DgFk0YOAS:AeF5lNO=;S:_OdF5POO9=Y:XO_F;RON6?[:UO^F>QON6>j;CQDO5>k;AQD24fY0000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000O10000001O0000000000000000O100000000000000000000000000000000000000000O1001O00000000O100000000000000000001O0O2M3K=Am^j7" + } + ], + "question": "Which object is positioned between and ?", + "choices": [ + "A. No object is between them", + "B. ", + "C. Both and ", + "D. " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_379.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000542089.jpg", + "mask_rles": [ + { + "size": [ + 500, + 375 + ], + "counts": "i`P17\\?3M3N3L4L3N1N2O1O1N2O1N3N1O1O1N2O001O5K2N1O1N101O00001O00001O0O1001OO2O000000000000000000001OO100000001O000000000O100000000O101O0O100O100O2N1M3M3O2N1O1O2N1O1OdNROTDm0m;SOTDk0m;UOSDj0\\=ObNXOSDf0n;ZORDe0`=M3M2O2M3M3M4L]UY3" + }, + { + "size": [ + 500, + 375 + ], + "counts": "WgQ14_?9G2O00000O1O1N2M3K5A?N2N2O1O1001O2N5K6J1O2N2N4L4L6K1N00000000000000O1L4K6L3I7K5M3M3001O2N6J4L1O2L4K7L6IU`k3" + }, + { + "size": [ + 500, + 375 + ], + "counts": "RW73^?b0d@]Oe>0[Ak1a=UN^Bn1`=4OnMaBo1_=QNaBo1_=QNaBo1`=PN`BP2_=QNaBo1_=QNaBo1`=PN`BP2`=2O010000000O010O10000O1O1O1O10O10000000000O01000000O01000000000O1000O10000000000000O01000000000000O100000O100000O10000000O01000000000O10O101O1O2N1O1O1N2O00001O000O1000O100O0O2O1M3M210O10000000O1000000000O1000O10000000O1000O1000000000O010000O2OO10000000O1000O10O10000000O1000O1000000000O01000000000000O1000O1000O1000000N2^Ob0XOXoj2" + } + ], + "question": "Based on the arrangement of the objects, which statement accurately describes the vertical positions of , , and ?", + "choices": [ + "A. is over , which is on .", + "B. is on , which is over .", + "C. is on , which is over .", + "D. is over , which is on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_380.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000229858.jpg", + "mask_rles": [ + { + "size": [ + 555, + 640 + ], + "counts": "gPT5PNhAL;\\2n=eMWB[2\\>2EcMbAa2Q>_MlAR3Q>nLoAV3S>7H4J4SOZL_C05i3P and ?", + "choices": [ + "A. is in front of .", + "B. is looking at .", + "C. is standing behind .", + "D. is eating ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_381.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000229858.jpg", + "mask_rles": [ + { + "size": [ + 555, + 640 + ], + "counts": "gPT5d1h0I7L5K5L:E3M100O010O001O1O001O1N2N2L4M4HbSU5" + }, + { + "size": [ + 555, + 640 + ], + "counts": "0Q:Z7000000O10000O10000O1O10000O10000000000O1O100O10000000000000000O10000O10000M300000000001O1O1O1O00001O00O100001O000000002N1O001O002N1O1O001O5K2N001O001O1O010O0O2O00001O001O0000001O1O001O00001O00001O0000001O0000O1000001N100O10000001O0000010O000000001O1N101O000010O00O10000001N1000000O10000O1O1O10000O1O100O100O100O11O0000000000001O1O1OO1001O1O1O00001O2N1O0000000O1001O000000000O11O0000000000O100O10000O1O1O1O1O1O1N2O1O1O100O1000000O10000O1000000O100O1SLmHaMT7\\2XIYMj6e2[IWMf6g2^IVMc6i2`ISMb6j2iIiL\\6i2YJnLi5n2\\JPMe5o2]JoLd5P3^JmLd5R3_JiLd5V3_JfLc5Y3`JdLa5Z3dJcL\\5\\3gJ\\L_5c3cJSLf5m3]JjKi5U4YJjKg5V4YJjKg5U4[JiKf5V4\\JhKe5W4^JeKd5[4\\JaKh5^4ZJ_Kh5`4b300UOfCeLZPNhAL;\\2n=eMWB[2\\>2EcMbAa2Q>_MlAR3Q>nLoAV3S>7H4J4SOZL_C05i3P and ?", + "choices": [ + "A. is in front of .", + "B. They are both eating from .", + "C. They are looking at each other.", + "D. is eating ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_382.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000313182.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "i^b04R=4M2N1WCHa\\;BeDa0X;^OhDg0S;XOnDj0P;VOoDk0Q;UOoDl0Q;SOoDm0R;SOmDm0T;ROlDn0U;POlDP1U;oNkDQ1W;mNjDR1W;mNiDS1X;lNhDT1Z;jNfDV1[;iNeDW1\\;hNdDX1];gNcDY1_;eNaD[1`;cNaD]1e;00000001O00O1001O0000001O0000000000000000000000001O0000000000O10000000000O10000O100O1O1O100O100O100O100001O00000000000000001O00000000001O0000001O00001O00000000001O000000001O0000001O00000000001O00001O0000001O000000001O0000000000001O00000000000000001O00000000000000O100000000000000O100O10000O1O1O1O1O1000000000000GeNeD[1Y;gNcDM0\\1Z;nNeDS1Z;>N2N`NlDP1S;POnDP1P;ROQEm0m:VOREj0l:XOTEh0k:YOUEg0i:ZOXEf0f:]OXEd0f:^OZEb0c:A\\E`0a8dNaG50IU1Q1jN<^8iNaG8R1f0PO9\\8]OZHc0[OO[8_OhGS1L_O\\8^OfGU1N]O\\8^OfGU1O\\O[8@eGS11]OZ8@eGT1O]O\\8_OeGT1O]O\\8_OeGU1N\\O^8^OdGU1O]O]8^OdGU1O]O]8^OdGU1O]O^8^ObGU10]O^8^ObGU10]O^8^ObGT11^O]8^ObGT11^O]8^OaGV11\\O^8_O`GU12\\O^8e1aG[N_8d1bG\\N^8d1bG\\N_8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8c1aG]N_8c1`G^N`8b1aG]N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^N`8b1`G^N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^Na8a1^G`Nb8_1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`Na8_1_GaNa8_1_GaNa8`1]GaNc8_1]GaNc8_1^G`Nb8_1_GaNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1]GcNd8]1[GcNe8\\1\\GdNd8\\1\\GdNd8\\1\\GdNM" + }, + { + "size": [ + 424, + 640 + ], + "counts": "[96o20n60RI0n60RI0n61QIOn62RINn62QIOo61PI0P70PI0P70oH1Q7OoH0R7OnH2R7NmH3S7MdG1@2k8NcG5^ONo8LbG:\\OJR9M^Ga0\\OBV9NZGg0[O\\OZ9NYGl0XOWO^9OfFU2Y9mMaFY2^9hM`FZ2_9gM_F[2a8_MnG7_O\\2a8bMlGS3T8oLjGR3U8PMiGQ3W8PMfGQ3Z8QM_Gh12jN_8m1gGnMY8R2YH]Mg7a2`HYMa7g2`HXM`7l2\\HSMe7o2XHQMi7Q3SHQMm7Q3kGUMU8o30000001O002M2O2M5CQGXLS9[2TGUNb9d1jFnM^9k1U1I7M2M3K5M=B7Ia0^OQSa7" + }, + { + "size": [ + 424, + 640 + ], + "counts": "f[75n<6N1O2O1N1N3L3M3M4L3L4N29H6I1O1O1O1O0000000O01O1O0O1fNHbE<`:OQE1Y;FiDKGHPde7" + } + ], + "question": "Which of the following statements correctly describes the actions of and ?", + "choices": [ + "A. is driving and is on .", + "B. is carrying and is carrying .", + "C. is carrying and is carrying .", + "D. Both and are carrying ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_383.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000313182.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "\\T_11Y\\a08\\P_O;]CBS\\;BeDa0X;^OhDg0S;XOnDj0P;VOoDk0Q;UOoDl0Q;SOoDm0R;SOmDm0T;ROlDn0U;POlDP1U;oNkDQ1W;mNjDR1W;mNiDS1X;lNhDT1Z;jNfDV1[;iNeDW1\\;hNdDX1];gNcDY1_;eNaD[1`;cNaD]1e;00000001O00O1001O0000001O0000000000000000000000001O0000000000O10000000000O10000O100O1O1O100O100O100O100001O00000000000000001O00000000001O0000001O00001O00000000001O000000001O0000001O00000000001O00001O0000001O000000001O0000000000001O00000000000000001O00000000000000O100000000000000O100O10000O1O1O1O1O1000000000000GeNeD[1Y;gNcDM0\\1Z;nNeDS1Z;>N2N`NlDP1S;POnDP1P;ROQEm0m:VOREj0l:XOTEh0k:YOUEg0i:ZOXEf0f:]OXEd0f:^OZEb0c:A\\E`0a8dNaG50IU1Q1jN<^8iNaG8R1f0PO9\\8]OZHc0[OO[8_OhGS1L_O\\8^OfGU1N]O\\8^OfGU1O\\O[8@eGS11]OZ8@eGT1O]O\\8_OeGT1O]O\\8_OeGU1N\\O^8^OdGU1O]O]8^OdGU1O]O]8^OdGU1O]O^8^ObGU10]O^8^ObGU10]O^8^ObGT11^O]8^ObGT11^O]8^OaGV11\\O^8_O`GU12\\O^8e1aG[N_8d1bG\\N^8d1bG\\N_8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8c1aG]N_8c1`G^N`8b1aG]N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^N`8b1`G^N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^Na8a1^G`Nb8_1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`Na8_1_GaNa8_1_GaNa8`1]GaNc8_1]GaNc8_1^G`Nb8_1_GaNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1]GcNd8]1[GcNe8\\1\\GdNd8\\1\\GdNd8\\1\\GdNM" + }, + { + "size": [ + 424, + 640 + ], + "counts": "eig02Q=7M7H5L1000001O001O000kC[OLNc;i0_DYONNc;o0<4mCmN52e9c0aFj1U9X1M6J2N0000000000000000000000000000000000000000O1000000000000000000000000000000000000001O0000O2aNhFkN[;iN_EMf[2l0_dMUO8OV80jFR1IB4I;M@4S9BUGi3U90O11O000O10000O100001O000000000000O1N2000000003M0000O1O1O101O0000001O001bN_FSOb9k0eFnN]9P1lFfNW9Y1iFgNW9Y1eFRNAd0m9W1cFmN]9R1cFoN^9n0cFRO^9n0`FTOa9k0ZF[Oe9e0ZF\\Og9c0YF\\Oh9d0WF\\Oj9c0WF\\Oj9b0aF]NX:^1l0L4N2O1O1O1N3M2DlCAX<:lCBW<9`0Lemh5" + }, + { + "size": [ + 424, + 640 + ], + "counts": "Z96R=00000000O1O11O0001NW3N]Q12\\kN100001NV`0Oh]j02RRUO1X^l02]bZOK]oH001N1PC1i:OfF0]Nh0^:\\OiEMIZ2Q:l0K2N2O3L10001M200O1O100O6K4L001N100N2O100000O10ON301N10000001O0O1000000O1000001O000O2O0O101N1O100O10001O0O1000000O1O2O000O1M5L2N3O0O1000001N100000001O000O100O2O1O000O1000000O2O0000000O2O0O10000O100O2O000O2O001O00000O2O0010OO100O11O0001M11001O0O10000000000000001O00001O0001O01N11O0010O00000001O00O1010O0001O00000010O01O00001O00000001OO100O2O001O001O00001O0O101O000000000O1000001O0O10000O100000O10O01001O1O2N2M2O2N1O1O2N1O1O1N2O1N2O1O1O1O1N101O001O1N2O0000000O1000000000001N10000O1000000O10O10O0100O100O100O1O1O1O1O1O010O1O1O03N1N2O1O000O1000001O0O1000001O000O100000001O001O0O100O2O00000O101O000O2O000000000O2O000000000O2O1O000O101O0000001N100001O0000001O01O01O001O00010OO1000000O10000O1O10000000ZHiJ[7W5cHlJ\\7T5dHmJ[7S5dHoJZ7R5fHoJY7Q5fHQKY7o4fHSKY7m4gHSKY7m4gHTKW7m4iHTKV7l4iHVKU7k4jHWKU7i4kHXKS7_5M4N1L6N1N2O1N4M3L2M5K6J5L;E5J;E7Ic0\\O`0B:E4M6G5RMUF[2g:iMmDc1k;ZNVDn0Y, , and ?", + "choices": [ + "A. is beside , and is in front of .", + "B. is driving on , and is beside .", + "C. is driving on , and is beside .", + "D. is on , which is beside ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_384.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000463174.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "fmU11X=2000000001O00O12N[da11][^N8TJG[I3_4l0`3jMnMc0PNk0a0i0^3UOhLVOXOn0b0g0\\3\\OdLQO]Ol0d0f0Z3B`Ln08_OV3G_Lk0;^OT3OWLg0d0[OT3]3lLcLS3^3nLnIYO`2i3b3RM]Lm2d3TM[Lk2f3UMZLj2g3VMYLi2h3WMXLi2h3WMXLi2h3WMXLh2i3XMWLh2i3WMWLi2j3XMULh2k3XMTLh2m3XMQLi2P4WMmKk2T4UMiKn2W4RMgKo2Z4QMbKS3^4mL`KT3a4kL]KX3d4gLXK]3h4cLUK`3j4bLRK`3o4`LnJa1oNUOV6XOiJc1WOQOQ6\\OeJc1GfNe5GbJa1NgN`5I^Ja16cN]5K\\Ja1`Im1d1dMm4?\\Io1k1\\Mm4d0WIP2U9PNfFU2Z9m010O10O101OO0101O0001N100000O11N1000O1000000001O00000000001O0O2O2N001OmLmFn1S9RNnFm1R9SNoFm1Q9QNQGo1n8oMUGS2h8mMYGT2e8lM]GT2`8nM`GX2S1PM_5g0`IZ2i0UMf5`0cI[2HjLa0_67eIi2GTMd63fIj2_OYMk6LgIn3Y6QLiIn3W6RLjIm3V6TLjIk3U4^KnLg0nNj3R4eKlLa0SOi3o3kKkL;XOi3j3QLlL6[Oh3h3ULjL4_Of3g3XLhL3Ae3d3\\LjLMDf3a3`LiLFgN[OP1^4_3dLiLBhN\\OP1^4^3fLhLELc3\\3jLfLDNb3V3PMkL^O0b3l2bKRLc1S1XO0c3c2`M\\MmN2b3a2cM\\MkN5a3]2gM\\MiN7_3[2kM]MeN:`3e1gK`MV2`0cN<`3a1QLWMQ2k0]N>b3`1bNPNmMa0`3^1dNPNmMb0_3_1cNnMmMf0`3\\1bNmMnMh0`3Z1cNmMlMk0b3W1bNnMlMk0h3Q1]NRNkMn0S4e0RN\\NjMQ1X4`0mM^NgM]O]Of1P5>mM]NfM@\\Of1S5;kM^NjMX1^46jM`NhM\\1_43hM`NiM^1a41eMaNhM`1d4NeM`NgMc1d4LfM`NfMe1e4JgM^NdMi1f4HgM\\NcMo1f4CbN`0_1_O`Nb0a1\\O`Ne0a1XOaNg0a1VO_Nk0c1RO[NQ1f1mNYNU1g1jNZNW1e1gN]NY1c1fN\\N]1c1aN]Na1b1_N^Nb1a1]NaNb1`1]NaNb1_1]NbNc1]1^NdNa1\\1^NeNb1[1^NgN`1Y1`NiN^1W1bNjN]1V1bNmN\\1S1dNoNZ1Q1eNSOX1m0hNUOV1k0iNXOU1h0jN[OT1e0lN]OR1c0nN@o0?RODk0GB;, , and ?", + "choices": [ + "A. has already hit with .", + "B. is holding while looking away from .", + "C. is using to point at .", + "D. is swinging and is about to hit ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_385.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000192904.jpg", + "mask_rles": [ + { + "size": [ + 436, + 640 + ], + "counts": "oTo37[=5L3M2N2N2O1N101N100ODYCNf<1[COe<1\\CNd<2\\CNd<2\\COc<1]COc<1]COd<0\\C0d<0\\C0d<0\\C0d<1\\CNe<1[COe<2[CMe<4ZCLg<3ZCL^S5DnJYIAi6;ZIDg69\\IFf64_IKb63cII_65bIJ_65aIK_64bIL_63aIM`62aIM_62bIN^62bIN_60bI0^6OcI1]6NeI1\\6MhIOY60iIOX6OiI1W6NjI2W6LkI3U6LnI2S6LgIHoL=Y9JiIInL=Z9IPJ6P6IQJ7o5HSJ7n5GSJ9m5GSJ:l5DVJgJ\\O_L6h8`0iJZO_L6o75WH?Z3VO`L5n7]1bK]NaL5n7^1fKaN[4_1]K_NeL2o7^1[KaNeL1Q8_1YK^NhL2P8`1XK]NiL2P8c1TK^NjLOR8d1SKdNn4]1QKbNP5`1nJ_NS5b1lJ^NT5c1kJ]NU5d1jJ\\NV5e1iJ[NX5d1hJ[NY5f1gJYNZ5g1eJYN[5f1`JUNVM5[8f1_JVNUM4]8f1]JUNWM5\\8g1\\JTNXM5\\8f1aJ[N`5b1bJ]N`5b1eGXNc26h5i1YJWNh5h1jGTNLNg17d6h1RJZNk1J^Om1dNZNn1K^Oj1cN\\No1I_Ol1`N\\NQ2H@l1]N]NS2G@m1\\N[NT2J^Om1\\NZNV2I^On1ZNZNW2I^Oo1ZNWNY2J]OP2XNWNZ2J]OQ2UNXN]2H]OR2TNWN_2H\\OS2RNVNb2G[OV2oMTNg2FYOY2mMRNj2EXO[2lMQNk2EXO]2iMoMP3DWO_2fMnMS3DUOa2dMmMW3BUOe2_MiM]3BSOi2ZMhMa3ATOj2UMiMf3_OSOl2QMhMl3\\OSOQ3dLjMZ4UOQOc4o0^KoNc4Q1]KoNb4R1_KlNb4S1_KmNa4R1`KnN_4Q1cKnN^4Q1dKnN\\4Q1eKoNZ4R1gKlNZ4T1fKkN[4T1fKlNZ4T1gKjNY4W1hKhNX4X1hKhNW4Y1iKfNX4Z1iKeNW4Z1jKeNV4Z1mKeNS4[1nKcNS4]1nKbNR4^1nKaNR4`1oK^NR4b1oK\\NR4d1nK\\NR4d1nK[NR4f1oKYNQ4f1PLYNQ4g1PLWNQ4j1oKTNQ4m1oKSNQ4n1oKPNR4P2oKnMQ4T2nKkMS4V2mKhMT4X2lKgMT4[2mKbMT4^2mK`MT4a2mK\\MS4e2nKYMS4h2mKVMT4k2lKQMV4Q3jKmLW4S3jKkLV4W3lKdLV4]3jK`LX4`3lKYLV4j3iKRLZ4o3hKkKZ4V4jKbKZ4^4iKYK]4h4V22N2O001N2O001N100O2O0O2O1O0O2O000O2O1O00100O001O001O001N102N1O1O1O1O1O1O1O1O1O101N1O1O1O100O2N1O2N2N3M4L2N4L3L5L3L4K5DH9B=D;G9C=H9G8M3L6J4M5J6J5L5I7H?WOhmk0" + }, + { + "size": [ + 436, + 640 + ], + "counts": "]hn35^=2O0O2O000000001O00000000001O0001O000000000000001O0000000000001O0000000000001O0000000000001O00000000001O000O10000000001N1001O000001O000001O01O0000O1000000000100O2N1N1O2O1N2O1N101N101N100O100O100O100O1000O1000000O10000O10000O100O1O100O101N1O101N1O2N2NkRl2" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is on .", + "B. is inside .", + "C. is on .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_386.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000015335.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "fYc2=`>5K5L3L4N1O3K3O1O1M300O1O1N2O2N1O1M2001O01M3O001O1O1N2O001N110O1O0O101O1O0O2O001O1O1O1O001fDiMi9X2VFmMfNKl:X2^FSN_9n1bFSN[9o1dFSNZ9n1eFUNX9l1gFWNW9h1iF[NS9g1kF\\NS9e1iFaNT9_1jFfNT9Y1hFoNT9S1`FZO]9h0[FAb9R3M2M3N1N4M1O2M4M2M4M2N2M3N2N1O2N2M5L1O3M3M1N101O0gN\\IVJd6f5dIVJ]6g5hIVJX6i5jIUJX6i5lITJT6S5TIZJO;0Ek0b0R6Q5kI[J5c0o5l4SKSKm4i4YKTKh4h4]KWKc4f4aKYK_4f4cKYK]4f4eKYK[4f4gKYKZ4c4jK[KW4d4kK[KU4e4lKYKU4f4mKYKS4d4V3O1000000O1O1O1O100O100O10000O1O100O1000000O1000000000000000000000000001O000000001O0000001O001O0000001O0O110O1O1O1O1O001O0O3N1O10O01cMTKdJm4k0UKW1OmMl4j0YKb3i4ZLZK^1GiMQ5d0[Kd1ChMS5a0\\Kj1C`MR5c0]Km1A`MS5?_KR2]O_MU5;aKV2ZO_MW55dK\\2UO^MX5NjKd2oN\\MZ5JkKd1iNjM4e0Y5GPLT3dNSM^5ZO\\LV2UNTN1:e5oN]La2oMUNO:`8`1bGVNN7c8`1cGXNKiNOU1g8h1_GYN4K`8k1]GYN4Ci8e1mF`M`0X1KAi8d1dGiNH[Oi8h1cGkNKTOe8m1eGlNKoNf8P2dGRNiNe0h:U1cFTNgN1N2O9j:]1\\2K5M2N3M3N2M3M3K5K6K5F:IWVh3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "g4l1T=0O101OO01N101000O1J6N20000O2O1N101OOL5M3O10000O101L3N2O4Jocj1WOh\\UN6N2M2N4M2M3N2N1O2L3O2N2N1N3L4N1O2O001O1O1O00001O1O00001O10O01O001O001O1O1O001O1O1O2N1O1O2N3M2N3M4L4L4L5K5K7I6J4L4L3M4M3L1O0000O1M3O100N2O1O1N20O01O1O1O100O2N1O1O101O0O10001N100O100O10000O100000000O100O1000000iN[DSOe;i0eDPO\\;l0jDROV;`0YDlNh0b0o:6hEHX:2oELS:NTFOm9NWF1i9L[F3e9HaF6W43000000O10000000001N1000jiU1" + } + ], + "question": "Which statement accurately describes the relationship between and ?", + "choices": [ + "A. is pushing .", + "B. is positioned on top of .", + "C. is sitting on .", + "D. is standing in front of ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_387.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000060932.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "Ydg0=h<:I5L4J5M3N3N1gDfN_:\\1REWOi:k1Ja0@T1kN9H3L7J6I3M3@mJcHT5T7XKhHh4V7]KcHh4Z7f000O100000nNQIgKn6m3fIlKZ6R4kIkKV6R4nIlKR6S4QJkKo5T4SJjKn5V4VJeKk5Z4d1010O20ZLVGj2g8TM\\Gn2a8oLbG_OEb3h8kLlGT3g9AOTMfEMa0o1P;JNWNdD`1f;1N1M3N01N3N4J7J6J6J6I3N2M5LVSk6" + }, + { + "size": [ + 428, + 640 + ], + "counts": "m7o0]<0000001O1O1O0000O1O1O1O1O1O100001O001O000000O1O1O1O10000000O2O01O0000O01O1O100O10000001O001O00O1O100O010O11O1O1O0010O0O101M2F;HmR`7" + }, + { + "size": [ + 428, + 640 + ], + "counts": "QQZ1?gh4Y7eK`Ha4Z7aKcHc4[7j0L3M2O1N1000001O0000000001oNmHWK00T7f4QISK23n6f4V1oKcGo2_8QMcGm2^8QMhGk2X8RMlGn2U8lLoGU3P8hLTHX3T8[L\\H1PO1JU3Y:K8H5ZM[EY2g:dM\\E[2k:0O2M1O1102gMTEm1j;lMXD[1R, , and ?", + "choices": [ + "A. is in front of , who is beside .", + "B. is looking at , who is beside .", + "C. and are both in front of .", + "D. is beside , and is also beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_388.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000076417.jpg", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "oXg29`>5M3N3K4M3N2N101M4M2N2O1O1N2O1O1O1O1O1N200O100O100O10000O1O100O100000000O10000000000000000000000000000O100000000000000O11O0000O100001O00O1001O000000000000000000000000001O0000000000O1001O000000000000000000001N10000O100001O1O0000VOlBJU=3nBLR=P100001O001O001O00O1N2N2M3M3N2M3M3M3N2N2M3M3M3M3N2M3N2M3N2N2M3N2L4N2M3M3M3N1N3N2M3N2M3M3N2N2N2M3N2N3M2N2N2N2N2N2O1M3N2O1N2N2M3N2M3N2O1M3M3O1N2N2M3N2N2O1N2M3N101N3N1M3N2O1N2N2M2O2O1N2N2M3N2N2N2N2N2N2N2O1N2N2N2O1N2O1N2M3O1N2O2M2N101N2O2M2O001O1O2M101O1O1O2N001O1O100O1O1O100O2OO0100O1O2O0O00101N1000O100001O0000O02O0000O01000000000001O00O10O100O2OO10O10001OO10000001O00O1001N1000O100001O000O011O0000O100001O0000O02O0000O10000000O10001O000000000000000000000000000O100000000000000000000O100000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000WK" + }, + { + "size": [ + 478, + 640 + ], + "counts": "R\\_6Q1Z and in relation to ?", + "choices": [ + "A. and are both over .", + "B. is beside , and is over .", + "C. is over , and is beside .", + "D. is over , and is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_389.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000326627.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "VYY45g>2N3M:F2N1O0000001O00001OO2N1O2O1N4M2N4LQmQ4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Q>n0R>000O10000000000O100O10000001O0000O100O1000000000000000000O10000000000000000O100000000000000O10000000000000000000000000000O100000000O10000000000000000000000O100000000000000O10000O10000000000O100000000O10000000000000000000000000O100000000O1000000O10000000O010000000000O100000O0100000O1000O100000O10O01O100O1N2O1O10O10000000000O1000O0100O101O0O10001O00000O2O001O0O10001N10000O100O2O000O01000O10O100O10O10000O010O0100000000O0100000000O01000O1000000O10000O10000000000O10O1000O100000O01000000O010000000O1000000000000O1000000O100O100000000O1000000O010000000O1000000O1000000O100O10001NndS5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "R>m0S>0000000000000O1000000000000O1000000O10000000000O10000000000O1000000000000000000000000O10000O10000000000000000000000000000O1000000O10000000000O10000000000000000O10000000000000000O100000000O100000000O10000O10000000000001O0000O1000000000000000000000000000000000000O1000000000000O100000000O100000000O100O1N2N2O1O100000000001O0000O1000000000000000000001O001O001O0000001O0000001O0000000000000000O1000000O1000000O1O1O100O100000000000000O10000000000001O000000001OO100O1000000O100000000O100000000O100000000000000000000O10000000000000000000000000000000000000000000000O10000000000000000001TNPC0Ob1b=H1O1O00000000000000O1O1000000000000000000000000O100000000000000001O001O000000000000O1O10000000000000000O1N2O1O100000000001O00O100O100O100O100000000O10000O1O1J6M3M3O11O3M1O1O0000001OO10000000000000000000000000000000000O10000O100O1000000O100001O1O00000000O1001O00O100001O0000000000O1CSNcCQ2\\<:O1O1O10000001O0000000000001O00000000000000000000O100000000001O001O0000000000000000O10000002N10O0001N1001O0000O100000O1O10001O01O1O1O1O0O10001O00000000Q1oNU1iNQ`69d_I?E;mAoNa=m1`BQNe and , and the road, ?", + "choices": [ + "A. is parked on , but is parked on .", + "B. Both and are parked on .", + "C. Only is parked on .", + "D. Both and are enclosing ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_390.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000077460.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "Sj[55ic07G5N2O3M4N1ON2Ci\\OMXc0Om\\OOUc0Nn\\OOn\\i2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "gQo17gc07\\\\ODYc0n0^OQOZ]OZ1[b07N2N11Bf]OSO[b0k0f0NK7H10oN_]Oa0cb0\\O`]Ob0db0YOh]O;]dS6" + }, + { + "size": [ + 640, + 428 + ], + "counts": "[a093Jh0028^`09g^O@0M13N\\1Ya0V10N2N2O10e^O`MVa0a2g^OaMYa0c20000000000000000000000001O001O0[Ni^Oa0Wa0^Oj^Ob0Va0\\Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0\\On^Od0Sa0[Om^Oe0Sa0[Om^Oe0Sa0\\Ol^Od0Ta0\\Ol^Od0Ta0\\Ol^Od0Ta0]Ok^Oc0Ua0^Oj^Ob0Va0_Oi^Oa0Xa0^Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0@h^O`0Xa0@h^O`0Xa0_Oi^Oa0Wa0_Oh^Ob0Xa0^Oi^Oa0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0_Og^Oa0Ya0^Oh^Ob0Xa0^Oh^Ob0Xa0^Og^Oc0Ya0]Og^Oc0Ya0^Of^Ob0Za0^Oe^Oc0\\a0P1000000001O000gMg^Ok1Za0RNh^On1Ya0PNi^Oo1da0UNR^Od1Yb0L4`Ne]OS1db001O0000O1M31O1O1O001OO100[OPOP^OZ1ka0`0M3O1L4O100001O0000002N:F5TNk]Oc1_b0J=C2N1OO10000O11O1O0000O1ROQOk^Oo0Sa0ZOf^Of0Ra05Z^OLfa0W100000000001O00000000000000001O0000000000000000000000O100001O000000000eN[^O;ea0SOm^Om0Sa0POQ_Oo0o`0POR_OP1n`0oNR_OR1Qa0kNo^OU1ma01O1BlNl]OX1Rb0<00O1O100O100000000001O0000O1JWNS^Ok1ka06O1000000001O00000000000000^OoMV_OP2^a0O1O1O1O00O100O100O100LPNW^OQ2ia0oMW^OQ2ma0O000000002N1O0000000000001O000000001O0000O100000000O1O10000000000001O0000O100000000001O000000000000O100000000001O00000000000000O100JRNZ^On1ma0O7I4L2N1O3M3M2N1O1O0000O100ElNi]OU1na0jNQ^O7KP1Sb0mNm]O_1Sb070000000000000000000000000000000000000000O1001O001O1O1O0000O1O1O10000000000001O00O1001O0000O100000000000000001O000000O11O001O2N1O001O0000000cNk]Oo0Ub0POm]Oo0Sb0QOQ^Ok0oa0UOR^Oj0na0VOS^Oi0ma0VOT^Oj0la0VOS^Ok0la0VOS^Ok0ma0UOS^Ok0ma0VOQ^Ok0oa0TOS^Ok0na0TOR^Ol0bb0O1O1O00001OO10000UO[OZ^Of0ea0@V^O`0ja0_OV^Ob0ja0^OV^Ob0ja0]OW^Oc0ia0]OW^Oc0ia0]OX^Ob0ha0_OX^O`0ha0@X^O`0ha0@X^O`0ha0@W^Oa0ja0^OS^Oe0ma0\\OQ^Oe0oa0\\Oo]Oe0Pb0]Oo]Oc0Qb0f0000000000000000000000000000000000000000000000000000000000000001^Nm]OH[1" + } + ], + "question": "What are and doing on ?", + "choices": [ + "A. Both and are running on .", + "B. is running on and is standing on .", + "C. Both and are standing on .", + "D. is standing on and is running on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_391.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000077460.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "gQo17gc07\\\\ODYc0n0^OQOZ]OZ1[b07N2N11Bf]OSO[b0k0f0NK7H10oN_]Oa0cb0\\O`]Ob0db0YOh]O;]dS6" + }, + { + "size": [ + 640, + 428 + ], + "counts": "[a093Jh0028^`09g^O@0M13N\\1Ya0V10N2N2O10e^O`MVa0a2g^OaMYa0c20000000000000000000000001O001O0[Ni^Oa0Wa0^Oj^Ob0Va0\\Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0\\On^Od0Sa0[Om^Oe0Sa0[Om^Oe0Sa0\\Ol^Od0Ta0\\Ol^Od0Ta0\\Ol^Od0Ta0]Ok^Oc0Ua0^Oj^Ob0Va0_Oi^Oa0Xa0^Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0@h^O`0Xa0@h^O`0Xa0_Oi^Oa0Wa0_Oh^Ob0Xa0^Oi^Oa0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0_Og^Oa0Ya0^Oh^Ob0Xa0^Oh^Ob0Xa0^Og^Oc0Ya0]Og^Oc0Ya0^Of^Ob0Za0^Oe^Oc0\\a0P1000000001O000gMg^Ok1Za0RNh^On1Ya0PNi^Oo1da0UNR^Od1Yb0L4`Ne]OS1db001O0000O1M31O1O1O001OO100[OPOP^OZ1ka0`0M3O1L4O100001O0000002N:F5TNk]Oc1_b0J=C2N1OO10000O11O1O0000O1ROQOk^Oo0Sa0ZOf^Of0Ra05Z^OLfa0W100000000001O00000000000000001O0000000000000000000000O100001O000000000eN[^O;ea0SOm^Om0Sa0POQ_Oo0o`0POR_OP1n`0oNR_OR1Qa0kNo^OU1ma01O1BlNl]OX1Rb0<00O1O100O100000000001O0000O1JWNS^Ok1ka06O1000000001O00000000000000^OoMV_OP2^a0O1O1O1O00O100O100O100LPNW^OQ2ia0oMW^OQ2ma0O000000002N1O0000000000001O000000001O0000O100000000O1O10000000000001O0000O100000000001O000000000000O100000000001O00000000000000O100JRNZ^On1ma0O7I4L2N1O3M3M2N1O1O0000O100ElNi]OU1na0jNQ^O7KP1Sb0mNm]O_1Sb070000000000000000000000000000000000000000O1001O001O1O1O0000O1O1O10000000000001O00O1001O0000O100000000000000001O000000O11O001O2N1O001O0000000cNk]Oo0Ub0POm]Oo0Sb0QOQ^Ok0oa0UOR^Oj0na0VOS^Oi0ma0VOT^Oj0la0VOS^Ok0la0VOS^Ok0ma0UOS^Ok0ma0VOQ^Ok0oa0TOS^Ok0na0TOR^Ol0bb0O1O1O00001OO10000UO[OZ^Of0ea0@V^O`0ja0_OV^Ob0ja0^OV^Ob0ja0]OW^Oc0ia0]OW^Oc0ia0]OX^Ob0ha0_OX^O`0ha0@X^O`0ha0@X^O`0ha0@W^Oa0ja0^OS^Oe0ma0\\OQ^Oe0oa0\\Oo]Oe0Pb0]Oo]Oc0Qb0f0000000000000000000000000000000000000000000000000000000000000001^Nm]OH[1" + }, + { + "size": [ + 640, + 428 + ], + "counts": "RRR62mc03N0O4M1N3N1OIb\\OM]c02f\\OMXc04h\\OLXc03j\\ONTc02m\\ONSc01n\\O0Pc00Q]O0ac0M_QP2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "Sj[55ic07G5N2O3M4N1ON2Ci\\OMXc0Om\\OOUc0Nn\\OOn\\i2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "0^21]N2[O1001N020OO101O0O1000e0M]O3Oc00\\O91GOO2O000Mh0OB5CZ1O;RMiAm2X>SMhAl2Y>TMgAk2Z>TMjAh2V>XMjAh2W>VMjAj2f?000000000000000000000O1001O00001O00000000000000mKYMmFg2T9XMkFi2U9XMjFh2V9XMjFh2V9YMiFg2W9ZMhFf2Y9XMhFh2Y=O0000000000O1O1001O1O1O001O001O1O00O1O100O1001O2N1O1O1OO100O10000O10000]O_Mg_Oa2Y`0`M[_OK8e2]`0dMb_O\\2_`0cMa_O]2d`0]M]_Oc2c`0]M]_Oc2c`0]M]_Oc2b`0^M__Oa2b`0^M^_Ob2b`0^Mb_O^2Ra0N001O1O1O0000001O001OO100O1H8O1O1iM\\MRCd2m<]MSCc2l<]MVCb2j<^MWCa2V?N3MN2N2O1O1O10000C[M^_Of2b`0ZM__Oe2a`0\\M^_Od2o`00L40000000ZMm^O_2[a0O00O1O100001O3M3M2N1gM[^OU2]`0jMi@0kNV2d>kMWDY2h;hMWDY2j;fMVDZ2f>2`MdM[C]2d1O1TMnMiCS2P?200O1000000001O001O1O00000000O1O100O100000000000000M3O100O1O1000000O11O00^HfM^MZ2b2gM]MY2b2hM]MY2c2gM]MY2b2hM^MX2b2hMWFKP7]2h2hMYFKo6]2h2hMYFMm6[2j2hM[FKl6\\2i2iMbMV2_2iM]FLg6[2l2iM]FLg6[2l2iM]FLg6[2P3eM]M[2T:0000000O1000000lJgM`HZ2_7gMaHY2_7gMaHY2`7fM`HZ2d0O100aMeMYC[2fS1@UN1OOR3`0nLOW3BmL3M20Li0NZO3N103L12LNM3:Y;" + } + ], + "question": "Based on the provided relationships, where is located?", + "choices": [ + "A. Being held by .", + "B. Next to .", + "C. On .", + "D. In ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_392.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000287545.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[jm05n=1gB2]OM4LV=;RC8FAT=:RC7JCo<9RC7O@oVCBj<>UCCl<=SCBn<>QCCo<=QCCo<=PCDP=0O0001O1O2N1N3N^b\\3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "\\]^34l>2N1jA1Y=0dB2\\=NWBJ5:e=2YBOh=2SB1n=?2O0O1OO0O2O2O02N2O0O1O[CYOS;g0iD^OV;b0gDBX;n0UDUOk;n0dCcN5c0W interacting with ?", + "choices": [ + "A. is looking at .", + "B. is standing on .", + "C. is beside and looking at it.", + "D. They are standing on different objects and looking away from each other." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_393.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000545219.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "\\Zm12j>8J3N2M3M5K5L2M3N3L5K4M3M2M4L8I2M2O2N1M3O2N1O1N2N2O1OiNgCIX<7kCGS<:PDDo;XD@g;a0ZD^Oe;c0\\D\\Oc;f0]DYOc;h0^DVOa;l0_DSOa;P1]DnNc;T1]DkNc;[1WDeNh;e1PDXNQoG\\OQ8e0RHUOa7NoEl0S=VOlBj0S=YOjBh0U=]OfBc0Z=b0O010O0010O100O100O1O101N1O2N2N2N3M3N1N1O2N2O1N3M3M3M4L2NkQo4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aag1>_>;eBC`;X2C7F:I6I7J7K4M2O2N1O2M3O0O100O2O0O101N3M4M3L2O1O1N1O2N100O1O101N101N2N2N1000O1O0M301O01O010O10000O10O010O01O10O010000O01O001O0010O001000O10O10O1000O01O10O010OdNgFQM[9j2_1F;1O1O0O101O1N100O1O1O1000O10O0100000O0001000O1O2O00000O2O1N1O101N1O100000000O1O2O000XFWLX8j3gGVLY8l3fGQL\\8P4dGPL\\8Q4cGRLZ8o3dGTLY8P4dGRLZ8V4TGULj8Y4hFjKV9W4iFiKW9W4iFiKV9X4jFhKU9T4lFVK1e0R9V4nFTK0f0R9Z4nFgKQ9Z4mFgKS9X4oFfKQ9W4nFSK=6i8f4k00O101N3M101O000O2O1O1N2O0O2O0O100O1O1O2O0O1O1O1O1000O10OL4M40O100O01000O100O00011O0O2N101O000O7J001O0O2O001N100000000O10000O101O00000O3N1O1O1N102M4L3N1O1N2N2N2cLVIbNk6i41N100O2O00002N4L1ZHnH:Na6[7b0011`HdHZ7a700jH^Hn6b7RI^Hm6d7TI^Hi6V8F1\\IPHV6_8N3L5L9H3L1O000010O000000001O001O0000O2O00000O100O2M2O100000000001O0000001O000000000000000000010O0000000000001OO1001O000O10001O00000O100O101O00000010O0000002OO01O1O1O1O001O0001O2N1O0O10000O1O1O101M2O100O2O1O002M2O000O101OO1001O0O1000001O0O10001O001aN]J_Ic5`6_J^Ic5`6`J^Ia5]6fJQIUO6V6e6XKSIj4j6i1M001O1O2M2N3O0O2O1kNmGoJB1c8j4\\HTKe7j4\\HUKf7h4]HVKe7h4[1N2N3M2N3N2M2O2N1N3N2L4N1O2N1O2M3N2N1O2N2N2N2N2N2N3M2N2N2N1O2O1N3M3M2O2M3N1O1N105L1N1O2M2O3O00SCoMO2OO`<[24M2O4K4L3M3L5K7J5J5L5J;CXlR1" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is looking at .", + "B. is wearing .", + "C. is inside .", + "D. is moving towards ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_394.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000096001.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "ejb06Q=7I7I5K4N3M2M3M4M1O2N2N2N1O2N1O2N1O2N1O2N101N1O100O1O2O0O1O2O0O1O101N1O10001N100O2O0O2O000O100O2O0O100O101O0O100O1000000O10000O10000O10000000000O100000O100000000O10O1000O10000O10000O100O100O100O100O100O2N100O1O1O2O0O1O1O1O1O2O0O1N3N1O1O2N1N3M2O2M3M2N3N2M2O2M3N3M2M3N3L4L4K6HZQn5" + }, + { + "size": [ + 426, + 640 + ], + "counts": "Pfm37R=3M2N3M2N2N2O2M3M2N3N1N2N3M2N2O2N1N3M5L1N3M2N3N1N2N2N2N3M2O2N1M3N3M2O1N3M2N2O1N2N2O2M2N2O2M2N2N2N3M2O1N3M3M2O1N2O1N3M3N1N3M2N2N2O2M2O1O1O1N2N200O2N1O1O2N1N2O1O1O2N1O2O0O1O1O001O001O1O0000000000000000O1O100^OVHcKk7P4eHlK\\7R4hHkKY7Q4QIiKo6U4T1XOTGoLo8Q3SGiLP9X3a0O11O000000000000001O00001O00001O00010N1001O010O010O100O2N100O100O10001N100O10000O10001O0O1O100O10TFoLM0W9P3i0O11O2mEWMa9Y30O1O1O2N2M2O2N1N3N3M2M5L4L7I000O01O001O10O0100O1O01O1O001O001O1O0O2O1OgHbMg3^2c3O001N101O1O001N101O001O0O101O1O0O2O1O1O1O1O0O2O1O1O1O0O101O1O001N101O1O000O2O001O1O0O200O1N101O0O2O1O1O00001O0000O0101O001O0O2O1O2N1O1N3N101M3N1N3N1O1O2N001O2N1N4LRkf0" + }, + { + "size": [ + 426, + 640 + ], + "counts": "oc51Y=000Pm<0PSC0\\R<0Wd10[YB3M2O1N2O1O2M2N2O1N2O1N2O2M2O1O1N2N3N1O1N3N1N2O2N1000001O0001O0001O000001O01O00000001O01O00000010O000001O000001O01O00000001O01O0001O0001O00010O00001O0001O01O0000010O0000001O0001O01O00000010O00000010O0000001O000001O0001O00010O00001O01O01O000000O2M2N2N2N2M4M2N2O1N2N3M2N2N2O1M3N3M2O1N2N2Niao4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "d1Z29Bb0f0b4IkJBc0e0a4JjJDd0b0a4JjJFe0`0a4Q2_KPN_4Q2bKnM^4R2bKnM\\4U2dKjM\\4V2dKjM[4W2eKiMY4Y2hKgMV4DoJ`0k0LU4\\2jKdMU4]2kKdMS4]2mKcMS4]2nKbMQ4_2oKaMP4`2PL`Mo3b2PL^Mo3c2RL]Mk3e2UL[Mj3g2VLXMi3i2XLVMg3k2YLVMf3j2ZLVMe3k2[LUMd3m2\\LSMb3n2^LRMa3o2_LQM`3P3`LPM^3R3cLmL\\3U3cLlL\\3T3dLlL[3U3eLlLY3U3gLkLY3U3hLjLW3W3iLiLW3X3hLiLU3Y3lLfLS3[3mLeLR3\\3oLcLo2_3QMaLn2`3RMaLm2_3TM`Lk2i0[L[Oi0Lk2f0dLZOa00j2e0kLWO;5h2b0RMXO56h2`0XMXO08g2?]MWOM9d2a0aMUOL9b2a0eMUOI;a2?hMUOH;_2`0kMTOF=^2>nMUOD<^2=QNVOA=^2;TNWO^O>^2;TNWO^O>^2:VNXO\\O=^2:XNXOZO>^29YNYOYO?]27\\NYOXO?\\27]N[OVO>]26_N[OTO?]26_N[OTO`0\\24aN\\OSO`0\\23cN]OQO`0[22eN^OPO`0[21fN_OPO?Z22gN^OoN`0[20gN@nNa0[2nMdLc1S2OnN?o2ZOTN7mN`0o2XOUN7lNa0Q3UOTN:lN`0R3SOSN>jN?T3QOTN?hN`0U3POSN`0hNa0V3lNSNc0gNa0X3jNRNd0gNa0\\3eNnMk0eN`0a3`NkMP1eN`0a3gM^Ka0\\2X1eN`0b3fM_K?\\2Z1cNa0l3RNaM]1dN`0n3oM_Mb1bN`0P4kM`Md1`Na0P4kM`Md1aN`0P4kM_Me1aN`0Q4jM^Mf1aN`0R4hM_Mh1_N`0R4eMaMk1]N`0j6@VI`0j6@VIa0i6@VI`0k6_OUIa0k6_OVI`0j6@VIa0i6_OWIa0i6@WI?j6@VI`0j6@VI`0j6@VIa0i6@VI`0j6@WI?i6AWI`0h6@YI?g6AYI?g6BXI>h6AZI?e6A[I?e6A[I?j2fMIj1^Ma0h2iMFd1eMb0e2kMEb1gMc0d2lMD_1jMe0b2nMB\\1mMf0a2PN_OZ1RNf0^2RN^OV1VNh0\\2TNWLEk2^1dNh0Z2_NROg0fNj0X2_NROf0hNj0[2[NmNi0jNm0Y2YNlNj0lNm0Z2WNjNj0oNn0X2WNiNi0QOQ1V2UNiNi0ROR1d5lN^JT1a5lNaJS1_5kNcJU1]5jNdJV1\\5hNgJX1W5hNjJX1V5gNkJZ1T5dNoJ[1P5eNRKZ1n4eNSK[1l4eNVKZ1j4fNVKZ1i4gNWKZ1h4gNWKY1i4gNXKX1g4iNYKW1f4jNZKV1f4jNZKW1d4jN]KU1b4lN^KU1a4lN^KT1a4mN_KS1a4mNaKQ1^4PObKQ1\\4POeKo0[4QOeKo0Z4SOeKn0Y4SOgKm0X4TOhKl0W4UOjKj0U4WOkKj0l0nM]OY1Gi0j0QN]OW1Jg0i0SN[OX1Ke0j0SNZOY1Le0i0SNQMGd1b1b0d0h0UNoLHe1`1d0c0h0VNmLIe1_1g0a0g0XNjLJg1_1g0?h0YNhLKf1_1j0>g0\\NcLIj1_1l00n3d2dMXOPNTN>Oo3e2cMYOoMSN`0MP4f2aMZOoMSN`0LQ4g2aMXOoMVN?IR4j2_MWOPNVN`0FS4m2]MWOQNUNY5d2gLVOPNVN=IV4k2\\MVOQNVN60]4d2\\MVOQNWN32_4a2]MWOnMXN53^4^2_MWOlMZN65\\4Z2cMGPNQN\\4X2dMHnMSNo2@QNd2S1g0e0PMSNZ2W1g0b0UMTNT2Z1i0=YMVNn1]1j0;[MVN:]OOS2\\27_MXN3@3P2\\26aMXNMF5l1^23cMZNHI7j1^21gMZNCL8j1e0iMGU2K[N^O0;g1d0kMGR2N[N[O2=e1c0lMHQ2M]NXO4?b1d0lMIo1O]NSO8b0`1b0mMIm11^NPO:d0^1b0nMIk12^NmN>e0[1c0oMHi15^NiNa0g0Y1c0PNHf16aNeNb0k0V1b0RNGd19aNaNe0m0T1b0TNEa1=aN]Nh0o0S1a0UNE^1?aNZNl0P1P1b0VND\\1a0bNXNm0Q1o0b0VND\\1b0aNUNP1S1m0b0WNCZ1e0bNoMS1X1i0a0YNBY1f0bNmMV1Y1g0a0XNCY1g04UO;a0XNCX1h06SO:b0YNBW1i06TO9a0ZNBW1i06TO9a0ZNCU1i08TO9?ZNDU1i09SO8`0ZNDU1j08RO9`0ZNDU1j08RO:?YNDV1k08RO8?ZNDU1l09QO8?ZNDU1l09QO8?ZNDU1l09QO9>YNEU1l0:QO7>ZNEU1l0:QO7>ZNEU1l0:QO7>YNFV1k0:QO8=XNGV1k0;PO7>WNHW1j0;PO7>VNHY1j0:QO7Y2a7VNQHA>Z2`7VNVIj1j6VNVIj1j6WNUIi1k6WNUIi1k6XNTIh1l6XNTIh1l6XNTIh1l6XNTIi1k6XNTIh1l6XNTIh1l6XNSH^O9[2OfMc6a0UI^O9[2NgMd6`0UI^O:Z2MhMd6a0UI\\O:[2MiMc6`0VI\\O:\\2LjMc6>VI^O:Y2MmMa6gM]O:W2MQOX2oNJ_OA]NSO?S2IiNl1c0\\O_O`NSO>R2JiNj1h0ZO[OeNQO=S2JiNi1m0VOWOjNPO=T2IgNj1Q1TOUOkNoN>T2IgNi1S1ROUOnNmN?S2HgNh1W1oNUOROjN?S2HgNg1c1dNjN_OhN>T2HfNg1l1ZNfNIcN>U2HeNg1S2QNdN2`N=T2IeNf1Z4SNmJ>T2IdNf1\\4TNkJ=U2IdNd1^4VNiJ>T2HeNb1a4XNeJ>U2HeNa1b4YNdJ>V2HbNa1f4XNbJ?U2IcN_1g4YNaJ?U2IcN^1h4[N_J>V2IcN]1j4\\N\\J>W2IcN[1l4_OaLVOcN[1l4@`LUOcN[1n4@_LUOcNY1Q5A\\LWObNW1S5B[LWObNU1V5CXLXObNT1W5EVLWOaNU1Z5DULWOaNS1]5ERLXO`NS1_5EQLXO`NR1`5FPLXO_NR1c5EnKYO_NQ1d5GlKYO^No0i5GiKZO^Nn0j5IgKYO^No0k5HhKXO]No0l5IgKXO\\Nn0o5JeKXO[Nn0R6IcKYO[Nl0U6K_KYO\\Nk0V6L]KZO]Ni0W6M\\KZO\\Ni0Y6M[K[O[Ng0[6NZK[O[Nf0]6OXKZO[Ne0_61VKZO[Nd0`62UKZO[Nc0a64RKZO\\Nb0d63PK[OZNb0h64nJYOZNb0j64lJ[OXNa0m64jJ\\OYN?n66hJ[OYN?Q75fJ\\OYN=S77dJ\\OYN_IDZNMX8`0]IC[NLY8a0\\IC[NJ[8c0ZIDZNH]8c0ZIEZNF^8d0XIFZND`8f0VIGZNBa8f0UIH[N_Ob8i0SIG]N^Ob8j0QIH]N\\Oe8k0nHJ[N\\Og8j0nHJZN\\Oj8i0lHK[NYOl8k0jHKZNYOn8k0hHL[NWOf0M[7P1cIL[OVOR7n0cILZOXOR7l0dILXO[OT7i0cILXO^OS7f0eILWO@S7e0eIKWOBS7c0fILVOBS7b0gILUODT7?gIMTOGS7=hILTOHT7) and the rectangular tag ()?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_395.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000322829.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "`:j2a:0000000O10000000000000000001O00001O0000001O00001O0000000000001O000000000000000000000000000000O10000000000000000001O000000000000000000000000000000001O000000000000001O00000000001O000000000000000000000000000000001O000000001O001O1O001O1O0gMSET2m:lMSET2l:mMUER2k:nMUER2k:nMUER2k:mMWER2i:oMWEP2h:QNXEo1h:QNYEn1g:RNYEn1g:QN[En1e:RN\\Em1c:TN^Ek1b:TN`Ek1a:SNgEf1P;O001O1O00000000001O000000000000WO\\NkEd1T:]NlEc1S:^NmEb1S:^NmEb1R:_NnEa1S:]NnEc1R:\\NoEd1l:0001O000000000000001O000000O10000000000000000O100000000000000O100000000O10000O1]OYNbEg1]:[NbEe1^:[NbEe1^:\\N`Ee1_:]N`Ec1`:^N_Eb1a:^N^Ec1b:]N\\Ee1d:\\NZEe1f:[NYEf1g:[NWEf1i:[NVEe1j:\\NTEe1k:>00000000000000000iMWEl1i:TNWEl1i:;00000iMZEi1f:UN\\Ek1e:QN^Eo1o:000000000000000000000O10000O100O10000O1000000O1O1O1O100O100OWNPEY1n:c0O1O100O1000000O100001O00O1000000000000001O00000000000000000000O1000000000000001O000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000O10000000SNXEW1h:gNZEY1f:fN[EZ1e:dN^E[1b:bNaE^1_:`NdE_1\\:aNdE_1\\:`NeE`1\\:_NdEa1[:`NeE`1[:_NfEa1Z:_NgE`1Z:_NfEa1Z:_NfEa1Z:_NfEa1Z:_NfEa1Z:^NgEb1Y:^NhEa1Q;00000000000000000000000000000000000000000000000O1000000O10000O1O10000000000O1000000O1000000001O_OYN]Eh1c:XN]Eh1c:YN\\Eg1d:ZNYEh1g:ZNREk1n:9000000000000000000000000000000O1000000000000001O000000000000O100000000000000001O000000000000000000000000000000000000000000000000000000O100000000000000000000000000000000O100000000000000O1O1O1N200O1O100000000001O000000000000001O00000000000000001O0000000000000000001O001O00001O000000000000000000O1000000001O00001O0000000000001O0000000000000000000000000000000000001O0000000000000000000000000000001O00O1001O0000O1000000000000000000QE" + } + ], + "question": "Which statement accurately describes the relationship between and the other objects?", + "choices": [ + "A. is in front of and behind .", + "B. is in front of both and .", + "C. is behind and in front of .", + "D. is behind both and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_396.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000261116.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "PoT51`01`:4\\EOa:LUE68Oc:KTE86Ng:JSEf0l:30O100O010O00100O010O1O100O100O1O101N1O2OO0100O100O1O1O2N1O10000O010000000O0100000000O100jJ" + }, + { + "size": [ + 375, + 500 + ], + "counts": "bUP58];6K3N3M2M2O1N101N100O100O10O10O100000O10O100O1000000O100OO1010000000O0100O101O001O002N3L4M1N2N3L6Kjc4" + } + ], + "question": "Based on the provided information, what is the relationship between and ?", + "choices": [ + "A. is stuck inside .", + "B. is resting on top of .", + "C. is on top of .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_397.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404534.jpg", + "mask_rles": [ + { + "size": [ + 500, + 386 + ], + "counts": "o841300040G1R14oNj<[4TN4M10000O1000000000000000000000O100000000000000000000000000O10000000000000000000000000000O2O00000000000000O1000000000O100000000000001OO10000000O100000000000000000000000000O100000000000000000000000000O1000000000001O000000000O1000000000000000000O1000000000000000001O1O2M3O3L1N10000000000000000O10O11OO1000000N1L5M30O00010O3N00O0011M30O1O01O1O0PL^Dn3c;01O001O2O001M2ZLWDO00000U3j;lLeDQ3Z;PMfDP3Z;QMeDn2\\;RMYDL21JQ3k;RMdDm2];SMcDl2^;[M\\Dc2e;]M`D]2b;\\MRDM>f2`;\\MTDM=e2`;eM_DZ2b;fM_DX2b;hM^DX2b;hM^DW2b;jM^DU2c;lMlCD60Oa2o;kMTDDM`2P1lk[2" + }, + { + "size": [ + 500, + 386 + ], + "counts": "g?a4P;4O000000O1001O00O11O00001O00001O00001O0000M3N2N20000O10000001O00001O001O1O001O00002N:F1O1O1O00000000O1000000O10000O10000O10000O100000000O1000000O1000000O100000000O10000000000O10000000000O1000000O100O1FUEjKl:]42ESEoKm:o3SEQLo:m3RESLo:k3QEXLn:g3REZLn:e3QE^Ln:a3REaLm:\\3SEhLl:W3UEiLk:U3UEnLj:Q3UERMj:l2VEVMj:h2WEYMi:f2VE\\Mj:c2WE]Mi:a2UEcMk:[2VEfMj:X2VEjMj:U2VElMj:R2VEPNj:o1VERNj:l1VEVNj:i1UEYNk:e1VE\\Nj:c1VE^Nj:_1VEdNj:[1VEfNj:W1XEjNh:U1WEmNi:R1VEPOj:P1TEROl:k0VEVOj:h0WEYOi:e0WE]Oi:b0VE@j:>WECi:;VEHj:7WEIi:6WEKi:3VE0j:OUE4j:JWE7i:GWE;i:DWE=i:BUEa0k:T2000UMWEk0i:SOWEo0i:POWEQ1i:m10QMUEU1k:fNgEm0Y:nNmEQ1S:hNTFX1l9gNUFY1k9gNUFZ1j9eNWF[1i9eNWFZ1j9fNVFY1k9gNUFV1QOeMi:U1VFV1POgMi:S1WFQ1PORNh:m0XFP1POUNg:j0ZFl0ROjNV::gFl0ROkNX:9fFi0ROPOY:7eFh0QOQO\\:6dFf0oNWO8POk9S1nFf0nNWO7SOm9P1nF`0QO_O4ROm9o0nF`0PO^O6TOl9n0nF;ROE4SOl9m0nF:ROE7nNn9S1iFEPOG23M_O0OP;S1TFoNQ351U>m0jAL\\=8aBI_=V1000000000000O10000000000000000000000000000O10000000000000000O1000000000000000000000000000000000000O1000000000000000000O1000000000000000000000000000000O10000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000O100000000000000000000O1000000000000000000O1000000000000O10000000000000000O10000O100O10000O100O1O1O100O1O10000O1O100O1O10000O1O100O1O1O1O1O100O100O100O100O100O1O1O100O100O1O100O100O1O1O100O1O1O100O100O100O1O1O100HXLcDi3];ZL`Df3`;71O0000004L1OJVLbDj3];XLbDh3\\;ZLcDg3X;^LhDb3W;_LiDa3V;?O100O100O100O1O100O100O10000O1O10000O10000O10000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000O1000000000000000000000000001O00001O0000O100O1O100]Oc00000O1000000O10000000000000000001O1O1O000000000000000000O100000000003M4L1O1O1O3PK^E0N051O002NQ3NYMY and ?", + "choices": [ + "A. is mounted on .", + "B. is over .", + "C. is over .", + "D. is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_398.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404534.jpg", + "mask_rles": [ + { + "size": [ + 500, + 386 + ], + "counts": "o841300040G1R14oNj<[4TN4M10000O1000000000000000000000O100000000000000000000000000O10000000000000000000000000000O2O00000000000000O1000000000O100000000000001OO10000000O100000000000000000000000000O100000000000000000000000000O1000000000001O000000000O1000000000000000000O1000000000000000001O1O2M3O3L1N10000000000000000O10O11OO1000000N1L5M30O00010O3N00O0011M30O1O01O1O0PL^Dn3c;01O001O2O001M2ZLWDO00000U3j;lLeDQ3Z;PMfDP3Z;QMeDn2\\;RMYDL21JQ3k;RMdDm2];SMcDl2^;[M\\Dc2e;]M`D]2b;\\MRDM>f2`;\\MTDM=e2`;eM_DZ2b;fM_DX2b;hM^DX2b;hM^DW2b;jM^DU2c;lMlCD60Oa2o;kMTDDM`2P1lk[2" + }, + { + "size": [ + 500, + 386 + ], + "counts": "g?a4P;4O000000O1001O00O11O00001O00001O00001O0000M3N2N20000O10000001O00001O001O1O001O00002N:F1O1O1O00000000O1000000O10000O10000O10000O100000000O1000000O1000000O100000000O10000000000O10000000000O1000000O100O1FUEjKl:]42ESEoKm:o3SEQLo:m3RESLo:k3QEXLn:g3REZLn:e3QE^Ln:a3REaLm:\\3SEhLl:W3UEiLk:U3UEnLj:Q3UERMj:l2VEVMj:h2WEYMi:f2VE\\Mj:c2WE]Mi:a2UEcMk:[2VEfMj:X2VEjMj:U2VElMj:R2VEPNj:o1VERNj:l1VEVNj:i1UEYNk:e1VE\\Nj:c1VE^Nj:_1VEdNj:[1VEfNj:W1XEjNh:U1WEmNi:R1VEPOj:P1TEROl:k0VEVOj:h0WEYOi:e0WE]Oi:b0VE@j:>WECi:;VEHj:7WEIi:6WEKi:3VE0j:OUE4j:JWE7i:GWE;i:DWE=i:BUEa0k:T2000UMWEk0i:SOWEo0i:POWEQ1i:m10QMUEU1k:fNgEm0Y:nNmEQ1S:hNTFX1l9gNUFY1k9gNUFZ1j9eNWF[1i9eNWFZ1j9fNVFY1k9gNUFV1QOeMi:U1VFV1POgMi:S1WFQ1PORNh:m0XFP1POUNg:j0ZFl0ROjNV::gFl0ROkNX:9fFi0ROPOY:7eFh0QOQO\\:6dFf0oNWO8POk9S1nFf0nNWO7SOm9P1nF`0QO_O4ROm9o0nF`0PO^O6TOl9n0nF;ROE4SOl9m0nF:ROE7nNn9S1iFEPOG23M_O0OP;S1TFoNQ351U>m0jAL\\=8aBI_=V1000000000000O10000000000000000000000000000O10000000000000000O1000000000000000000000000000000000000O1000000000000000000O1000000000000000000000000000000O10000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000O100000000000000000000O1000000000000000000O1000000000000O10000000000000000O10000O100O10000O100O1O1O100O1O10000O1O100O1O10000O1O100O1O1O1O1O100O100O100O100O100O1O1O100O100O1O100O100O1O1O100O1O1O100O100O100O1O1O100HXLcDi3];ZL`Df3`;71O0000004L1OJVLbDj3];XLbDh3\\;ZLcDg3X;^LhDb3W;_LiDa3V;?O100O100O100O1O100O100O10000O1O10000O10000O10000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000O1000000000000000000000000001O00001O0000O100O1O100]Oc00000O1000000O10000000000000000001O1O1O000000000000000000O100000000003M4L1O1O1O3PK^E0N051O002NQ3NYMY is attached to?", + "choices": [ + "A. ", + "B. The gate secured with a latch mechanism.", + "C. ", + "D. " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_399.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000548780.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "W[R33W=1O2N1N201N1O100O1O100O100000000O100N2O1O1M3N2N1O2bLMbI5o5j0]IZO_6U1RIoNk6[1jHiNU7a1_HbNa7d3O1O1O1O010O1000000000000000N1TOlGTLd8i3`0000001O001O0]NnFmNS9l0WGPOj7kNbHb1;]OR7TObH\\1c0\\Oi6ZOdHX1i0YOb6AeHV1k0gNbNLQ88cHT1e9oNZFP1d9SO]Fl0`9WOaFh0\\9[OeFd0Y9^OfFb0W9BiF>U9DkFfGeJg7b5N1O1O00000001O1O001O1N104L1O1O1O2N2`LPHiN7k1l7POTIAeNUOd0d0j7:RIKD^Oo9?`1N1O1N2M7ITcm5" + }, + { + "size": [ + 427, + 640 + ], + "counts": "_kX38R=2L5M1L5N2M200O100O0100O010O010O1O0010O01000O10O1000000000000001O0000001O1O001O001O0010O01O001O000000000000001OO1000001OO10000001N2O00000O101O0O2O2N2N2M2O0O3N3L3J_jo3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "n_T51X=2O1O2O0O3M5L7H5L1O1O0O10000000O010001O000O1O2N1O2N2O1N3L5M3YDdNX;i1M2O1FQNWEo1i:QNWEP2Q;1O1O1BnMbER2]:PNbEQ2\\:QNcEP2[:RNeEm1Z:TNfEm1W:VNhEj1X:VNhEk1U:XNjEh1U:YNkEh1T:XNmEg1S:YNmEh1R:XNnEh1R:XNoEh1Q:XNnEh1S:WNmEj1S:UNmEl1R:TNoEl1Q:SNPFm1P:SNoEn1S:oMmEQ2V:mMiET2X:jMiEV2X:hMhEY2Z:dMfE]2c:010O]OXEUN29f:^1eE`N[:^1hE`NX:^1lEaNS:^1oEaNR:^1oEaNQ:^1QFbNo9]1RFbNn9]1TFcNk9\\1WFcNi9\\1ZFcNf9\\1T101O1O1O10O02O0O10001N1O2N101O1O1O1N102N1O1O1N2O2M3M4L4M4KaVj1" + } + ], + "question": "Which object is being carried by ?", + "choices": [ + "A. , the dark-colored, structured handbag.", + "B. , the tan and teal handbag.", + "C. , the handbag with a multicolored abstract print.", + "D. , the woman with long blonde hair." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_400.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000008211.jpg", + "mask_rles": [ + { + "size": [ + 459, + 640 + ], + "counts": "j[i53V>2I8L4H7M3M7fBTOi<[1L8mCYNT;m1YD[O1gNP:0cFb3Y9bLdF`3Z9dLbF]3]9dLbF^3D`L_92lFj3S9VLlFk3S9VLmFj3S9VLlFk3S9ULnFk3R9XLjFi3V9[LdFg3[98eFfK^9[48O1I8UOUFeLM4T:S3h001JeFmLj7S3SHRMj7o2oGYMP8h2fFmLn0b0\\8f2\\GaMd8`2VGeMj8]2nFjMQ9i30O001O1O001O1bKdFk3_9PLeFn3^9mKdFR4k901H8N2@eEhLa:o2b0H9H7H9H7N3K6K5E`CeNdGX9HXFm02C=IZ9DZFQ1MC9M`9_O[FQ1LEM4b:6`Em0b:RO^Em0d:QO\\Eo0f:ROXEm0j:UOREl0o:UOoDj0S;V11O1AjDhM^;o1cDPNa;l1`DSNb;k1_DTNf;HYDi12^NP<]1QDbNR<[1nCeNS to the other elements?", + "choices": [ + "A. It is beside .", + "B. It is on but behind .", + "C. It is on .", + "D. It is on and beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_401.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000156643.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "j`l2<`>5N2M5L3M3M2N2N3M2N2O2M2O3L4M3M1O7I3M4L1O1O3M2N2N2N3M2N4L2N1O1O100O100O1O0010^NTDNl;1VDNk;1VDNj;2VDNk;0WDOm;MTD2k;NVD2j;NWD0j;OXDOj;0VDOk;0WDNj;2WDMi;3XDKi;5XDIi;6YDHi;7\\DBf;=]DAc;?_D_Oa;a0`D]Oa;c0`D\\O`;d0cDXO_;g0cDVO^;j0fDPO];o0[101N00010O001O01O1O100PCQOm;P1QDROn;P1QDoNo;R1o010O01O1N3N1O001O000010OQDiNU:X1hEQO`NE\\;Z1SF\\Oh9e0VFAe9?ZFI`97_FN\\93bFN^92_F1a9O\\F4d9MXF6h9LTF6l9KRF6n9KnE8R:IkE9V:GgE\\:CaE`0^:BZEd0g:k1O1O100O000000000000XO^ERMb:k2eEQM\\:l2gESMY:k2lERMT:l2PFQMQ:n2QFQMP:n2QFPMP:o2QFoLR:o2P1O101O1O1O1O10O01O0000100O1O1O2N1iN^DnNe;d0iDYO_=08GR]10mbN7I3M4N2N0O2N2M4M2M3M4L3N2N1O2N2M2O1M4N2N1N3N2N4L2N2M5L2N2N2N1O00001O1O1O1O1O1O1O1O1O1O2M3L5L3L4L3N3M3O0O11O01O2O5J2O0O01O000000000000000000001O001O01O01O001O1O001O1O1O2N001O1O2N1O1O2N1O1O1O1O001O2N2O0O2N2N101N2N1O2O1N4L2N1O2O2M2N4L4L2N2N2N101N1O2N4L2O0O2N2M3N2M4L6J6I9EdVZ2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "kV43h>5N2N2M4M2N2L4O1L4L4K5M3M2M5M2L4M3N2K5N2J6N1O2K6K4K5mDbNoNfN]9l2ZGYOW8i0fGD]O]Mg02e6S3VIm0[6UOcIY1_OdKU6Y3ZJf1T5bNkJj1AbJh1?;@YNP43b26d0JEBWI2X7;V700N2O10000O1O10000O100O100O0100001O00000000000O100000000000000000000000000000000000000000000000000000000000000O100001O000000000000000000000000000000000O1001O01O000000001O001O001O001O0000001O001O001O00001O1O001N2O1O0010O01N100010OO3N1O010O1O1O2N1O2N8aBhND0T30`6\\3YF5o7h3F2N3RJdHR4k7oJTIb4U9@g0YO7I7I3M3M4L2N4L1O2N3M2N4L2N2M3N3M3M2N3M4L4J8_Ob0_O:J7J5N1N2M4K5N2M2N3N2K5_Oa0A>O2L3nMXMkGi2T8ZMiGg2U8[MkGe2T8]MjGd2U8^MgGe2X8\\MgGe2Y8[MfGf2X8\\MhGd2W8]MgGd2Y8]MhGb2W8`MiG^2W8cMiG\\2W8fMhGY2W8lMfGT2Y8oMeGP2X8WNfGe1Y8cNcG[1\\8lN`GS1_8QO_Gl0d8XOXGf0h8^OVG4X9OeFL`96^FJb98\\FGe9;ZF@j9a0UFZOP:g0PFWOQ:j0nETOT:n0iEQOY:P1fEoN[:S1bEmN_:U1_EjNb:W1\\EjNd:W1[EhNf:Y1YEgNg:h21000000O100000000O100000000O10000000000000000000000000000000000001O001O0000001O0000001O001O2N001O001fGgKk5[4QJhKn5Y4oIjKP6W4nIjKR6Z4iIhKV6Z4dIkK[6X4aIiK_6Y4\\IkKc6X4WIkKi6Z4`HnJHP1h7n5000000000001O0000001O00001O1O1O1O001O1O1O1O1O1O1O2N1O1O2N1O1O001O1OWNjHjKT7R4`I_K_6d1oHKh0\\NX6h1RIKj0ZNS6k1TIJn0XNl5m1XIKV1kMd5Y2XIKZ1bMb5b2WIKe84\\GLd83^GMa82`GN`81bGN^80fGNZ80iGOW80kGOU80mGOS8OSHLn72XHJh75ZHJf74]HJd74`HIa76cHG]76hHGY78jHFV78mHGS77SIDn6:WIBj6;[IBf6=[IAg6<]IBd6<_IAd6:cI_Ob6>aI_Oa6=eI@\\6;lIAU6;RJ]OS6a0QJXOT6d0V401O1O1O001O1O1O0O2N10001N2N2N3LdaR2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Z`l31l>;G5J6K4K4M4M2M3N8H2N001ODdBTO\\=l0gBPOY=Q1hBnNX=R1jBkNW=U1:0;F34KM4L8GUWT5" + } + ], + "question": "Which statement accurately describes the spatial arrangement of the objects relative to ?", + "choices": [ + "A. is holding in front of .", + "B. is in front of , which is holding .", + "C. is in front of , which is holding .", + "D. is in front of , which is in front of ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_402.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000252332.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "abe1h1V=Z1kBQMh;c3@l0TOf0ZO?A4L2N2N2N2N2N2M201N2N2N2N2N1O2N2N2N2N2N2N1O2N2N2N2N1O2N2N2N2O1N2M2O2N3M1O2N[MPI]Mn6b2VI\\Mj6d2VI]Mh6d2WI_Mf6b2XI`Mg6a2YI`Me6a2[I`Mc6a2\\IaMa6a5OnL`IeM^6[2dIdM[6]2eIQKNS2[6m2gIcMX6^2hIcMV6^2jIcMT6^2lIcMR6]2oIdMo5^2PJcMn5i5N2N2N2N]MXJPLg5P4ZJQLd5P4\\JPLc5R4\\JPLa5i6N2N2N1OYMgJjKW5W4iJjKU5W4kJjKS5W4mJjKQ5S7OUMPKiKn4X4SKgKl4Z4SKhKk4Y4UKiKh4Y7NPMZKhKe4j3[KdI0c2c4Y4]KgKb4Z4^KhK`4]7NlLbKhK]4Y4cKhK[4Y4dKhK[4`7N2N2N2N2N2N1O2N2NfIWL`1g3bNYL^1e3cN\\L]1b3Q5O2O1MbIbL_1]3aNcL_MOa1\\3Q1eL^MOa1[3c3eL^LX3g7N3N1N1OcJoLmNP3c6NkJUMWNj2j1XMUIN6M]4l2Y2TMYI0j4i2V7N3N2N3N1N1O2N1O2N2O0O1O2N1000O1000000001OO1000001OO10000001O0O100000000000000000001N1000000000000000000000001O000O1000000000000000000O1000001O00000000000000001N100000000000000000000O2O0000000000000000000O1000001O00000000000O10000000001O00000O10000000000O100000001O00000000000O11O00O10001O000000000000000O2O000000000000000O101N1O1O1O1O1O1O1O100N2O1O2N1O1N2O100O2N1O2N2N2N2N1N3N2N3N3L2N1O3M2N2N2M3N4L2O1N2M3N3M2N2N2N2M4N0O2N3M3M2N2N2M3N2N2O1N2N3M3L3N3M1O3M2N2N2O3L2M3N2N3M1O2N3M3M2O1N2M4L3N3M1O3M2O1N2N2N3M2M4L3N2O1N2N2O1M4N2L3M3N3M1O2N2K5H:]Oa0H9^Oa0D2010O1001O00000001O0000O10000000000000001OO1000000000000000000000000000000000000000000000001OO100001O00O10000001OO10000001OO10000001OO10000001OO1000000WET1_5kN]JQ2j4PNTKk2S4UMkKY3i3gLULS4YLlKe61QMm4[2SKdM\\5n1dJQNS6Y1mIfN]6Q1cImNe6m0[IROi6l0VISOm6j0TIUOn6j0RIUOQ7i0oHUOT7j0lHUOV7k0iHUOX7i0iHVOY7i0gHVOZ7j0fHUO]7i0cHVO_7j0`HUOb7i0_HVOc7i0]HVOd7j0\\HUOf7k0ZHROi7m0VHTOk7j0VHUOl7j0THUOm7k0SHTOo7l0PHSOQ8m0oGROS8l0nGSOT8l0lGSOV8l0jGROY8n0fGQO\\8n0dGRO]8l0dGSO^8l0bGTO_8k0aGSOa8m0_GROc8m0]GROe8m0[GSOf8l0ZGSOg8m0YGQOj8n0VGQOl8n0TGQOn8n0QGSOP9l0QGROQ9m0oFQOT9n0lFQOU9P1jFoNX9o0iFQOX9n0hFQOZ9n0fFPO\\9P1dFoN^9P1bFPO^9P1bFoN`9o0aFPOa9o0_FPOc9P1\\FoNf9P1ZFoNg9Q1YFnNi9Q1WFnNk9R1TFmNn9Q1SFnNo9Q1QFnNQ:Q1oEnNR:R1mEoNT:P1lEoNV:Q1iEnNY:Q1gEnN[:Q1eEnN]:Q1cEnN_:Q1aEnNa:Q1_EmNd:R1\\EmNf:R1ZEmNg:S1YEmNh:R1XEmNi:S1XEkNj:T1UElNm:S1SElNo:S1QElNQ;S1oDlNS;S1mDlNT;T1lDkNV;T1jDkNX;T1hDkNZ;T1fDkN\\;T1dDkN];U1cDjN_;U1aDjNa;U1_DjNc;U1]DjNe;U1[DjNf;V1ZDiNh;V1XDjNi;U1WDjNj;V1VDiNl;V1TDiNn;V1RDiNo;W1QDhNQM3O10001N1000000000000O1000000000000000000O10000000000000000O100000000O100001OO1000000000000000000O1000000O1000000O1000000O10000O100O1O1O100O1O1O1N2O1O1O100O1O1O1O1O1N2O1O1O1O1O100O1O1N2ZCkM` relative to the other objects?", + "choices": [ + "A. is located behind .", + "B. is located between and .", + "C. is positioned in front of both and .", + "D. is on the back of ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_403.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B5I7B>H8M3M2O2N3M2N1O1O2N001O1O010O1O1O1O1O1O2N2O0O1O1O2N2N2O1N2N2N1O3M1O01O0O1O100000001O0001O01O00000001O010O0000001O00010bEeMn8[2iFPNU9P2iFSNV9n1\\FaNb9_1[FfNc9[1TFbMMW1n9n2N1O1O_NVFgNh9Y1XFiNg9V1YFkNf9U1ZFjNg9d27M4K4iNjE[NX:a29K5L4G:E:O1O10001O01O00001eF_Mn6a2PIcMn6]2hHnMX7R2bHUN\\7k1_H[N`7e1[HaNd7`1UHZMoNY1k8]1oGPOP8P1jGVOU8k0`GA^8`0YGXM1\\2e8a0YGAg8Q3N2N2O1N9G001O1O10O0001O00010O00000001N1O1K6I6N2K5MWH" + } + ], + "question": "What is the primary action of in relation to and ?", + "choices": [ + "A. is holding and sitting on .", + "B. is sitting on and eating from .", + "C. is looking at and holding .", + "D. is holding and sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_404.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B_OcX[4" + } + ], + "question": "Which statement correctly describes the interaction between , , and ?", + "choices": [ + "A. is using to eat from .", + "B. is looking at while holding .", + "C. is holding who is looking at .", + "D. is eating from inside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_405.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "glZ29n=3N1O02O1OO101O00010O6J2N4L00N2N3M3MJ7M2N3J6L4M3L4L3M4K5L3M5J6I6J8F9K5FiDZM];`28L4K4N3M2O4K3L3O1N4L4N1N1O2M4K5L3N3K4M4J6G]BHg=2goV1" + }, + { + "size": [ + 455, + 640 + ], + "counts": "]XX18n=8G:G8H7I4L5K7I4M3L3M3M3N1O2M3M3N1O2M3N1O1N2O2M2O1O1N2O1O1O1O1O2N1O1O1O1O2N1O2N10O01O1O100O10O01O001O100O1O001O001O1O1N2O001O2N1O1O1O1O1O1O1O001O001O0010O2OO000L5O001J5O1O1000gKbFi3^9`0O2O00000O1O1O101O0O1O100O10000O101O0000000O11O0000O1000000000000000000000O10000000000000000O100000000OjNPKWIP5i6PKWIP5i6PKWIP5i6PKWIP5P80001N1000000O100000000O100O10000O2O00000O100O100O2O0O100O100O101N100O1O1O100O1N2O2N1N2N2O1O1O1O1O2N1O100O2N1O1O100O2N1N200O1O2N100O2N1O1O2N1O1N3N2M2O2M2N2N3N1N3N1L5L4K4N3L4L4K5I7D>_OcX[4" + }, + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B5I7B>H8M3M2O2N3M2N1O1O2N001O1O010O1O1O1O1O1O2N2O0O1O1O2N2N2O1N2N2N1O3M1O01O0O1O100000001O0001O01O00000001O010O0000001O00010bEeMn8[2iFPNU9P2iFSNV9n1\\FaNb9_1[FfNc9[1TFbMMW1n9n2N1O1O_NVFgNh9Y1XFiNg9V1YFkNf9U1ZFjNg9d27M4K4iNjE[NX:a29K5L4G:E:O1O10001O01O00001eF_Mn6a2PIcMn6]2hHnMX7R2bHUN\\7k1_H[N`7e1[HaNd7`1UHZMoNY1k8]1oGPOP8P1jGVOU8k0`GA^8`0YGXM1\\2e8a0YGAg8Q3N2N2O1N9G001O1O10O0001O00010O00000001N1O1K6I6N2K5MWH" + } + ], + "question": "Which object is sitting on, and which object is attached to ?", + "choices": [ + "A. is sitting on and is attached to .", + "B. is sitting on and is attached to .", + "C. is sitting on and is attached to .", + "D. is sitting on and is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_406.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481390.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Voj43l>2N1O011N3N2N1O1N10001bABY>c0O0001N101N2N4L3LhUW4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "dRh55i>3O001O01O001O0000100O00000001O0mBMY;3gDNk97`EKd0Ok99_EHAN`02^:;_EF@1a0N`:<^EE@4?Ma:<_ECA6=Kc:=^EBA;:Ff:?^E@A?8Bi:?^E@@b07_Oj:a0^E_O_Od06]Om:`0^E_O^Of06[On:`0^E_O]Oh04[OQ;>^E_O\\Ok00[OW;:\\EA]O]1W;SOYEB@[1X;SOTEFBX1[;SOoD9IOY;IjD90LW;W1mDeNU;Y1oDcNY;U1mDeNT;[1nDbNR;h0WD^Oh0GR;k0VD_Oi0DQ;h0[DDe0CP;h0YDIh0\\OP;j0YDJk0WOn:n0VDMka02N1O1O1O1N2O1O1N2O1N2O1N3M3KSej1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y3e;[3000001O000000001O0000001O0000001O00000000001O0000000000001O0cKbLXM^3f2iLUMW3i2mLUMS3i2QMUMP3i2SMUMm2j2ZMPMf2[2eLYJk0W3`2_2hLWJl0W3]2`2jLWJk0W3]2_2kLVJk0Z3]2[2ZNdMl1V2UNiMl1V2TNjMQ2P2PNPNW2i1iMWNX2h1hMXNZ2f1gMYN[2e1eM[N\\2d1dM\\Nd2\\1]MbNk2W1VMhNk2X1UMhNj2X1XMfNi2Z1\\M`Nh2]1cMWN`2g1cMUN_2k1cMQN^2P2fMjM]2V2hMbMk2m1XMnMj2Q2VMnMk2R2TMnMl2R2UMmMl2R2UMmMk2S2WMkMi2U2XMjMh2V2]MeMc2[2bM`M^2`2eM]M[2c2gM[MY2e2iMYMW2g2jMXMV2h2kMWMT2j2mMUMS2k2nMTMQ2m2QNQMn1P3SNoLm1R3TNlLn1R3SNmLo1R3RN`LmKLQ6d3WNWLoK3j5f3dNZL\\1f3eNYL^OOaMi3P3XLZO5fMc3V7^LjHc3U7^LjHc3U7]LkHd3S7gLbHZ3^7QMWHP3^2VLa1Q1kKi2d2YL^1P1lKh2e2ZL]1o0lKj2f2nKbL6j4S1WKgN`0T4R3PLV1\\1eKg2T8]MgGf2W8\\MfGh2X8XMhGi2R4bKgKOm3b6aMmIh0Di1a6ZM`J;oNZ2d6XMbJ9kN_2f6UMbJ7jNe2g6PMdJ5gNj2i6mLdJ4dNo2m6hLbJ6aNS3n6dLcJ7`NU3n6cLeJ4^NY3n6QLhH9Q2N`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PTT42m>2O1N100O1O100O1O1O1O1O1O100O1O1N2O1N2L4N2M3M2OF81200OO00M41N2N3O0N20O2O0O1O100M400O001O1O1O1000]NCbD>Z;HdD7[;McD3];NaD3_;O_D1d;MZD3i;LVD3l;NQD3o;OoC1R<1kCOU<2iC1Z4M2M4N110O2N0011O2M1O0O10000000O0010O1O1O000010O000000000001O00O100O10000RCFU;;gDKW;5hDMW;5fDLZ;;_DF_;`0ZDCe;a0UDC];o0\\DSOa;Z1QDjNm;W2M4L6J9HM3O12N2N1O3M2O1N10O2N2O9F3M2N010O1POZDdNf;Y1dDbN[;\\1hDdNW;[1kDdNU;[1lDeNS;Z1oDgNP;W1SEgNn:X1TEgNl:X1TEiNm:U1TEjNm:U1UEiNm:T1VEjNl:T1TElNm:d0ZDSOk08l:b0eE\\O^:`0eE]O]:b0R2N2N3M2N2N2N3M3M3LPko2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "^g<2k>8J2M2O1M4L3N2N2N2N200O1O101O3RBoNg=\\1H:YOaN[Cg1[ and in what state of motion?", + "choices": [ + "A. is running on while holding .", + "B. is jumping from while holding .", + "C. is standing on while holding .", + "D. is running on while holding ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_407.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481390.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "YZn76i>2N10000O0100O0001O0O101_NMXC074=FQ;;VD070a0Jm:;WDg0j0ROl:c1TE_Nj:a1UEbNj:^1UEdNi:\\1VEfNj:Y1VEjNh:U1WEnNh:R1WEPOh:P1WEROh:n0TEWOk:j0nD]OQ;d0lD_OT;a0iDBV;U200001cLgDP3[;lLgDT3b;2O0O2N2O1N2_EdL[9]3aFeL_9^3]FcLd9`3WFaLi9b3SF_Lm9\\2iE]N2\\OU:U2nEROR:l0QFROQ:=aFA`9fF_O\\9?gF^OZ9UOcEg0Y10T9YOgE`0[13P9\\OiEj8BmEM]1?h8CfE2m11^8j0dGSO^8k0dGSO^8l0cGSO]8l0dGSO^8k0dGSO^8j0eGUO\\8d0kGYOY8b0U3M3Jgh>" + }, + { + "size": [ + 480, + 640 + ], + "counts": "bgP32l>3N101O00001O01O010O10O010O010O001O0010O000001O000XOJeB7W=OfB2X=1dB2[=1bB0^=2_BO`=h0100O1N2O1N3N1O2O101O100O10O0\\CZOl:g0QE]Om:d0PE_Oo:b0mDAT;?hDEW;Q<@`DK@b0T3O001O01O001O0000100O00000001O0mBMY;3gDNk97`EKd0Ok99_EHAN`02^:;_EF@1a0N`:<^EE@4?Ma:<_ECA6=Kc:=^EBA;:Ff:?^E@A?8Bi:?^E@@b07_Oj:a0^E_O_Od06]Om:`0^E_O^Of06[On:`0^E_O]Oh04[OQ;>^E_O\\Ok00[OW;:\\EA]O]1W;SOYEB@[1X;SOTEFBX1[;SOoD9IOY;IjD90LW;W1mDeNU;Y1oDcNY;U1mDeNT;[1nDbNR;h0WD^Oh0GR;k0VD_Oi0DQ;h0[DDe0CP;h0YDIh0\\OP;j0YDJk0WOn:n0VDMka02N1O1O1O1N2O1O1N2O1N2O1N3M3KSej1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`i?5h>6L4L4M5J5WOXOQCm0iYEI8d1b:oNlEk0V:RO^F:g9B_F7f9F]F6i9E\\F6h9F\\F6g9G[F6k9DXF9j9EYF9i9DYF:XN`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PTT42m>2O1N100O1O100O1O1O1O1O1O100O1O1N2O1N2L4N2M3M2OF81200OO00M41N2N3O0N20O2O0O1O100M400O001O1O1O1000]NCbD>Z;HdD7[;McD3];NaD3_;O_D1d;MZD3i;LVD3l;NQD3o;OoC1R<1kCOU<2iC1Z4M2M4N110O2N0011O2M1O0O10000000O0010O1O1O000010O000000000001O00O100O10000RCFU;;gDKW;5hDMW;5fDLZ;;_DF_;`0ZDCe;a0UDC];o0\\DSOa;Z1QDjNm;W2M4L6J9HM3O12N2N1O3M2O1N10O2N2O9F3M2N010O1POZDdNf;Y1dDbN[;\\1hDdNW;[1kDdNU;[1lDeNS;Z1oDgNP;W1SEgNn:X1TEgNl:X1TEiNm:U1TEjNm:U1UEiNm:T1VEjNl:T1TElNm:d0ZDSOk08l:b0eE\\O^:`0eE]O]:b0R2N2N3M2N2N2N3M3M3LPko2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "^g<2k>8J2M2O1M4L3N2N2N2N200O1O101O3RBoNg=\\1H:YOaN[Cg1[OmA3l=e0AVO]BO4l0[=^OcBc0Y=e0N3M4L4M2lCUNo:n1kD[NP;f1mD]NQ;f1lD\\NS;e1jD]NU;f1gD^NV;h2fDaLQ;f32N=C2N1O0O2N1SO[D_Nf;X1fDcN[;\\1hD`NZ;a1fD]N[;d1fDYN[;h1eDVN];k1cDRN_;o1i01O0010O0eNYC=g<^O^Cb0c?", + "choices": [ + "A. and are standing, while is running.", + "B. and are running, while is standing.", + "C. and are standing, while is running.", + "D. and are running, while is standing." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_408.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000042296.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "jg\\11X=0WgP19WXoN;J7J4L4M3M3M4L3M3M3N3L3N3L3N3M3M3M2N2N2N1O2N1O2O0O3M3N1N1O100O2N100O100O101O0O10000O1000000O1000000000000O01000000000000O1000001O00000O10001O000000001O0O101O001O000O2O001O1O0O2O1N101O1N2O1N2O1O1N101O1N3N1O1O2M2O1O2M1O3M2N2N3M3K5L5K5I9D`0^On`Y4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "QUo172LhA5L1O2N1O1O1O1O1O1O2O0O1O1O010O100O1O10000O100O10O010000O1000O0100000O010000000000000000O2O000000000001O0001O0000001O0001O0000001O01O01O001O00001O0010O01O001O1O001O1O0O2O1O0O2O1N2O1O1O1O2M2M4L3M3N3M2N3N2M3L5L2N4KVbb5" + } + ], + "question": "What is the relationship between and the other objects?", + "choices": [ + "A. is looking at and about to hit .", + "B. is about to hit .", + "C. is looking at .", + "D. is attached to ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_409.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is behind .", + "D. is standing on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_410.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y[1CdN41b0c3ZO\\LN6OP2U1hN]Ob1@aMO0O60d00\\O000:1@>1Gc00[O]JRBb5\\>J>B?UKe@S4i?K002N1O1O2N001O1O1O1O1O001O001O000000001OO1jM`LlC`3Te1oAQNU>h1QBUNR>d1SB[No=`1UB_NS>V1QBiNS>n0TBPOo=j0UBUOn=`0ZB@h=5aBKf=_OiBa0[`001OO1001O001O0000001O000000000000O1cITO]C8m5d0f6KoH5P7NoH1Q74jHLV7S1aNQ6]2fHSOY1aNn5d2cHkN_1bNh5n2cH`Ne1bNe5Y3]HVNn1bN_5e3ZHjMW2bN[5j3[HdM[2bNW5m3]HaM\\2dNT5P4\\H\\M`2dNT5Q4[H\\M`2cNT5T4ZHZMb2aNT5W4WHYMe2aNP5]4WHSMh2`NP5`4VHPMj2aNn4b4VHmLl2cNi4e4YHhLn2dNf4g4ZHfLQ3cNa4l4[HbLT3cN_4n4ZH`LW3bN^4R5WH]L[3cNZ4T5XHYL_3dNV4[7iKfHU4\\7jKeHR4^7nKfHm3[7SLeHl3\\7ULdHg3[7fHoGc3f0f3\\7gHnGc3f0a3a7lHiGc3i0[3e7eL[HZ3f7gLYHY3g7gLZHX3f7hLZHW3g7iLZHV3g7jLYHS3i7mLYHo2i7QMXHl2j7TMVHk2k7VMUHg2n7XMTHe2m7[MSHe2n7ZMTHc2m7]MTH]LmNo5R9dMUH[2k7eMWHX2k7gMWHV2j7kMVHS2k7mMUHQ2m7oMSHP2o7PNTHk1m7UNUHf1n7ZNUHb1l7^NZH[1g7eN]HV1d7jN]HS1e7nN[Hn0h7RO\\Hh0f7YO]Hc0c7]O_H\\LhMP4k9DaH9a7GbH\\LbMe3m9OPICS7=PITORMZMR:b3ZIdNP7]1VIZNl6f1[IRNf6o1^IkMc6U2cITMkKVO06f:_3lI[LPL5T:`3bJ`L^5`3^JZLlJ7f:_3bJ`L^5a3fJZLZ5f3gJYLY5g3gJZLX5f3iJZLV5f3b5ZLi_Oc3W`0]Li_Oc3W`0\\Lj_Od3Y`01O01O0001O00000010O1OO2O000001OO2O1OYLj_Od3V`0\\Lj_Od3V`0[Lk_Oe3U`0[Lk_Oe3U`0[Lk_Oe3X`00O10010O01OdM^LYBNh1e3e;[L_B`0f0B8c3c]LiAc3W>]LiAc3W>[LkAe3X?0000001QOXLdAh3\\>XLdAh3Z?001QOWLeAi3[>XLVAOK0Ij3V?WLl@O42O0Ji3V?WLk@O64LMMi3V?jLl@[3S?j0YAXKC00O0k0O\\O0b11XN5Z1MSOl2A`M]2AfM0On05:_c0PO" + } + ], + "question": "Which statement accurately describes the positions of the objects relative to ?", + "choices": [ + "A. is standing on , which is on .", + "B. is standing on , and is also on .", + "C. Neither nor is on .", + "D. Only is on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_411.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000217400.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^6T2l<0000001N100000001O000000000000000000O100O010001O0O2O001O0O1000001N101O00000O2O1O1O2N001N1000000000O11N10000O1000001O000O101O0000O10O100000000000000O2O00001N1000001O001O000O101O000000000O101O0000001N1000001O000O10000000000O10001O000O1000000O1000000O100000000000000000000O1000000O2O0000001N101OO0101O1O1O00001O1O0O2O00001O1O000O2O000O2O0000001N1000001N100000001O0000000O101O0000001O00000O10000O10001O001O000O101O000O10001O00000000001O00000O2O001O00000O1000001N1000000O101O00000O10000000000O2O000000000000000O10000O10001O4K4M2N2N1N2O3M0000000000000O1000O1O100O1O0100000O1000000001O00001N10000O101N100000001O001N1000001O001O1O00000O2O00000O2N100000000O100000001O1O2N1O1O1N3N2N00001O001N2O001O0000000O10O01O1000O100O1000000O10000O1O1M3O010000000000000000000000000001O0O101O1N2N100O2O0O10001O0000000O10001O00000000000000001O00001O001N1O1000O010000000000O0100000000O10000000000O100000O10000000O100O100O11O1O000000O10000001O00000000000O100001O000O1O100000000O10001O000O2O0O2N3L2O1O1N2O2O1N1000000001O0001O1O1O00001O10O0001O001O001O001O0O2O00O11O9F3N001N2N2L3G\\FPKj9Q5210I7M4M1N2N4J9hKYEd00ZOS11GOUOd0[;G\\V`1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m5>b>0000000O100000O0101O00O010000O1000O1000O100000O01000000O0100000O1000O01000O10O1000O10O10000000O010000O100000O01000000O01000O10000000O0100000000O100O10O1000O010000000O10O1000O100O00100000O10000000O100O10O10O10O1000O10000000O1000O100000000O1000O01000O100O1000O2N1000000O010000O10O100O1000000O10O1000O10N2O2_OkAJ40R>g0M2O1N1O0_Ob0M3O1O1000O010000O1000O010000O1O10000O10O10O01000O100O10000O010O10000O10O10O10000O10O010000O1000000O01000O100O10O010000O10000O01000O1000O0100O1000O010000O10000O10O1000O10O10O100O01000O10000O10O10O10O010000O10O10O100O10O1000O100O100000O01000N1O2O100O010O10O100OMWCPNj and ?", + "choices": [ + "A. is positioned over .", + "B. is a component attached to the side of .", + "C. is traveling on top of .", + "D. and are parallel and side-by-side." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_412.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000017182.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "]bc57R=4N1O1O1O1000O10O10000O1O10O2O1O000000001N2O1O2N2MmQ]2" + }, + { + "size": [ + 428, + 640 + ], + "counts": "[b]25T=7K5K6I6K4L4L7I4M0O2N10O01O100O100O1000O10O1000000O10000000O10O1000000000O010000000000000O10000000O100000O101Oc0]O[SV5" + }, + { + "size": [ + 428, + 640 + ], + "counts": "o^g12Y=3N00001O0O1001OO1O2O1N5SCB0132K=b;S3TN:F1O1O000000000000000O100000000O1000000000000000000O10000000000O10000000000000000O10000000000000000000000O100000000000000O1000000000000000000O100000000O1000000O100000000000000000000000YLoFS3Q9mLoFS3Q9d03nKmFJ2e3b9O000000O1O100N2O100O1O100000000000000000000000000001O000000001O0000000000O100oMULaJk3`700000O10000000000000000O100000000O1000000000000O100000000000000O100000000O1000000000000O1000000O10000000000O10000000000000000O10000000000O1000000A?N2O100O100000000O100000000000000O1000000O1000000000000000000000000001O0000000000001O00000000001O0000000000001O000000000000000000001O0000001O002N3M00000000000000000000O1000000000000O100000000000000O100O100000000000000O100000000000000000000O10000000000000000000000O1000000000000O100O10000O10000O1000000000000000000000000000000O10O2YMcGkM43O1OP1MQOh8g0e3M302N?", + "choices": [ + "A. and are on .", + "B. is on , and both are on .", + "C. and are on .", + "D. Only is on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_413.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000017182.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "[b]25T=7K5K6I6K4L4L7I4M0O2N10O01O100O100O1000O10O1000000O10000000O10O1000000000O010000000000000O10000000O100000O101Oc0]O[SV5" + }, + { + "size": [ + 428, + 640 + ], + "counts": "o^g12Y=3N00001O0O1001OO1O2O1N5SCB0132K=b;S3TN:F1O1O000000000000000O100000000O1000000000000000000O10000000000O10000000000000000O10000000000000000000000O100000000000000O1000000000000000000O100000000O1000000O100000000000000000000000YLoFS3Q9mLoFS3Q9d03nKmFJ2e3b9O000000O1O100N2O100O1O100000000000000000000000000001O000000001O0000000000O100oMULaJk3`700000O10000000000000000O100000000O1000000000000O100000000000000O100000000O1000000000000O1000000O10000000000O10000000000000000O10000000000O1000000A?N2O100O100000000O100000000000000O1000000O1000000000000000000000000001O0000000000001O00000000001O0000000000001O000000000000000000001O0000001O002N3M00000000000000000000O1000000000000O100000000000000O100O100000000000000O100000000000000000000O10000000000000000000000O1000000000000O100O10000O10000O1000000000000000000000000000000O10O2YMcGkM43O1OP1MQOh8g0e3M302N and ?", + "choices": [ + "A. is in front of .", + "B. is on .", + "C. is in front of .", + "D. is on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_414.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000559543.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "]lV24Q:8oK4RJAX3`0c2i0cL\\O\\3i0]LZOb3k0XLWOf3o0TLSOl3o0oKTOP4n0hKYOX4j0XKCU4Q1fKSOS4ZNTKf2c0VOW4P1_KZO^4k0[KZO`4\\3N2N2I7L4O1N2O001N3N10OO201O0O0100001OO1001O00O1001OO010001OO100001O00O20O0000O10100O1O001O2M3N1O103L2N002N3M5K1O5K6J2eMPKUOR5i0RKSOQ5]NjJk18FS5YNgJINo1=M]5SNVJ71^1=7^5\\NUJ\\1`06h5F]J8e5SOPJVOENj0g1c5nNUKP1l4gN]KX1e4bNaIKm1_1c4fN`IJ10k1]1h4fN\\IM10l1[1i4gN`KW1a4hN`KV1m6O1O2N2M4L3M3N3L3N3L4L3M4KPb0OS^O0gE0ef<6jaP1" + }, + { + "size": [ + 333, + 500 + ], + "counts": "led0:R::F7J4L2N1N2O0O2O000O1O100O1O101N1000001N1BdNkG[1S8lNiGT1V8QOfGo0Z8LdG[O\\8c0iGYOX8f0kGYOS8h0nGVOS8i0oGVOQ8j0oGWOP8i0PHXOo7b0gGQO<=m7a0XH_Oh7a0XH_Oh7`0YH@g7`0YH@f7a0ZH_Of7a0ZH_Of7a0YH@g7?ZHAf7?ZHCd7<]H0W70iH8o6HQI`0f6AYIb0e6^O[Ic0d6]O[Ie0d6\\O[Ie0d6[O\\Ie0d6[O[Ig0c6ZO]Ig0b6YO^Ih0a6XO_Ii0`6WO`Ij0_6VOaIk0^6VO`Il0OYNk5k0VJl0N[Nj5k0WJk0I_NP6g0VJk0EbNU6c0VJ_1j5cNTJ\\1m5fNQJX1Q6iNnIR1V6oNjIn0Y6SOfIk0\\6TOeIj0]6VOcIi0^6VOcIi0]6XOcIg0^6YOcIe0^6[OcId0]6\\OcIc0]6]OdIc0\\6\\OeIc0\\6]OeIb0[6^OeIa0\\6^OeIb0Z6_OfIa0Z6^OhI`0Y6@gI`0Y6@gI?Y6BgI>Y6BgI>Y6AhI>Y6BhI=W6DiI8k0i7TOVHm0j7SOUHn0k7SORHn0P8QOoGP1Q8QOmGP1T8oNjGR1W8b0001N1000O010000O10000O1000O01000O010000O100O=D7Hf0ZO6K4J5Mnji2" + }, + { + "size": [ + 333, + 500 + ], + "counts": "mVl25U:3O1O2O0O2O001O2O2M00N3M3M4LW`Q2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is on the floor next to .", + "B. is standing next to .", + "C. is placing on .", + "D. is holding ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_415.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000578545.jpg", + "mask_rles": [ + { + "size": [ + 474, + 640 + ], + "counts": "[me2:Y>9K4M2M4J5J6M3N3L3N2N2N2N2N2N3M2O1O1N3M2N3M3N1O2L4N1O2N2M3M3N2N5K6J4`LbL]Kb3_4`L`Kb3]4`LaKf3Y4aLbHDk2o3_4eL\\K_3`4gLZK\\3d4fLZK\\3d4eLZK^3c4fLYK^3d4dLYK_3d4fLXK\\3e4iLVKY3h4iLWKY3f4lLUKX3g4nLTKT3i4RMQKQ3l4QMSKP3k4QMTKQ3k4PMTKQ3k0kJj1U2YMR3l4PMQKR3n4oLPKR3P5oLnJR3R5oLlJR3T5PMiJQ3X5PMeJQ3\\5QM`JP3a5QM\\JP3d5RMYJo2Z1`Jb2d2oKn2_1^Jc2a8k100000O10000oJ^Gj3b8VL^Gj3b8UL_Gk3`8X100N2O1N200N2O1O10kIiG2N2O1N2O1O1N2N2O1N3N1N2O1N2N2N3UCQOg;Q1XDPOg;Q1UDSOj;n0VDSOg;P1WDQOg;Q1XDQOe;R1ZDnNd;T1\\DlNc;V1[DkNd;X1ZDiNc;[1YDgNe;]1WDfNf;\\1XDfNg;\\1WDeNh;W2N2O1O1O100001O00001O00001O00001O0010O0001O00010O0001O01O00001N1O2N1O1O2N1O1O2N1O1O2N1O2N1O2N1O2N1O1O2M2O1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O1O2N1O2N1O2M3^OPB7Z>O1O1O1N3Moj12PUN1O10O_>2`AO1Nnj7NVUH0\\Zm1" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is holding .", + "B. is attached to .", + "C. is reading .", + "D. is sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_416.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000437898.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "gXR3\\1f;:G9A?kNV1mMaM`HV3f6T2O2O1O1N2O001O001O1O1O1O1O1O1O1O1O001O001O1O1O1O1O1O1O1O001O1O1O1O010O1O1O001O1O1O1O1O001O001O000000001O001O1O1O0000000O10O100000000O101O0eK`Jk0`5UObJJ>TMR5P3bJlMJ\\On1Gf3Q3kJiLn25W2R3^NlLc1T3^NkLb1T3`NjLa1V3aNhL_1X3Z40000000000000000O100000000000000000000000000000000O2O00000000000000001O00000ZOmESNT:l1nESNS:0YFi0FVOT;f0REVOo:?", + "choices": [ + "A. is located between and .", + "B. is over .", + "C. is below both and .", + "D. is above ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_417.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000225532.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "gdb27_;4K3N1O1O1O011N100O2OO100000000O11O000000O1001O0000O101O3M10O0010O00001L4Ibaf2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "h_d21f;1O1O2N1O1N2O1O1O1O001O001O001O0oDBi:?VEBi:?VECh:=XEDg:=ZEBe:>\\ECb:>]EDa:=_ED_:=aEC^:=bED]:=cED[:=eECZ:=gEDW:=hEEV:SHAl7?UHAj7a0UH^Ok7c0UH\\Ok7e0TH[Ol7f0THYOl7g0VHWOj7j0VHUOj7l0VHSOj7n0VHQOj7P1VHoNj7R1VHmNj7S1WHlNi7U1WHjNi7V1XHiNh7Y1WHfNi7[1WHdNi7\\1XHcNh7_1WH`Ni7a1WH^Ni7c1WH\\Ni7d1XH[Nh7f1YHXNf7j1ZHUNf7l1ZHSNf7n1ZHQNg7o1YHPNg7Q2YHnMg7S2YHlMg7U2XHkMh7V2XHiMh7X2XHgMh7Z2XHeMh7[2YHdMg7]2YHbMf7`2ZH_Mf7b2ZH]Mf7d2ZH[Mf7f2ZHYMf7h2ZHWMf7j2ZHUMf7l2ZHSMg7m2ZHPMg7P3`01O1O1O100O002N001O001O2N001O1O1O1O1O1O0000000000000000000000000000cMQHR1o7eN_HV1a7eNgHX1X7fNPIU1Q7hNXIQ1h6nNbIi0]6VOiIf0W6YOmId0S6\\OnIc0R6\\OQJb0o5^ORJ`0o5@SJ>n5@TJ2fMOU80WJKdML14OLU89WJ and ?", + "choices": [ + "A. is driving on .", + "B. is driving on .", + "C. is driving on .", + "D. is parked next to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_418.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000564336.jpg", + "mask_rles": [ + { + "size": [ + 360, + 640 + ], + "counts": "gTV12T;3N2N2I6L4M3M3G9M3M300N1010N101M2IPFVOR:g08O1M4M210000O100O2O3dEFd93ZF6e9J]F6a9JiFLX93nFGR9:n000001O2_E_OW:n0J;F4K1O2AbNoFa1P9j0100O10O1O00001O1Ng]h2" + }, + { + "size": [ + 360, + 640 + ], + "counts": "0[5m500000000000000000000000000000000O100000000000000000000000000O10000K5O1O1000000000000001O0000000000000000O11O1O1O00002N3M3M2N3M2N2N1O001O000000M5aK^JlN0000O12NN3O]10hN14OF2OO11O0Oc81YV7OWlH11OP\\n5" + }, + { + "size": [ + 360, + 640 + ], + "counts": "R_P24kg0NWC:E6K4L4L4L4L4N2O0100000000O1L4L400O11O00000001O01O00000O100010O0000000000000000000000000N2L4L4L4M4L3LQa]4" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is looking at and is positioned in front of .", + "B. is looking at while being beside .", + "C. is looking at and is positioned beside .", + "D. is looking at and is positioned in front of ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_419.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000319935.jpg", + "mask_rles": [ + { + "size": [ + 398, + 640 + ], + "counts": "i9d2j90O1000000O1000000O100O1O1N2N2O100000000O10000O10000O100000000O1000000O10SMkFa2U9]MRG^2n8aMVG\\2j8cMZGZ2e8gM\\GX2d8hM]GW2c8hM^GX2b8hM^GX2a8iM_GW2a8iM_GW2a8iM^GX2b8hM^GX2b8hM]GY2b8hM^GX2b8hM^GX2b8gM_GY2a8gM^GZ2a8fM`GZ2_8gMaGY2_8fMbGZ2^8fMaG[2_8eMaG[2^8fMbGZ2^8fMbGZ2]8fMcG[2]8eMcG[2]8dMdG\\2\\8dMcG]2\\8dMdG\\2\\8dMdG\\2\\8dMdG\\2\\8dMdG\\2\\8cMdG^2\\8bMdG^2\\8aMeG_2T91000000O100000000O10000O1000000O100VO\\MjGd2V8]MiGc2W8^MgGc2Y8]MgGc2Y8]MgGc2Y8]MfGd2Z8\\MfGd2Y8]MgGc2Y8]MgGc2Y8]MfGd2Y8]MgGc2Y8]MgGc2Y8\\MhGd2X8\\MgGe2Y8ZMhGf2P9000O1001O3M6J7I7I7I8H6J4L4L2N1O1O7I1O1O1O1O0000000000000000O1O1O1O1N2O1O1O1O1O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000VLPOVLP1j3TORLl0n3VOPLj0o3XOPLh0o3]OmKc0R4_OmKa0R4@nK`0Q4AoK?Q4APL>P4BRLo5ARJ>n5BSJ=m5CSJ=n5ASJ?n5@RJ`0Q6\\OoIe0S6YOlIh0U6WObIR1^6mNkHk1V7TNhHn1X7QNhHP2X7PNhHP2X7oMjHP2V7PNjHP2V7oMkHQ2U7oMlHP2U7oMlHP2T7PNlHP2T7oMmHQ2S7oMmHQ2S7oMnHP2S7nMnHR2S7mMmHS2S7lMnHT2R7lMoHS2Q7lMPIT2Q7jMQIU2o6kMQIU2o6kMRIT2n6lMSIS2n6kMSIU2m6kMSIU2m6jMUIU2k6kMUIU2k6kMUIU2l6jMUIU2k6jMVIV2j6iMXIV2i6iMWIW2i6iMXIV2h6iMYIW2h6hMYIW2h6hMXIX2h6gMYIY2h6fMYIY2g6gMYIY2g6fMZIZ2f6eM\\IZ2d6fM\\IZ2e6eM\\IZ2\\801O1O00001O00001O001O00001O001O1O001O6Jd0\\Od0\\O7I1O1O00001O0000O1O1O1oKDlJ0VN=m6EZJP2g5oMWJS2Q81O1O100O1bMiMdJX2Y5lMeJU2[5jMfJV2h71O1O100O1O1O1O1eM`MhJ`2Y5_MaH2o1`2c5aM]J_2h71O1O1]N[M\\If2d6[M[Ie2d6\\M[Ie2\\6ZM\\H1X1e2T6cMcHHX1e2^6[MZH0W1e2i6\\MUIh1fN\\Ni8L`H]1U8dNjGU1]8kNbGo0e8QOZGo0g8ROWGn0j8ROVGm0k8SOTGm0m8SORGn0n8RORGm0o8SOPGn0Q9QOnFo0o8mNUF1l0R1o8nNXGR1h8nNWGR1j8oNUGQ1l8mNTGS1S:010O1O1O1000000mNlNlFT1g:lN]D=P6CX;:bDIc;=2L4K5M300001O=D4K2N]OnDKQ;3f00000003MO2Liel2" + }, + { + "size": [ + 398, + 640 + ], + "counts": "Q^k433c0P;g0I5J6O1O1O1O1O100O1O100O100O100O10000000000000000001O00000000O1O1O1O1O1O1O1O1O1O1O1O100N2O100O1O1O100O1O100O1O1O1O1O1O1O1O100O1O1O1O1O1O1O100O1O1O1O1O1O1O100O1O100O100O1[Oe0O100O10000O1000000000000000000000000001_LmGe2S8XMRHf2n7YMSHg2m7XMUHg2k7WMXHh2h7UM[HK^Oh2W8[M^HK^Oh2U8[M_HL\\Oi2U8ZMiHe2W7ZMjHf2W7XMkHg2U7XMmHg2T7XMlHh2T7WMnHh2]8O2N1O1O1O1O1O2N1O001O2N2N1O1O2N2N2N1O2N1O1O1O1O1O001O1O1O001O001O0000000000O100O1N2O100O1O1O10000000000001O1OK5M3O1O1O1O1N200O100M3O1O100O1M3N2N2O100O1N2O1O1O1N2N2O100bNQM\\IP3b6QM]IQ3b6PM\\IR3b6PM]IQ3a6QM^IP3b6PM]IQ3b6QM\\IP3d6PMbHJ;X3R7SM]HH?V3T7RM\\HI?V3T7\\MlHd2T7\\MkHe2U7[MjHf2V7ZMgHi2Y7WMgHi2Y7XMfHh2Z7YMdHh2\\7YMbHh2_7XM^Hj2b7Q11O00000000001O00001O00001O00001O1OVH" + }, + { + "size": [ + 398, + 640 + ], + "counts": "nQY1>n;:G8H5L6I9H7I7I7I8G8H8H8H4L5L1N2O1O3M3M1N3N0O100O1O11O00O01O1O001O1N10001000O100O10O1000O100O10000O1O1O1000000000000000000001O1O1O1O1O2N2N3M2N2N3M4L3M3M1O1O0O11N0100000000000000000000000O10000000000O10000O10000O100000000000000000000000001O6J2N1O1O001O000001N1O3L6L4J9_O:N2N2M3M3L4L4L3N3M3K6K4M3L5Ab0BZdS2HW\\lM11O\\b`2" + } + ], + "question": "What spatial relationship do , , and all share with ?", + "choices": [ + "A. They are all next to .", + "B. They are all under .", + "C. They are all on .", + "D. They are all inside ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_420.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000116439.jpg", + "mask_rles": [ + { + "size": [ + 640, + 429 + ], + "counts": "fcg49fc0?B2N2N3M3M7I1O001O00O11O000000O10000001O000O1000001O1O1O1O1O1O2N1O1O1O00hb0A\\]O;fb0C^]O6jb0EZ]O7`hT3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "aWd5h0Wc07K0O10O02O00O1O01O01O01O00000000001N10000O1000O10O100O1O1O1O1N2N200O100001N101N2N1O2O1M4M5J5JbKcA1Lj4a>UKcA2Jj4c>SKdA2Kj4j>VKVAj4a>UKcA2Jj4c>TKcAQ5]>PKaAQ5_>oJ]A60e4c>UK]A60e4h>ZKXAf4h>93M1O001O4L2N002N001O001O2N1O1O3M2N001O1O3M3M2N5K0000001O00000000O10000001O00000000000000O11O000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000001O000000O11O00000000000000000000000000000000000000000000000000000000000000001O00000000000000O1000000000000000000000000O100000000000000000000O1000000O100000000000000000000O100000000000000000000O10000O10000O100O10000O10000O1000000O1000000O100000000000000O1000000O100000000O10000O1000000O1O1000000O100O100O1O1O100O100O1O1TO]KYBe4f=`KTBb4k=aKoAc4P>aKkAa4T>f0O1O1N2O100O10000001O0000001O00001O00001O000000001O000000001O00001O000000001O0000A[JdBd5\\=]JcBc5\\=^JdBb5\\=_JcBa5\\=aJcB_5]=aJcB_5]=aJdB^5\\=cJcB]5\\=dJdB\\5\\=eJcB[5]=eJcB[5]=eJcB[5]=eJdBZ5\\=gJcBY5\\=hJdBX5\\=hJeBW5[=iJeBW5[=iJeBW5[=iJeBW5[=iJfBV5Z=kJeBU5[=kJfBT5Z=lJfBT5Z=lJfBT5Y=mJgBS5Y=mJhBR5X=nJhBR5X=oJgBQ5Y=oJgBQ5X=QKgBo4Y=QKgBo4X=RKiBm4W=SKiBm4W=TKhBl4W=UKiBk4W=VKhBj4Y=UKhBj4X=VKhBj4X=VKhBj4Y=UKhBj4X=VKhBj4X=WKgBi4X=XKiBg4W=YKiBg4W=ZKhBf4X=ZKhBf4X=[KgBe4Y=[KhBd4X=\\KhBd4X=\\KhBd4W=^KhBb4Y=]KgBc4Y=]KhBb4X=^KhBb4Y=^KfBb4Z=^KfBb4[=]KfBb4[=]KeBc4[=]KeBc4[=]KfBb4Z=^KfBb4Z=_KeBa4Z=`KgB_4X=bKhB^4W=cKjB\\4R=hKnBX4Q=iKoBW4Q=iKoBW4Q=jKnBV4Q=kKoBU4Q=lKoBS4V=hKjBX4W=fKjBZ4V=gKiBY4W=gKjBX4V=hKjBX4W=hKhBX4X=hKhBX4X=hKiBW4W=jKhBV4X=jKhBV4X=jKiBU4W=kKiBU4V=mKiBS4V=nKkBQ4S=QLmBo3R=SLmBm3hj?BV@>i?DU@>i?CW@=i?CW@=i?DV@ZOSAe0o>XORAh0P?VOPAj0R?TOm@n0V?nNk@ZNLd2Z?QOn@n0S?POo@o0R?POn@P1R?POn@P1S?oNm@Q1S?oNm@P1T?oNl@`NG]2_?ROj@bNF\\2`?QOk@S1V?lNj@dNE[2c?POi@cNF[2c?QOk@o0V?nNl@Q1V?nNj@R1V?mNk@T1U?kNk@U1V?iNk@nN\\OV2h?kNm@oN[OV2i?iNm@PO\\OU2j?hNl@ROYOW2l?eNo@\\1Q?bNPA^1Q?aNo@_1S?]No@b1S?\\NPAc1P?ZNVAGiNJ1c1R`0fNmAIoMV1T`0oN^Bf0_`0O2N4L3Bg\\OOfc0N100001O0000001O0O1000001O1O1O001O001O01O00004D\\\\O3jc0O1O0Ob^P3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "\\_c51dc0?", + "choices": [ + "A. , , ", + "B. , , ", + "C. , , ", + "D. , , " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_421.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000116439.jpg", + "mask_rles": [ + { + "size": [ + 640, + 429 + ], + "counts": "cd69fc0hb0A\\]O;fb0C^]O6jb0EZ]O7`hT3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "[`02332OO2ib0W1X]OhN4Li`01T_O^4^>bKcA1Lj4a>UKcA2Jj4c>SKdA2Kj4j>VKVAj4a>UKcA2Jj4c>TKcAQ5]>PKaAQ5_>oJ]A60e4c>UK]A60e4h>ZKXAf4h>93M1O001O4L2N002N001O001O2N1O1O3M2N001O1O3M3M2N5K0000001O00000000O10000001O00000000000000O11O000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000001O000000O11O00000000000000000000000000000000000000000000000000000000000000001O00000000000000O1000000000000000000000000O100000000000000000000O1000000O100000000000000000000O100000000000000000000O10000O10000O100O10000O10000O1000000O1000000O100000000000000O1000000O100000000O10000O1000000O1O1000000O100O100O1O1O100O100O1O1TO]KYBe4f=`KTBb4k=aKoAc4P>aKkAa4T>f0O1O1N2O100O10000001O0000001O00001O00001O000000001O000000001O00001O000000001O0000A[JdBd5\\=]JcBc5\\=^JdBb5\\=_JcBa5\\=aJcB_5]=aJcB_5]=aJdB^5\\=cJcB]5\\=dJdB\\5\\=eJcB[5]=eJcB[5]=eJcB[5]=eJdBZ5\\=gJcBY5\\=hJdBX5\\=hJeBW5[=iJeBW5[=iJeBW5[=iJeBW5[=iJfBV5Z=kJeBU5[=kJfBT5Z=lJfBT5Z=lJfBT5Y=mJgBS5Y=mJhBR5X=nJhBR5X=oJgBQ5Y=oJgBQ5X=QKgBo4Y=QKgBo4X=RKiBm4W=SKiBm4W=TKhBl4W=UKiBk4W=VKhBj4Y=UKhBj4X=VKhBj4X=VKhBj4Y=UKhBj4X=VKhBj4X=WKgBi4X=XKiBg4W=YKiBg4W=ZKhBf4X=ZKhBf4X=[KgBe4Y=[KhBd4X=\\KhBd4X=\\KhBd4W=^KhBb4Y=]KgBc4Y=]KhBb4X=^KhBb4Y=^KfBb4Z=^KfBb4[=]KfBb4[=]KeBc4[=]KeBc4[=]KfBb4Z=^KfBb4Z=_KeBa4Z=`KgB_4X=bKhB^4W=cKjB\\4R=hKnBX4Q=iKoBW4Q=iKoBW4Q=jKnBV4Q=kKoBU4Q=lKoBS4V=hKjBX4W=fKjBZ4V=gKiBY4W=gKjBX4V=hKjBX4W=hKhBX4X=hKhBX4X=hKiBW4W=jKhBV4X=jKhBV4X=jKiBU4W=kKiBU4V=mKiBS4V=nKkBQ4S=QLmBo3R=SLmBm3h, , and ?", + "choices": [ + "A. is on , but is not.", + "B. is on , which is on .", + "C. Both and are on .", + "D. is on both and ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_422.png" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000581615.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "QXi223m1ORNTa0j3eNQ1\\MYKkBM2Z6ob3M3O1N2O0O2N1O2O2N1N101N101N1O2O1N2N2N101O1N2N2O0O2O0O2O0O2N2N2N2O1N100O3N1N1O2N101N2O1N2O0O2N2N1O2O1O1N2O1N1O1O2O1N2N2N1O2N3M2N2M3N3L4I8H:D?BPGYE]9V:c0^Oa0_Oc0]Oe0]Oc0[Of0TOe0D7N2O1O1N101N101N2N101N2N2M4M3N2L4L8Hf0jNX1`NWQh0mMl]WO5MMO520OOWa0Li^O^30eLe?_4[@`K4OR=d6L4L4N2N2N2M3K5M3O1N200O1O1O1O100O1O10000O100O100O10000000000O100000003VDPHK?3Y2n7U9M101O0000000000000000000000000000000000000000000000000nJjESMYNS3m;HSF5m9JUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF5k9KUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF5k9KUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LUF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF3k9MUF3k9MUF3k9MUF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LUF5k9KUF5k9KUF5k9KUF5k9LSF6l9JQF9o9IaEe0_:^40000000000000000000000000000000001O000000000O101O00001N2O0M3]Nc1iMX2M201O1O001O1O1O1O1O1O2N2N2N1O4LB:A=ZDfGn:R9A>@>F8E and ?", + "choices": [ + "A. is mounted on .", + "B. is leaning against .", + "C. is placed on the floor in front of .", + "D. is behind ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_423.png" + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/eval_detailed.py b/evaluation/GAR-Bench/eval_detailed.py new file mode 100644 index 0000000000000000000000000000000000000000..31b33a8284e30d8ae86e84ff334e430f1c67ac52 --- /dev/null +++ b/evaluation/GAR-Bench/eval_detailed.py @@ -0,0 +1,213 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang and Yuhao Wang +# -------------------------------------------------------- + +import argparse +import base64 +import io +import json +import os +import re + +import numpy as np +import openai +from PIL import Image +from pycocotools import mask as mask_utils +from pycocotools.coco import COCO +from tqdm import tqdm + +# Define Azure OpenAI details +model_name = "gpt-4o-2024-11-20" +max_tokens = 1000 # range: [1, 4095] + +# Initialize the Azure client +client = openai.AzureOpenAI( + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), + api_key=os.getenv("AZURE_OPENAI_KEY"), + api_version="2024-03-01-preview", +) + +prompt_ann = """ +You are a language model expert. Your task is to evaluate the following model output based on the provided images, and subject, object, and relationship. + +- subject_name: {subject_name} +- object_name: {object_name} +- predicate_name: {predicate_name} +- model_output: {model_output} + +Task: +1. Check if the model output describes the {subject_name}. +2. Check if the model output conveys the relationship between {subject_name} and {object_name} related to {predicate_name}. + +Note: +- The first task only requires checking if {subject_name} is mentioned in the model output. +- The second task asks if the output conveys a relationship related to {predicate_name} between {subject_name} and {object_name}, even if different words or phrases are used. +- If both tasks are successfully completed, return "True" Otherwise, return "False" +- Do not output any reasoning. Do not perform correction. Please output only just one "True" or "False". + +""" + + +def process_questions(outputs): + + pattern = r"^```json\s*|\s*```$" + try: + cleaned_str = re.sub(pattern, "", outputs, flags=re.MULTILINE) + questions_data = json.loads(cleaned_str) + except: + print("Error in parsing JSON") + return [] + return questions_data + + +def encode_pil_image_to_base64(pil_image): + buffered = io.BytesIO() + pil_image.save(buffered, format="PNG") + img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") + return img_str + + +def mask_to_box(mask_np): + mask_coords = np.argwhere(mask_np) + y0, x0 = mask_coords.min(axis=0) + y1, x1 = mask_coords.max(axis=0) + 1 + + h = y1 - y0 + w = x1 - x0 + + return x0, y0, w, h + + +def query(messages): + # Adjusted to use the Azure OpenAI client with the specified parameters + response = client.chat.completions.create( + model=model_name, + messages=[{"role": "user", "content": content}], + max_tokens=max_tokens, + temperature=temperature, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + ) + + message = response.choices[0].message.content + return message + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Evaluate model outputs") + parser.add_argument("--pred", type=str, help="Path to the model") + parser.add_argument("--min_box_w", type=int, help="Minimum width", default=56) + parser.add_argument("--min_box_h", type=int, help="Minimum height", default=56) + parser.add_argument( + "--image_folder", type=str, default="evaluation/GAR-Bench/annotations" + ) + args = parser.parse_args() + + with open(args.pred, "r") as f: + data = json.load(f) + + output_json = [] + total = 0 + true = 0 + + for item in tqdm(data): + total = total + 1 + model_output = item["model_output"] + + subject_name = item["subject_name"] + object_name = item["object_name"] + predicate_name = item["predicate_name"] + model_output = item["model_output"] + prompt = prompt_ann.format( + subject_name=subject_name, + object_name=object_name, + predicate_name=predicate_name, + model_output=model_output, + ) + + img = Image.open(os.path.join(args.image_folder, item["image"])) + + img_np = np.array(img) + base64_image = encode_pil_image_to_base64(img) + content = [ + {"type": "text", "text": "\n1. The original image:\n"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, + }, + ] + + for mask_idx, mask_rle in enumerate(item["mask_rles"]): + mask_np = mask_utils.decode(mask_rle).astype(np.uint8) + pil_mask = Image.fromarray((mask_np * 255).astype(np.uint8)) + + assert ( + img_np.shape[:2] == mask_np.shape + ), f"image shape mismatches with mask shape: {img_np.shape}, {mask_np.shape}" + img_h, img_w = img_np.shape[:2] + + x0, y0, w, h = mask_to_box(mask_np) + xc, yc = x0 + w / 2, y0 + h / 2 + + # focal_crop: need to have at least min_box_w and min_box_h pixels, otherwise resizing to (384, 384) leads to artifacts that may be OOD + w, h = max(w, args.min_box_w), max(h, args.min_box_h) + x0, y0 = int(xc - w / 2), int(yc - h / 2) + + cropped_mask_np = mask_np[ + max(y0 - h, 0) : min(y0 + 2 * h, img_h), + max(x0 - w, 0) : min(x0 + 2 * w, img_w), + ] + cropped_img_np = img_np[ + max(y0 - h, 0) : min(y0 + 2 * h, img_h), + max(x0 - w, 0) : min(x0 + 2 * w, img_w), + ] + + cropped_pil_img = Image.fromarray(cropped_img_np) + cropped_pil_mask = Image.fromarray((cropped_mask_np * 255).astype(np.uint8)) + + base64_cropped_image = encode_pil_image_to_base64(cropped_pil_img) + base64_cropped_mask = encode_pil_image_to_base64(cropped_pil_mask) + + content.extend( + [ + { + "type": "text", + "text": f"\n{2 * mask_idx + 2}. :\n", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_cropped_image}" + }, + }, + { + "type": "text", + "text": f"\n{2 * mask_idx + 3}. The mask of :\n", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_cropped_mask}" + }, + }, + ] + ) + + content.append({"type": "text", "text": prompt}) + + messages = [{"role": "user", "content": content}] + + outputs = query(messages) + print(outputs) + if outputs == "True": + true = true + 1 + item.update({"eval_result": outputs}) + output_json.append(item) + + print("Accuracy: ", true / total) + with open(args.pred.replace(".json", "_eval.json"), "w") as f: + json.dump(output_json, f, indent=4) diff --git a/evaluation/GAR-Bench/eval_simple.py b/evaluation/GAR-Bench/eval_simple.py new file mode 100644 index 0000000000000000000000000000000000000000..7706f51b087619effd1091f9ea1ee6c285d04ba0 --- /dev/null +++ b/evaluation/GAR-Bench/eval_simple.py @@ -0,0 +1,199 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang and Yuhao Wang +# -------------------------------------------------------- + +import argparse +import base64 +import io +import json +import os +import re + +import numpy as np +import openai +from PIL import Image +from pycocotools import mask as mask_utils +from pycocotools.coco import COCO +from tqdm import tqdm + +# Define Azure OpenAI details +model_name = "gpt-4o-2024-11-20" +max_tokens = 1000 # range: [1, 4095] + +# Initialize the Azure client +client = openai.AzureOpenAI( + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), + api_key=os.getenv("AZURE_OPENAI_KEY"), + api_version="2024-03-01-preview", +) + +prompt_ann = """ +You are a language model expert. Your task is to evaluate the correctness of the model's output based on the provided ground truth and given masks. + +- Ground truth: "{answer}" +- Model Output: "{model_output}" + +Please determine if the model's output conveys the same meaning as the provided ground truth. If the output is semantically correct, return "True", otherwise return "False". + +Attention: +1. The ground truth and model output do not need to match exactly, as long as they convey the same meaning. Synonyms and different phrasings are acceptable. + +2. Do not output any reasoning. Do not perform correction. Please output only "True" or "False". + +""" + + +def process_questions(outputs): + + pattern = r"^```json\s*|\s*```$" + try: + cleaned_str = re.sub(pattern, "", outputs, flags=re.MULTILINE) + questions_data = json.loads(cleaned_str) + except: + print("Error in parsing JSON") + return [] + return questions_data + + +def encode_pil_image_to_base64(pil_image): + buffered = io.BytesIO() + pil_image.save(buffered, format="PNG") + img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") + return img_str + + +def mask_to_box(mask_np): + mask_coords = np.argwhere(mask_np) + y0, x0 = mask_coords.min(axis=0) + y1, x1 = mask_coords.max(axis=0) + 1 + + h = y1 - y0 + w = x1 - x0 + + return x0, y0, w, h + + +def query(messages): + # Adjusted to use the Azure OpenAI client with the specified parameters + response = client.chat.completions.create( + model=model_name, + messages=[{"role": "user", "content": content}], + max_tokens=max_tokens, + temperature=temperature, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + ) + + message = response.choices[0].message.content + return message + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Evaluate model outputs") + parser.add_argument("--pred", type=str, help="Path to the model") + parser.add_argument("--min_box_w", type=int, help="Minimum width", default=56) + parser.add_argument("--min_box_h", type=int, help="Minimum height", default=56) + parser.add_argument( + "--image_folder", type=str, default="evaluation/GAR-Bench/annotations" + ) + args = parser.parse_args() + + with open(args.pred, "r") as f: + data = json.load(f) + + output_json = [] + total = 0 + true = 0 + + for item in tqdm(data): + total = total + 1 + answer = item["answer"] + model_output = item["model_output"] + prompt = prompt_ann.format(answer=answer, model_output=model_output) + + img = Image.open(os.path.join(args.image_folder, item["image"])) + + img_np = np.array(img) + base64_image = encode_pil_image_to_base64(img) + content = [ + {"type": "text", "text": "\n1. The original image:\n"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, + }, + ] + + for mask_idx, mask_rle in enumerate(item["mask_rles"]): + mask_np = mask_utils.decode(mask_rle).astype(np.uint8) + pil_mask = Image.fromarray((mask_np * 255).astype(np.uint8)) + + assert ( + img_np.shape[:2] == mask_np.shape + ), f"image shape mismatches with mask shape: {img_np.shape}, {mask_np.shape}" + img_h, img_w = img_np.shape[:2] + + x0, y0, w, h = mask_to_box(mask_np) + xc, yc = x0 + w / 2, y0 + h / 2 + + # focal_crop: need to have at least min_box_w and min_box_h pixels, otherwise resizing to (384, 384) leads to artifacts that may be OOD + w, h = max(w, args.min_box_w), max(h, args.min_box_h) + x0, y0 = int(xc - w / 2), int(yc - h / 2) + + cropped_mask_np = mask_np[ + max(y0 - h, 0) : min(y0 + 2 * h, img_h), + max(x0 - w, 0) : min(x0 + 2 * w, img_w), + ] + cropped_img_np = img_np[ + max(y0 - h, 0) : min(y0 + 2 * h, img_h), + max(x0 - w, 0) : min(x0 + 2 * w, img_w), + ] + + cropped_pil_img = Image.fromarray(cropped_img_np) + cropped_pil_mask = Image.fromarray((cropped_mask_np * 255).astype(np.uint8)) + + base64_cropped_image = encode_pil_image_to_base64(cropped_pil_img) + base64_cropped_mask = encode_pil_image_to_base64(cropped_pil_mask) + + content.extend( + [ + { + "type": "text", + "text": f"\n{2 * mask_idx + 2}. :\n", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_cropped_image}" + }, + }, + { + "type": "text", + "text": f"\n{2 * mask_idx + 3}. The mask of :\n", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_cropped_mask}" + }, + }, + ] + ) + + content.append({"type": "text", "text": prompt}) + + messages = [{"role": "user", "content": content}] + + outputs = query(messages) + print(outputs) + if outputs == "True": + true = true + 1 + item.update({"eval_result": outputs}) + output_json.append(item) + + print("Accuracy: ", true / total) + with open(args.pred.replace(".json", "_eval.json"), "w") as f: + json.dump(output_json, f, indent=4) diff --git a/evaluation/GAR-Bench/inference.py b/evaluation/GAR-Bench/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..c1fd5682a64bffcad37a29168ceed8a9035f8e8c --- /dev/null +++ b/evaluation/GAR-Bench/inference.py @@ -0,0 +1,207 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang +# -------------------------------------------------------- + +import argparse +import json +import os + +import numpy as np +import pandas as pd +import torch +from PIL import Image +from pycocotools import mask as mask_utils +from tqdm import tqdm +from transformers import AutoModel, AutoProcessor, GenerationConfig + +from evaluation.eval_dataset import MultiRegionDataset + +TORCH_DTYPE_MAP = dict(fp16=torch.float16, bf16=torch.bfloat16, fp32=torch.float32) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Inference of Grasp Any Region models on GAR-Bench." + ) + + parser.add_argument( + "--model_name_or_path", + help="HF model name or path", + default="HaochenWang/GAR-8B", + ) + parser.add_argument( + "--cache_name", + help="cache name for saving results", + type=str, + default="gar_8b", + ) + parser.add_argument( + "--anno_file", + help="annotation file path", + required=True, + ) + parser.add_argument( + "--image_folder", + help="the folder of images", + default="evaluation/GAR-Bench/annotations", + ) + parser.add_argument( + "--mode", + help="mode to build questions", + type=str, + choices=["vqa", "simple", "detailed"], + required=True, + ) + parser.add_argument( + "--data_type", + help="data dtype", + type=str, + choices=["fp16", "bf16", "fp32"], + default="bf16", + ) + parser.add_argument( + "--seed", + type=int, + default=0, + help="Random seed for reproducible text generation", + ) + args = parser.parse_args() + return args + + +def select_ann(coco, img_id, area_min=None, area_max=None): + cat_ids = coco.getCatIds() + ann_ids = coco.getAnnIds(imgIds=[img_id], catIds=cat_ids, iscrowd=None) + + if area_min is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] >= area_min + ] + + if area_max is not None: + ann_ids = [ + ann_id for ann_id in ann_ids if coco.anns[ann_id]["area"] <= area_max + ] + + return ann_ids + + +def main(): + args = parse_args() + data_dtype = TORCH_DTYPE_MAP[args.data_type] + torch.manual_seed(args.seed) + + # init ditribution for dispatch_modules in LLM + torch.cuda.set_device(0) + torch.distributed.init_process_group(backend="nccl") + + # build HF model + model = AutoModel.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + torch_dtype=data_dtype, + device_map="cuda:0", + ).eval() + + processor = AutoProcessor.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + ) + + model_outputs = [] + cache_name = args.cache_name + + with open(args.anno_file, "r") as file: + data = json.load(file) + + for item in tqdm(data): + img = Image.open(os.path.join(args.image_folder, item["image"])) + + # build question for different mode + if args.mode == "vqa": + question_str = f"Question: {item['question']}\nOptions:" + for op in item["choices"]: + question_str += f"\n{op}" + question_str += "\nAnswer with the correct option's letter directly." + elif args.mode == "simple": + question_str = item["question"] + elif args.mode == "detailed": + question_str = "Describe in detail, including the relationship with ." + else: + raise NotImplementedError + + masks = [] + for mask_idx, mask_rle in enumerate(item["mask_rles"]): + mask_np = mask_utils.decode(mask_rle).astype(np.uint8) + masks.append((mask_np * 255).astype(np.uint8)) + + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [ + "" + ] + dataset = MultiRegionDataset( + image=img, + masks=masks, + question_str=question_str, + processor=processor, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=data_dtype, + ) + + data_sample = dataset[0] + + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + do_sample=False, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + outputs = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=False + ).strip() + if outputs.endswith("<|eot_id|>"): + outputs = outputs.replace("<|eot_id|>", "") + print(outputs) + + item["model_output"] = outputs + model_outputs.append(item) + + cache_name += f"_{args.mode}" + print(f"Cache name: {cache_name}") + + with open(f"evaluation/GAR-Bench/model_outputs/{cache_name}.json", "w") as file: + json.dump(model_outputs, file, indent=4, ensure_ascii=False) + + if args.mode == "vqa": + # directly compute accuracy using exact-matching + for category in set([x["type"] for x in model_outputs]): + results = [x for x in model_outputs if x["type"] == category] + total = len(results) + correct = len( + [x for x in results if x["model_output"].lower() == x["answer"].lower()] + ) + print(f"{category}: [{correct}/{total}]={round(correct / total * 100, 1)}") + + total = len(model_outputs) + correct = len( + [ + x + for x in model_outputs + if x["model_output"].lower() == x["answer"].lower() + ] + ) + print(f"=> overall: [{correct}/{total}]={round(correct / total * 100, 1)}") + + +if __name__ == "__main__": + main() diff --git a/evaluation/GAR-Bench/model_outputs/gar_1b_detailed.json b/evaluation/GAR-Bench/model_outputs/gar_1b_detailed.json new file mode 100644 index 0000000000000000000000000000000000000000..3b58c68af781ee52372c25554512ccb13293e59a --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_1b_detailed.json @@ -0,0 +1,2463 @@ +[ + { + "image": "images/caption_detailed_0.png", + "subject_name": "person", + "object_name": "skateboard", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hT\\63W=1N3M2O1O2N1O1O2O0O1O001O00O10O10O001000O011N1O1O10000O1O1000000\\MCZH>e7GTFGk1b0Q81mGOS83kGMU85hGKY86fGJZ88cGJ\\88`GIb88[GIe8:WGGj8:PGIQ99lFHU99dFK]96\\F0d92WF0k90SF0o90PF0P:1nENU:b12O1O1O1M3N2M3L4N2O2N2O1O001N1OZFoMT8P2kGUNS8k1jGYNU8c1aFRNY1>V8^1aFTNX1a0d0@j5b1ZH]NW1d0a0@n5^1ZH^NV1g0>@Q6[1\\H]NT1k0=]OT6[1`I\\O9ZOV6\\1^I]O:WOX6]1\\I_O:UOZ6\\1ZIB:SO[6[1ZIE8QO^6Z1YIH7nN`6[1VIK8jNb6]1SIL8iNd6\\1RIN0ZN_O<`7[1PI2OWN@=a7Z1oH6LUNB=Ic0^O7J4K2N2O0O2O0O10O010TOhBOY=KoBAGNg=a0d0O0101XOAlB`0Q=ElB=Q=FmB=P=FmBbNXO]:8QG`0bNXO_:6oFb0j9\\OVFb0m9]OSFa0R:\\OnE?Y:^OiE?\\C1N100O100O10O0000010O0100O000000000000000000000O10000O100VNiCj0WRSN`0\\9RO`H?TN?[9RO^Hc0XN9\\9SOZHf0\\N3\\9WOVHg0aNOZ9ZOTHh0cNLZ9[OSHj0dNIZ9[OSHm0fNCY9]ORHQ1hN^OX9_ORHS1iNXOX9EoGS1jNUOX9HoGR1lNROW9KnGS1QOeNX95jGV1Y9hNhFX1X9gNiFX1X9hNhFX1X9gNhFZ1X9eNhF\\1X9dNfF]1[9cNcF_1]9aNbF`1]9aN`Fb1a9]N_Fc1a9]N_Fc1a9]N_Fc1a9]N^Fd1b9\\N^Fd1b9\\N]Fd1d9\\N[Fe1e9[NZFf1f9ZNYFg1g9YNXFh1h9XNXFh1S3XNj10RKi1R3\\Ni1KUKh1Q3`Ni1HVKh1Q3aNg1HXKf1o2fNh1DZKe1i2mNl1^O\\Kc1g2SOk1ZO_Kb1e2VOk1XO`Ka1f2XOi1WOaK`1f2[Oh1UO_K`1l2[Oe1UO]K_1Q3\\Oc1TO[K`1R3^Oa1TO\\K]1T3_O`1UO[K[1V3_O`1XOWKY1Z3_O_1ZOUKU1]3B^1ZOTKS1_3C]1\\ORKQ1a3C]1]OQKo0c3E[1]OQKn0c3G[1_OnJj0g3H[1_OlJi0i3HZ1AlJg0j3IY1BkJe0k3JZ1DhJb0n3JZ1GeJ?Q4JZ1IcJ>R4I[1KaJM2\\2^OgMb0MO^2\\OfMe0LO^2\\OeMg0LM`2ZOdMjN^NP1_1k0c2VO`MfNjNX1S1k0g2RO]MjNkNY1P1k0k2nNZMlNoNZ1m0k0m2kNXMnNQO\\1j0j0m2lNWMnNTO\\1h0i0n2_M`KY1f1SOWO[1d0j0n2`MbKV1f1TOXO[1c0j0l2aMgKR1b1XOZO[1`0j0W3aNoLZO\\OZ1?j0X3_NnL]O[O[1`0g0aNbMa4n0RM_O]OZ1>g0Z3^NkLA]O\\1>d0\\3\\NjLD\\O\\1>d0]3ZNiLF]O]1c3QNdL2]O_1==f3lMaL9[O`1=:kNjM]6<[Ja1=8kNoMX69`Ja1=6lNoMV6:dJ_1:8nNmMT6ZMdNl1i5i1SKeM0Z1`0F[4\\1PKSLYOc1k0W1`0I\\4[1nJjM6P1`0M[4Y1kJoM9j0a00Z4W1jJRN;f0`03Z4U1iJUN=?b09W4S1cJSLKU2i09c0>U4R1bJTLKU2j07c0cMcNi2b5i3TKnK1^MMP3m4e3SKnK2^MNP3l4g3fJTL?XMNo2l4i6SK[F1m2k4h6RK^F1l2l4f6RK_F2l2k4e6QKaF4l2i4c6SKaF4m2h4b6SKcF4l2h4a6RKXFF7a0R3d4_6UKfF7k2c4`6TKfF9l2a4_6TKfFk2^4Z7bKgH]4Y7bKiH]4W7cKiH]4W7_KnH`4R7`KnH`4Q7aKQI]4o6aKXEEh3i4P7aKZEFf3h4Q7aKUI_4k6aKUI_4l6`KUI_4k6`KVI`4k6^KWIa4j6^KWIa4k6\\KWIc4T;O001O001O001O00001OL`Ki@_4]?O1O002N2N1O2N2N2N2N1O2N2N1O2N2N2N2N1K6M5K6J0O10000O2YOo^O^NQa0X20O100O1[On^O]NRa0V220000N20000N3O1N2^Od^O]N`a0b1`^O]Naa0b1a^OSNia0j18N2N2O1O1N2N2N2O1N2N2N2O1O1O1M3O1N2O1N1O2O0000MXOP]Of0Rc0ZOn\\Od0Xc0N2OIAo\\O=Sc0Cl\\O=Uc0Ck\\O;Wc0Eh\\O;Yc0Eg\\O:Zc0Ff\\O8[c0Gf\\O9[c0Ge\\O7^c0Gb\\O8`c0H`\\O6ec001N[KM`E3W?N2OPPi1" + } + ], + "model_output": "The sky is a soft, pale blue with a smooth gradient, transitioning from a slightly darker blue at the top to a lighter blue towards the bottom, with a person over it." + }, + { + "image": "images/caption_detailed_4.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "al_6=^>8I7AWO[BS1a=:M1N04MmNfN_DZ1];kNaDU1[;POdDQ1X;ROhDn0S;WOmDi0Q;YOPEf0m:]OSEc0k:_OUEb0h:@ZE>d:FZE:e:HhDoNIM8[1V;KhDmNIN9Z1U;MgDPO4T1l0Ne73WGgNOM9^1`0f0_7YOkGbNO08b16j0f7RORHbN3e1ES1U8gNaHQ3_7nLbHS3\\7nLdHR3[7oLeHQ3Z7PMeH`NYO^4Q8TMeH]N[O_4^7aKQIc1I]N20F_4Z7eKUI_19l2_6iKVI\\1;k2\\6mKXIX1=j2Y6PLZIV1=j2X6PL[IW1=i2V6RL]IU1>i2R6ULVI@1b1g0i2P6WLUI\\1l0\\2n5\\NRJd1m5]NSJc1l5^NTJb1l5]NUJc1j5^NVJb1j5]NWJc1i5]NWJc1h5^NWJc1i5]NWJc1i5\\NWJe1i5[NVJf1j5ZNVJf1j5ZNRJj1n5UNPJn1P6RNnIP2R6PNiIU2W6kMcI[2^6eM\\I`14^L`6]6eIaI[6[6mIaIS6e0[I\\4i0lJm5f0]IW4l0RKg5e0`IS4o0UKe5d0_IR4Q1VKe5d0]Im3V1^K_5c0]Ii3Y1dK[5a0^Ia3`1nKY56]Ic3b1ULU53\\Ia3^:K4L3K6G\\DRMg;m28M3N2N6I6K5Ic0J5M2L5L3N3N2M4M2M3N3M2N2M2O2N1O2N1O1O1O2O1N1O001O00WO]CaN1?c2O0L4N00100N2O1O0]OFaB<^=J\\B7e=KVB7j=>`BSOP=l0QCVOnRCCo<=PCCR==kBCX=>cBCb=k03M2N1N2O1N2NTCXOd;g0\\D\\Ob;d0]D^Oa;i0XDXOh;m0RDTOn;S1hCPOX6K4L3N2M3N2N1O2N100O2N100O100O1000O01000O1O010O1O00001O1N2O0O1O2M3C=DeA4Y>LjA4T>LmA7o=ISB5m=KTB3m=MUB0l=1a000O2Oln00SQO2O0N4L4L3N1M4F9M3O1O2O00O0100000O100O101N1O2N2M2O3L3M6GTl]6" + } + ], + "model_output": "A person wearing a black helmet with a reflective visor, a black long-sleeve top, and black pants is riding a bicycle. The individual is in a bent-over posture, gripping the handlebars with both hands, and wearing black gloves. The person's head is slightly tilted downward, and the facial features are not visible." + }, + { + "image": "images/caption_detailed_8.png", + "subject_name": "surfboard", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 408, + 640 + ], + "counts": "Ved18VO11O000000001O1N2N2N2N2N2N2N2N2O12N1O2NjN]E9b:EgE6W:IkE9R:DSF;m9DUF;_OQO[:>[Fc0e9[O_Fa0o:N2M3M1O4M:F000O1O0O2O010O1O1O1OhV^3" + } + ], + "model_output": "A person with wet, curly hair, wearing a black wetsuit with a red stripe down the side, is captured in a dynamic pose with one arm extended upwards and the other bent at the elbow, legs spread apart in a running or jumping motion, playing with a surfboard." + }, + { + "image": "images/caption_detailed_9.png", + "subject_name": "person", + "object_name": "grass", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "d45k>0N200O1O2N2O1N1O1O1O010O2O3L4M000O100O010OO2N1NI9O10O1010O0100O1O2M2O2N2FfAK^>OfANg>OjSi8" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i8U6k80O10001O0000000001O1O2N2M6K6K1N2M2O3M2O0O1O0O2O001O1O1O001O10O00O10000000000000000000001O0000001O000000001O000000000000001O000000000000000000O1000000O2O00O02N1O1O001O1O1O1O100O1O1O1N2O1O1N2O1O1N2N2N2010N2O001O001O00001O00001O001O001O1O000000001O0000000000001O00001O00001O0000001O000000001O00001O1O3M1O1O2N2N2N001O2N1O100O0O2O1O001O001O0000001O0001O000001O00O010000000000000O100000000O10000O100000000000000001O000000000000O1O10000001O0000000000000000001O00000000001O00001O000000001O0000001O000000001O0000010O000O10001O0000001O00001O000000001O0000001O00001O0000001O00001O000000001O000000001O00000000001O00000000001O0000001O000000001O00001O000000001O00000000000000001O0000010N2O3M2N4L2N3M3M2N1O2N1O1O001O1O1O1O0000001O0000000000000000001O000000000000O10000000000O10000O10000O10000O100O100O1O100O100O1O100O100O1O1N2O1N2N2N2O1M3O1M3N2M3L4M3K5K5M3G9L4L4N2000000000000001OO10000O1O100O1O100O1O1O100O100O100O1O100O1O10000O100O100O100O100O100O100O100O100O1O10000O100O100O100O1O10000O100O1O100O1O10000O100O1O10000O10000O10000001O00001O2N001O001O001O001O0000001O000000000000000000O10000O1O100O100O1O1N2O1O2K3K6H8I7O1O100000001O00O1000000000000000O1FfHmH[7Q7iHlHX7S7;O10000O1O100L4M3ZOQHkI31R8S6a00000O100001OO10000000000O100001OO10000000000000000000000000000000000001O0000000000000000000000000000001O0000000000mI" + } + ], + "model_output": "A person with short dark hair, wearing a light-colored, short-sleeved shirt with a collar, and a dark-colored undershirt visible at the neckline, is standing on grass." + }, + { + "image": "images/caption_detailed_10.png", + "subject_name": "umbrella", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "b>;j1JWN3[a05f^OE106OO6Jm0Xa0d1M2O1O0O2O00001O0O2O1O00001N2O00001N101O001N101N10001O000O2O001O1N101O00001N101N101O0O3N001O0O2O001O1O0O2O1N101O001N2N101O1O0O2O001O000O2O0O2O000O2O000O100O2O00000O101N10000O100O100O10000O01000O100O010O1000O1000000000O0101O00O10O10000O100000000O100000000O1000000O3N7I7dK[@o3U`0F9G5KcNmL]Bc2b=fMZBV2e=UN[BXOaNS2T?QOXBWOeNS1S?0WBbNiN]1P?[1i@^Mg0X1_>f1XAZNh>i1UAWNj>l1RAkLIY1U?`3000000000001O0O1VHUK?k4AUK?k4[71O0000000000000O11O0000O0100O1O100O1O100O10O010000O1O1O1O10000O1O010O1O100O10000O100O00101OO0100O100000O010000O10O1001O1N100O2O0000001N100000001N10001N1O100O101O0O2O0O100O1O2N10000O100O1O1O2O0O1O100O2N1O1O1O100L5O0O100O101M2N2O1O1O1O1N2O1O1Oo_O]Mh>a2ZA_Me>^2`1O100O1N2N2O1O1O1O1N2O1O1O1N1O2O1O1O2Nf0YO7J5L3L7H3M3N1O1NUiV2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "cj04413L20gb0j7Q@gJa3Z5[LiJd3X5]LgJd3W5^LhJb3X5_LgJ`3Z5`LfJ`3Z5aLeJ_3Z5bLfJ^3Z5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5dLdJ\\3[5fLdJZ3\\5fLdJZ3[5gLeJY3[5hLdJX3\\5hLdJX3\\5hLdJX3\\5iLcJW3\\5jLdJV3\\5jLdJV3\\5kLcJU3]5kLcJU3\\5mLcJS3]5nLbJR3^5nLbJR3^5oLaJQ3^5QMaJo2_5QMaJo2_5QMaJo2^5RMbJn2^5SMaJm2_5SMaJm2^5UMaJk2_5VM`Jj2`5VM`Ji2a5WM_Ji2`5XM`Jh2`5YM_Jg2a5YM_Jg2`5[M_Je2a5\\M^Jd2b5]M]Jc2c5]M]Jc2c5^M\\Jb2c5_M]Ja2c5`M\\J`2d5aM[J_2e5aM[J_2d5cM[J]2e5cM[J]2e5dMZJ\\2e5fMZJZ2f5fMZJZ2e5hMZJX2f5iMYJW2g5iMYJW2g5jMXJV2g5kMYJU2g5lMXJT2h5lMXJT2h5mMWJS2h5nMXJR2h5oMWJQ2h5QNWJo1i5QNWJn1j5SNUJm1j5TNVJl1j5TNVJl1i5UNWJk1i5VNVJj1i5WNWJi1i5WNWJi1i5XNVJh1j5XNVJh1j5XNVJh1i5YNWJg1i5YNWJg1i5YNWJg1h5[NWJe1i5[NWJe1h5\\NXJd1h5\\NXJd1h5\\NXJe1f5\\NZJd1f5\\NZJc1f5^NZJb1f5^NZJb1e5_N[Ja1e5_N[Ja1e5_N[Ja1d5`N\\J`1d5`N\\Ja1cNcIj6m4cJ`1cNhIe6h4hJl1U5TNlJP2P5PNPKU2i4mMWKS2g4oMYKQ2f4PNZKP2f4oM[KQ2iNUIX5j4oKQ2fN[IY5c4QLR2eN^IY5^4SLU2bN`IZ5Z4TLR3oNVHR4h4oLT3fN]HY4_4QMU3aN`H]4\\4RMl3n2TLRMm3l2TLTMl3h2WLYMo3i1WGlLo4[1Q4`1dL`Nd3U1_LkNe3m0_LSOj3OjL2h3RObLn0P4_NPLb1W;00000O100000000O1000000000000000000000000O11O0000iJ]NQHc1m7cNoG]1`7]NhC=g4V1U7AkH?o6GQI9n6HRI8n6IQI7n6JRI6l6KUI5j6LVI4i6MWI3e61[IO`66`IJZ64L2O1N3N2M2O2M2O1O1N3N2N2M3N1O1N3N1O1N2O1N3N2M3N1N102M2O2M2N2O1O2M2N3M2O1N2N2O2M2N3M2N2N2O1N3M2N1O3M1O3M2N1O2N2O1O1N2N2N2N2O1N2M3OiL^EQ2`:oMcEP2[:PNiEn1V:QNnEn1o9SNUFj1i9WNYFi1e9VN_Fh1_9YNeFe1X9[NlFd1Q9\\NSGb1k8]NZGa1e8_N^G_1`8aNcG^1[8cNhG[1V8eNmGZ1Q8fNSHX1k7hNXHW1g7iN[HV1c7jN`HU1^7kNeHT1Z7kNiHS1V7nNkHR1S7nNPIP1o6QOSIn0l6QOVIn0i6ROZIm0d6TO]Ik0b6UOaIj0]6WOdIh0[6YOgIf0X6YOjIg0T6ZOmIe0R6[OPJe0n5\\OTJb0k5^OWJb0h5^OZJa0d5_O^J`0b5_OaJ`0\\5BfJiLAW3`0hL_OX3b0hL^OW3c0iL\\OW3e0hL\\OW3e0iLZOW3g0iLXOX3h0hLWOX3j0hLUOY3k0gLUOZ3k0dLUO]3l0bLSO`3n0^LQOd3o0[LPOg3Q1VLoNl3R1RLmNP4S1oKlNT4T1iKlNY4U1eKjN]4V1aKjNa4V1^KiNd4W1[KgNi4X1VKfNl4[1RKeNQ5Z1nJdNU5\\1iJdNY5\\1eJdN]5[1bJfN_5Y1aJgNa5X1^JhNc5W1\\JkNd5U1[JkNf5U1XJlNi5T1VGTNg2i0S6R1SGZNg2e0W6^1eIcN\\6^1bIaNa6^1]IcNc6^1\\IaNf6_1XIbNj6^1TIaNn6^1QIbNR7\\1nHdNR7]1lHcNW7\\1gHeNZ7Z1eHfN]7X1cHhN_7W1`HiNb7V1]HkNd7T1\\HkNg7U1_FaNi09j8W1YFdNd0\\OWOi0n9X1RFhNf07Y9Y2[FhMh9Y2SFhMo9d31N2N3N1N2N2N3M2N2N1O3M2O1N2O1N2O1N2O1N2N2O1N2N2N2N3M2O0O2N2O1N2N2N2N2N2N2N2O1N2N3M1O2N3M1O2O1N2N3M1O3M2N102M1O2O1M3O1N2O1N3M1O2O1N2N2O1N2O1N2N2N2O0O2O1N2N2N2O1N2N2N3N1Mocg0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "2l>400001OO1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000oJb0`J^O\\5k0_JUO]5S1_JmN]5[1_JeN^5`1`J`N[5i1aJWN]5m1aJSN[5S2cJmMZ5Y2cJgMZ5]2eJcMW5c2gJ]MV5h2hJXMV5k2iJUMT5P3jJPMR5U3mJkLP5Z3nJfLn4_3QKaLm4c3QK]Lk4h3TKXLi4l3VKTLf4R4XKnKg4T4XKlKa4\\4^KdKc31XJ]4U2bKb36SJ[4Z2_Kc38nI^4]2ZKd3=hI]4c2VKe3a5[L_Je3b5ZL^Jf3b5ZL^Je3d5ZL\\Jf3e5YL[Jg3f5XLZJg3h5XLXJh3h5XLXJe3k5[LUJb3o5]LQJ`3R6`LnI]3V6bLjI\\3Y6cLgIX3^6hLbIV3a6iL_IS3e6mL[IP3i6oLWIn2l6RMTIk2o6UMQIf2U7YMkHe2W7[MiHa2[7_MeH^2_7aMaH\\2b7dM^HY2f7fMZHW2i7iMWHS2m7mMSHQ2P8nMPHm1U8SNkGk1X8TNhGi1[8WNeGf1^8ZNbGc1a8]N_G`1d8`N\\G]1g8cNYG[1j8dNVGY1m8gNSGX1o8gNQGW1Q9iNoFU1S9kNmFT1U9kNkFS1W2gLd4V2UIR1X9nNhFP1Z9POfFo0\\9POdFo0^9PObFn0`9RO`Fm0b9RO^Fm0c9SO]Fl0d9TO\\Fk0f9TOZFk0g9UOYFk0g9UOYFj0h9VOXFi0i9WOWFh0k9WOUFh0l9XOTFg0m9YOSFf0n9ZORFf0o9YOQFf0P:ZOPFf0P:ZOPFe0R:ZOnEe0S:[OmEd0T:\\OlEd0T:\\OlEc0V:\\OjEd0V:\\OjEc0W:]OiEc0W:]OiEb0Y:]OgEc0Y:]OgEb0[:]OeEc0[:]OeEb0\\:^OdEa0]:_OcEa0]:_OcEa0^:^ObEa0_:_OaEa0`:^O`Eb0`:^O`Ea0a:_O_Ea0a:_O_Ea0b:^O^Ea0c:_O]Ea0c:_O]Ea0c:_O]Ea0c:_O]E`0d:@\\E`0e:_O[Ea0e:_O[Ea0e:_O[Ea0f:^OZEb0f:^OZEa0g:_OYEa0h:^OXEb0h:^OXEb0h:^OXEb0i:]OWEc0i:]OWEc0i:]OWEb0j:]OWEc0i:]OWEc0i:]OWEc0j:\\OVEd0j:\\OVEd0k:[OUEe0k:\\OTEd0m:[OSEe0m:[OSEf0l:ZOTEf0l:ZOTEf0m:YOSEg0m:YOSEg0n:XOREh0n:XOREh0n:XOREi0m:WOSEi0m:WOSEi0n:VOREj0n:UOSEk0m:UOSEk0m:UOSEk0m:UOSEk0n:TOREl0n:TOREl0n:TOREm0m:SOSEm0n:SOQEn0n:ROREn0n:QOSEo0m:QOSEo0m:QOSEo0n:POREQ1m:oNSEQ1m:oNSER1l:oNSEQ1m:oNSEQ1m:oNSEQ1m:oNSER1l:mNUES1k:mNUES1k:mNUES1k:mNUES1k:mNUET1k:kNUEU1k:kNUEU1k:kNUEV1j:kNUEU1j:lNVET1j:lNVET1j:lNVET1j:lNVEU1i:kNWEU1i:kNWEV1h:jNXEV1g:kNYEU1g:kNYEV1f:jNZEV1f:jNZEV1f:jNZEV1e:kN[EU1e:kN[EV1d:jN\\EV1c:kN]EU1c:kN]EU1c:kN]EU1c:kN]EV1a:kN_EU1a:kN_EV1`:jN`EV1_:kNaEU1_:kNaEU1^:lNbET1^:lNbEU1]:kNcEU1\\:lNdET1[:mNeES1[:mNeET1Y:mNgES1Y:mNgES1Y:mNgES1X:nNhES1V:nNjER1U:oNkEQ1U:oNkER1S:oNmEQ1S:oNmEQ1S:oNmEQ1R:POnEQ1P:POPFP1P:POPFP1o9QOQFP1m9QOSFo0k9SOUFm0k9SOUFn0i9SOWFm0h9TOXFl0g9UOYFk0g9UOYFk0f9VOZFk0d9VO\\Fj0c9WO]Fi0b9XO^Fh0`9ZO`Fg0^9ZObFf0]9[OcFe0\\9\\OdFd0[9]OeFd0X9^OhFb0W9_OiFb0T9@lF`0R9BnF>P9DPGRHCl7>THBl7>THBk7?UHBj7>VHBj7>VHBj7>VHCi7=WHDh7UKAk4?VKAi4?WKAi4?XK@h4`0YK_Of4b0ZK_Oe4a0\\K^Od4b0\\K^Od4b0]K]Oc4c0^K]Oa4c0`K\\O`4d0`K\\O_4e0bKZO^4f0bK[O]4e0dKZO\\4f0eKZOZ4f0fKZOZ4f0gKYOX4h0iKXOV4h0kKWOU4i0kKXOS4i0nKVOR4j0nKVOR4j0nKWOP4j0QLUOn3l0SLSOm3m0SLTOl3l0ULTOi3m0XLROh3n0YLROf3n0ZLROf3n0[LROd3n0\\LROd3n0]LQOb3P1_LPO`3P1aLoN]3S1cLnN\\3R1eLnNY3S1gLnNX3R1iLmNW3S1jLmNU3S1kLmNT3T1lLmNR3T1PMkNm2V1TMkNk2U1VMkNh2U1YMlNe2U1[MkNd2U1^MkN`2U1cMiN\\2V1gMiNY2U1iMmNT2S1mMmNR2S1PNmNl1U1UNlNj1R1YNnNe1R1]NmNa1U1_NlN_1T1cNmNZ1P1kNPOT1m0POROP1m0QOTOm0m0SOUOk0k0VOUOh0i0\\OWO`0j0CUO=i0EXO9h0IYO4g0MZO1b05^OJa08_OF?=C@=a0D]O=e0BZO=h0DRO=Q1ElN;U1FhN;Z1FbN;_1F`N7d1JZN4i1LVN1n11nMOT22eMWMdLg2h54bMO`22]MMe24XMMi27QMJQ36lLKV36gLH]3:_LCf3?WL@j3b0SL_On3b0PL^OP4g0eIaLQ2f2^4S1YKlNk4Z1\\IQL^1e2W5_1gJ`N[5`1cJ`N_5`1`J_Nb5b1[J\\Nh5e1VJZNl5g1SJYNn5g1oI[NQ6f1nIXNU6h1iIVN[6j1dIUN^6k1`IVN`6k1^IVNc6i1\\IUNg6l1WIPNo6P2PIoMR7R2kHoMU7R2jHnMW7R2gHmM\\7S2bHjMb7V2]HhMg7X2WHiMi7X2VHhMk7X2SHhMo7X2PHeMT8[2jGdMX8a42N1O2L4N2N101M3K5N10000O2M3M200O3L3L4M2O2N1N3M3M201O0O2M3N2N1O2M2M4N2M2N3M3M3M2L4N3N1O2N101N10_J" + } + ], + "model_output": "A piece of white tissue paper with a soft, slightly crumpled texture and irregular, torn edges is on the dining table." + }, + { + "image": "images/caption_detailed_13.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\n_13W=1N2N3M2N2N2N2N2N2N2N2O1M3N2N2N2O1N2N2N2N2O1N2N2N2N2O1N2N2N2O1N2N2N2N2N2O1N2N2N2M3[NbM\\H_2c7cMZH_2d7cMZH_2e7bMXHa2h7_MWHb2h7^MWHd2i7]MUHd2j7]MTHe2l7[MRHg2n7YMPHi2P8WMnGk2R8UMmGl2T8SMjGo2U8RMiGP3W8QMgGP3Y8PMeGR3[8nLbGU3^8lL_GV3a8j00000001OO100000000O10000O100000000000000000000000000000000O10000O100O100O1N2N2N2N2O1N200O1O100O1O1O1O1N2jNlJSJV5k5mJSJT5l5nJXIO:Z5]6UK[IP5e6k0N2O1O1N2ZO\\IcJf6\\5_I^Jc6b5_I[Jb6b5e0K5OaI[JZ5c5hJ]JX5b5hJ_JX5_5hJdJW5Y5gJmJX5R5dJSK\\5k4bJgJRO?\\6i4bJ[K^5d4aJ^K_5b4\\JcKd5\\4ZJgKf5Y4XJhKi5X4VJiKj5W4UJjKk5V4SJkKn5U4oInKP6S4oImKS6R4lIoKT6Q4kIoKV6P4iIQLX6o3fIRL[6n3cITL]6k3cIUL^6j3bIWL^6h3cIWL^6i3aIWL`6i3_IWLb6i3]IXLc6W5100O10000O100O1O10000002N5K;E3M4L4VLlHU1X7eNRIS1R7eNSIZ1o6bNTI]1n6_NYI\\1j6^N[I_1i6[NZIf1g6TN]Il1e6lMaIT2a6dMeI\\2`6XMgIh2^80001O000000O100DTMWFn2d9]MTFe2l9=0000O10O1001N0lNfLYHZ3`7nL^HS3Y7YM]Hn2a7WMZHk2d7XMXHk2f7XMXHi2f7^MSHd2k7_1O2A>ASJ^Io5`6TJ_Il5]6ZJcId5X6bJiI\\5W6fJhIY5W6jJiIT5W6mJiIR5V6PKjIo4V6SKiIl4V6UKkIj4U6UKlIk4S6dJcI7;T5S6cJdI89U5T6aJeI97V5_6jJ`IW5`6jJ_IV5b6jJ\\IW5d6kJYIV5g6g000000000000000000000000000000000cNgIcJLQ1P3nN:P5hLTKa0h0\\2WO9m4jLTKj0?T2^O9o4iLTKl01SO3o2GTM0Q3Q5mLTKl01TO1o2JRM1P3o4oLSKm02XOKk22nL2Q3l4QMSKm03\\29@a4XMRKm03[2;_O`4YMRKo02Y24fLMg2k4[MRKo03W2>^O]4\\MUK2Ma04a2?^O[4^M_Kc0H_2a0^OZ4]M^Kg0G]2a0_Om4UNaJ\\2a0@o4RNaJ^2h0XOW4fMkJN?=Hg2f0XOX4fMeK3^Oo2e0XOj5g0VJYOk5f0UJZOk5f0VJYOk5f0UJZOk5f0UJZOl5e0TJ[Ol5e0TJ[Om5d0SJ\\On5c0RJ]Oo5b0QJ^OP6b0oI^OQ6b0oI^OR6a0nI_OS6`0mI@U6>kIBV6=jICV6=jICW6mGZO@O123O17_8QEBo:N1000000O100O2O1N2NQW_7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "f;Y3g;000000000000000001O0000000000000000O100000000000000O100O1O1000000000000000000001O001O1O001O001O1O3M1O1O000O100000000000O10001O0000000O10000O1000000O1000000M3O1O1N2O1L4J6N2O100J6O1OISE]Lm:k3000000000000O1O100O100N2O10000000000000000001O0000000000VOjKjFV4P:01O00000000001O0000001O00001O0000001O00000000001O000000000^ORERMo:_3000000001O00000000001O000000001O00000000001O0000001O000000001O0000000000001O00000000001O0000000000001O0000001O00000000001O0000001O000000001O000000001O000000000000001O000000001O00000000001O0000001O00000000001N11O000001O000000001O00000000001O0000001O000000001O00001O001O0000001O00001O0000001O0000001O0000001O0000001O00001O000000001O0000001O0000001O0000000000001O0000001O00001O00001O001O00001O00001O000000001O00001O0000001O00001O00001O0000001O00001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O0000001O00001O00001O0000001O00001O00001O000000001O000000001O00001O0000000000001O001O0XNlBd1U=[NkBe1U=ZNlBf1W=000O11O000000000000O1000000000000000000O10000000000000000000000O10000000000000000000000O100000000000000O10000000000000000O10000000000000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000O10000000000000000000000000000000000O1000000000000000000UC" + } + ], + "model_output": "A dark-colored car with a glossy finish, featuring a curved roofline that slopes gently towards the rear, a visible section of the rear windshield with a defroster grid, and a partial view of a rear windshield wiper, is parked on the road." + }, + { + "image": "images/caption_detailed_18.png", + "subject_name": "person", + "object_name": "baseball glove", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "YRa01Z=000Uh?4eW@8I3L5L2N3M3N1\\DTOf:n0XEVOc:m0YEWOe:k0XEWO2Jl9Q1oEWO5Hk9S1mEXO7EEOk9U1VFYO9ECOm9U1TFYO4E^9N`F:3I\\9MeF50NZ9NgF3OOY9OjF0IUOJk0b91mFNE7]9LnFME7[9MjF3HnNNR1_9NhF5J1]9KdF9NL^9d20OO2O010O0001O001O00000O2N2N10100O001O0N3O010O000M3001O001000000O1O1O1O1O2N0G^LiFd3R9=N2N2O1N2O010O100O01000O010O10000^GhKQ8X4nGjKQ8V4nGlKQ8T4jGQLV8P4gGSLX8`400010O01O01O000010O000010O0dMiGEW85oGJQ81UHNk70VH1j7NWH1k7MUH4k7KVH5k7JUH7j7JUH6l7JSH6m7KRH5o7LoG4R8LmG4V8JiG6X8JgG6Z8lM`G22U12n0T9SOjFm0V9TOiFl0X9UOfFk0[9b11O1O2N5J4XOYFfMh9X2YFhMh9V2ZFiMg9`0TFc06mNg9>TFd07mNf9=TFe07mNf9>SFd08nNf9oEd0m:\\OTEa0n:_ORE`0P;_OPE`0R;_OnD`0T;@kD>X;BgD;];EbD:_;FbD8`;H`D5b;M]D1e;N^DMe;3^DFf;:\\DZOn;e0<10O100O100O10lC]O_O0k;d0dDC[;=cDE\\;;cDG^;8[D1c;n0N00O110O002M2N2N100O1O100O2N1O0O1O2N3M3L3N3K4L4L5M3KjSi4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V\\o23Z=N^n25[QM3N2N2N2ZOEXD>e;EXD?e;CWDb0f;AVDb0i;a0O1N110O01O1O2N1O010O1O101M2O1O0O1O2N2N3K6K3L4L4N3L4L^fh4" + } + ], + "model_output": "A baseball catcher in full gear, including a helmet with a face mask, chest protector, shin guards, and a mitt on the left hand, is crouched in a ready position, wearing a dark blue jersey with red and white accents, and a tan baseball glove." + }, + { + "image": "images/caption_detailed_19.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "j8o01UO03m0J\\16iM0N12JX>b1gA]N=;BM00000:0A1N11O50:OB1N10l=P6XBSJN?af4=00000O100001O00001O1O001O001QLi@S3X?lLh@T3X?lLi@S3X?kLi@U3X?jLh@W3X?hLh@X3X?gLi@Z3W?dLk@e3l>XLVAl3h>gKbA\\4P?102M1O100O1O1O1O1O100O001O010O1O001O10O01O1O00100O1O1O10O01O1O1O00001O010O1O1O1O0000010O00001O0000O1O2SNYAZNl>d1XAVNl>h1WASNm>m1TAnMQ?P2QAkMS?U2n@fMW?X2k@dMX?\\2i@_M[?`2h@ZM\\?f2g@SM^?l2k00001N101O1O0O2N101N2N101N1O2O1O1O1O1O1O1O1O2N1O1N2O2O1N1O1O1O1O1O2N1O01O001N3N2N1N3M2N2O1N1O2O1O1N00O20O103K2O1O1O1O003M1O10N11n^OXMl`0n2O1OK5N2100O1O1O101O000O100000002NO1O1001O7I0O2O0O100O2N101OO1000O2O00O1000O010000O10O1000O10O100000O10000O1000000O10001O00001O0O1000001O000O2O001O001O1O0O2O001O001O1O0O2O00001N2O001O1N3N3L1000001N2O0O3N2N2N3M1N3N1N3M3M2O2M4L5J6K5J6J6Hh\\b0" + }, + { + "size": [ + 640, + 425 + ], + "counts": "Yi0;6HWb09f]OH6V3a?m0O00000000O10O1000O100000000O10000000O1000O1000000O100000O100000O1000000O100O10000000000O10000O10O1000O10000O100000000O10O100000O100000001O000O0100000O1000000O100000O100000O100000000O0100000000000O10000000O10000000O10000001O00000000000O10000000000000000O100000001O0010O0001O0101O1N2O1O2N2M2O2N1O3MROfLWAW3h>jL[AT3d>nL]AX3Z>jLgAU3W>lLnAP3P>RMQBl2n=UMXBd2h=^MYB`2f=aM]B]2a=eMdBV2Y=mMkBo1S=RNRCi1mVC^Oj01OO1WHRIS7n6mHRIS7n6g0O003M00_HVI`6i6`IWI`6i6_IXIa6h6_IXIa6h6TIWI_O020[7i6oHWID00020\\7h6_IXIa6h6^IYIa6h6_IXIa6h6^IYIc6f6nHXIN2T7f6\\I[Ic6f6[I\\Ie6d6]IZIc6f6oHWID051Z7g6mHXID0j7h6RIYIb6g6^IYIb6g6]IZId6e6[IZIh6e6XI[Ih6e6VI[Il6e6RI]Io6b6QI]IP7c6oH^IQ7b6oH]IR7c6nH]IS7b6mH]IT7c6lH]IT7c6lH]IU7b6lH]IT7c6k0000001O0000001O1O0000001OeGbIg7^6YHbIh7]6XHbIi7^6c00001O00000000aGcIo7\\6PHeIP8[6a00000000]GdIV8[6iGfIW8Z6iGfIW8Z6jGeIV8[6>000001O01O00000000000000000000000000000000000000000O2O00000000000\\GcIX8]6hGcIX8]6<0000000000000000000000000000001O000001O0000000000001OO101O1O00001O1Oj3_9fLSGX3m8jLQGV3o8mLnFS3R9nLlFS3T9\\M\\FT22iLb9[1SFc1f0QMW9]4n0O2K7ULnDA=f2X;^MRELXO4>5Pjm1" + }, + { + "size": [ + 493, + 640 + ], + "counts": "V;P11T2W;lMiDT2W;lMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;lMkDT2U;kMlDU2T;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2R;kMnDU2R;kMnDU2S;jMmDV2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;lMmDT2S;mMlDS2T;mMlDS2T;mMlDS2T;mMlDS2T;lMmDT2S;mMlDS2T;mMlDS2T;mMkDT2U;kMlDU2T;kMlDU2T;kMlDU2T;lMjDU2V;Q100000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000001O0000001O0000001O0000000000000000001O000000000000001O0000001O0000001O000000001O0000001O0000001O00001O00001O001O001O001O1gL^De2b;ZM`De2`;ZMbDe2_;YMcDf2^;WMeDh2[;WMfDi2[;UMgDj2Y;UMhDk2Y;SMiDl2X;RMiDn2W;RMjDm2l;O1O001O00001O001O0000001O001O001O001O000000001O1O001O00000000001O00001O1O00001O001O00001O000000001O0000001O001O1O1O00000000001O001O001O001O001O00001O001O001O001O001O00000000001O0000001O00001O00001O0001O01O0O101O001O001O001O001O00001O0000001O001O00001O001O000000001O001O002N00001O00000000001O0000001O001O00001O001O0000001O00001O001O1O1O00000000001O00001O00010O0O101O001O000010O0001O001O001O1O00000000001O0000001O001O00001O00001O00001O001O001O001O001O0000001O0000001O00001O001O0000001O001O001O1O001O001N2O00hhP3" + } + ], + "model_output": "A red and black diesel locomotive numbered 1803, featuring a prominent front headlight cluster with two main lights and additional smaller lights, a yellow front bumper, and a series of grilles and vents on the side, is driving on the railroad. The locomotive has a yellow railing on the front, a pair of windshields with wipers, and a set of steps leading up to a door. The side of the locomotive displays a series of vertical handrails and a ladder leading to a platform with a railing." + }, + { + "image": "images/caption_detailed_21.png", + "subject_name": "sandwich", + "object_name": "sandwich", + "predicate_name": "leaning on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lg_32m>2O1N101N2O1M4M2O1N2O0N3N101N1O2N1SOUOdCm0Z:G8J3O3I5M4N1N2M3M4L3N1O2M3M3N2N1O2M3N2M3O1N2N2N101O0O2O0O2O1N101N101O1N101O1N101N2O001N2N101O001N2O1O0O2O001N2O001N2O1N2O1O001N101O001N101N10001O0O101O1O0O101O000O101O00000O101O0O10001N10000O1000000O10000000O010O10000O01000O100000O010000000000O100000O10O100000000O0100000000000O1000000000O10O10000000000O10000O100O1000000O1000000O1000000000000000000O10000O1000000O10010O0O10000000000O11O00000000O11O00O100001O00O10000O100O2N11O1O0000O2N1000000O20O000001O0000010O0O11O010OO10001O00000O1100O0000O1O10001N1O1001O00O2O0O100O1O10001O000O1O10001N1L4N21O1O00O2L3001O01N100003NO000O10010O000000010O000O11O01O00001O0000001O0O100000000010O01N101O001N101O00001O00000001O01O00001O00010O00000O101O000000001O00001O00001O001N10001O0000001O0000001O0000001O00001O00001O01O0001O00001O001O001O001O1O001O100O2N2N2M3N2M3N3L3N3L3N1O1N2O1O3L2N2O2M4M3M2M3N4K5Ib]f2" + } + ], + "model_output": "A sandwich with a layer of green pesto spread, topped with slices of tomato and a piece of bread, is attached to a sandwich." + }, + { + "image": "images/caption_detailed_22.png", + "subject_name": "cat", + "object_name": "sand", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "g^S67T=3N3M101N3]CBS5b0@W30oKf5ImJ=1a0]OY30PL\\62aIHj0j0XO]32nK[6MfJe0nN_32oKZ6LiJd0jNb32oKh7=VHe32nKi7;UHh32mKk79SHj32mKl77SHl31mKm76QHn32lKo72PHS40lKR8NoGV4OlKT8JoGY4MnK^8R4bGnK^8S4_GoKa8^4001O00001O1O001OOQM^GV1b8hNaGW1^8hNdGX1\\8hNeGW1[8jNdGV1\\8lNaGU1^8nN`GR1`8m1VMXKPMh4n2\\KZJNb2f4S3fKhLZ4W3jKfLV4Z3mKcLS4]3RL]Lo3c3ULXLl3h3TLVLn3j3RLTLP4l3PLRLR4n3nKnKV4S4iKjKZ4V4eKjK\\4V4dK^Kh4b4VK\\Kn4e4jJ[J@h0P6m4`J\\J^Oe0U6P5\\JPKh5P5WJmJm5S5SJkJo5V5PJiJQ6W5oIhJR6X5nIcJW6]5hI]JEKl00h4h5gJ[Jg0Oa4f5fJ\\Jj0N`4f5fJZJf6e5[I\\Jd6b5_I]Ja6c5_I\\Jb6d5c000000000000QJeJkNI\\4b5dK\\J`0<\\OI_4_5eK]JO`KTIb2l6\\MXIb2h6]MYIc2h6[MZId2f6[M[Ie2e6[M[Ie2e6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2e6YM[Ig2e6ZMZIf2f6[MXIf2h6o10000000mIRIH3j5k6a000000gITIR6l6mIUI5Me5n6VJUIS6k6lIVIT6Q700RJkHe5U7UJmH00j5S7]JlHb5Q7bJnH^5R7bJnH^5R7aJoH_5P7aJQI_5o6bJPI^5P7bJPI^5Q7bJnH^5S7aJmH_5S7aJmH_5S7aJmH_5S7=0000O10000000000001O0RKmHc3S7\\LQIa3o6`KPIa01o3o6`KPIa01o3o6_KQIb00o3P7^KQIb0OP4P7^KQIa01P4o6^KRI`0OR4o6_KQIa0MQ4R7_KnH`5R7=000000000000001O001O00000000000000000YJiHY5X7=0000O100000000000000001O00O11O0000000000000000001O00000000001YKjHV3V7iLoHS3Q7lLQIS3o6mLRIR3n6nLTIP3l6oL[Ik2f6TMhI^2X6bMiI]2W6dMlIX2T6hM[Ji1f5WNdJ^1\\5bNeJ]1[5dNeJ[1[5gNdJX1\\5iNdJV1\\5kNcJU1]5kNdJT1\\5mNcJS1]5mNcJS1]5mNcJWOSNT1[7DaJUOYNV1W7D`JUO\\NU1T7G_JSO_NU1R7I]JROcNT1P7J[JSOfNS1P7H[JSOgNU1n6H[JROiNU1m6HZJSOiNU1m6HZJROjNV1m6F]JoNhN[1k6F\\K:e4D\\KcJC]5>bJB^5a0_J_Ob5a0\\J@d5a0[J_Oe5c0YJ]Og5g0UJYOk5k0oIWOR6S1mH]MMa1V7]1\\H]M8W1[7T2_HmMa7o3001O001O000000001O00O100000000O100000aJ`HR5_7nJcHQ5]7oJdHP5]7oJdHP5\\7oJeHQ5[7PKdHP5\\7PKdHP5]7oJcHQ5]7oJcHQ5]7PKbHP5^7SK^Hn4b7>1O3M1O001O0000000000001aLTH\\1m7aNVH^1k7VMSHg02S2l7TMSHh03S2j7UMSHh03S2j7UMTHh02R2k7TMTH:0C2n2j7UMTH;OB4m2i7VMTH;0A3n2i7VMTH;0A3n2j7UMSH;1B3m2j7UMRHm0Qc04N000O100000000000000000000000000000000000000000000000000000000000000000000000O100000O10000000000000000000000000000000000000000000000000000000000000000000000000000O10000001N4M7Ib0\\OjSQ10WlnN3X\\OLSRS6" + } + ], + "model_output": "An individual with a contemplative expression, wearing a light-colored, short-sleeved shirt and dark pants, is sitting on a bench with one leg crossed over the other. The person's arms are bent at the elbows, with hands clasped together in front of the body, and the head is slightly tilted downward." + }, + { + "image": "images/caption_detailed_24.png", + "subject_name": "airplane", + "object_name": "sea", + "predicate_name": "flying over", + "mask_rles": [ + { + "size": [ + 488, + 640 + ], + "counts": "kjg73T?2O00000O1K5VAH]>NfA:MI0OT>1mA8OHO1T>OnA?NBT>OnA?NBT>OnAf0R>8O7E=D6M100000O1O1L4I7J>Cc0]O8L10000O10000000000000000000O100N5K7G6K4N10O1M4Ha0E2N2O001N_nn0" + }, + { + "size": [ + 488, + 640 + ], + "counts": "Y7k0Q1R4^7nKbHR4_7mKbHR4^7nKbHS4]7mKcHS4]7mKcHS4\\7mKfHR4Z7jKeG[OQ1k4Z7fKnH[4Q7cKQI]4o6bKSI]4m6bKTI^4l6bKTI_4k6aKUI_4k6`KVIa4i6_KWIa4i6^KXIb4h6^KXIc4g6\\KZId4f6\\KYIf4f6ZKZIf4f6ZKYIg4g6ZKWIh4h6YKhHGWOU5P8UKfHY5Y7iJcHY5]7hJaHZ5^7gJaHZ5_7fJ`H[5^7eJbH\\5^7dJbH\\5^7dJbH\\5^7dJ_H`5`7`J^Hc5a7^J^Hb5b7bJYH_5g7eJUH[5k7hJPH[5o7o0000001O001O0000000O110O00001O00gIYHd2ObNh7fN`Hc2KfNe7dNgH_OA0OL7h1N9d7bNlH[OC0=f1C0001OO10000O100O2N1O1O1PLc0kEUO^19b8k0dE[Ob1Kg8V2mFkMW9>^ESONj1i0hNW;^1XDhNi;[22N2N002N1O1O1O001O1O001O001O001O00001O001O1OmNaM]E_2Z:mMdE7[Ol0j:UOkEKAm0\\:ASF]OFQ1n9L_FgNL\\1Q9b0PH^Oo7d0oG]OP8d0oG]Oa5gNXJ2iNl1]1[Ob5iNUJ5eNl1a1WOe5kNRJb28cNf5oNnI_2:cNh5POlI_2:aNj5TOjI[2:aNl5VOkIW28cNm5XOkI\\OZOS2k0ZOP6XOeJZ1ZO^OQ6ZOfJV1XO@R6]OeJR1WOBS6^OeJP1VODU6^OcJo0VODX6]OaJQ1ROE]6[O`JR1nNFb6]O[JX1dN_MIl1X7@WJa3i5jLkIW3U6kLhIV3X6mLeIS3Z6SM_Io2a6VMWIm2i6YMlHl2U7c2000000000000000000O100001O0000000000000000000000000000000000O10000001O00O1000000000000000000000000001O0000000000000000000000000O10000O1SOSI^In6[6S1001O1O00001O00001OmIeGY5[8cJkG\\5T8cJnG\\5R8dJnG\\5R8cJoG^5P8cJoG]5Q8cJnG^5R8dJlG\\5T8hJfG[5Y8j0000bJiGeN0n4W8TL\\Hj3d7PLbHQ4]7nKeHQ4[7mKgHT4X7kKiHU4W7jKjHW4V7gKkHZ4T7fKlHZ4T7`KRIa4m6[KXIe4g6[KYIe4g6ZKZIh4d6XK\\Ii4c6WK]Ij4a6WK_Ii4a6WK_Ii4a6WK_Ij4`6VK`Ik4_6TKbIm4]6SKcIm4^6PKcIQ5]6nJdIS5[6mJdIT5\\6lJaIW5_6iJRIf5n6ZJQIh5n6ZJPIf5P7\\JmHe5R7^JlHb5T7_JjHb5V7_JiHa5W7^JiHc5X7\\JgHe5Y7[JeHg5Z7ZJfHf5Z7ZJeHg5[7ZJdHg5[7ZJdHf5\\7aJ\\H`5d7bJZH^5f7cJYH]5g7R101O00aIZH[5g7dJ`HV5`7iJbH=Kh0M9e7bNdH9Oi0J;c7aNPIUOB6=e1A>`7_N_Jo0UNa0\\7]NcJP1RNc0[7XNjJR1lMf0Z7VNnJP1kMi0X7PNWK7]Ni1b;000O1O10000O1O2O0O100N2_ObMUDa2b;g02YMQDQ2Z<_MRDZ2a00001O0O1000000O1N2O100N2]LDXH>[7nMdAT2\\>PN^AR2a>PNVAX2i>nMY@k2b=oLUCe4j<_KfBALT5\\=W1O1O1M3O100O1N2O1O100O1M3O100O1O1O100O100O10000000000000000000000000000001O00aNUIiEk6U:XI`EJXOo6W;a1N2I7N2O1O100O1O1O100O1O100O1O100O10000O1O1O1O100O1O100TOfFQG[9l8iFSGW9j8oFdFD9]9Q9XGmFi8R9S1O100O100oM^EdJb:[5`EcJa:\\5aEbJ`:^5aE_Ja:`5aE\\Jb:c5_E\\Jb:c5bEWJa:i5aESJa:m5`ESIAa0R;[6_ESI_OI2129P;j6\\EXIAE5ON0Q;T7ZE_IE^OX;R7SElIj:T6VEmIi:S6WEmIi:R6XEmIi:S6WEmIi:R6XEmIi:S6XEkIi:T6XElIh:T6YEjIh:V6YEgIi:X6ZEbIj:^6WE`Ij:`6YE]Ig:c6ZE_Ic:a6fEXIX:h6fEXI\\:i6`EZI`:W6aDiIU1J[:\\6eDcIk<\\6=000000000000O10hBgIiYL^A?GoN2k2j>eMjA1IT2`>eMlA3HV2_>bMnA4GW2h`0N3M1O2J6XOZ^OQOia0h0b^ORO_a0j0X1D;E8Hm]T2N^njM=Ji00i7ZN]Ag1c>]NWAe1i>bNo@_1Q?h1O10000000000O10000O10000O100O1O100O100O1O100O100O1O100O1O1O1O1O1O1O1O1N2M3lKeJ`I^5_6dJ^I^5`6eJZI`5e6aJoHk5P7VJfHT6Z7mIYGM`NKh0`6^9iIXGc0gNg5P:gIVGb7i8_HVGb7i8^HVGd7i8]HUGe7k8[HTGf7k8[HUGe7j8\\HVGd7i8]HWGc7i8^HVGb7i8_HVGb7j8^HVGb7i8`HUGa7k8bHRG^7o8cHnF^7R9gHiFY7X9jHbFX7_9lHZFV7f9mHSFW7n9]1001O1O1O1O2TGlEh7V:QHSFk7U;L2N2N2N2`LXDdNi;X1^DdNc;X1bDfN_;W1dDhN];V1dDjN];S1fDlN[;P1hDPOY;m0iDSOX;k0jDTOW;h0lDXOU;c0PE\\OY;ZMeC2?[1j0Y1oYLmAg3Q>\\LnAd3o=_LQBa3n=bLoA_3o=dLPB\\3n=gLQBY3n=jLoAW3n=mLPBT3n=oLPBR3o=PMmAS3l=d1J6J6E;D3M101N2O2M3N1O1N0100O010O01O0001000O010O001M201O100O1O1O1O1N2N3N1N2O101O000O2O0O101O0O2O1O1O1N2N2O1N3N1O2M3N2N1N4M2LLXCWNeoDA]12d94_]A4YAIc>;N2O1O10000O01O0102NO0020aA@[>c000O00100O10O1O001O001O1O001O001O001N2O1M3O1O10OO3M2O00Y`g5" + } + ], + "model_output": "A small bird with a stout build, featuring a mix of brown and white plumage, stands on a wall. The bird has a distinctive black eye stripe extending from the beak through the eye, a short, conical beak, and a white underbelly. Its wings display a pattern of brown and white feathers with darker brown edges, and the tail is long and narrow with brown and white feathers. The bird's legs are thin and pinkish, with three forward-facing toes and one backward-facing toe on each foot." + }, + { + "image": "images/caption_detailed_28.png", + "subject_name": "bird", + "object_name": "floor", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "\\WR3=k<5L4K5M3L3N3M3L8H>C3M2N2M3N3MAVEdNd:\\1^EdN`:\\1bEdN]:\\1dEdNZ:]1gEbNY:]1jEaNV:_1kE`NT:a1nE]NR:c1oE\\Nj9JgEk1`0ZNg9NhEi1a0XNf90iEh1b0WNBLj96RFg1c0VN@Oi96SFe1e0VN^OOi98SFc1g0UN]O1f99VFa1g0UN]O1e99XFa1g0SN]O3`9=\\F^1l0VNd8`0`FZ1k0WN]8f0iFS1j0WN[8h0kFQ1j0WNZ8h0mFQ1h0XNZ8k2fGUMZ8k2eGVM[8j2eGVMZ8k2eGVM[8j2dGWM\\8i2cGWM^8i2aGWM`8h2_GZMa8f2\\G]Md8c2TGeMl8[2oFkMP9j2TG^Ll8]3YGbLg8[3]GdLd8Z3^GeLd8X3j0dMPFW1U:]NTFa1Q:YNTFb1FYNi:c1[EaN`:^1cEaN]:]1fEbNZ:\\1hEcNX:\\1kEbNU:[1oEdNR:Z1PFeNP:Z1QFfNP:X1RFgNo9V1SFjNP:o0UFPOm9k0VFUOk9i0VFWOl9e0VF[Om9a0TF_Oo9=RFCR:7QFHi;000QZY4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\7m5^7000000000000000000000000000000000000000000000000000001O00O10000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O1001O00000000000000000000000000000000000000000000000000001O1O4L3M2N2N1O2N1O2N2N1O3M1O2NcJYOYOV1g0jNQO^1R5b0\\Od0A?@`0YOg0TOl0L4O1M3N2N2N2N2N2N2M3N2M3O1N2O1N2O1N2N2N2N2N2O1N2O1O1O21M3N3M2N2N3M3M2N1O3M102MGRJjJl5f5K3M3M2N3M2N3M2N3M3M1O2N2N4L5L7H9G9F9H7I7IgLoLUNh2k1XM[Nc2e1\\MdN[2\\1eMlNS2T1mMUOi1k0XN\\Ob1c0^NDZ1=fNJS16mN3j0MVOc03DMS61O1^DN];2cDN\\;3dDM];2cDM^;2cDN\\;4bDN];1eDM];0_V20giM1O0O11O0O10O11000SlV2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "nnW35a;2N2ZEJd9=XFFf9=UFFj9=nEJQ:7lEKT:l000000000001O001O001O000O1001O0O100O10000O10000O10000000O1000001N10001O0O10O1O100000O0100000000O100000010O0100O010O01O00000000000O100000000000000000000dNeN\\H[1c7gN\\HY1a7kN^HU1b7kN^HU1a7lN_HT1a7lN^HU1a7lN_HT1`7mN`HS1_7nNaHR1]7PO[HDVO]1]8ROZHEWOY1^8UOWHV1i7lNSHV1l7lNoGX1Q8iNlGY1T8iNhGY1X8iNeGX1[8R10000oMoL]KP3b4SM\\Km2c4VM[Kj2e4XMYKh2f4ZMYKf2g4[MWKf2i4]MnI^Oj0U3X5]MkICk0P3Z5_MhICl0o2]5jMbJW2^5iM`JY2`5gM]J\\2d5cMYJ`2g5`MXJa2i5_MUJb2k5^MTJc2n5[MRJe2Q6XMnIi2U6TMjIm2W6SMhIm2Z6QMfIo2]6nLcIR3^6nLbIQ3_6nLaIR3`6nL_IW1DNn6kN^IQ3c6oL^Io2d6PM[IP3f6PMZIo2g6QMYIn2i6QMWIn2k6QMUIn2l6QMTIo2m6QMRIo2n6QMSIn2m6TMPIm2Q7TMeH@1_3Z7i01O00001O0000O10000O1000000O1XOhKQJX4o5hKPJY4o5iKoIX4Q6hKmIZ4S6gKiI\\4X6eKaI`4`6;1O00000000001O2N1O1O000000O1N2O100O1000000O100000000001O2N6J3M2N4L1O1OO100O1O100O1O10000001O1O001O1O1O1O1O2N1OdH" + } + ], + "model_output": "A tree with numerous slender branches covered in small, elongated green leaves stands beside a stop sign. The branches spread out in various directions, creating a dense canopy of foliage." + }, + { + "image": "images/caption_detailed_30.png", + "subject_name": "grass", + "object_name": "stop sign", + "predicate_name": "on back of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "\\:c4^:O000000001O1O001O1O001O1O1O2N1O001O00001O000000O10000O100O1O100O1O1O1O100O100O10000001O001O001O00O100000000001O1O1OO1O1000000O1O1000000000000001O001O1O1O001O0000001O0000001O1O0000001O1O001O1O1O1O1O001O1O1O1O001O001O1O1O1O1O1O1O1O1O1O001O1O002N001O1O001O1O1O001O1N2O1O001O1O002N010O1O1O1O001O1O1O1O001O1O1O1O1O001O1O1O1O001O002N1O001O001O1O1O1O001O001O1O001O1O1O001O1O1O1O1O001O1O1O1O1O1O001O1O1O1O1O001O1O1O1O1O001O1O1O1O001O1O1O001O1O001O1O001O1O001O001O00001O00001O00000000000000000000000000000000O1000000000000000000000000O1000000000000000000000000000O2DoAEogX5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "abe1h1V=Z1kBQMh;c3@l0TOf0ZO?A4L2N2N2N2N2N2M201N2N2N2N2N1O2N2N2N2N2N2N1O2N2N2N2N1O2N2N2N2O1N2M2O2N3M1O2N[MPI]Mn6b2VI\\Mj6d2VI]Mh6d2WI_Mf6b2XI`Mg6a2YI`Me6a2[I`Mc6a2\\IaMa6a5OnL`IeM^6[2dIdM[6]2eIQKNS2[6m2gIcMX6^2hIcMV6^2jIcMT6^2lIcMR6]2oIdMo5^2PJcMn5i5N2N2N2N]MXJPLg5P4ZJQLd5P4\\JPLc5R4\\JPLa5i6N2N2N1OYMgJjKW5W4iJjKU5W4kJjKS5W4mJjKQ5S7OUMPKiKn4X4SKgKl4Z4SKhKk4Y4UKiKh4Y7NPMZKhKe4j3[KdI0c2c4Y4]KgKb4Z4^KhK`4]7NlLbKhK]4Y4cKhK[4Y4dKhK[4`7N2N2N2N2N2N1O2N2NfIWL`1g3bNYL^1e3cN\\L]1b3Q5O2O1MbIbL_1]3aNcL_MOa1\\3Q1eL^MOa1[3c3eL^LX3g7N3N1N1OcJoLmNP3c6NkJUMWNj2j1XMUIN6M]4l2Y2TMYI0j4i2V7N3N2N3N1N1O2N1O2N2O0O1O2N1000O1000000001OO1000001OO10000001O0O100000000000000000001N1000000000000000000000001O000O1000000000000000000O1000001O00000000000000001N100000000000000000000O2O0000000000000000000O1000001O00000000000O10000000001O00000O10000000000O100000001O00000000000O11O00O10001O000000000000000O2O000000000000000O101N1O1O1O1O1O1O1O100N2O1O2N1O1N2O100O2N1O2N2N2N2N1N3N2N3N3L2N1O3M2N2N2M3N4L2O1N2M3N3M2N2N2N2M4N0O2N3M3M2N2N2M3N2N2O1N2N3M3L3N3M1O3M2N2N2O3L2M3N2N3M1O2N3M3M2O1N2M4L3N3M1O3M2O1N2N2N3M2M4L3N2O1N2N2O1M4N2L3M3N3M1O2N2K5H:]Oa0H9^Oa0Df2`1000001O1MgN" + } + ], + "model_output": "A bouquet of deep red roses with velvety petals, tightly clustered together, creating a lush, rounded appearance, sits beside the TV." + }, + { + "image": "images/caption_detailed_32.png", + "subject_name": "truck", + "object_name": "road", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Q7V1U<1O1O1O00001O000O0100O1000001O000O101N10001O1O1O1O00000001O1O2N1O1O000O2OOO2O1000000000O100O10000O10000001O1O1O000000000000000001M2O1O1000000O1O1O100000000000O10O100L4O1O2N2WObC4a[V7" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\8m08E\\;;dDE\\;;dDF[;:eDF[;:eDF\\;9dDG\\;9dDH[;8eDH[;8eDH[;8eDH[;8eDHZ;9fDHY;8gDHX;9hDHW;8iDHW;8iDHW;8iDIV;8iDHV;9jDGV;9jDGV;9kDGT;9lDGT;9lDHS;8mDHS;8mDHS;9lDGU;8kDIT;7lDIU;6kDKU;4lDKU;4kDLU;4kDLU;4kDLU;4kDLV;4iDLW;4iDMV;3jDMV;3jDMV;3jDMV;3jDMV;3kDMT;3lDMT;3lDMT;4kDLU;4kDMT;3mDLR;5nDKR;5nDLQ;4oDLQ;4oDLQ;4oDLQ;5oDKP;5PEKP;5PEKQ;4oDMP;3PEMP;3PENo:2QENo:2QENo:2QEOn:1REOn:1REOm:2SENm:3RENm:2SENm:2TEMl:3TENj:3VEMi:4WELi:4WEMh:3XEMh:3XEMg:4YELg:4YELh:4WEMi:2XEMm:NSE3m:LSE4m:LTE3m:LSE4m:LRE5o:JQE6o:JQE7n:JRE5n:KRE5n:KRE6m:JSE6m:JSE6m:JSE7l:IUE7k:HUE8k:HVE7j:IVE7j:IVE7j:IVE8i:HWE8i:HWE8i:IVE7j:IVE8i:HWE8i:HWE8j:GVE9j:GVE:i:FWE:i:GWE8i:HWE9h:GXE9h:GXE9h:HXE8g:HYE8h:GXE9h:GXE9h:GYE9g:FYE@O6k:8VE_O37h:9VE[O7;c::lEES:=mECR:=oEBQ:>oEBR:=oEBR:>nEBR:=nECR:>mEBS:>mECR:>]EXON9e:`0ZEZO07e:a0WE[O34f:c0SE[O72g:R1YEnNg:S1XEmNh:S1XEmNh:S1XEnNg:R1YEnNg:R1YEnNg:R1YEnNf:T1YEmNf:S1ZEmNf:S1ZEnNf:P1[EPOe:P1\\EoNd:Q1\\EoNe:P1\\EoNd:1oDa0=_Oc:P1]EPOc:P1]EQOb:n0`EQO`:S1\\EmNd:T1ZEmNf:S1[EmNd:S1\\EmNd:T1[ElNe:T1[ElNe:T1[EmNc:T1^EkNb:U1^EkNb:U1^EkNb:V1]EkNb:U1_EkN_:V1aEkN^:U1bEkN]:V1cEjN]:V1cEkN\\:U1dElN[:T1eElN[:U1dEkN\\:U1dEkN\\:U1dElN[:K\\Ei09\\OY:V1gEjNU:Z1kEfNU:e0bE[O90U:Z1kEfNU:`0bEF8JU:\\1kEdNU:\\1kEdNU:\\1kEdNU:\\1lEcNT:]1lEdNS:\\1mEdNS:\\1mEeNR:[1nEeNS:Z1mEfNS:Z1mEgNR:Y1nEgNR:Y1nEgNQ:Z1oEgNP:Y1PFgNP:Y1QFgNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFhNk9X1UFhNk9X1UFhNk9X1UFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFjNi9V1WFjNi9V1WFkNh9U1XFkNg9V1YFiNh92QF47Kg9V1YFjNh94oEO9Ng93PFO9Ng92QF08Ng9OTFXOKf0:3g99oED:3g9:UF\\O4;f9S1ZFmNf9S1ZFmNf9S1ZFmNf9S1ZFnNe9R1[FnNk9l0UFTOn9i0RFWOQ:f0nE[OT:c0lE]OV:a0jE_O]::cEG_:6aEJ`:5`EK`:5`EKa:4_EM_:4aEL_:4aEL_:4aEM^:3bEN]:2cEN]:2cEN^:nNYEP193\\:mN]Eo065o9POnE0Kk094m9UOlEMOh087h9XOQFIOh078d9]OTFe08Nc9h0]FXOc9h0]FYOb9g0^FYOb9g0^FYOb9g0^FYOb9g0^FYOb9f0_FZOa9f0_F[O`9e0`F[O`9e0`F[O`9e0`F[Oa9d0_F]O`9c0`F]O`9c0`F]O`9c0`F]O`9c0_F_Oa9`0_F@a9`0_F@a9`0_F@a9@XF27>a9@XF]O285k0b9JZFlNL;8P1a9J]FVO2P1b9JWFZO7l0b9<_FDa9]OXF27a0a9^OVF29a0`9;`FEa9:_FFb99^FHc96\\FKf93ZFNf91ZFOh9OXF1i9NWF2j9MVF3j9MVF3j9MVF3j9MVF4i9LWF4i9KXF5h9KXF6g9JXF8g9HYF8g9HZF6g9JYF6g9YOQFC7U1h9gNRF3O17V1g9fNSFd06g0f9eNTFd06h0e9dNTFe07g0e9F[F:e9VOXF]O3]1e9UO[F\\O0`1d9TO\\F\\O0`1d9TO\\F[O1a1c9VOUF^O8\\1c9D]Fc9A^F?b9A^F?b9YNVFR18f0c9VNXFR15h0j9_NmE=9T1m9TOSFl0m9TOSFl0m9TOSFm0k9TOUFm0j9SOVFn0j9TNnEh09T1n9SNjEd08X1S:]NiEJ4i1S:]NjEI3j1S:]NeEG127j1S:]NeEH018k1f9hNRF\\OO29k1e9hNUF^O5l1e9fNWF\\O5o1c9eNXF\\O5o1d9cNYF]O3P2e9bNXF^O3P2n9YNoEG3P2j9]NRFD3Q2f9nMQF`02C7o1f9iNZFX1d9iN\\FW1d9`NSFC9m1e9_NRFD9m1i9[NnEH9n1g9[NPFG9n1e9]NRFD:o1c9^NTF^O1O8U2d9]NSF^O207U2e9\\NbFe1_9^NRF\\O7V2g9bNYF^1h9bNWF^1i9bNWF_1i9`NWFa1k9\\NUFd1l9[NTFd1m9[NTFe1h9jMPFa08e1g9`NYF`1[9iMgFf0Na1[9iMjFc0Kd1[9jMiFb0Lg1X9gMjFd0Mf1Y9fMhFf0Od1Y9fMiFe0Ne1Y9fMoF>Il1X9fMPG=Hl1Y9gMnF>Ik1Y9gMgFe00d1Y9gMgFe0Of1Y9eMhFe0Og1X9dMiFe0Oh1W9cMjFe0Oh1W9cMjFe0Oi1V9bMkFe0Oi1V9bMkFd00j1U9bMkFd0Om1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9_MmFd0Om1T9_MmFd0No1`9QN`Fo1`9QN`Fo1`9PNaFP2_9PNaFo1a9PN_FQ2`9oM_FR2a9nM_FR2V9[MlFc0OQ2T9^MlFa00P2V9^MjFb0OQ2W9]MjFa00R2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM^FS2b9lM_FT2a9lM_FT2a9lM_FT2a9lM^FV2a9jM_FV2a9iM`FW2a9hM_FX2a9hM_FX2a9hM_FX2a9hM_FW2b9iM^FW2b9iM^FX2a9hM_FX2a9gM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9eM`F[2`9eM_F\\2a9dM_F\\2a9dM_F\\2a9dM_F\\2a9cM`F]2`9cM`F]2a9bM_F^2a9bM_F^2a9bM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9`M_F`2a9`M^Fa2b9_M^Fa2b9_M^Fa2b9_M^Fa2b9_M]Fb2c9^M]Fc2c9\\M]Fc2d9\\M]Fd2c9\\M]Fd2c9\\M\\Fe2d9[M\\Fe2d9[M\\Fe2d9[M\\Fe2e9YM\\Fg2c9ZM]Ff2c9ZM]Ff2c9ZM]Ff2d9YM[Fh2e9XM[Fh2e9XM[Fh2e9XM[Fh2e9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM[Fk2d9TM]Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2e9SM\\Fm2d9SM\\Fm2d9SM[Fn2e9RM[Fo2d9PM\\FP3e9QMZFo2f9PM[FP3e9oL\\FQ3n91000000O101OO010000O100O100O1FiL_FX3`9jL^FW3b9900O100000001N100O1O0100000O10000O2M200O0100000000000O101OO1000001O1O1O000000O1O1N200O100000001N11O3L5L1O01O00O11N1O1K5O10O11O00000O1000000000000001O0000000000O100000000O10000O1000000000000O100000000O100001OO1000000O11O1O00000TOjFcMW9Y2SG_MQ9]2Q1N2N3M2M5L4L00000000O100C=O1000O100000000001O0000000000000000O11O000dF" + } + ], + "model_output": "A red fire engine, featuring a silver front bumper, a set of headlights, and a side compartment with reflective striping, is parked on the road. The vehicle is equipped with a side-view mirror, a visible door handle, and a section of the wheel well with a black tire. The side of the truck displays a reflective strip and a series of compartments with handles, and there is a visible hose reel with a yellow and black striped pattern." + }, + { + "image": "images/caption_detailed_33.png", + "subject_name": "paper", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 500, + 333 + ], + "counts": "R`?3`?2N3N1O2M2O1N103M1N3N1O1N2O1O0O2O0O0100O2VOZAd0k>00O2OO010000O01000O100O100O2O00000O2N1N2N3HS?0TA0^Ul3" + }, + { + "size": [ + 500, + 333 + ], + "counts": "Q>e0U=\\OmBd2OaM100LS<_3M100O100O100O100O1O100O1O10000O11O00OBYDQMf;P3]DmLd;Q3^DnLb;R3_DmLa;S3`DlL_;T3cDkL[;W3gDgLY;3ZDn2>oLY;V3iDiLX;V3e0L3M4N1N2N2N3M3L3N2N3L3M2L5J6K4O1O1O1000O010O10O10O010O0100O001O001O001O10O10O10N101O001O1G^N`Bb1^=dN^B\\1b=;2M2O1O1O2N100O2N1000010N10000000O1O2O0O00001O1O1O2N6J2ZCoMe;Q2WDVNf;k1WDZNf;f1YD^Nd;e1VD^Nj;f1mCeMMg0W?VA@a>o0F9F:H8I7F:H9F9H8CDH8EE;K6B=K5N3N100010O01PLkI_NV6c0gJ]OZ58PKHQ57oJIS55lJLU53kJMU53kJMV53hJNX52gJOY52fJN[51eJO[51dJ0]50bJ1]5OcJ1^5NbJ2_5N_J3a5M_J3b5L^J4b5L]J5c5K]J5d5K[J5e5KZJ6g5JXJ6h5JWJ7j5HVJ8j5IUJ7l5HTJ8l5oMPHn0T2S1l5oMQHm0R2U1n5mMRHm0o1W1n5lMTHl0m1Y1P6jMTHm0k1Y1Q6kMTHl0i1Z1T6iMSHm0i1Z1T6jMSHk0i1[1U6iMSHk0g1]1V6hMSHk0g1]1V6hMSHl0f1\\1X6hMRHk0e1^1Y6gMRHk0e1^1Z6gMPHk0f1_1Y6fMRHj0d1a1Z6eMRHk0c1`1\\6eMPHk0d1`1\\6eMQHj0b1b1^6cMPHk0b1b1^6dMoGk0a1c1_6bMQHk0_1c1a6aMPHl0_1c1a6bMoGk0`1c1b6aMoGk0^1e1c6`MoGl0]1d1d6`MoGl0]1e1d6^MPHl0[1g1e6^MoGk0\\1g1f6^MnGk0Z1h1h6]MnGk0Z1h1h6]MoGj0Y1i1i6]MmGj0Y1j1R4nLmL>hMk0X1i1e3]MZMOiMk0X1i1X3jMhMBhMj0W1l1k2UNVNUOhMk0V1k1\\2eNfNeNhMk0U1l1m1TOWOTNhMk0T1m1_1BEfMhMk0T1m1S1N1[MgMk0S1m1d0>c0iLgMk0R1n1NT1Y1SLgMk0R1n1F\\1a1lKfMj0R1P2ZOf1o1_KfMj0Q1k7Y1\\GeMj0Q1j7[1[GdMj0R1k7Z1[GdMk0P1k7\\1ZGdMk0P1k7\\1[GdMi0P1l7]1ZGcMk0o0k7^1ZGcMk0n0l7_1ZGcMi0n0m7`1ZGaMi0o0m7`1ZGaMj0m0m7b1ZG`Mi0n0m7b1[G_Mh0o0m7c1]GZMg0S1l7c1oH\\NR7d1nH\\NR7e1nHYNS7g1mHZNR7f1nHZNR7g1mHYNS7g1mHYNS7g1mHYNS7g1mHXNT7h1mHWNS7j1lHVNT7j1lHVNT7j1lHVNT7j1mHUNS7l1lHTNT7l1lHTNT7l1lHTNT7m1kHSNT7n1lHRNd6^2\\IbM]6e2cI[M\\6g2RIYLiNQ1V8e2iHfLkNg0\\8c2dHXN\\7h1aH[N_7e1^H[Nd7f1[HZNg7e1XH]Ng7c1XH_Ng7`1YHbNe7_1ZHdNd7\\1[HeNe7\\1YHfNf7Z1YHhNf7X1ZHhNe7Y1ZHhNf7Y1YHgNf7Z1WHiN`7`1`HaNV7i1iHWNj6V2UIkMa6`2^I`MZ6h2eIYMP6R3PJnLe5^3YJcLP5U4nJlKd4b4\\KXKE`Mf4c7cKlJ[4[8\\KXER4Z;F;I6N3N1O101O000000001O00001O0000001O0000001O0000000O2O0000001O0000001O0000001O0000001O00000000001O1O0O1000001O0000001O0000001O000000001O000O11O01O000O2O000000001O1O0000000000001O000000001O0O11O01O000101N4Lb0^Oc0^OV1\\DoJW:U6RO`0@W1jN;D000001O00001O000000001O000000001O0000001fHnHP6R7mI[Ii5f6RJnI\\OfNn5\\7eJ]JQ5c5nJhJh4^=" + } + ], + "model_output": "A brown and white pinto horse with a predominantly white head, a brown patch over the left eye, and a brown mane stands on a chair. The horse's body displays a pattern of large brown patches interspersed with white areas, and it has a white underbelly and legs. The tail is not visible." + }, + { + "image": "images/caption_detailed_35.png", + "subject_name": "person", + "object_name": "giraffe", + "predicate_name": "feeding", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "mgV58^;4L1O100O100O01000O100O1000000O1000001O1O001O1O2N1O1O2O1N2O00N1N3M3M3M4L3MaT1GhI" + }, + { + "size": [ + 375, + 500 + ], + "counts": "bil0=U;:H6J5K3N2O0010O001O100O001O010O00100O001O010O10O0100O010O10O10O100O10O10O10O010O10O10O1000000O10O01000000O10O10O10000O1000O10O101OO10O@aEJ_:5fEF[::hE@FOb:a0a00O10000O10000000001N100O10001N10000O10001O00001N100O2O1O00001O0010O010O0101N003N2N1N2O1N101N1O101O0O2O0000000001OO2O0O101N8H1O1O1N2L3O1O2O000000O1RNjF>2IV9HlF5;Jl8NnF1W:MkE0mRT3" + } + ], + "model_output": "A person with short, light brown hair, wearing a white shirt with a collar, is feeding a giraffe." + }, + { + "image": "images/caption_detailed_36.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Wme21kc06N000[O6Q]OMkb0d01O14T]OXOYb0[1L3N2N3L5K1O1DSNd^Om1[a0=O0100000001O1kM_^Oh1da0PNb^OP2ha0N1G9F:K6O0O2O1O2N3Mf0SO[\\O1gVP6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "hU18d?LVDZ2i9nMPFR2o9lMTFT2l9jMVFV2i9jMXFV2h9jMXFV2h9iMYFW2g9dM^F\\2c=0TJXN`Ih1[6bNaDQOl4]2^6QO[Io0d6UOYIk0f6XOXIh0h6XOXIh0h6WOYIi0g6WOYIi0Y7_NmHa1Z;QNm@o1o>YNm@g1P?aNk@_1S?eNk@[1n>oNo@Q1n>UOo@k0P?XOn@h0P?[On@f0o>_Oo@a0n>El@>Q?Z2M3N2N2O1M3N2N2N2O1O1O1O100001O1O1O1O1O1O1O1O1O1O1O1O1O001O1O1O00001O001O0000POXKhBh4U=^KYBD7n4_=iK_BW4`=kK_BU4_=nK`BR4_=PL_BQ4`=RL]Bo3c=SLYBo3f=V100M3O1N200O1001gIbBJ5k5S>I2N1O1O00002N;E:F7iLl@T1Y?cNm@[1[?YNi@g1b?lMa@S2a?hMc@W2_?eMc@[2^?`Mg@_2]`00001O00001O00000000000000001O000ZO`^OlN`a0S1a^OmN^:EWL_1ZIlN]:LTLn1l3RNULm1j3WNSLi1m3XNRLh1m3]NoKc1P4`NfETOU6\\2T4eNkK[1U4fNjKZ1k3iMSFj0OROa23k0X2a6lNSGhNQO\\1Y1`7lNTG]O1e0S1R1h7lNTG]O2j0l0m0n7iNXG_O0a136e8iNYG@4c1G4l8hN[G_O4d1E5l8_NVFVOZ1a00e1C5n8YNjGLFf1A5o8UNPHMBi1^O5Q9oMUH2]Oj1\\O5S9nMUH1^Ol1YO5T9mMVH1^Om1XO5T9mMVH1_Om1VO5V9lMVH1^OQ2SO2[9iMUH4]OR2RO1j;mMTER2RO1k;lMSES2RO1k;lMREU2ROOm;kMPEW2SONm;jMoD[2SOKo;iMmD^2SOIP`0\\2n0]:eNiBN=>^2o0X:lNgBM`06d2P1o9DYCZOj2R1k9GZCUOn2S1h9HZCTOP3S1e9J[CQOR3U1c9KZCnNU3W1a9KZCmNV3X1`9K[CkNW3Y1^9L[CjNX3Z1]9L[CiNh1M\\O^1a;L[ChNa1OVNk0d0b0ZB1O00O1J6L4[OSNU_Oo1o?gM]@S3`?TMR@J1S3m?g0001O1O00001O1O2TMk_OTO0`2W`0PNc@e1Va0F5K1O1O0000IZNS^Og1Sb0000002YNh]O[1cb0L3NO2001O2N2M3DM4O010N2O1N101O001O01O00001N100O2O1O1N10100O_OnI_KR6`4PJ_KQ6Q4^JoKb5k3dJUL\\5k3oIhK?;e5k3bJUL^5k3bJUL_5j3aJVL`5i3`JWL`5i3`JVLa5j3_JVLb5h3^JYLb5g3^JYLc5g3\\JXLe5h3[JXLf5n3gIhK4:U6`3dIVL3O2129W6^3UJYLD9W6\\3WJ[LB9W6[3XJ\\LA9W6Z3ZJ\\L_O:X6W3fJiLZ5V3gJiLZ5W3fJiLZ5V3gJjLY5U3hJkLX5T3iJlLW5T3\\JcL]O9W6T3\\JcL]O9W6S3jJmLV5S3jJmLV5S3iJnLW5R3hJoLX5Q3^JeLUO<]6n2^J[Mc5d2\\J]Mc5d2[J^Me5b2ZJ_Mf5`2ZJaMe5`2YJbMg5^2XJcMh5\\2XJeMg5\\2XJeMh5Z2XJhMg5X2XJiMg5X2XJiMh5V2WJlMh5T2WJoMh5P2XJQNg5P2XJQNh5o1WJSNg5m1YJTNg5l1WJVNh5j1XJXNf5h1ZJYNf5g1XJ[Nh5d1XJ^Nf5c1YJ^Ne5f1nIiL1c1o5k3N3L3K6I6K5N2M3L4LdMTKUNh4o1YKPNe4Q2\\KPN`4S2`KmM_4T2aKmM[4V2eKkMY4V2gKRMNoNY4P4jKoLNROW4o3lKlL0VOP4P4QLiLOYOm3o3TLgL1]Of3m3YLeL3AQ3QOPMk4LbL4Hd1lNoN]5XOnK5NT1GdNd43gK53l0`5nN]J67g0^5RO[J7h<01M20100000O1nNEXES;BmD>S;BmD=T;ClD=T;DjD=V;EfD=Z;DeD<[;DeD<[;DeD;];EbD<^;DaD=_;C`D>T;]OhD44`0R;^OkD12a0R;_OmDO2b0o:@oDO1`0Q;AnDO1`0Q;AoDOO`0S;@nD1Ma0U;^OnD0Mb0U;@lDONa0V;3gDMZ;P10XNjD^1c;N1O1O1O000O_DgNT;Y1hDoNT;P1kDTOS;l0lDWO7DZ:V1\\EYO8CZ:l1dEUNP:LoEQ20SNo91nEm12SNn9\\2PFeMQ:h2001O0\\MmET2T:jMPFR2Q:mMPFS2Q:lMQFR2R:jMQFS2Q:jMQFV2`:0O01DUE\\Nk:c1WE\\Ni:c1XE\\Nh:c1ZE]Ne:c1\\E]N_:f1dEWN^:f1a0XOgDGV;9mDFR;;oDCQ;=RE@n:a0UEZOl:g0k0N2O2ROjCg0[<000O2N2N2N1O2N2O1N2OO0O110N2O00000L400100O20N100N2O0010O01O5^OeC0a1O10000001O3M3M1O001O00000000000000000000000000O100^NSMWB1A2=j2^>_MbAa2^>_MbAa2]>aMbA_2l=SMSB?1^2l=SMSB?0_2^>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`MbAa2^>_MaAb2^>_MbAa2^>^McAb2]>^McAb2]>^McAb2^>]MbAc2^>\\McAd2\\>\\MdAe2\\>XMgAh2Y>VMiAj2V>VMkAj2U>UMlAk2R>WMnAi2Q>XMoAh2o=ZMRBe2l=\\MUBd2i=^MWBb2i=^MWBb2i=^MWBb2h=_MYB`2g=`MZB_2f=aM[B^2e=bM[B^2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=bM]B^2c=bM]B^2c=bM\\B_2d=`M]B`2c=_M^Ba2c=]M^Bc2b=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=]M^Bc2b=^M]Bb2c=`M[B`2f=_MZBa2f=`MYB`2g=`MZB_2f=aMZB_2f=aMZB_2f=aM[B^2e=bM[B^2e=cMZB]2f=cMZB]2f=cMnAEFh2\\>dMlAFGf2]>eMjAGGe2_>dMjAHFd2`>dMjAi2V>WMiAj2W>VMfAm2Z>TMdAm2\\>SMdAm2]>RMbAo2^>RM`Ao2`>QM_AP3a>f00000000000001O00000000000000000000001O001O4L2N1O1O1O001O1O1O1O2N1O2N1O1O1O2N2N3M1O1O1O2N1O1O1O1O1O1O1O001O1O1O1O001O1O001O1O001O1O001O1O1O1O1O001O1O1O001O0000001O00001O000000001O000000001O0000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000O100000000O10000000000O1000000O1000000O10000O100O1O100O1O100O1N2O1O100O1O1O100000000001O00001O0000001O0000000000\\OaMj@_2U?cMi@^2V?eMg@\\2Y?eMe@\\2[?c0O10000000000O1000000O10000O100O100O10000O1000000O100O10000O1O10000O10000000000000000000000000000000000000000000000000000000000O100000000O10000O100O100O100O10000O1O100O100O100000000001OP1PO5K3M2N:F8H=C;E3M2N1O1O001O1O001O001O001O0000001O000000000000000000001O0000000000000000000000000000O100000000000000O100000000O10000O1O1L4K5hLmN]BN\\O0R1j1T=ZNUBb3k=^LUBb3k=]LWBb3i=^LWBb3i=^LXBa3h=\\L]Bb3c=]LaB`3b=\\LbBa3j>aLe@NF3NX2]a0SO?A6J4L3M001O1O001O001O001O0000001O000000001O0000000000001O0001OO100001O00000000000000O1000O2O000000000000O100000O2O000000000O0100000N2O1nK1TE1l:3lD1S;1lCHnMd0V>EkCn0UdDA\\;`0dD_OUM" + } + ], + "model_output": "A white ceramic bowl with a flared rim and a scalloped edge design, featuring a subtle floral pattern with hints of pink and green, sits on the dining table." + }, + { + "image": "images/caption_detailed_41.png", + "subject_name": "sports ball", + "object_name": "person", + "predicate_name": "touched", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kiR13W=2EOYC2d<3YCNe<<00000O10000001O00O20O0001O1ObCDn;=PDHm;8RDKl;3UDOi;2VDOj;0VD1j;0TD1k;0TD2k;f010000000000O1000VDiN`;W1_DjNa;V1_DjNa;V1_DjNa;U1_DlNa;T1_DlNb;S1^DmNb;S1]DnNc;Q1^DoNc;P1]DoNd;Q1\\DoNe;P1ZDQOf;o0ZDQOf;n0[DROf;m0YDSOi;m0TDUOm;T101O10O1O2N1N2O1N2M5J5M4L5K4L3K4NST\\6" + }, + { + "size": [ + 427, + 640 + ], + "counts": "bQT12Y=1J0oB01Nm<0`U:8QjEd0E:XDPOl:n1G7G9I6J8J5L5J5G:K4L4L5K6G7L5K5L4K3VLeKgKUOe2Y5b1mKjKjN\\11\\OY5]3nKiKjN[13[OW5_3RLUMnNTOR5e3SLTMROiN^O2a5P4RLRMi4l2[KQMf4n2[KoLh4P3YKWLWN`0a6Y3ZKTLWNa0`6Z3bKeL^4[3cKbL_4]3bKcL^4\\3cKdL]4\\3fK`L[4_3lKZLU4e3mKYLT4g3lKYLT4g3mKXLa27RL`3d6eLZIZ3d6kLZIT3d6PM[IP3d6RM[Im2e6VMYIi2i6XMUIh2k6\\MQId2o6]MPIb2Q7_MnHa2R7`MmH`2S7cMjHc0K[NAn0k79dH;>VOo6`0bH7d0VOk6h0[H2n0ROh6R1SHKZ1POc6h2dIRM]6m2gIoL[6P3iIkLY6S3P2O2N1O10000O2O0O10002N5K4K4M4L2N4L2N2N4L2N3N2M5K9H0O0010O00001O2N1O001dEfNe8[1UGkNj8W1RGlNm8U1PGnNo8S1mFPOS9S1eFRO[9o0`FVO_9k0oE_N1n0o9U2O00lNTFjNk9[201VNSMTIn2i6VMUIj2j6YMTIh2k6ZMSIf2l6`MoHa2X3fLkNLRM^1f0Q2Z3kLaNe1RNa1[3kL\\Nl1XNZ1Z3lLSMCNd2_Oo0X3XNQMX1[Ob0a3_NaLW5]3]2O1O1O1O1O1O2N001N2O1O1O1O1O1N2O1O1M4L3O1M4M2M8G6J5UNcD]1o;aNUDi0c3N2N2O1O0O101O00000000000000000000000000000000000000000000000O1000O10000000000000000000000000QJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0[63nJV7R5jHnJV7R5jHnJV7R5jHnJV7R5iHWJO6W7c5jHWJO6W7c5jHUJ19T7b5kHTJ2:S7b5kHSJ3V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LR9J6K4L5K4L3N1O2M2O2M101N2O1N3M2M3M3M2O2N2O1O1N1O2O001O0O1OO21O0O0XORN`Do1_;WNZDl1f;UNXDk1i;f01N2N101N1O2N2O0O2O1N2N2N1O2N100O2N1O1O100O1O1O100O2O0O1O10000O2O000000010O01O00001O0010O00010O01O1O100O1O010O1O010O1O10O01O100O1O1O10000O1O1O100O100O0010000O2N2N2O2N1N2N1O2O2M2N1O100O2O1N10000O100O1O1O100O1O0010O0101N1O3N3L5K3M3M3RHnKi4U4RKlKn4V4oJkKR5W4gJmKY5X4aJiK_5[4[JfKf5d4nI^KS6g4fI[KY6k4^IXKb6l4YIUKg6n4TISKm6S5jHPKV7]60O1000000GUH\\Il7b6YHZIh7d6;O0O2O1iNeGhK\\8T4RHbKn7[4\\H]Ke7d4[HZKf7h4YHWKg7n4SHRKn7T5gGSKW8l5OO1O2N11O1OnLiGUO]8j0XH_Nk7a1bHfLkNR1d8X2PIaMQ7_2UI[Mk6e2[ISMg6m2[IPMf6P3^IjLe6U3c21O0000001O001O010O00001O00001O00001O00010O001O00001O001O000000001O00000O10001O0000000000001O0000000000000O1000000000000000O01000000000O1000000000O01000000O10000O10O010000O100O10O10O1O1O1O100O100O100O1O\\GjL`5U3`JmL_5R3^JSMa5l2\\JXMd5h2[JYMe5g2YJZMh5f2UJ\\Ml5e2PJ^Mo5e2lI]MU6e2hI\\MX6e2eI]M[6f2aIZM`6h2\\IWMg6k2VIPMP7R3lHnLV7T3gHmLY7T3eHkL]7W3`HjL`7X3\\HiLd7Y3ZHgLg7[3VHdLl7]3QHcLQ8`3kG_LW8c3eG]L]8e3`GZLb8i3YGXLh8l3RGTLP9l42O1N2O1O1N3N1O1O1N2O1NmGnJ\\6Q5dIPK]6m4bIWK]6h4cIZK\\6d4dI_K[6`4dIbK]6\\4cIeK]6Y4eIgK\\6W4dIjK\\6U4bInK_6Q4_IQLa6R4YIQLg6Q4SIRLo6P4gHVLZ7m3\\HWLh7]53N2O2N1O2O0O2N1O1M4XM[GPOh8n0^GbNo8[1VGWNU9g1PGRNT9l1PGlMW9Q2mFgMZ9V2jFbM]9Z2iF^M]9_2d1M2M4L3N3M3M3L3M3N3N2M2M4M3L4PO_B9g=F_BOh=0d00001O00001O001O1O0O3NTef1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "V6e17W5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6k1000000000000000000000000000gHiIi5W6WJiIi5W6WJjIh5V6XJjIh5V6XJjIh5V6XJjIh5W6WJhIj5X6UJiIk5W6VJhIj5W6`100001O5K0kKcIG^6RMaIo12o0]67cII]67cII^67`IJb6Y4000000001gH^IR6b6mI`Ii1N[1b6lL_Ii11Z1`6mL_Ii11Z1_6oL^Ih12Z1`6nL^Ih11[1a6U400000000000000O_H_Ie6`6\\IbIb6]6_IdIj1Mm1_6YLcIa6]6_IcIa6]6_IcIa6]6_IbIb6]6Q1000UKcIlLOd3^62dIN\\6dLbIb23j0Z6eLcIa23j0Z62eIO[60fI0Z60fI0Z6dLcI`23l0Y6eLdI_23l0Z60eI1[60dI0\\6OeI1Z6kKfIo306Z6jKhIo3N7Y6kKiIn3N7Y6jKjIo3M7X6kKkIn3M7X6lKjIX8V6hGjIT4LKZ6SLgIR40KY6TLfIP43KV6>jIBV6?iIAV6`0jI@U6b0iI_OV6f4O1O1OTNoIkJQ6S5RJlJm5S5UJmJk5S5VJlJj5S5WJmJi5S5WJmJi5R5XJnJh5R5XJnJh5S5VJnJj5R5VJnJj5S5SJoJm5Q5SJoJm5Q5SJoJm5T5nInJR6P700000000000000O100000000001_GRJn7n5mG^Jl7b5RHbJl7\\6M2N2N1O001O1O000000001O0000000000000000O1000000O10000O100N2O1N2N2O1O1O100O100O100O100N200O1O1O100O100O1N200O1O1O1O1O1O100000000O10000O11O00O1000000001O00001O0000001O1O1O2N001O1O001O1O1O001O001O1O2N001O2N1O1O1O001O1O1O1O001O2N1O1O3M2N2N2N1O2N3M2N3M001O002N2N1O1O2N1O1O1O1O1O1O1O00001O2N2N3M3M4L2TMVG^On8O1O11O0000000000000000O100000000O1kLRMXJn2h5_MkGYOW13`NX3^8eM`G_OS1V3]7UNZHl1f7VN`GWNa0d3o7\\NoGe1Q8]NkGe1V8b200001O001O001O0000001O001O00001O001O001O0000001O0000001O00001O00000000001O00001O000000000000000000000000000000000000O1000000000000O100000000O100000000O10000O10000O10000O1O10000O100O1O100O1O\\L_Gk0a8SOkGc0T8[OPHd0P8YOTHf0k7YOWHg0i7XOXHh0g7WO\\Hh0c7WO_Hi0b7QObHP1^7oNcHQ1^7kNeHT1]7iNeHW1\\7dN_GVNU1W3^7aNeH`1\\7^NcHc1`7YN`Hh1b7UN^Hl1c7QN^HP2d7mM[HU2f7iMZHX2g7fMQHnNQO_3Q9^MnGj2S8TMnGl2S8RMmGo2W8lLjGT3X8hLhGZ3[8aLeGa3\\8]LcGe3k91N2M3O1N2O1O1N2NPMcEa1\\:\\NiEc1W:[NlEd1S:[NoEe1P:ZNSFe1i9^NXFb1g9^NZFb1e9^N\\Fb1c9^N^Fb1`9_NbF`1]9`NdF`1[9`NfF`1Z9^NhFb1W9^NjFb1U9[NoFe1P9WNUGi1l8TNVGl1l8jM[GW2g8]McGc2\\:100O1O1O1H8O1O1O100O1N2N2O1M3F:bM]LTIn3i6ZLlHl3S7WLgHm3V7YL`Hn3^7UL\\HP4a7WLTHP4i7VLPHn3P8a1N2N2L4M3N2L4O1M3N2O1M3O1K5N2H8@`0O1O1O1000000O100000000N2O10000O1N2O100000000000000000\\HWJQ6i5mIYJS6g5lIZJT6g5kIYJU6g5kIZJT6e5mI[JS6e5mI[JS6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHQMX1bNhN3^7k5jIQJjN3\\7[4bHQM\\1dNR6[4cHPM^1bNo5^4dHoLm8Q3TGnLl8R3UGmLk8S3TGnLl8R3SGeKNU1o8V3SGPMl8R3RGnLn8Q3TGfK1n0k8\\3TGfK0o0l8Z3UGgKNP1m8Y3UGoLk8Q3VG\\LMEm8o3VGcKLe02Jk8n3ZGYLJIl8n3ZGlLf8T3ZGlLf8T3YGnLf8R3YGoLg8Q3XGQMg8o2YGQMg8o2WGSMi8R4VGiJk8\\4WG`K00OJN6l8`4WGaKOO0IO7k8`4VGbK0OOH08j8T4VGkK1NNL20a8T4oFmK`0Oa8T4oFmK?0^ONj8U4YGnK=0@Mj8O" + } + ], + "model_output": "An adult zebra stands with its body profile facing left, showcasing its distinctive black and white striped pattern. The stripes are unique, with varying widths and patterns across its body, legs, and mane. The mane is erect and striped, and the ears are pointed upwards. The zebra's eyes are alert, and its muzzle is partially visible with a discernible mouth and nostrils. The tail is tufted with longer hair at the end, and the hooves are dark and sturdy. The zebra is standing on grass." + }, + { + "image": "images/caption_detailed_47.png", + "subject_name": "cake", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "PYR41h01[=2cBO[=4dBL[=6cBK\\=7bBJblKBT4=mKCT4fFGU9;dF@cN7f:=aFN]92dE^O=d0n9OcE@b5n600000O10000000000000000O100001OO10000fH" + } + ], + "model_output": "An individual wearing a bright yellow ski jacket with a hood, red ski pants, and black gloves, equipped with a black helmet and goggles, is captured in a dynamic pose with one arm extended outward and the other bent at the elbow, suggesting movement, over the snow." + }, + { + "image": "images/caption_detailed_49.png", + "subject_name": "donut", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 612, + 612 + ], + "counts": "_Ql1l0oa0c0A8H7J6I:D;K5J5M6H7H6L2N2M4K7I5L3M3N4I6L2N4L4K0020001O1N2N110O1N2N1O2O1O1O2M2O2N1O1N2O001O100N2N3OO10O1N2O011N1O1O1N201O0O1O1O1O1O101N2O0O1O1O1O10000O2N1O101N1O100O100O1O1O101O0O100O10000O1O1O10000O101N100O100O2N1000000O10000O101O0O10000O1000lLhD\\NW;a1PE\\NP;a1VE]Ni:b1ZE\\Ne:e1\\EZNd:f1]EYNc:f1^EZNa:g1aEWN^:i1dEVN[:k1eEUN[:j1fEVNZ:i1hEUNY:j1hEVNW:k1jETNV:l1jETNV:l1jEUNU:k1kEUNU:j1mEUNR:l1nETNR:l1oESNQ:m1oESNQ:m1oESNP:n1QFRNn9n1RFRNn9n1RFRNn9n1RFRNn9n1RFRNm9o1TFPNl9P2TFPNl9P2TFPNl9P2TFQNk9o1UFRNj9n1VFRNj9o1UFRNj9n1VFRNj9n1VFRNj9n1WFRNh9n1XFRNh9n1XFRNh9n1XFRNg9o1YFQNg9P2XFQNg9o1YFQNg9P2XFPNh9P2XFQNg9P2YFPNf9Q2YFoMg9Q2YFPNf9Q2YFoMg9Q2YFPNe9S2ZFlMf9U2YFlMf9U2YFlMf9T2[FkMe9V2ZFjMf9W2XFjMh9X2VFhMj9Y2TFiMk9X2TFhMl9Z2PFhMP:Z2mEhMR:]2cEjM\\:_50001O001O001O1O1O01O0001O1O001O00100O00002N10O01O001O100O010O1O100O1O010O100O100O2OO01O011O0O01O010O10O01N101N2O0O2N2O0O101N1O2O1N2N1O2O1N2N1O2N1O2N2O0O2N2N2N2N1N3M3N2N1N3N2N2N2N1O2N2O1N2N2N1O3M2N2N2N2L5J5L4N3M2M3M5K3N2M3N2M3M3N2M3N3L3N2N2M3N3K5M2M4K6J5J5L7I5K8F9I6K9F6JZb0`0F7G8H8G:B=F:B>N2O1000000000000O10001O00000000000000O02O00O100000000O100O1O1O1O1O1O1O100O1O001O1O100O1O1O1O1O10O010000O1ROf_OROZ`0h0o_OUOQ`0g0T@XOk?g0W@YOi?e0Z@ZOe?f0]@YOc?e0`@ZO`?e0b@ZO^?d0d@\\O\\?c0f@\\OZ?c0h@[OY?d0h@[OY?d0i@YOY?g0h@VOZ?i0j@POZ?P1]100O1000001O000010O3M10O00010O000O101N1O1O2N1O2N1O1Ok_O_Oi=`0TBEk=:TBIl=4TBNl=1oA5Q>JkA;V>DeAa0[>^OeAc0\\>ZOcAi0]>VO[AS1f>lNTAZ1m>dNPA`1Q?]Nn@f1U`03H8N2K5L4N3K3M4K5L5K4I7L3L6L3J6K4J7M3M4L2M4H8G9N2L4N2N200O100O1O100O100O1000000O10000000000000000000000001O000000010O0O2O00010O1O001O001O1N2O1O001O2L3@`0H8J7J5I7K5K5H:F9I7J5K6J8UNY_Om0ea0H6L4K6J6K4L4M2MdgZ6" + } + ], + "model_output": "A red frosted doughnut, generously sprinkled with white nonpareils, is held by a person." + }, + { + "image": "images/caption_detailed_50.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0R8n6000000O10000O100O10000O10YLSH;l7]3000`LTHLk75VHJj73[HKd7i3000000O100O100O1000000O10000O10002M102N2M5L3L5L3M3L5L5K3L4M1N2O00001N101N2O0O2O00000O2O001N2O001N2O001N2O1O0O2O1O1N2O0O2O1O0O2O1N102N001N2N101O1O1N101O1N2N2O1O001N2O0O2O1N2O1O1N101N2O1O0O2O1O001N3N0O2O1O0O2O1N2O001N2O1N2O1O0O2O1O001N2N2O1O001O1N101N2O1O1O0O2O1N2O0O2O1O001O2M101N2O001N3N001N2O0O2O1N2O1O1O0O101O1O1N2O1O0O2O1N2O1O1O0O2O0O2O1O0O2O1O0O2O1N3N1N101O001N2O1O001N2O1O1N2O1O0O2O001O1N2O1O0O2O2N001N2O1N2Nf]Q6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "U8i6W8000O100O10000O10000O10000O100O1000000O100O100O1000000O100O10000O100O10000O10000O100^HlHX7T7gHnHW7S7iHQIS7o6lHVIP7j6PIWIn6j6RIYIk6g6UI_Ie6a6[IaIb6`6^IaIa6_6_IaIa6_6`I`I_6a6aI_I_6a6`I`I`6`6`I`I_6a6aI_I_6a6bI^I]6c6bI^I^6b6bI^I^6b6bI^I]6c6cI]I]6c6dI\\I\\6d6dI\\I[6e6dI\\I\\6d6dI\\I\\6d6eI[I8VOe5_7SJ[I8VOd5`7TJZI7XOd5^7TJ[I8ZOa5[7WJ[I7\\O`5Z7ZJYI6]O`5Z7ZJYI6^O_5Y7[JXI6@_5X7[JXI6@_5X7[JXI6A^5W7\\JXI5C]5V7^JWI5D\\5U7_JWI5EZ5U7aJVI5FY5T7bJVI5GX5S7cJVI5GW5T7dJUI5HV5S7eJUI5IT5S7gJTI5JS5R7hJTI5KR5Q7iJTI4LR5Q7jJSI4LR5Q7jJRI5NP5P7kJQI60m4P7mJPI53l4m6oJPI54j4m6QKoH54j4m6QKoH55h4m6SKmH66g4m6SKmH58g4k6TKmH59e4k6VKlH5:c4k6XKkH5:c4k6XKkH4`4i6[KiH5>`4i6\\KhH4`0^4i6]KgH6a0\\4h6^KgH6b0Z4h6`KfH6c0Y4g6aKfH6c0Y4g6aKTHL>:g0X4h6bKSHL>9j0W4e6dKdH6h0U4d6eKcH7j0R4d6gKaH8l0P4c6hKRHL:=Q1n3d6iKQHL:=R1m3c6jK`H7n0o3b6jK`H7o0m3b6lKPHL;;T1l3a6mKPHL;:V1k3`6oKoGL;:W1j3_6PL^H6T1h3_6RL]H6T1h3_6RL\\H7V1f3^6SL\\H6X1f3\\6TL\\H6Y1d3\\6VL[H6Z1c3[6WL[H5[1c3[6XLZH5\\1b3Z6YLkGM;8`1b3Z6YLlGL:9a1`3Z6[LYH4`1_3W6]LYH4a1^3V6^LYH4a1]3W6_LXH4b1\\3V6`LWH5c1Z3W6aLVH5e1X3U6cLVH5f1W3T6dLVH5f1W3T6dLhGM99l1T3T6fLhGL89m1U3S6fLSH6k1R3S6hLRH6k1R3S6hLQH7n1o2Q6jLQH6o1o2Q6kLPH6P2n2P6lLoG7R2l2o5mLoG7S2j2o5oLnG7S2j2o5PMcGL7:X2i2n5QMmG6V2g2n5RMlG7X2f2l5SMlG7Y2e2k5TMhGNJ9c2d2l5UMgG;^2_2k5VMgG:`2^2j5XMfG:a2]2i5YMgG9a2]2h5ZMfG:c2[2g5\\M`GL2=g2Z2h5]MdG9e2Y2g5]MeG:e2X2f5^MeG:f2V2f5`MdG9h2V2d5bMbG9j2U2d5aMcG:j2S2d5cMcG9i2T2d5cMcG9j2S2c5eM]GL2j4BWK=i4BYK=g4BZK>f4BfHEf0h0d6CbHLh0?g6E_H1h08j6F\\H7i02k6GYHk0Jm6HWH`0k0Go6ITHc0n0Bn6JTHe0o0_On6LRHg0P1\\On6MQHh0Q1ZOo6NoGj0R1WOo6OnGk0S1UOP70mGl0T1ROo62lGm0V1oNo64kGn0W1kNo67jGn0Y1gNo6:iGo0i:QOWEP1h:POXEQ1g:oNYEQ1g:oNYER1f:nNZER1f:nNZES1e:lN\\EU1c:kN]EU1c:kN]EV1b:iN_EW1a:iN_EX1`:hN`EY1_:gNaEZ1^:fNcEY1]:fNdE[1[:eNeE[1[:eNeE\\1Z:dNeE^1Z:bNfE_1Y:`NhE`1X:`NhEa1W:_NiEb1V:^NjEb1V:^NjEc1U:\\NlEd1T:[NmEf1R:ZNmEh1R:WNoEi1Q:WNoEj1P:VNPFk1o9UNQFk1o9UNQFl1n9TNRFm1m9SNRFo1m9QNSFo1m9PNUFP2j9PNVFQ2i9oMVFR2j9nMVFS2i9mMWFT2h9lMXFU2g9kMXFV2h9jMXFW2g9iMXFX2h9gMYFZ2f9fMZF[2e9eM[F[2e9eM[F\\2d9dM\\F]2c9bM^F^2b9bM^F_2a9aM_F`2`9`M_Fb2`9^M`Fb2`9^M`Fc2_9]MaFd2^9\\MbFe2]9[McFe2]9[McFf2\\9ZMdFf2\\9ZMdFg2[9YMdFi2[9WMeFj2Z9VMfFj2Z9VMfFk2Y9UMgFl2X9TMgFm2Y9SMgFn2X9RMhFo2W9QMiFP3V9PMjFQ3U9oLkFQ3U9oLkFR3T9nLkFT3T9lLlFU3S9jLnFW3Q9iLoFW3Q9iLoFX3P9iLnFX3R9gLoFZ3P9fLPG[3o8eLQG\\3n8dLRG\\3n8dLRG]3m8cLSG^3l8bLSG`3l8`LTGa3k8_LUGa3k8_LUGb3j8^LVGc3i8]LWGd3h8\\LXGe3g8[LXGf3h8ZLXGg3g8YLYGh3f8XLZGh3f8XLZGi3e8WL[Gj3d8VL[Gk3e8UL[Gl3d8TL\\Gm3c8SL]Gm3c8SL]Gn3b8RL]GP4b8PL^GP4b8PL^GQ4a8oK^GR4b8nK^GS4a8mK_GT4`8lK`GT4`8lK`GU4_8kK`GW4_8jK`GW4_8iKaGW4_8iKaGX4^8hKbGY4]8gKcGY4]8gKbG[4]8fKbG[4]8eKcG\\4\\8cKeG]4[8cKeG^4Z8cKeG^4Z8bKeG_4[8`KfGa4Y8_KgGb4X8^KhGc4W8\\KjGd4V8\\KiGf4V8ZKjGg4U8YKjGi4U8WKkGj4T8VKlGj4T8VKlGk4S8UKmGl4R8TKnGl4R8TKmGn4R8RKnGo4Q8QKoGo4Q8QKoGP5P8PKPHQ5o7nJRHS5m7mJSHS5m7nJRHS5m7mJSHS5m7lJSHV5l7jJTHW5k7jJTHV5l7jJTHW5k7iJUHX5i7iJVHY5i7gJWHY5i7gJWHZ5h7fJXH[5g7eJXH]5g7cJYH]5g7cJXH_5g7aJYH`5f7`JZH`5f7`JZHa5e7_J[Hb5d7]J\\Hd5d7]J[Hd5c7]J]Hd5b7\\J^Hd5b7\\J^He5a7[J^Hg5`7ZJ`Hg5_7YJaHg5_7YJaHh5]7YJcHh5\\7XJdHi5[7WJeHi5[7WJeHj5Y7WJfHk5Y7UJgHl5X7TJiHk5W7UJiHl5T7VJlHk5R7VJoHi5Q7WJPIi5o6WJQIj5n6WJQIj5m6WJTIh5k6ZJVIe5i6[JWIe5i6\\JVIe5i6[JWIf5g6[JYIf5f6ZJ[If5d6[J[Ie5e6[J[If5d6[J[If5c6\\J\\Id5c6]J^Ic5`6^J`Ic5_6^J`Ib5`6^JaIb5^6^JbIb5]6`JcI`5\\6`JeI`5Z6`JgI_5Y6bJfI_5X6bJhI_5V6bJjI_5U6bJjI_5T6bJmI^5Q6dJnI]5P6dJPJ]5o5dJQJ[5n5gJRJY5m5gJTJX5k5iJUJX5j5iJUJX5i5iJWJW5i5iJXJW5f5kJYJV5f5kJYJV5e5kJ[JU5d5lJ\\JU5b5mJ^JS5a5mJ`JR5_5oJaJR5^5nJbJS5\\5oJcJQ5\\5PKeJP5Y5QKgJP5X5PKiJP5U5QKkJP5T5QKlJn4S5SKmJn4Q5TKoJXNYOa6f5XKQKk4n4WKQKj4m4WKUKg4j4ZKWKf4g4\\KXKd4g4]KYKd4e4]K[Kd4c4^K\\Kb4d4^K]Kb4b4^K^Kb4b4_K]Kb4a4_K_Kb4_4_KaKa4_4`KaKa4\\4aKcK_4]4bKbK_4\\4bKdK_4Z4bKgK^4W4cKjK\\4V4eKjK[4U4eKlK[4R4fKnKZ4Q4hKnKY4P4hKPLY4o3hKQLX4m3iKSLW4m3iKSLX4k3jKULU4k3lKVLS4h3nKXLR4h3oKWLR4g3oKYLQ4f3PLZLP4e3QL\\Ln3d3SL[Ln3c3SL]Ll3c3VL\\Lj3c3WL]Li3c3XL]Lg3c3YL]Lg3b3[L]Le3b3\\L^Lc3c3]L]Lc3b3^L_La3a3`L_L^3b3bL^L^3a3cL`L[3`3fLaLY3_3gLcLV3]3lLcLS3\\3nLdLQ3]3oLcLQ3\\3PMeLn2\\3RMdLn2Z3TMfLl2Z3TMfLk2Z3VMfLi2Z3XMgLg2Y3YMgLg2X3ZMhLe2Y3^MdLb2[3_MeL`2[3aMeL_2[3aMeL^2[3aMhL^2W3dMhL[2X3fMmKXLg0R6\\3iMeLV2[3jMgLU2X3lMhLS2X3oMgLQ2Y3PNeLP2[3QNfLn1Z3QNhLm1X3TNiLk1W3WNgLi1X3YNgLf1Y3SNQLTLh0h5W3TNPMk1P3VNPMj1o2YNoLg1Q3\\NlLc1T3]NmLb1T3]NnLb1Q3_NPM_1P3bNPM^1o2dNPM\\1o2eNQMZ1P3eNRMY1n2hNSMW1m2jNRMU1n2lNRMT1m2mNSMR1n2oNQMQ1n2POSMn0m2TORMl0m2VORMi0o2VORMj0m2WOSMh0m2YOSMg0m2ZORMe0n2\\ORMd0m2^ORMa0o2_OQMa0n2@RM?o2BPM>P3BPM=P3EoL;P3FQM8P3HQM7n2KRM3n2NPMPKUOQ5k3OSM0m22RMNm23SMLm25SMKm26RMIn28SMGl2;TMCl2>UMAk2`0TM_Ol2c0SM]Om2c0SM\\Om2e0TMZOk2g0VMWOj2k0UMUOk2l0TMSOl2n0TMROl2n0TMQOk2R1TMnNk2S1UMlNl2T1TMlNk2V1PMYJZO`4f3X1PMXJZO`4f3Y1oLWJ[O_4g3Z1nLWJ[O_4f3[1oLVJ[O^4f3^1oLSJ\\O]4f3`1SMSJVOY4f3f1VMZNi2h1VMWNk2j1gLoIH0KV4f3l1gLnIIOKV4e3n1fLmI0S4Y3Q2gLkI2R4W3U2fLjIE09Q4[3V2gLiIE19n3[3Z2fLfIH08o3[3[2eLfI5o3U3\\2fLeIL2I10k3f3^2dLeILU4a3W2aLeIMU4a3W2bLeIHM2V4d3Y2cLfIHQ4d3Y2eLgIFP4d3Z2fLfIFo3e3[2eLfIGm3d3^2eLaILNKo3d3c2RM[Mm2h2SMWMm2i2cL]I0NKl3b3k2bL\\I0MLl3a3l2cLZI00Li3a3m2lL[IFd3^3R3kL[IHb3]3T3jL[IH`3_3W3fL[IJ]3`3X3gL\\IH\\3`3Y3iLZIG\\3a3Z3hLZIG[3`3]3iLYIFZ3a3^3hLXIGZ3`3_3hLYIGW3b3a3gLWIGX3a3b3hLVIGW3a3e3gLgLX3Z3iLdLW3]3iLcLW3o0" + } + ], + "model_output": "A black laptop with a glossy finish, featuring a full keyboard with white lettering on the keys, a touchpad centered below the keyboard, and a series of stickers on the palm rest area, is placed on the table." + }, + { + "image": "images/caption_detailed_51.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "bS\\1SEGg:=VEFg:;XEGf:;XEHd:4N2M2O2N1O101L3O1M3O1O0\\OWORCj0jI7I7L4M2O2M3N2M3N2N2M3O1M3N2L4L5WIiIc4[6SKTJb4R6]KQJ\\4T6cKoIV4V6jKlIn3[6QLfI^3k6_LWI`3l6]LVIb3R7ULPIi3T7bKoGOo0_4R9O2N1O2M2O2N1O2O1M2O2M2O1N3N1N3M3N1O2L3N3L4M3M2N3L4L5L5K4K6J6I8J6I7KU1iN_Rj0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nj_66l03V@`Rj0" + } + ], + "model_output": "A smiling man with short hair and glasses, wearing a blue and gray zip-up jacket with a logo on the left chest, blue jeans, and brown shoes, is standing with his hands in his pockets, in front of a fence." + }, + { + "image": "images/caption_detailed_53.png", + "subject_name": "handbag", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Qha0a0f=F^BY1j2N10000O0100O0001O0O101_NMXC074=FQ;;VD070a0Jm:;WDg0j0ROl:c1TE_Nj:a1UEbNj:^1UEdNi:\\1VEfNj:Y1VEjNh:U1WEnNh:R1WEPOh:P1WEROh:n0TEWOk:j0nD]OQ;d0lD_OT;a0iDBV;U200001cLgDP3[;lLgDT3b;2O0O2N2O1N2_EdL[9]3aFeL_9^3]FcLd9`3WFaLi9b3SF_Lm9\\2iE]N2\\OU:U2nEROR:l0QFROQ:=aFA`9fF_O\\9?gF^OZ9UOcEg0Y10T9YOgE`0[13P9\\OiEj8BmEM]1?h8CfE2m11^8j0dGSO^8k0dGSO^8l0cGSO]8l0dGSO^8k0dGSO^8j0eGUO\\8d0kGYOY8b0U3M3Jgh>" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y3e;[3000001O000000001O0000001O0000001O00000000001O0000000000001O0cKbLXM^3f2iLUMW3i2mLUMS3i2QMUMP3i2SMUMm2j2ZMPMf2[2eLYJk0W3`2_2hLWJl0W3]2`2jLWJk0W3]2_2kLVJk0Z3]2[2ZNdMl1V2UNiMl1V2TNjMQ2P2PNPNW2i1iMWNX2h1hMXNZ2f1gMYN[2e1eM[N\\2d1dM\\Nd2\\1]MbNk2W1VMhNk2X1UMhNj2X1XMfNi2Z1\\M`Nh2]1cMWN`2g1cMUN_2k1cMQN^2P2fMjM]2V2hMbMk2m1XMnMj2Q2VMnMk2R2TMnMl2R2UMmMl2R2UMmMk2S2WMkMi2U2XMjMh2V2]MeMc2[2bM`M^2`2eM]M[2c2gM[MY2e2iMYMW2g2jMXMV2h2kMWMT2j2mMUMS2k2nMTMQ2m2QNQMn1P3SNoLm1R3TNlLn1R3SNmLo1R3RN`LmKLQ6d3WNWLoK3j5f3dNZL\\1f3eNYL^OOaMi3P3XLZO5fMc3V7^LjHc3U7^LjHc3U7]LkHd3S7gLbHZ3^7QMWHP3^2VLa1Q1kKi2d2YL^1P1lKh2e2ZL]1o0lKj2f2nKbL6j4S1WKgN`0T4R3PLV1\\1eKg2T8]MgGf2W8\\MfGh2X8XMhGi2R4bKgKOm3b6aMmIh0Di1a6ZM`J;oNZ2d6XMbJ9kN_2f6UMbJ7jNe2g6PMdJ5gNj2i6mLdJ4dNo2m6hLbJ6aNS3n6dLcJ7`NU3n6cLeJ4^NY3n6QLhH9Q2N`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + } + ], + "model_output": "A person wearing a white basketball uniform with the number 8 on the back, white shorts, and white sneakers with orange accents is running on the playing field. The individual has short dark hair and is captured in a dynamic pose with one leg extended forward and arms slightly bent at the elbows." + }, + { + "image": "images/caption_detailed_55.png", + "subject_name": "bowl", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "ejb06Q=7I7I5K4N3M2M3M4M1O2N2N2N1O2N1O2N1O2N1O2N101N1O100O1O2O0O1O2O0O1O101N1O10001N100O2O0O2O000O100O2O0O100O101O0O100O1000000O10000O10000O10000000000O100000O100000000O10O1000O10000O10000O100O100O100O100O100O2N100O1O1O2O0O1O1O1O1O2O0O1N3N1O1O2N1N3M2O2M3M2N3N2M2O2M3N3M2M3N3L4L4K6HZQn5" + }, + { + "size": [ + 426, + 640 + ], + "counts": "d1Z29Bb0f0b4IkJBc0e0a4JjJDd0b0a4JjJFe0`0a4Q2_KPN_4Q2bKnM^4R2bKnM\\4U2dKjM\\4V2dKjM[4W2eKiMY4Y2hKgMV4DoJ`0k0LU4\\2jKdMU4]2kKdMS4]2mKcMS4]2nKbMQ4_2oKaMP4`2PL`Mo3b2PL^Mo3c2RL]Mk3e2UL[Mj3g2VLXMi3i2XLVMg3k2YLVMf3j2ZLVMe3k2[LUMd3m2\\LSMb3n2^LRMa3o2_LQM`3P3`LPM^3R3cLmL\\3U3cLlL\\3T3dLlL[3U3eLlLY3U3gLkLY3U3hLjLW3W3iLiLW3X3hLiLU3Y3lLfLS3[3mLeLR3\\3oLcLo2_3QMaLn2`3RMaLm2_3TM`Lk2i0[L[Oi0Lk2f0dLZOa00j2e0kLWO;5h2b0RMXO56h2`0XMXO08g2?]MWOM9d2a0aMUOL9b2a0eMUOI;a2?hMUOH;_2`0kMTOF=^2>nMUOD<^2=QNVOA=^2;TNWO^O>^2;TNWO^O>^2:VNXO\\O=^2:XNXOZO>^29YNYOYO?]27\\NYOXO?\\27]N[OVO>]26_N[OTO?]26_N[OTO`0\\24aN\\OSO`0\\23cN]OQO`0[22eN^OPO`0[21fN_OPO?Z22gN^OoN`0[20gN@nNa0[2nMdLc1S2OnN?o2ZOTN7mN`0o2XOUN7lNa0Q3UOTN:lN`0R3SOSN>jN?T3QOTN?hN`0U3POSN`0hNa0V3lNSNc0gNa0X3jNRNd0gNa0\\3eNnMk0eN`0a3`NkMP1eN`0a3gM^Ka0\\2X1eN`0b3fM_K?\\2Z1cNa0l3RNaM]1dN`0n3oM_Mb1bN`0P4kM`Md1`Na0P4kM`Md1aN`0P4kM_Me1aN`0Q4jM^Mf1aN`0R4hM_Mh1_N`0R4eMaMk1]N`0j6@VI`0j6@VIa0i6@VI`0k6_OUIa0k6_OVI`0j6@VIa0i6_OWIa0i6@WI?j6@VI`0j6@VI`0j6@VIa0i6@VI`0j6@WI?i6AWI`0h6@YI?g6AYI?g6BXI>h6AZI?e6A[I?e6A[I?j2fMIj1^Ma0h2iMFd1eMb0e2kMEb1gMc0d2lMD_1jMe0b2nMB\\1mMf0a2PN_OZ1RNf0^2RN^OV1VNh0\\2TNWLEk2^1dNh0Z2_NROg0fNj0X2_NROf0hNj0[2[NmNi0jNm0Y2YNlNj0lNm0Z2WNjNj0oNn0X2WNiNi0QOQ1V2UNiNi0ROR1d5lN^JT1a5lNaJS1_5kNcJU1]5jNdJV1\\5hNgJX1W5hNjJX1V5gNkJZ1T5dNoJ[1P5eNRKZ1n4eNSK[1l4eNVKZ1j4fNVKZ1i4gNWKZ1h4gNWKY1i4gNXKX1g4iNYKW1f4jNZKV1f4jNZKW1d4jN]KU1b4lN^KU1a4lN^KT1a4mN_KS1a4mNaKQ1^4PObKQ1\\4POeKo0[4QOeKo0Z4SOeKn0Y4SOgKm0X4TOhKl0W4UOjKj0U4WOkKj0l0nM]OY1Gi0j0QN]OW1Jg0i0SN[OX1Ke0j0SNZOY1Le0i0SNQMGd1b1b0d0h0UNoLHe1`1d0c0h0VNmLIe1_1g0a0g0XNjLJg1_1g0?h0YNhLKf1_1j0>g0\\NcLIj1_1l00n3d2dMXOPNTN>Oo3e2cMYOoMSN`0MP4f2aMZOoMSN`0LQ4g2aMXOoMVN?IR4j2_MWOPNVN`0FS4m2]MWOQNUNY5d2gLVOPNVN=IV4k2\\MVOQNVN60]4d2\\MVOQNWN32_4a2]MWOnMXN53^4^2_MWOlMZN65\\4Z2cMGPNQN\\4X2dMHnMSNo2@QNd2S1g0e0PMSNZ2W1g0b0UMTNT2Z1i0=YMVNn1]1j0;[MVN:]OOS2\\27_MXN3@3P2\\26aMXNMF5l1^23cMZNHI7j1^21gMZNCL8j1e0iMGU2K[N^O0;g1d0kMGR2N[N[O2=e1c0lMHQ2M]NXO4?b1d0lMIo1O]NSO8b0`1b0mMIm11^NPO:d0^1b0nMIk12^NmN>e0[1c0oMHi15^NiNa0g0Y1c0PNHf16aNeNb0k0V1b0RNGd19aNaNe0m0T1b0TNEa1=aN]Nh0o0S1a0UNE^1?aNZNl0P1P1b0VND\\1a0bNXNm0Q1o0b0VND\\1b0aNUNP1S1m0b0WNCZ1e0bNoMS1X1i0a0YNBY1f0bNmMV1Y1g0a0XNCY1g04UO;a0XNCX1h06SO:b0YNBW1i06TO9a0ZNBW1i06TO9a0ZNCU1i08TO9?ZNDU1i09SO8`0ZNDU1j08RO9`0ZNDU1j08RO:?YNDV1k08RO8?ZNDU1l09QO8?ZNDU1l09QO8?ZNDU1l09QO9>YNEU1l0:QO7>ZNEU1l0:QO7>ZNEU1l0:QO7>YNFV1k0:QO8=XNGV1k0;PO7>WNHW1j0;PO7>VNHY1j0:QO7Y2a7VNQHA>Z2`7VNVIj1j6VNVIj1j6WNUIi1k6WNUIi1k6XNTIh1l6XNTIh1l6XNTIh1l6XNTIi1k6XNTIh1l6XNTIh1l6XNSH^O9[2OfMc6a0UI^O9[2NgMd6`0UI^O:Z2MhMd6a0UI\\O:[2MiMc6`0VI\\O:\\2LjMc6>VI^O:Y2MmMa6gM]O:W2MQOX2oNJ_OA]NSO?S2IiNl1c0\\O_O`NSO>R2JiNj1h0ZO[OeNQO=S2JiNi1m0VOWOjNPO=T2IgNj1Q1TOUOkNoN>T2IgNi1S1ROUOnNmN?S2HgNh1W1oNUOROjN?S2HgNg1c1dNjN_OhN>T2HfNg1l1ZNfNIcN>U2HeNg1S2QNdN2`N=T2IeNf1Z4SNmJ>T2IdNf1\\4TNkJ=U2IdNd1^4VNiJ>T2HeNb1a4XNeJ>U2HeNa1b4YNdJ>V2HbNa1f4XNbJ?U2IcN_1g4YNaJ?U2IcN^1h4[N_J>V2IcN]1j4\\N\\J>W2IcN[1l4_OaLVOcN[1l4@`LUOcN[1n4@_LUOcNY1Q5A\\LWObNW1S5B[LWObNU1V5CXLXObNT1W5EVLWOaNU1Z5DULWOaNS1]5ERLXO`NS1_5EQLXO`NR1`5FPLXO_NR1c5EnKYO_NQ1d5GlKYO^No0i5GiKZO^Nn0j5IgKYO^No0k5HhKXO]No0l5IgKXO\\Nn0o5JeKXO[Nn0R6IcKYO[Nl0U6K_KYO\\Nk0V6L]KZO]Ni0W6M\\KZO\\Ni0Y6M[K[O[Ng0[6NZK[O[Nf0]6OXKZO[Ne0_61VKZO[Nd0`62UKZO[Nc0a64RKZO\\Nb0d63PK[OZNb0h64nJYOZNb0j64lJ[OXNa0m64jJ\\OYN?n66hJ[OYN?Q75fJ\\OYN=S77dJ\\OYN_IDZNMX8`0]IC[NLY8a0\\IC[NJ[8c0ZIDZNH]8c0ZIEZNF^8d0XIFZND`8f0VIGZNBa8f0UIH[N_Ob8i0SIG]N^Ob8j0QIH]N\\Oe8k0nHJ[N\\Og8j0nHJZN\\Oj8i0lHK[NYOl8k0jHKZNYOn8k0hHL[NWOf0M[7P1cIL[OVOR7n0cILZOXOR7l0dILXO[OT7i0cILXO^OS7f0eILWO@S7e0eIKWOBS7c0fILVOBS7b0gILUODT7?gIMTOGS7=hILTOHT7W2SAjMk>V2UAkMj>V2UAmMh>S2WAPNh>Q2PAWNn>_3N2O1N2N2N2O0O2N1O2N2N2O0O2O0O2N2O0O2N101N1O101N1O1O100O2O1N1O2O0O101N1O100O2N100O1O100O100O101N100O1O100O2O0O100O1O100O1O100O1O10O01O100O010O1O010O00100O010O0010O01O1O00100O001O01O10O010O010O01O010O0010O00100O0010O0010O01O010O01O010O01O01O010O0010O001O010O01O010O01O0010O1O010O01O010O001O010O010O001O010O10O01O01O01O10O1O01O0010O01O010O010O010O001O10O010O01O010O1O0010O10O1O001O010O1O10O01O001O010O1UKbBS3_=f11O001O100O1O00100O100O00100O1O1O010O1O1O1O1O101N1O1O1O101N1O1O2N1O2O0O2N1O2O0O2N2N2N1O2N1O2N2O1N1O2N2N1O2N3M2N4L4M4K7I2N2N2N2N1O2O1N2N2N2O1N2N2N2N2O2M4L3N2M6J:F3M1O3N1N2N3M3Mf6" + } + ], + "model_output": "A golden-brown hot dog bun with a soft, fluffy interior and a slightly crisp exterior, featuring a split top design to cradle its contents, is attached to a hot dog." + }, + { + "image": "images/caption_detailed_57.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0V5j90WFlJf9W501O1O4L4L1O2N2N5K2N3M1O3M2N3M2N3M2N3M6J1O002N4L3M1O12O0100NO2N2O3M1O01O02M7J2M1O2O2L3N3L2O1N5J:E4N2O004N2NO2N0101O1N8H0O00003N1N2N2N2M3N1O0000O1O1O2O0O1M3O1O0100000O001O01M3O1N2JkE_KW:_46N2O00100O10001O0001eEdKP:\\4oEfKP:[4PFdKP:]491iEfKf9]4QFiKo9d40000000000000000000000001O00001O1O001O000iKRF\\3o9`LUF_3k9^LYFa3g9]L[Fc3e9\\L\\Fd3d9[L]Fe3d9XL_Fg3a9XL`Fh3`9VLbFj3Z:0O10000000000M3O1000000O1HmK`E0NT4e:mK[ES4e:nKZEQ4f:5L4000000LeKbE\\4\\:5N2000XMkEe0T:XOWFa0h9\\ObF>Z9lMnEc1R1:T9CQG;o8\\OjElN[1f1k8ZOaGe0^8ZOdGf0\\8WOhGh0U8YNmEn0o1i0T8XOnGh0R8WOPHh0o7UOUHk0k7QOYHo0g7POZHP1f7nN]HQ1c7mN_HS1`7jNcHW1U7ZN[F=c2Y1Y7gNgHY1Y7fNhHZ1Y7dNhH\\1X7bNiH_1W7`NfHIZMg1P:]NiHLWMg1P:[NoHe1U:1M3JVNUCk1P=00000K5LoMYCQ2j<100M3O100000000O100000000O1O1001O00000O2O1O1O2N2N001O1O1O1O001O001O1O0000001O1O00000000MYNmBg1S=YNmBg1S=YNmBg1S=YNmBg1S=YNmBg1V=000001O000000000000001O00000000001O00001O1O00000O2O00001O001O001O1OYMfNhGZ1W8gNjGX1V8iNiGW1W8iNjGV1V8kNjGT1V8lNkGS1U8nNkGQ1U8oNkGQ1T8QOkGo0U8QOlGn0S8TOmGk0S8UOnGj0R8VOnGj0R8VOoGi0P8YOPHf0o7[OQHe0o7[OQHe0o7[ORHd0n7\\ORHd0m7^OSHa0l7@TH`0l7@UH?j7CUH=k7CUH=k7DUH;i7GWH8i7JWH5h7LXH4h7LXH4g7MZH2d70\\H0c72\\HNc73^HLb74^HLa75_HKa75`HJ^78bHH]7:bHF^7:cHE\\7=cHC[7?eHBZ7?eHA[7?eHA[7?eHA[7?eHAZ7a0eH_O[7a0eH_O[7a0eH_O\\7`0eH_O[7a0eH_O\\7a0bH@^7`0bH@_7?bH@^7`0bH@^7a0aH_O_7a0aH_O`7`0`H@`7`0`H@a7`0^H@b7`0^H_Oc7a0\\H@e7`0YHAg7?YHAh7>WHCi7>VHBj7>VHBj7>VHAl7>THBl7>THBm7>RHBo7=QHZOeMB[:T1PHUO^8k0bGSO_8m0aGSO_8n0aGPOa8o0`GPO`8Q1_GoNb8P1^GoNc8Q1^GlNd8U1[GjNf8V1ZGjNg8U1ZGjNf8V1c2O10O01O01O0010O01O01O1O010O001O001O01O000000010O00100O010O0001O3M2O0O2N2N2O2M1O2N3N0O3N1N3M2N3N1NTgZ3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[5e2=`1KdM`5l0eJ`1KdMa5k0cJb1LcMc5i0`Je1MbMe5h0]Jf1NbMg5f0[Jh1NbMk5b0WJl1NbMm5`0UJm1OcMn5>SJo1OcMP6R1V8]O]G@>R1U8]O^G@`0Q1S8]O^GBa0o0Q8^O`GAb0o0n7@`GAd0m0l7AaGBd0l0k7BbG@d0n0j7AcGAd0m0h7CdG_Of0m0f7CfG^Of0n0d7DfG^Og0m0d7DeG_Oh0l0P2eNj1o0_K_Oh0m0b7DfG^Oj0m0`7DgG_Oj0l0_7EgG_Oj0l0^7EiG^Ok0l0\\7EiG@k0k0\\7EiG_Ol0l0[7DjG@k0l0[7DjG@l0k0l1mN]1g0kKAm0k0k1POYLLh4g0XLBm0j0j1ROY1b0PLAn0k0j1QOX1b0QLBm0k0j1SOW1>SLCm0l0i1YOQ18XLDn0k0j1XOP18YLEm0k0j1YOo06[LEm0l0j1XOn06\\LFl0l0j1_Og0ObLFn0l0i1Bd0KfLEP1m0g1C7ZOPLa0R1DR1n0e1G3WORLa0T1CR1n0e1H2VORLb0T1AU1o0d1JNNYMYOU1o0d1KMLZMZOV1o0d1JLMZMYOW1P1c1JKM[MYOY1o0b1KIM\\MYOY1o0c1JHM\\MZOZ1o0b1KFM\\M[O\\1m0b1LEL\\M[O^1m0a1LEK]M\\O]1m0b1KDL]M\\O\\1n0c1KCJ^M]O]1n0c1JBK]M^O^1m0c1K@K^M]O`1m0c1J_OL^M]O`1m0c1J@J^M^O`1n0c1I_OK^M^O`1n0c1J^OJ_M]Oa1o0b1J^OI`M^O`1o0c1I]OJ`M\\Ob1Q1a1J\\OHbM]Oa1Q1a1J\\OHbM\\Ob1R1a1JYOIdMZOc1S1`1KXOGc0>5KXOGb0?6JXOFc0`05JXOFc0`05KWOEd0`05KWODe0a04KWODe0a04LVOBg0b04KUOBh0c03KUOBh0c03LTOAi0c03LTO@j0d02MSO_Ok0d03LRO_Ol0e01NRO]Om0e01OQO\\On0e02NPO\\Oo0f01NPO\\Oo0f01OoN[OP1f01OoNZOQ1g00OoNZOQ1g00OPOYOP1h000POVOP1k00OPOVOP1k000POTOP1l000QOSOo0m001POQOQ1n0O1POQOQ1n0O1QOPOP1o0O2POoNQ1o0O2QOmNQ1Q1N2ROkNP1T1O0ROkNo0U1N1TOhNo0W1M2d3N\\L2d3N\\L2d3N\\L2d3N\\L3c3M]L3c3M]L4b3L^L4b3L^L4b3L^L4b3L^L5a3K^L6b3J_L5a3K_L6`3J`L6`3J_L7a3I_L8`3H`L8b3F_L9a3G^L:c3E]L;e3C[L>e3A[L?e3A[L`0e3_O[La0e3_O[Lb0d3^O\\Lb0d3^O\\Lb0e3]O[Lc0e3]O\\Lb0d3^O\\Lc0b3^O^Lb0b3^O_La0a3_O_La0a3_O`L`0`3@bL?]3AeL=[3CfLhM@X2`0kM]OU2c0lM\\OT2d0nMZOQ2g0PNYOo1g0SNWOm1i0UNUOj1l0WNSOi1m0YNQOg1o0_NjNa1W1T60000000O100000000O1000000O10000O1000000000000O100O100000000000000O100000000O100000000O1000000O1O100O1O100O100O1O10000O1000000O1000000000000000000000000001O00001O000000001O00001O000000001O000000000000001O00000000001O00eB" + } + ], + "model_output": "A black laptop with a visible keyboard featuring white lettering on the keys, a touchpad, and a series of ports on the left side including a USB port, an HDMI port, and possibly a card reader slot, is on the table. The laptop's screen is open at an obtuse angle, and the display bezel is partially visible." + }, + { + "image": "images/caption_detailed_58.png", + "subject_name": "person", + "object_name": "surfboard", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "_nd22\\=3N2N100O11O000000QFJe65ZINd62ZI0f60XI2i6MTI6m6JnH9S7GkH;U7EhHj7BTH?m7BoG?S8AkG?X8@fGa0[8@bG`0`8@_G?c8A\\G>g8BWG=k8CTG;o8EPG9T9GkF7W9JgF6[9IdF7]9JaF6a9J]F7c9I]F6d9J[F7f9IXF7i9JUF7k9IUF6m9JQF7o9JoE7Q:KlE5MBP9lFO0ET9=kF2JD[9>fFR1Y9QOeFo0[9SObFn0^9SO`Fm0a9UO]Fk0c9WOZFj0g9XOUF?U:Y16WO`EZNh:c1d0O2M101O0O101O001N2O1O1O001N2O1O1N2O2M4L=C3L5K6Hejf4" + }, + { + "size": [ + 432, + 640 + ], + "counts": "c\\k24\\=001O00000000001O0O100000001O000000000000000O1000001O0000000000000000000000000O1000000000000000O10000000O1000O100000O01000000O10O0100O10O0100O1O100O2Meab4" + } + ], + "model_output": "A person in a full-body black wetsuit with a white logo on the left chest, featuring a high collar, long sleeves, and integrated gloves, is playing with a surfboard." + }, + { + "image": "images/caption_detailed_59.png", + "subject_name": "horse", + "object_name": "sand", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VSR21[=0ZS_16]l`N5K4M3M4L5QDUOT;R1bDVOZ;`1M3L4M3N2N3M\\OPE@\\:=jEBU:8QFHn98QFJo97oEJQ:h10]OkEPNT:n1SFmMn9U2c02N00]OWEcNj:\\1[EYNI1m:d1d0N2N30\\EaNZ9b1bF`N]9b1`F_Na9`1^FaNc9^1[FdNe9[1YFhNg9X1UFlNl9S1SFlNP:S1oEkNU:V1eElN]:o12N2M4L5M3L2B`0H7L5M3N2M3N001N101O0@eC1[d3^8bLaG^3_8aLUGM6b3e8aLUGN6`3e8oLTGS3l8f000O1000000000000000000000O11O000000000000000000000000000000bLTG]2m8QM[GDI10Z3l8mLcGX3]8dLhG[3X8cLjG]3R9000000000004L0000000000001OO1M3N2000000000000001O000000001O1O001O00000000001O00O1WOdLcG\\3T92002N1O000000000000O1O100O1O2N00101N1O1O1L4001O1O2N3M1O1O0000000000000000001O2N1O0000001O00mNYMaGh2\\8_M`Ga2\\8dMcG\\2\\8fMcGZ2]8Z11OO101N1000000000000O10000000000000000000000000000000000001aK`GQ4Q9L2N3M2N2N2N8SLaF5KR3R:00O2O000001O0000O01AQMbFo2]9TM`Fm2`9TM_Fl2a9UM^Fk2a9UM`Fk2_9RMfFm2Y9oL`FM8T3h8mLPGO682l2g8cMZG]2f8`M]G`2c8^M_Gb2`8ZMeGf2[8YMfGg2Y8ZMgGf2^8PMnFKf0U3\\91M3M3O1L4N2M3N2N2N2J6E;M3O10000O1001O3M>B>oK^F]3l9K2O1O00001O0000lNmLRG>:e2_8jM]GV2`8PN\\GQ2^8kL`GW10n1_8a110000000000O1000000O10000O10000O10000000000O1000000000000000000O100000001O0000000O100000000000000O1000001O0O10000000000000000O2O0000000000001O0O1000001O00000000000O1000000000000000O100000000000O1000O1000000000000000000000000000000000O11N100000O11O000000O11O00000000000000000000000000000000000000000000000000000000000O100000001O0000000000000000000000000000000000000000000000000O2O01O00O1000000000000000000000000000001O000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000001O0001O0000000000000000000000000000000000001O00_OWG`Li8`3XG_Lh8a3YG^Lg8b3YG^Lg8a3ZG_Lf8a3[G^Le8b3[G^Le8b3[G^Le8a3]G^Lb8c3^G]Lc8b3]G^Lc8a3^G_Lb8a3_G^LW1" + } + ], + "model_output": "A dark bay horse with a sleek coat, black mane, and tail, featuring a white blaze on its forehead and white socks on its front legs, is walking on sand." + }, + { + "image": "images/caption_detailed_60.png", + "subject_name": "banana", + "object_name": "bowl", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c`]5c0\\>`0@3N1N2O1N2O0O2O1N101N2O0O2O1N101O0O2O0O2O001N10001O000O2O00001N100000001O0000001O0000001O0000000O11O000001O00000000001O0000001O00000001O0001O00000001O0001O00000000010O00000001O01O00000001O01O000000010O0000010O000001O01O0000010O0001O01O00010O0001O01O01O01O00010O000010O00010O00010O00010O0100O0001O0001O10O001O00010O010O02O0O1O010O101N010O1O10O02O0O1O100O100O100O2O0O101N101N1eGZM[4f2aK`MbLK[7g2PLaMaL1Y7_2SL[Ng3g1TL_Nj3a1SLdNj3^1QLeNP4]1kKeNV4\\1dKhN\\4\\1\\KgNf4[1SKgNo4\\1gJgN^5\\1YJgNj5_1gIfN\\6c1RIdNQ7c42O2N1N3M3N1N3N2N1O2N2M3N1O2N2N1O2O1N1O2N2N2N2N2O1N1O2O1N2O1N2O1O1N2O1N2O1N2O1N2O1N2O1N3M2N2O1N2N3M2N3M2N3M2N2N3M2N3M3M2N3L4M3M3L4L3OXJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hZ^55j>3N1M3eI9YMJe2k0iGmNe4:`3_1UL`Ni3c1VL\\Ni3e1YLZNe3g1[LZNc3g1^LYN`3h1`LXN_3i1aLWN^3j1cLVN[3k1eLUNZ3l1hLRNW32aHV1Z4gNS31hHV1W4gNP33nHR1T4iNm26RIn0Q4mNk25XIk0n3oNi27ZIi0m3POh27]Ih0k3QOg28_If0j3ROf29aI7VOAc4Oe29dI5WOA`41d2:fI2WOC_41b2lIIV4Jl1=QJFT4Mj1>SJDS4Ni1?TJCS4Nh1`0VJAR4Og1a0WJ@R4Ng1c0XJ]OQ41f1c0ZJ[OP42e1c0\\JZOP43c1d0^JWOP45a1e0O[O0f00ZOOf02ZOMg03YOLh04XOLh04XOKh05YOKg05YOJh06XOIh07YOIg07YOHh08XOHh07YOIf08ZOGg09YOGg08YOIg07YOIg07YOIf07[OHf08ZOHf07[OIe06\\OJd06\\OJc06^OJb05_OKa05_OKa05_OK`06@J`06@J`07_OI`09_OGa09_OGa09_OGa0:^OFa0<^ODb0<^ODb0=]OCc0=]OCc0=]OCc0>\\OBc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAb0a0]O_Oc0a0]O_Oc0a0]O_Oc0a0]O_Ob0b0^O^Ob0b0^O^Ob0a0^O@a0a0_O_Oa0a0_O_Oa0a0_O_Oa0`0@@?a0A_O?a0A_O?a0A_O?a0A_O?a0A_O?`0B@>`0B@>`0B@=a0C_O=a0C_O=a0C_OX15nJ_Oh3=Z14nJ_Og3>[13mJ@g3=]13kJAh3<]12kJCg3;_12iJDg3;`11hJEg3;a10gJFg3:c10eJGg3:d1OeJFg3;e1NdJHf3;f1McJIg39g1NaJJf39j1M_JKf38l1M^JKe38n1M\\JLe38o1L[JMd38R2JYJ0d36T2JWJ0e36U2JUJ1e35W2JRJ3f33Y2JPJ4f32[2JnI5NZOg2f0_3KjI7OZOg2c0a3MfI81YOf2c0d3LbI:3YOf2a0f3M^I;5XOf2`0h3NYI=7WOg2>j33nH=?TOg2=n3a1ZISNf2=Q4`1WIUNg2:T4a1SIWNg29X4_1oH[Ng26[4_1lH]Ng24`4^1fHaNg23d4\\1bHdNi20g4[1]HiNi2Mk4_2RKbMQ5^2lJcMU5]2gJfM[5Z2bJfMa5[2ZJgMh5Z2RJiMP6Y2jIiMX6Y2_IlMd6V2RIoMP7X2_HoMe7d42M3N4L3M4L9GA;E?B=B;D9GmeT1" + } + ], + "model_output": "A ripe banana with a bright yellow peel, exhibiting a gentle curve and a smooth texture with a few small brown speckles, is attached to a bowl." + }, + { + "image": "images/caption_detailed_61.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 482, + 640 + ], + "counts": "W]o31o>4M2N2[D1T81gG9R8IiG?R8CkGm0f7UOWHR1c7oNYHV1e7lNXHW1g7kNkGa1T8bN_Gj1^8\\NUGm1j8VNkFS2U9PNbFV2]9h1O10000O1O1O10000O1000000O1000000hMiJbH1ONk1X5Z5^KdJb4X5dKfJ\\4m4SLQKm3n4XLnJh3R5ZLkJg3T5aLeJ_3X5gLeJY3Y5mLcJS3Z5SMcJm2\\5XM_Ji2`5[M]Je2\\5eMaJ[2Y5nMdJR2W5UNfJl1W5ZNfJf1W5_NgJa1V5dNgJ]1V5hNgJY1S5^4O1N2O1M3M3N2N2O1O1L400O1O1O1O1O1O1O100N200OoGiKY5W4i2O1O10000O100O100O10000O1000000O1000001O0O0100001OO01000000001O00000000001O00001O1O1O0000001O0000010O0O2O1O1O001O000010OO2O1O1O1O1eIfKl1[4RNiKk1X4TNkKi1V4UNnKh1S4WNnKh1S4VNPLh1R4UNPLj1R4SNQLk1P4SNQLm1Q4PNQLo1Q4mMQLS2Q4iMQLW2Q4fMPLZ2S4aMPL^2S4]MoKd2U4UMnKj2Z4jLjKU3a4`JSJ11O0_1_1P4]5]KfJb4a5UKaJk4`5PKdJP5^5nJcJQ5^5mJcJS5^5kJdJU5\\5iJeJW5^5dJdJ\\5b7N100010OO100010O01O001N110O0O1000000jJWFS5i9mJWFS5i9lJXFT5h9lJXFT5h9mJWFS5i9mJWFS5i9mJWFS5i9lJYFS5k900001O01O1O001O1O0O1000O2O0O10000000O101O0001O01O0O101O00001O0000001O001O1O1O1O00001O001O001O1O00001O0000001O1O1O1O1O1O1O1O1O1O001O1O1O1O1O001O001O1O1O001O002N1O1O001O001O1O1O1O1O1O001O1O1O1O001O2M2O0O2O001O100O1N2O010O1O1O100N101O1O1O1O1O00100O1O1N2O00011N2M110O002N1O1N110O1O2N1O001O1O1O2N001O002N1O1O001O2N1O1O1O2N2N1O1O2N1O1O1O2N1O1N4L2N2O2L4L5K:DRo0" + }, + { + "size": [ + 482, + 640 + ], + "counts": "Zn071200001J0005O11J0O2O50K0:0K0J10001O0O5^2IiM7JKR7h4WJSMb5o2ZJUMe5l2WJWMi5l2SJUMm5m2PJTMP6o2iIUMW6n2bIVM^6f510000000000O100000000000000O1000000000O010000000000000000O10O11O1O0O2ROaIWI_6a6U1K5K5L4N102N1O1O00010O10000O1O1N200O100000000000000O10000O2O000O10O1TH^JX6b5fIfJT6Z5lIhJR6W5nIlJP6U5oIkJQ6U5oIkJR6U5lImJS6S5lInJT6R5lInJS6S5lImJU6S5jInJV6R5iIoJW6R5hInJX6R5gIoJY6Q5fIoJ[6e3RI`La0L]6a3XI`L:O_6_3[I`L61_6^3]I`L33`6\\3`I`LO3c6[3aIaLH7g6W3cIaLAVOES1X7U3cIbLB9n6R3aIfL_O9R7o2`IhLZOg0QO_OKKB2H7CJo11f2S1XKiN1c1h0_O]O^OLK0GW34^1k2PLUN^OYONMNFo0LW19_2i2QLWN^OWOONNF<`0f1Fb2f2PL\\N_OROO1NE;c0e1Dc2d2PL_N@POO1OD9f0e1Dc2e0]Ke0e0H_OnNO4OA9h0d1Ce2c0]Ke0e0L]OlN05N_O:j0d1Cd2`0`Kf0c0N^OiN07;:`1Cc2?^Kh0e0O^OeN298:b1Bc2e0^K;f08\\OaN3=5:d1Bd2b0^K9h0=YO_N4>49e1Cc2n1XLZOWO^N6>2;e1Bb28`Kg0j0=XOTN6d00:f1Bb28`Kc0l0c0WOPN5h0O9e1Cd25`K>Q1i0]OiNH9f1Ba2[OdKh01?n0m0]OgNH8g1Cc20dKd0i0m0_OeNJ8e1Dc2DeKI3V1e0m0AdNI9e1Ec2BgKH0Y1e0n0BbNH:f1Cc2@hK_1d0c0]OeMOX2c1cNd2^OhKa1c0b0_OdMOX2b1eNc2_1XL3CaM0Y2a1eNb2a1UL4G\\M2[2^1eNc2h1kK41C_1cNc2IoKl3NkM^1bNc2HQL_6[1kIc2V7[MlHd2U7ZMlHf2V7VMmHi2T7UMnHk2S7QMPIn2R7nLQIQ3Q7kLRIT3Q7eLSI[3Q7_LRI`3Y9O1O1O1O001O1O1O1O1O001O1O1O1O1O1O1O1O1O001O001O001O2N001O1O1O1O1O001O1O1O1O1O1O1O1O1O1O1O001O1O1O2N002N001O1OhMRN]Gm1`8YN^Gf1`8_N^G`1`8dN_G[1m2WNa0a0aLW1l2^N=>gLS1i2cN>=iLn0g2jN<:mLj0f2oN;9oLg0d2SO;8QMd0c2WO98TM?b2[O87XM=]2_O:4ZM<[2A:5[M:Y2C;4]M8V2F<3^M7T2H<2aM6R2H=3aM4Q2K=1dM2o1M=1eM1n1O<1gMMm13<0iMJl16:1kMKg15>0lMKe15`0OlMLb17`0OnMJ`19ROWNbM1h0h1UOVNc1_o0" + } + ], + "model_output": "A woman with short, light brown hair, wearing a black helmet with a badge emblem on the front, a black top with a V-neckline, and a gray knitted cardigan, stands in front of a wall. She has a silver necklace with a circular pendant and is looking slightly to her left with a neutral expression." + }, + { + "image": "images/caption_detailed_62.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 444, + 640 + ], + "counts": "mm[43g=3N1O2O001O001N1010O01O0aJD]M1=Q1]OS2I_Me4a0bKoNa0n0@S3\\4lMnKROHQ3Y4QNiKPOOo2W4UNbKPO8k2U4NlK3S4LmK5R4KoK5P4LPL4o3MQL3o31mKOS43iK0U4l40000O10000000YHfKl5Y4l101O00000O10QOnKTHR4g7XLTHh3f7cLTH^3d7PMUHQ3b7i1^OmIVIX6g6a0M2O20O000001O0001O000001O00010O0000001[JTIX4l6`K`I\\4a6bKaI]4_6bKcI]4]6cKdI\\4\\6cKfI\\4[6nJQI9h0h4W6mJTI9f0j4W6kJVI4i0P5R6kJ]JU5V7O1O010O001O001O1O1O1O1O1O2N1O001O1O2N001O1O1O1O1O1O002O0O1N200O2N1O1O1O2[N[FoNf9c0YFSN0L4\\1d9l0XFhM3]1g9i0VFjM2^1j9f0TFlM2^1l9h0UFWOm9h0RFXOo9Z23M2M4M2N3L3N4L6H4M4L3N3L3L6J4M3M4J:D8I6M5GUCWOZ=6c__1" + }, + { + "size": [ + 444, + 640 + ], + "counts": "o_l5:P1Ib9?gFNjND?3n93L3O1O1O1N1O2O001N2O2N2M4M1O1N2N0O2O1O1O1O001O2O000O010O2O000000010O1O:F2N2N1O001OO100010OO1O011O002M5L1O1O1O2N1O2N1O1O1N2O1N2N2N8G6D`ALcml02WaSO2M2O2N1O2M2O2K6L3N1O2N1O3M2N2O0O2N4L2O0YHeNi1\\1UNiNg1X1UNmNh1V1VMeNQL?b6n0YMjNiKe0i6b0\\M:_2H]M`0^2B_Mc0\\2@[Mj0a2ZOYMm0c2TO[Mo0c2ROZMQ1e2SOUMR1g2SORMR1k2QOQMS1l2ROnLR1o2TOkLP1R3QOlLR1Q3POkLT1S3nNkLT1S3lNlLW1R3jNlLZ1R3eNnL^1o2aNPMd1n2\\NQMf1m2ZNSMh1l2VNUMl1j2RNWMo1h2PNWMT2g2kMTM\\2l2dMQM`2n2_MPMd2o2[MmLk2S3TMgLS3Y3lL`L\\3_3dL\\Lb3d3^LWLg3i3YLTLj3k3WLPLn3P4SLjKR4V4oKUK[M200P7h4S3000000O1O100O0010000O2N100O100O00100O100O100O1O2O0O001O100O100O1nMiEnMFLNO;b0Z:\\1\\ESN_1>U9Z1XGcNi8Y1_GcN`8[1eGaN]8V1nGfNT8Y1j2N2O1O1O2O000000O01000001M200O100O10O01O1O2O0O10000O10000O1000000O100000nFXOl3h0V501O0000O0101O0000000000000000000001O00001O00000YFXOV5i0`4001O001O001O1O001O001O001O1O1O1O001O001O1O1O1O1O2N2N1O1YEcNi7^1VHbNj7_1THcNk7^1gFiNFKc9^1VF`N@i03^OV:Z1oEgNEZ1\\:1iEjNIV1^:3aElN0S1^:g2O1O00001O001O1lMeKaI\\4Z6iKeIX4V6oKgIR4S6VLjIj3Q6\\LnId3o5bLPIQOCF:5M5JYbd2" + } + ], + "model_output": "A white football with a pattern of blue curved stripes and a blue logo is in front of a person." + }, + { + "image": "images/caption_detailed_64.png", + "subject_name": "car", + "object_name": "road", + "predicate_name": "driving on", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "kRZ83h`09H9a_OQ7l7jIdGWOkNgAT1^>bNhA]1n>O001O00000000O100000000O100O100O1O1SO]NXBi1g=h0M300O100O1M3O100M3O1O1N2O1O1O1N2N2N2M3O100L]LkBd3Y=O1O3M;cLWBn2n=O00cMQMkFn2U9RMSGf2j;ZMdA[2d>L1O5K4L6J2N1O1O1O1O2N0000000000O1O100N2H^Nn@g1m>8FPN_A10P2W>c000001O1O7`M^AU2h>N4L1O002N1O1O00000000000000O100O1lKTNSIn1h6YNTIi1l6aNiH`1V7YO_FeNYOOd0S2d9B^E\\NOU1l0m0h9T1TFmNl9X1nEiNQ:Y1nEgNR:Z1lEgNS:\\1jEeNV:_1eEbN[:b1]EbNc:m3O1O100O1O100O1O1O11O1O1O1O001O1O0000001O1O0000001O1O1OO1O13M2N1O001O1O001O0000001O2N001O001O001O002N1O001O00001O000000001O1O000000000000O11O001O0000000000O1001O001O000000000lJhDa4W;_KkD`4U;_KlDa4T;_KmD`4T;^KoD`4Q;`KPE_4P;aKQE^4o:bKQE^4o:aKSE^4m:aKVE]4j:cKWE\\4i:eKVE[4i:fKXEY4h:hKWEX4j:gKXEW4h:iKXEW4h:hKZEW4f:iK[EV4e:jK[EV4e:jK\\EU4d:jK]EV4c:kK\\EU4d:kK]ET4c:lK]ET4c:lK_ER4a:nK`EQ4`:nKbEQ4^:oKbEQ4^:nKdEQ4]:nKdEQ4\\:oKdEQ4\\:oKdEQ4\\:oKeEP4[:PLfEo3Z:QLgEn3Y:RLgEn3Y:QLhEo3X:QLiEn3e;O1O1O00lNRLWEn3i:RLXEm3h:SLYE9A5]OHNl1k;nM[E4c0g1R:UNaGd1o;N8H3fN[@T1j;lNRHS1n7mNSHR1l7oNUHP1k7POWHn0j7QOYHl0g7TO\\Hi0m;01O00000000001O1O0000001O000000O11O01O000000O100000000001O00000O1N110O1O1L5L3O1O1K50ON3O100O1100O001O001O001O000000001O2N5QOT@a0b`0B9FYoi01P`UO0``02]_O1_`09JDh_O=V`0Fh_O;V`09N10000000000000000m_O" + } + ], + "model_output": "A dark-colored minivan with a visible rear section featuring a red and white taillight on the left side, a partially visible rear window with a reflection, and a rear windshield wiper is driving on the road. The vehicle has a visible rear bumper and a partially visible license plate area." + }, + { + "image": "images/caption_detailed_65.png", + "subject_name": "cup", + "object_name": "bed", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 436, + 640 + ], + "counts": "oTo37[=5L3M2N2N2O1N101N100ODYCNf<1[COe<1\\CNd<2\\CNd<2\\COc<1]COc<1]COd<0\\C0d<0\\C0d<0\\C0d<1\\CNe<1[COe<2[CMe<4ZCLg<3ZCL^S5DnJYIAi6;ZIDg69\\IFf64_IKb63cII_65bIJ_65aIK_64bIL_63aIM`62aIM_62bIN^62bIN_60bI0^6OcI1]6NeI1\\6MhIOY60iIOX6OiI1W6NjI2W6LkI3U6LnI2S6LgIHoL=Y9JiIInL=Z9IPJ6P6IQJ7o5HSJ7n5GSJ9m5GSJ:l5DVJgJ\\O_L6h8`0iJZO_L6o75WH?Z3VO`L5n7]1bK]NaL5n7^1fKaN[4_1]K_NeL2o7^1[KaNeL1Q8_1YK^NhL2P8`1XK]NiL2P8c1TK^NjLOR8d1SKdNn4]1QKbNP5`1nJ_NS5b1lJ^NT5c1kJ]NU5d1jJ\\NV5e1iJ[NX5d1hJ[NY5f1gJYNZ5g1eJYN[5f1`JUNVM5[8f1_JVNUM4]8f1]JUNWM5\\8g1\\JTNXM5\\8f1aJ[N`5b1bJ]N`5b1eGXNc26h5i1YJWNh5h1jGTNLNg17d6h1RJZNk1J^Om1dNZNn1K^Oj1cN\\No1I_Ol1`N\\NQ2H@l1]N]NS2G@m1\\N[NT2J^Om1\\NZNV2I^On1ZNZNW2I^Oo1ZNWNY2J]OP2XNWNZ2J]OQ2UNXN]2H]OR2TNWN_2H\\OS2RNVNb2G[OV2oMTNg2FYOY2mMRNj2EXO[2lMQNk2EXO]2iMoMP3DWO_2fMnMS3DUOa2dMmMW3BUOe2_MiM]3BSOi2ZMhMa3ATOj2UMiMf3_OSOl2QMhMl3\\OSOQ3dLjMZ4UOQOc4o0^KoNc4Q1]KoNb4R1_KlNb4S1_KmNa4R1`KnN_4Q1cKnN^4Q1dKnN\\4Q1eKoNZ4R1gKlNZ4T1fKkN[4T1fKlNZ4T1gKjNY4W1hKhNX4X1hKhNW4Y1iKfNX4Z1iKeNW4Z1jKeNV4Z1mKeNS4[1nKcNS4]1nKbNR4^1nKaNR4`1oK^NR4b1oK\\NR4d1nK\\NR4d1nK[NR4f1oKYNQ4f1PLYNQ4g1PLWNQ4j1oKTNQ4m1oKSNQ4n1oKPNR4P2oKnMQ4T2nKkMS4V2mKhMT4X2lKgMT4[2mKbMT4^2mK`MT4a2mK\\MS4e2nKYMS4h2mKVMT4k2lKQMV4Q3jKmLW4S3jKkLV4W3lKdLV4]3jK`LX4`3lKYLV4j3iKRLZ4o3hKkKZ4V4jKbKZ4^4iKYK]4h4V22N2O001N2O001N100O2O0O2O1O0O2O000O2O1O00100O001O001O001N102N1O1O1O1O1O1O1O1O1O101N1O1O1O100O2N1O2N2N3M4L2N4L3L5L3L4K5DH9B=D;G9C=H9G8M3L6J4M5J6J5L5I7H?WOhmk0" + } + ], + "model_output": "A cup of coffee with a frothy, light brown crema on top, served in a white ceramic cup with a visible handle on the right side, is placed on a paper napkin." + }, + { + "image": "images/caption_detailed_66.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "n[k523VOa@l0R1N]>VO]@S1S1G`>l0\\ATOd>o0YAROf>R1VAnNk>U1QAkNo>V1PAkNn>X1PAhNP?Y1o@gNo>^1n@cNo>a1o@_N:\\OY=Y2ZB[N4MAQOa=j2gBYN2;D`N]=m2lBXNOc0Q=Z1kBVN2b0R=l4N4L2N3M1OCUC]I05k^571O2N1O2O1N]OjA_KT>b4lA_KR>Y5O001N2N101O001N1O101O000O1O2[KmIWGJJ241JO50i2]6f4PM^HlLg1V6i5l5O0001O7I:Fa0_Od0\\O:F=C;E<\\F\\ET9Z;^O;E4jGWDl7Sl0K4000H8134JN2BXAIP?K^`e4" + } + ], + "model_output": "A person with short hair, wearing a light-colored shirt with a dark tie, dark pants, and dark shoes, is holding a smartphone in their right hand and appears to be in mid-stride with their left leg forward, pushing a stroller." + }, + { + "image": "images/caption_detailed_68.png", + "subject_name": "cow", + "object_name": "grass", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "TVU22m>2O0O2O0000000O2L9\\ADX>c0N2N1000001O01O01O010O100O1O2M2O2M2O4K4M6I;F3M1O00YOg0O1O1O1O10000O11O001O2N2N4L2N2NCiBROV=n0kBSOS=m0oBUOmn0L4K5O13N6IWOiBFY=6iQn5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m7R7n7000000000001O00001O0000000000000000001O00000000001O000000000000000000000000001O0000000000001O0000000000000000000000001O0000000000000000001O000000001O000000000O1000000001O0000O1001O0000000000000000O10000001O00000000O100000000001O0000O2O0001O00000000000000O20O00000000000001O0000000O1UJfGQ5Y8oJhGP5X8PKiGo4W8QKiGo4W8QKjGn4W8PKjGP5V8PKkGo4U8QKkGo4U8PKlGP5T8PKmGo4T8PKlGP5T8mJoGS5Q8lJSHQ5m7oJ]Hg4c7YK^Hf4b7YK`Hf4`7ZKaHe4_7ZKbHf4^7ZKdHd4\\7\\KeHc4[7]KeHc4[7]KeHc4[7^KdHb4\\7^KcHc4]7]KcHc4^7\\KcHc4]7]KcHc4]7]KfH`4Z7`KgH_4X7bKjH\\4W7cKkH[4U7eKmHY4S7gKPIV4P7iKSIU4m6kK_Ii3a6VLfId3[6[LfId3Y6\\LiIc3W6^LhIb3X6^LhIb3X6]LhId3Y6[LXIT4h6lKQI[4o6eKoH]4Q7dKmH]4S7bKnH^4R7bKnH]4S7cKmH]4S7cKnH\\4R7eKnHZ4R7eKQIY4o6gKSIW4m6iKVIU4i6kKXIT4h6lKZIR4g6mK[IP4f6PL]Im3c6SL`Ij3`6WL`Ih3`6XLlHFWOR4l7WLmHLSOm3P8WLlH3nNf3V8WLlH5lNd3X8WLlH6jNd3Z8VLlH`4T7`KlH`4T7`KkH`4W7_KiHa4W7^KkHa4U7_KkHb4T7^KmHa4S7_KmHa4S7_KnH;eN^3]8WLnH7iNb3Y8VLRI1iNi3U8VLfIj3Z6VLfIi3[6WLfIh3Z6XLfIh3[6WLeIj3Z6VLfIj3Z6WLeIi3[6WLeIi3[6WLeIi3[6WLeIi3[6XLoH]4Q7cKiHc4W7^KiHa4X7`KgH_4Y7bKgH]4Y7dKeH]4[7eKjGT5V8nJcGW5]8f000000000000O11O0000000000O10000001O01O00000O10001O000001O00000000O2O00000000001O000000000000000000000000000000000000000000000000000000000001O00O1000000000000000000000001O0000000000000000000000000001O00O100000000000000000000000001O00000000000000001O0000001O00001O00000000001O000000000000001O0000000000001O00000000001O0000000000000000001O00O1001O00000000000001O00O100000001O000000000000000000001O0000000000000000000000001O00000000000000001O00000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000aJjFT5W9iJkFW5V9hJjFX5V9hJjFX5W9fJjFZ5V9fJjFZ5V9fJjFZ5V9eJjF\\5V9cJkF]5\\900000001O00001O0000001O0000001O001O001O0000000000001O00000001O00O10000000000000000000001O0000001O00000000001O00000000O1000000001O00O10000000000000000000000O10000O100_OiJYGW5g8jJWGW5i8lJSGU5m8lJQGU5o8mJlFV5T9;000000000000000000000000001O0000000000001O00000000000000001O000000000000000001O0001O0000gF" + } + ], + "model_output": "A black and white cow with a predominantly white head, a large black patch over the left eye, and a smaller black patch on the right side of the face stands on a gently sloping hill covered in green grass. The cow has a black body with irregular white patches, a visible udder, and is standing with its head turned slightly to the left, showcasing its profile." + }, + { + "image": "images/caption_detailed_69.png", + "subject_name": "wine glass", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "WUb4:ec02M3N2N2O1N2O1N101]N\\Oh_Oe0T`0_Ok_Oa0R`0Cl_O?Q`0Co_O>n?EQ@;l?IS@7k?KV@5g?N]_OWO=j0T`02]_OUO?i0R`05^_ORO>k0S`05^_OQO:o0U`03^_OPO=m0P`0e0o_O[OP`0g0o_OYOP`0h0P@XOP`0i0V@POi?R1W@mNi?S1W@lNj?U1U@kNk?V1U@iNk?W1U@hNl?X1U@fNl?Z1U@dNk?]1V@bNj?^1W@`Nj?`1W@^Nk?`1W@^Nj?b1X@[Ni?e1Y@VNj?j1V1O2O000O2O0O2O0O2N1O2M3M4L4iN]Rg2" + }, + { + "size": [ + 640, + 427 + ], + "counts": "_jT11VSc25a`]M9H5K6VIM\\I9]6M`I7\\6KbI:Y6JbI1RO=Y2Y5X2X5?VKj4TO`H9NGZ2e5c0RKd4\\O^H<2AT2d5m0nJm2@_J0h0\\1h4S1jJf2L^JH=k0\\1g4T1iJd2OZKa0n0g4V1fJ[2MZJ5k0a1`4V7nKWGBc1\\4W7VLUG^OAROo1Z5Z7WLWG\\O@SOo1Z5Y7XLXG[Oc1]4T7YLYGZOc1\\4Q7aLXGTODROo1Y5S7dLYGQOEROo1Y5S7`L]GUOAROo1Y5S7`L^GTO@SOo1X5S7bLmFVN`0k0AUOo1W5S7kL[GgNCWOo1W5S7PMWGaNGYOn1V5T7nLZGbNDYOo1W5S7lL\\GdNe1`4o6kL_GcNb1b4o6iLcGbN_1e4n6iLdGaN^1f4n6iLdG^NXOCV2W5m6hLfG\\Na1l4i6hLiGYN^1o4i6gLlGSNSONY2Y5g6fLmGSN`1W5c6fL`JZ3`5fL`JZ3a5eL_J[3a5dL`J]3_5cLaJ]3`5bL`J^3a5aL_J_3a5^LbJb3_5]LaJd3^5[LcJe3^5YLcJg3]5YLcJh3]5SLgJm3[5lKjJU4W5_KjFUOT4\\5[:1O00001O00001O1O1O001O001O2N1bDZJ`8f5_G[Ja8f5WGcJg8^5QGkJm8W5mFnJR9R5oFmJQ9T5PGkJo8V5mFnJR9U5jFmJU9Y5eFgJ[9Y5eFgJ[9Y5fFfJZ9\\5cFeJ]9j5nD]Il0n0U:l5hEVJX:k5bEYJ_:g5`EWIBg0n:T6[EYIEd0P;Y6PEZJP;[700000000000000000001O0000001O2N00000000001O00001O0000001O001O0000001O000000001O0000001O000PKlF`KNZ2V9l1[GdK@`2U9T1YHULcNg2T9n0gHoKYNS3Q9h0oHPLlMGOb3V9?eIPLTMa3X93oIaLhLX3Y92TJdLeLZ3X9MVJjLbLY3Y9GUJmL`Lf3[9YOYJk0h5ROYJoNbLJU9V1YJoNhLGP9Y1XJkNVMCb8b1XJeN_MFZ8d1UJfNdMEW8g1SJcNhMEV8h1PJcNnMBR8k1oIcNRN@P8m1jIfNYNZOn7P2gIbNiLaNe1l0k7R2fIaNbN]Oh7S2dIaNfNZOg7U2aI`NnNWOb7Y2^I`NTOTO_7\\2[I`NXOSO^7]2YI^N\\OTO[7_2WI]NBQOX7b2TI^NFnNW7e2mH`N0hNU7g2jHaN4eNS7i2hHcN6cNR7k2gHbN9aNQ7n2bHbN_:_1]EcNc:_1WEeNj:[1TEfNn:Y1QEfNQ;Y1mDhNW;V1eDmN[;U1cDkN^;U1`DkNb;T1]DmNd;T1XDmNk;S1QDmNRoNZAE2O4KNOL323ij0" + } + ], + "model_output": "A clear wine glass with a thin stem and a broad, stable base, partially filled with a transparent liquid, reflecting light on its curved surface, is held by a person." + }, + { + "image": "images/caption_detailed_70.png", + "subject_name": "person", + "object_name": "snow", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kTY55c0Nj;9oCJP<;kCFTU8R6THnIm7o5VHoIk7h4jEiKf2SOb7Q5oEZKCGf=n4iBVKb=i4l0O0SO\\A]Le>\\3gA\\L]>a3gAZL\\>a3T1N2OhAcLkM9f=P3_D_M[;^2bDjM];S2bDQNVNlNH1ne0f2T6g4oHVH8b6C]I=c6D\\IOn0d4j5[NjHoLe0Ng0g4k5\\NiHmLj2f4^4]NiHhLm2j4[4^NhHeLP3m4X4^NXLa1j3^NVLb1j3bNRL]1o3dNPL[1Q4eNoK[1Q4eNoKZ1R4fNnKZ1R4fNnKY1S4fNnKZ1R4fNnKY1T4fNlKZ1T4fNlKY1V4fNjKY1X4fNhKZ1[4cNeK\\1^4aNcK_1_4_NaKa1a4]N^Kc1m4RNTKn1Q5YLaG7_3`3T5SL`G;]3b3^5_K_Gn0S3c3`6ZLaId3d6WL]Ii3d6VL\\Ij3d6VL\\Ij3e6TL[Im3f6RLZIm3l6nKTIR4n6kKSIU4n6jKRIU4P7jKPIV4R7hKoHW4T7eKmH[4V7bKjH^4X7`KhH_4];0000OeEcKU5]4kJbKV5^4V50cEfKT5Z4kJmKn4T4RKmKn4R4RKmKo4S4QKmKn4T4SKjKmMoAR2R>oMmAP2e8TMgKm0dKo1`8\\MhKf0hKm1_8aMfKe0iKj1]8gMgKa0kKh1\\8jMgK?mKf1\\8mMeK?nKc1\\8RNcK=PL`1]8UNaK=QL^1Z8ZNcK:RL\\1W8_NeK7SLY1X8cNbK6ULW1W8fNcK3VLW1V8hNbK3WLU1W8iN_K5YLR1X8kN\\K5[Lo0Y8nNZK5\\Lm0Y8QOWK5^Lk0[8SORK5cLg0[8UOQK5cLf0\\8VOoJ6dLd0]8XOkJ8fL`0_8[OeJ9kL<_8_ObJ7nL:a8_O^J:PM7b8A[J:RM5b8EWJ9VM2c8IoI:]MMd8KlI:_MKe8NgI9dMIe8OdI:gMGe83]I9nMCf89SI8WN_Of8=lH7^N\\Of8a0cH8gNWOf8e0[H7POTOe8n0gG:CgNg8a5YG_Jg8a5YG_Jg8a5YG^Jh8b5XG^Jh8b5XG]Ji8c5WG]Ji8c5WG]Ji8c5WG]Ji8CVGd31iLh8A_GoMC^56QMi8@fG]3ASMh8^OnG[3ZOWMh8]OUHV3SO]Mh8ZO`HQ3hNeMh8WOeHR3cNfMh8SOmHU3[NhMh8ROPIT3XNjMh8POVIR3RNnMh8POVIR3RNnMg8mN\\IT3mMoMg8kN_IU3iMQNh8gNcIW3fMQNg8fNgIW3bMSNg8cNlIX3]MUNg8\\NWJ[3RMXNg8YN]J^3kLZNh8VN_J`3jLZNe8UNdJ`3gL[Ne8QNiJc3bL\\Ne8oMlJd3_L^Nd8jMRKg3YL`Ne8gMUKh3WL`Ne8cMYKl3RLaNo<_1QCaNQ=]1oBcNQ=]1oBcNR=\\1nBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBgNR=X1nBhNR=W1oBiNQ=W1oBjNP=W1oBiNQ=W1oBiNQ=V1PCkNP=T1PClNQ=S1oBnNo:hM\\FX3fNPOh:oMbFo2gNSOd:QNdFl2hNTOb:SNfFe2jNZO]:TNfFc2mNZOY:XNgF^2QOZOW:ZNgF[2SO\\OS:\\NhFY2UO\\OQ:]NiFV2WO^Om9_NkFQ2ZOAg9bNnFl1\\OCb9eNQGg1^OE]9gNUGd1^OE\\9iNUGa1@GY9iNVG_1CIS9lNXG\\1EIQ9lNZGZ1FKl8oN]GV1GKk8PO^GT1HMh8QO^GR1KOa8SOdGl0M2]8TOeGi0O4Z8UOfGg004Y8VOgGe016U8XOiGb027R8ZOjG>69n7ZOlG=6:l7[OmG:8a7BSH0=`0\\7DUHK`0b0X7FWHFc0e0T7GWHDf0f0n6KYHAi0e0h6]3XIeLa6a3`I`L\\6a3eIaLU6b3lI`Ln5c3SJ`Lg5b3ZJ`LZ2PL_Oc7WN_LW2RL[Ob7_N]LU2]LlNY7PO[Lo1cLiNU7YOZLk1gLgNo6_OZLg1kLhNl6BXLf1mLVMPOF0Q1m7=WLd1mLUM]O1CNR8Y1QL_1hKSMP;_1XI]1k6eNVIX1k6iNUIU1m6kNTIT1k6mNVIQ1j6oNXIn0h6TOYIj0h6WOXIg0h6ZOYIe0g6[O[Ib0e6_O\\I?d6B]I;d6F]I9b6H`I5`6L`I3a6M`I2_6ObIO^62cIL^64dII[69eIF[6;fIDZ6hIBW6?jI@U6a0mI\\OS6e0nIYOR6h0oIWOQ6i0PJVOP6j0QJTOo5m0SJoNn5R1WJ_NQ6c1YJlMn5T2n5100O1O1O100O1O1O1O1O1O1O100O100O1O1N20Dn^OjMQa0c2O1O1O1O10000N2O1O1O100O1O1O1O1O100O002N1O1O010O2O0O1N10101O0O1O0O201N100O1N2O2N1O2Lm5Z@^GN^O2YOO3N44oa1" + } + ], + "model_output": "A small, fluffy white dog with light tan markings on its ears and around its eyes, wearing a silver chain collar with a tag, is sitting on a bench." + }, + { + "image": "images/caption_detailed_72.png", + "subject_name": "fence", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bbe32T=9K2N3M3M2N1000000000000000000000000001OO10000001O000000000000000000000000000000000000000000000000000000000000001O00O1000000001O0000000000000000001O0000000000000000000000000000000000000000000000000000000000000000O10000000000001O0000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000O10000000000000000000000001O000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "e`l0i0[<=B`0D9J4PJWN6o1@ZNeMYOH8d2?dMYOH8d2?dMYOH8d2`0cMYOH7e2`0cMYOH6f2a0bMXOI7e2a0bMYOH6f2`0cMZOF7g2?cMZOF7g2?dMYOE8g2`0cMXOF8g2`0cMXOF7h2a0bMXOF7h2a0bMXOE8i2`0bMXOE8i2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD6k2a0aMZOC5l2a0aMZOC5l2b0`MYOC6m2a0aMXOB7m2a0aMXOB7m2a0aMXOB7m2a0aMYOA5o2b0`MYOA5o2b0`MYOA5o2b0aMXO@6o2b0aMXO_O7P3a0`MYO@6P3a0`MYO@6P3a0aMXO_O7P3a0aMXO_O7P3a0aMXO^O8Q3`0aMYO]O7R3`0aMYO^N]O:i0h3a0aMXO\\NB8e0k3a0aMXO\\ND6c0m3a0aMXO[NG5`0o3a0aMXOZNI5>P4b0`MWO[NJ4=Q4b0`MWO[NJ3=S4b0_MWOZNK4XLBh3?WLAi3a0TL@l3b0RL_Om3b0RL^On3d0PL\\OP4e0oK[OQ4g0mKYOS4j0iKWOW4j0hKVOX4l0fKTOZ4m0eKSO[4o0cKQO]4P1bKPO^4Q1`KPO`4S1]KmNb4U1]KkNc4X1TKnNm4f30000000000000000000001O000000000000000000001O00000000001O0000001O0000000000001O000000001O000000001O000000001O00001O001O1O1O1O1O2N2N2N1O2N2N2N2N2N1O2N2N2N2N1O3M2N1O1O2N1O2N2N1O3M1O1O1O2N3M4RMeGg2`8N2N2N1O3M2N1O1O1O1O2N3M2N2N2N2N1O1O1O1O2N2N2N1O2N2N2N001O1O2N1O1O2M3N1N7[ObENg:N^EN[Y\\2" + } + ], + "model_output": "A baseball catcher in full gear, including a blue helmet with a protective face mask, chest protector, and leg guards, is crouched on the playing field, ready to receive a pitch. The catcher is wearing a white uniform with blue and black accents, and is equipped with a mitt on the left hand." + }, + { + "image": "images/caption_detailed_74.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "looking at", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "\\aT38cc06J5N2N4n\\OEZb0Q1G9O1OHn]OcNSb0[1Q^ObNPb0\\1;2N;E00010O`Nk]OU1Vb0iNl]Oa0<]Oia00m]Oa0<]OUb0=l0GQ[W2" + }, + { + "size": [ + 640, + 296 + ], + "counts": "QXZ28cc07d\\OLeb0i0N3N1O2[]OUOa0Ek`0`2M3M100O1O100O100N1O2OYO]_OQNc`0o1f0201d^OhMm`0S3C001O0001O1TMY_O]2i`0_M[_O_2Va0M3M1O3N4XNZ^Ol0oa0jNV^O:8CZc0O1O0O3NfYf2" + } + ], + "model_output": "A person with short hair, wearing a dark-colored t-shirt and blue jeans, stands with their back to the viewer, beside a young child." + }, + { + "image": "images/caption_detailed_75.png", + "subject_name": "motorcycle", + "object_name": "grass", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "gmT72Y=2N001O1O1O2N1O2N1O2N1O6J:F1O1O00000000000000000000O100O100O100O100O1O100N2O100N2O1O1O1N2N2N2M3Lb`a0" + }, + { + "size": [ + 427, + 640 + ], + "counts": "1o1a0\\5o2dJQM]5n2aJTMa5j2_JVMb5i2^JVMd5i2\\JWMf5g2ZJYMg5f2YJZMh5e2XJ[Mi5d2VJ\\Mm5b2SJ^Mo5`2QJ`MP6_2PJaMQ6^2oIaMT6]2kIdMV6[2jIeMW6Z2iIfMY6X2gIhM[6V2eIjM[6V2eIhM_6V2`IkMb6S2^IlMd6S2\\ImMe6R2[InMf6Q2YIPNi6n1WIQNk6n1UIRNl6m1TISNm6l1RITNP7k1PIUNR7j1lHVNV7i1jHWNW7h1hHYNY7e1hH[NY7d1gH[N\\7c1dH]N]7b1cH^N_7`1`HaNa7^1_HbNb7]1^HbNc7_1[HbNg7\\1YHdNh7[1WHfNk7W1VHiNk7V1UHiNn7U1QHlNP8S1PHmNQ8R1oGnNS8P1mGoNV8o0jGQOW8n0hGRO[8l0eGTO]8j0cGVO_8h0aGWOa8h0_GXOc8f0]GYOe8f0ZG[Oh8UOn0V9dNeG>[Om0l8dNiG>_OP1c8bNnG>AQ1^8aNQH=CT1Y8_NSH>IP1R8bNUH>KQ1m7aNXH=OP1h7cNXH>0o0h7cNXH>5k0b7gNYH>:g0\\7kNZH=?e0V7nNZH>`0e0U7mN[H=a0g0`NQOV8KiH=a0h0]NSOX8HiH=c0V1c6]NjH=c0O[Ng0X8mNjH=b00^Nd0V8oNjHD5HU1n7XNfH>B7IS1S1lNh4]OjJ=A9IQ1P1VOf4TOnJgNf4]NXK>C_2=kNe4XN[K>C_2;POd4SN^K=D`28TOd4oM`K=D_26[Oc4iMcK:]12S2V3RL^L>:]12T2U3QL_L=;_10T2U3PL_L>=^1NT2V3PL_L=>`1LT2V3oK`L=>a1JU2W3mKaL=>a1JU2W3mKaLi2X5_1iJcNW5\\1iJeNV5[1jJeNV5[1jJfNV5Y1jJhNV5W1iJjNW5V1iJkNV5U1jJkNW5T1hJoNV5Q1iJROV5m0jJTOU5l0kJTOU5l0jJVOV5i0jJXOU5h0jJZOV5e0jJ\\OV5c0jJ^OU5a0lJ_OT5a0lJAS5>lJDS5[J^Oj5a0VJ^Ok5b0TJ]On5b0SJ\\Oo5d0QJZOR6f0mIXOU6h0kIXOU6h0jIXOX6f0jIXOW6i0iIUOY6j0kIQOV6o0PJjNQ6V1RJfNP6Y1RJeNn5[1ZJ\\Nh5c1[JYNf5g1^JTNc5l1dJlM]5S2hJiMY5V2oJaMR5`2VIeLb1e0Y5f2UIhL`1a0\\5f2TIoL[1:a5g2TISMX14e5i2SIXMT1Ni5j2SI[MQ1Jm5k2RI^Mn0GQ6j2QIhMe0]O[6k2PInM>WOc6k2oHQN:UOg6j2oHVN5oNm6k2nHWN3oNP7h2nHUOR7k0nHUOS7j0mHUOT7l0kHTOU7l0kHTOV7k0jHTOW7k0jHUOV7k0jHUOW7k0hHlL7g1R7\\1gHlL9g1Q7[1gHmL9h1P7[1gHmL:g1P7[1fHmL;g1P7\\1eHlL2O1J6OM21LHcA6Vm0KbA1N21OZhb0Mnh\\O0k>1]AOc>000fm01_cN1d>N\\A0k>0bP17``NJ\\95`K;Y4L`K:[4MXK>d4HlJg0P5[OmJh0Q5\\OgJi0W5ZOdJk0Z5ZO^Jl0[2QNbMW1Lm0ISO_NZOc1h02n0GP13UN1n0JQ1EbLYNa1T2P1IR1GbLXN^1T2T1GQ1J[N:i0IP1JYN:j0Jo0KWN:l0Io0KWN9n0Hm0OVN6o0In0OUN6n0Jo0OTN5n0JQ1hN[L7f1W1n0JR1dN_L9a1`0dMH[3j0U2mNlLY1P1HV2nNkLY1o0IW2mNkLY1n0IY2mNiLZ1n0IZ2lNiLZ1n0H\\2kNhL\\1m0HU57lJGU59lJFT5:lJEU5;lJDU5;mJCT5V5BjJ>V5BiJ?W5AiJ?W5AiJ`0V5@iJa0W5_OiJa0V5@iJb0V5^OjJb0V5^OiJc0W5]OiJc0W5]OhJd0X5\\OfJg0Y5YOUH3`0e0[7XOSH6?d0^7VOQH9>d0`7SORH;6h0h7mNRH=2i0k7jNSH>0i0m7iNSH>Ok0m7gNTH?Mk0P8eNSHb0Il0R8cNVHa0^OV1[8YNWHb0dNA5f1P9WNWHc0bNC4d1S9VNWHe0^NE4c1W9RNWH[1^Ne0[9PNWHd3g7]LYHc3g7]LYHc3g7]LYHc3g7]LYHb3h7^LXHa3j7^LVHa3k7_LTHb3l7^LTHa3m7_LSH_3o7aLQH]3P8dLPH[3i2gLfK0l06Y2H\\NZ3l2VMSL3a2]O`NX3o2fN^NRNbNX3P3hN]NPNcNW3f2iLiKU2l2kMeNV3f2lLgKT2e1lM]OMa0U3l2YOQMjM_OHd0U3l2[OmLkMBEe0T3j2BiLjMF@g0T3?QMMk2mNeMM[Oj0S3=_MPN3j0`2KdM2XOl0R3=WNjNn1HcM5VOl0R3;YNjN\\2XOWMg0ROl0R3:ZNiNd3NPKn0R3;\\NeNf31lJo0R39_N^N^NIV5a0kJo0R38aNcNd36iJn0S38bNbNc38hJn0S38bNaNd39gJn0R37fN_Nc3i4dNXHn0o2>i4dNXHn0n2?j4cNXHm0o2`0j4bNWHl0Q3a0i4cNVHk0Q3c0j4aNUHl0Q3c0j4aNUHl0Q3b0m4`NRHm0P3d0Q5]NoGm0Q3g0S5XNmGQ1o2h0Y5RNhGU1P3i0_5VObJj0^5UOaJl0a5RO_Jn0c5PO]JP1d5nN]JS1c5lN^JT1c5hN_JX1e5cN[J^1h5_NXJb1k5XNXJh1`90O1O010[GkM]4W2bKkM\\4V2_KoMa4Q2^KPNa4Q2\\KRNd45hGQ1c3kNd46hGo0d3kNd46hGo0d3kNd44jGQ1b3jNe43lGS1G`Nb3;j41oGY1V3eNl40PHZ1U3fNk40PHY1U3hNj40QHV1T3lNl4MQHV1o2^NeL?\\8MPHV1o2_NcL>_8MoGV1o2_NcL=`8OmGU1P3^NdL=`8OmGV1o2_NcL;c8MnGX1l2lNY5JlGZ1k2kNZ5KkGZ1k2kNZ5JlG[1j2kNZ5JlGZ1k2kN[5IlGU1m2SOY5GjGT1g2iNhL>g8EjGS1m2XO[5DiGS1k2YOW6f0jIZOW6d0hI^OY6a0fI@Z6?fIA\\6=eIC[6;fIE]64hIK[61gIOZ6NhI1[6KgI4^6DeI<^:11O:DaQl2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l;S3m;0O10000O10000O1O100O1000000O10000O100O100O10000O1000000O100O100O100O1000000O100O1000_OoDRMR;k2b0N3M2O2N1O2O001O1O10O01000O010O1O01OTO_MZE`2g:bMXE\\2i:eMWEY2k:hMTEV2n:jMQEV2o:lMPEQ2S;PNlDn1V;SNiDk1Y;UNfDj1[;WNeDg1];YNcDf1^;[N`Dd1a;]N^Dd1a;]N^Dc1c;^N\\Da1d;`N\\D_1e;bNZD\\1g;fNWDZ1j;gNSD[1m;fNQDY1QO100O100O1O2QOhB3Z=ETC4n=M2O2N2MdTh27njWM9B_OTBj0j=7O100000O03N7I9PM]NPHf1k7\\NTHe1k7\\NUHd1j7^NUHa1k7_NUHb1j7_NVH`1j7`NWH_1i7bNVH_1i7aNXH^1h7bNXH_1h7`NXH`1h7`NYH_1g7aNYH`1f7_N[Ha1e7]N]Hc1d7YN`Hf1]:0000000001O0001N1000001N100YDaNl9a1PFiNh9X1UFnNg9T1UFPOj9[1eEkNZ:n2O1O2N1N2O1O100O10000000000000000O10O02O0O100O1O1O1O1O1O100O1O1O1O100O100O100O100O1O100O1O1O100O1N2O1M3C=N2O1M3O1O1O1N2N2N2N2N2N2O1O100001O00000000001N1001O0001O000O10001O00000000001O00001O00001O00000000001O0000000000001O00000000001O000000001O00000000001O000000000000001O0000000000000010OO10000000001O00000000000000001O0000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000001O000000000000000000000000000000000000001O00000000000000000000001O00iF" + } + ], + "model_output": "A young boy with short dark hair, wearing a beige baseball cap and a pink and white striped shirt with a graphic design on the front, is sitting on a bed, holding a yellow cup in his right hand and pointing with his left hand." + }, + { + "image": "images/caption_detailed_77.png", + "subject_name": "person", + "object_name": "road", + "predicate_name": "crossing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "`gm36Q=4kNIjDc0P;GhD>LWOm:?QE?J\\OR;6PEP1e0_N`9Y2]FhMb9[2[FgMd9\\2YFdMh9_2SFbMm9m22O1O5K4L1O1kNdEYO]:e0eEZO]:e0cEYO_:f0cEWO`:g0bEWO`:c0eE[O_:LXEI?9]:FYF9`;L1O1N3N2M_PQ4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "c9g3c910000O100O01000O2N100O10000O1000000O100O1000000O100O100O10000O10000N2O10000O1O10000000000002N8H1O001O00000000001O000000O100000000000000FVLRGk3^8i0J6O1N2F:O100000000O1000000001O000000000000000000O100000000000000000000000000000000000000O1000000001O0000000000000000OYLWHn1i7mM]HR2c7iMcHV2]7iMdHW2]7gMeHX2[7fMgHZ2Y7dMiH\\2W7aMlH_2S7`MoH`2Q7^MQIb2o6\\MSId2n6WMVIi2k6PM[IP3f6oLZIQ3g6nLYIR3i6kLXIU3k6cLZI]3Z8000O100O100O100O100001O0000000000000000O10000001O1O00000000UOdLfG]3Y8eLfG[3W8jLgGV3X8kLgGV3X8lLgGT3X8mLgGT3Y8lLgGT3Z8kLeGV3\\8iLdGW3_8fLaGZ3h8\\LXGe3i8ZLVGg3W901O2N[N_LaI`3^6cL`I]3`6dL_I\\3a6fL]IZ3b6kLZIU3f6lLYIT3g6mLXIS3h6nLWIR3h6[MlHe2S7aMhH_2W7fMeHZ2[7iMbHW2^7lM_HT2a7mM^HS2d7kM\\HU2j7fMUHZ2S8^MlGc2W8ZMiGf2Y8ZMdGg2\\8T100000000000000000000000000000000000000O100O1LUKgGl4U87O1O100000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000001O000000002N4L00001O1O0000001O0000001O004L1O00000000000000000000000000O100O1O1O1O100O100M3N2M3N2O1N2O1N21O001O00001O1O1O00000000O1N200O100000000001O1O2N1O00000000O100O1001O001O00001O=lJ_Gd4Q9C2N001O0000000000000000O100001O0000O100000000000000O1000000000000O100M3FeKcG\\4X8eKaGf4^84O1001OO10000001O000000000000001O0000001O1O1O00000000000000001O0000000000000000000000000000000000O100O1001OO1O100000000O10000001O0000000000001O1O2N2N5K4L002N000000000000000000000000000000001O2N4L3M3M2N2N000O1001O0000000000000000000QMYLZLg3e6000O1L4O1LSLiFn3W93001O1O6J5K3M5K4L1O_HiLo4U3d21O00000000000000000000000000000O100O100N2N2O1O100N2O1O100O1001O000000000000O100001O00000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000ZF" + } + ], + "model_output": "A person wearing a dark jacket with a fur-lined hood, light-colored pants, and dark shoes, with short hair and a dark-colored beanie, is walking on the road." + }, + { + "image": "images/caption_detailed_78.png", + "subject_name": "person", + "object_name": "bench", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "WS_24X=2N1O2N1O100O1O100N2O1O1O1O1[OBYD`0c;FZD;c;KYD6f;MWD4g;OVD3i;h0O2O0O2M2O1O1O10001O001N1L4N2O2M2O1O2N100O1O1N2O100000000000010O01O001BTEZNm:^1TE[N06n:]1^E[Nf:d1b000E\\DmNd;S1\\DnNd;]1010[O]DoN37a;e0iD[OY;KXD`0b0D\\;:fDF\\;7eDH_;3bDN^;1cDN];2cDN_;ObD1_;MbD3`;JbD5[<01O0000001OO10O10Sli4" + }, + { + "size": [ + 429, + 640 + ], + "counts": "R\\Y1i0a<3O1O1O1O0OG7M221101O1O1O1L4J6J6OGPDWOPOmgd4" + } + ], + "model_output": "A person with long, wavy hair, wearing a patterned dress with a mix of dark and light tones, and dark shoes, is sitting on a bench with one leg crossed over the other, and the left arm extended forward, resting on the armrest." + }, + { + "image": "images/caption_detailed_79.png", + "subject_name": "person", + "object_name": "tie", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "TjW56fc05L4K4M4N2J7G8N2N3N0O3N3L2O1N2O1O1N1000O1O001O1O10O01O1O1O00100O10O0100O00_OfN^^OY1\\a0QO`^Oo0]a0WOa^Oh0]a0m002N20O0[Ob^OiN_a0l1O1O101N1000001O001O1O1O1O1O1O010O000010O1000O10O1O2N1QNk^OP1Xa0iNn^OV1na0O1O2N1O2N1O2O001N100O2O0O2N100010O0010O010O01O01O001O001O010O10O1O010000O4M5K2N4L2O1O2N0N3N1M4M1O3LYae0" + }, + { + "size": [ + 640, + 428 + ], + "counts": "l]V63_c0Ol\\O3Qc00m\\O2Rc0Ol\\O3Sc0Nj\\O5Uc0;0O1O1O1O2N3M4Ca\\O1oam1" + } + ], + "model_output": "An older man with a receding hairline and a full head of hair is wearing a dark suit jacket over a light blue dress shirt and a dark tie." + }, + { + "image": "images/caption_detailed_80.png", + "subject_name": "dog", + "object_name": "elephant", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + } + ], + "model_output": "An adult elephant with a brownish-gray skin tone, large ears, and a long trunk extending downwards stands beside a dog. The elephant's tusks are not visible, and its eyes are small in proportion to its head. Its tail is thin with a tuft of hair at the end, and its legs are sturdy and columnar, supporting its massive body." + }, + { + "image": "images/caption_detailed_81.png", + "subject_name": "potted plant", + "object_name": "vase", + "predicate_name": "in", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Sa\\13k>`0A?B:F>B;E:FP3R4PMlMNbM7=k2U4PMlMNaM:9d2\\4oLlM0\\M?j2j2oL[LCj2l2mL\\LA;i02m04U3P4aKkK=On06T3o3aKmK>Mm06U3o3`KPL?Hm09T3n3`KRL`0El0;U3l3aKUL=Cn0a0c5S2VLlMj3V2VLhMj3[2VLbMj3_2XL^Mh3e2XLXMh3i2ZLSMg3o2T43dEoLc8S3[GoLc8R3\\GPMb8P3^GQMa8o2_GRM`8m2aGSM_8l2bGTM^8k2cGTM^8k2cGSM_8l2cGRM^8l2dGRM^8m2dI`Mn3_2mKgMS4X2nKhMR4W2nKkMjMZOf4i2bMnMfM_Oc4a2iMQNcMNT4n1\\NTN`MOS4l1^NVN^MNT4j1`NXN\\MNT4h1bN[NWMNX4f1aN]NUMOZ4c1bN_NRMO\\4a1cNaNoLO^4`1cNaNoLN_4`1aNdNmLOb4]1_NUNYLFe0i0c4[1_NVN\\LCc0k0c4\\1^NUN_LAa0n0b4\\1]NVNbM=R4]1WNZNhM9Q4\\1UN^NiM6S4\\1SN_NjM5S4\\1RN_NkM5T4\\1PN`NlM4T4\\1nMbNlM4V4[1lMaNnM5V4Z1lMaNmM6W4Y1lMaNmM4Y4[1jMaNmM4Y4\\1jM_NiM9]4X1jM_N]Me0i4l0kM^N[Mg0j4k0lM]NYMi0k4j0lM]NYMh0l4k0mMZNXMk0k4j0QNWNTMo0l4j0WNPNmLV1l4j0[NkMkLY1k4k0AUO?l0@TO`0l0@SOa0m0_OSOa0n0^OROb0o0]OQOc0P1]OnNd0S1\\OkNe0V1[OoMTKNa5S2^OgMXK3[5U2=fMDZ2a600000000O10000000000AaC]N_5CJk0CXO>f70O001O1001N1O2N8FhRj3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "mki51]4o0T5ROTJk1e5ZNfIY2X6iM\\Hc3b7\\LkFJ]1P4f7WLmFI[1S4g7TLnFIZ1U4g7ULWHn3i7mK[HS4e7jK^HW4U91O001N2O0O1N3N1N2O2O0O1O2O0O10000O2O00000O10000001O0000001O00001O001O1O2N1O1O1N3NnNbKcG]4\\8dKfGZ4S8mKnGR4P8PLQHn3P8RLRHl3n7TLSHk3m7ULSHj3n7WLRHh3n7XLRHh3o7XLPHg3Q8ZLnGf3R8ZLnGe3T8ZLlGe3U8[LjGf3V8[LiGd3Y8\\LeGd3\\8]LbGc3_8_L^Ga3c8aL[G^3g8bLWG^3j8cLTG\\3n8eLoF\\3S9iLeFW3]9U11N3M3L4N2L6K3MXL`KVM[4j6M4K6J4L4K5K5J5L5K4M4I8K6H8H:F:E?]OeVS2" + } + ], + "model_output": "A brown ceramic vase with a glossy finish and a curved handle on one side, featuring a smooth surface and a slightly tapered base, is in front of the window." + }, + { + "image": "images/caption_detailed_82.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "^[b35g>3N1N3N1N2N2O1N2N2N2N2N2O1N2O1N2O1N2O1O1N2N2O1O1O1O1O1O1O100N2O1O1N2N2N2O1O1O002M200O1[OSNXDn1e;YNVDh1f;`NTDb1j;j0M3N2M3L4M3O1N2N2O1N2O1M3O100O10000000000000O10010O0O10000gLQEd2n:[MZE^2f:aM[E_2e:_M^E`2c:YMcEg2]:XMdEh2]:WM[EQ3e:nLRE[3V;3J6O1O1O1O1N201N1O1O002N100O1O100O1O100O100O10000O1000000000O1001N10O100001O0O01000000000O10000000000000001OO100000010N100001O00O100010O00O100010O00O101O010O000O1010O0001N2O010O001N110O001N101O100O001N2O010O001N2O010O1O0O11O00000000000000000000000O10000O10000O10000O100000000O10000000000O10000000000001O0000000000000000001O0000000^ORETMn:k2WEPMj:P3]EgLf:W3a000O10001N1000000O2O0O101N1O1O101N1O1O2O0O2N1O2N1N3N2N2N2N3M2N3M2N2N2M3M3N2N2M3M3M3M3M3M4L4K6J8Eon[1" + }, + { + "size": [ + 478, + 640 + ], + "counts": "0\\6b8000000000000O101O0000O10000000O1000000000000000000O100000000000000O100000000000000000000O10001OO100000000000O1000000000000O2OO1000000000O100000000O10000000000O1000000O1000001N1000000000000000000000000000000001O0000O100000001O00O1000000O1000O100000000000001O0000000O10000000000000000O2O00O010000O1N2K5M3N2O1N2O1O10000O10OO3N1O1O1N200O1O00100O1O2N1O010O101O0O1O100O100O100O1O1O1N2O1O2O0O100O10000O01000O2O0O10O010001N010O100O2O00O0100O1O100O100000000000000000000O101O0O00100O1O100O10001O0O010O10000O10000O2O00O10000000000000000000O1001O00000000000000000000000O1001O0001O0O20O0001N2O10O0001O1N200O001N6K3M010O1N1010N1000O100001O0O010O2O0000O100001O0000O11O0001N1001O01O000O11O0000O2O01O0001O0O11O0000O2O0010O01N1010O01O1N10100O00001N11O00jE\\KR:k4L2O10O0000O2O1OO2O00O2O010O00O1000000O10O2O01O0000000O10000000O11O0O1000000000000000000000000000000000O0200OO10O10000000O1001O000000O11O000O10O11O00000000000O100000001O00O100001OO01000001O0000O11N10O10000001O000O011O00O10000001O000O10000000O100001O000O011O00O10000001O00O1000O10000000001O0000000O1000O1001O0000000O100000O1001O00000O10000000O1001O000O1000000000O1001O000000000O10000000000O100000000000000000000O1000000000000000000O10000000000000000O1000000000000000000O10000000000000000O10000000000000000000000O10000000000O1000000000000O1000N" + } + ], + "model_output": "A close-up of a human finger with a visible thumb and two fingers, showing the skin texture and natural nail color, is in front of the wall." + }, + { + "image": "images/caption_detailed_83.png", + "subject_name": "sky", + "object_name": "road", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "0d2h:0000000000M300M3L4N2O100O1UOlD^OT;=`ETO`:k0cEQO_:n0P1O1O1N2O1O1000000001O001O001O1O1O1O001O0000000000EjC@V_CCad1_O\\5d1RI]Ob1oN]5j2cJVM]5j2cJVM]5j2dJUM\\5l2bJUM^5k2bJUM^5k2cJTM]5l2bJUM^5l2aJTM`5l2_JTMb5k2^JUMc5j2]JVMi5e2WJZMm50eH\\2^1dMY6V2gIjM]6R2cIoM^6P2aIoMb6P2]IPNe6m1\\ISNe6m1ZISNi6j1WIVNj6i1VIWNl6T1eG\\O_1@n6e1RI[No6e1PI[NR7c1nH]NR7c1nH]NS7b1mH^NT7`1mH`NT7^1mHbNT7]1kHdNW7Y1jHgNV7X1kHhNV7V1lHiNT7W1lHiNU7U1lHkNT7U1lHkNT7T1mHlNU7R1kHnNT7S1lHmNT7S1lHmNU7R1kHnNU7R1kHnNU7R1kHnNU7Q1lHoNT7P1mHPOT7o0lHQOS7o0nHQOQ7o0PIQOP7o0nHSOR7m0mHTOS7j0PIUOP7k0PIUOQ7j0oHVOQ7i0PIWOn6k0TISOl6m0TISOl6m0SITOm6l0SITOm6l0SITOn6k0RIUOo6j0QIVOm6l0SITOm6m0RISOn6m0RISOn6l0TISOm6l0SITOm6m0QITOP7k0PIUOQ7j0PIUOQ7j0oHVOS75SG:i1BV7OTG?g1A\\7>eHB[7>eHB\\7>bHC`7:aHF_7:bHE^7;bHE\\7=cHD_7:bHE^7;bHE^7;aHF`79`HG`79`HG`79aHF_7:aHF`79_HHb77_HHa77`HI`78^HIb77^HIc76]HJc76^HIb77]HJc76]HJd74]HLc74]HLc75\\HKd75\\HKd74]HLc74]HLc74]HLd73\\HMe72\\HMe72[HNg70YH0f9100O1TMMTJ3h800000000000001OO1O11O1O00001O000000O100O11O00001O00O1000000000000000000000000O100001O1O000O01001OO100001OO10000000000001OO1kK0fIOPO1^20k41eI0QOO_21j41eI1POMb21k4OeI0QOO^22l4OfIOPO0^22k4NgI0g12h8OkLOeJ0[50eJ0Y50XH1i1Of61ZIOQ60eJ0[50eJ0a80000000jJ2dI3M0a4KT70000000000000000000000000000000000000000000000000O1mN;TFEk9l0dEUO[:T1000000000000000000000000000000O1000000000000000000000000000000O11O0000O11O00O1000000000000000000000000000000000000O100000000000000000001O1O3M4L;Eb0^O>jFSMf8Y3F?A`0_Ok0VO?A5mIZJ35MM[5S7fJoHV4o7ZO1N4MRI" + } + ], + "model_output": "A white bulldog with a broad, wrinkled face, dark eyes, and a black nose sits on a truck. Its ears are small and folded, and it has a red collar with a visible tag. The dog's mouth is open, revealing its tongue and teeth, and it has a muscular build with a broad chest and a deep chest." + }, + { + "image": "images/caption_detailed_85.png", + "subject_name": "mirror", + "object_name": "wall", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "k5b4^:1O3L3N3M3M3L3N3M2N2N2N0O10O100O10O01000O010O010000O10O0100O100O010O100O1000O0100O10O0100O10O10O100O10O10O100O100O010O100O10O01000O010O10O10O0100O10O0100O10000O010O100O10O010O10O100O10O10O100O100O01000O101N3N5J4M5J5L5J6K2M6K3M5J5L3L6K4K5L3M3L6K3M4K5K5L5K4K5L4K4M4K4M5K4K5L4KfP[7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "1_2]1\\O_3W1_L]O2\\O`3W1]L\\O3^Oa3W1ZL[O5^Oc3W1VL[O7^Oe3W1RL[O9]Oi3W1lK\\O;]Ok3W1hK\\O=]On3V1cK]O?]Oo3X1_K[Ob0]OR4W1ZK\\Oc0]OV4V1VK]Od0]OW4W1SK\\Of0]OV4Z1RKYOh0]OV4\\1PKVOk0]OV4_1mJTOl0^OV4a1lJQOn0^OV4c1jJoNP1^OV4e1hJmNR1]OW4g1fJlNS1]OV4k1dJhNV1]OU4m1dJfNW1]OT4n1eJeNW1]OT4o1dJdNX1\\OU4Q2bJcNX1]OU4Q2cJbNX1]OU4Q2cJbNX1]OT4R2dJaNX1\\OU4S2cJ`NY1]OT4S2cJ`NX1^OT4S2dJ_NX1^OT4T2cJ^NY1^OS4U2dJ]NY1]OT4V2cJ]NX1^OT4U2eJ]NW1^OT4U2eJ]NW1^OT4U2eJ]NW1]OT4V2fJ\\NW1^OS4V2fJ\\NW1^OS4V2fJ\\NW1^OR4V2hJ\\NV1^OQ4U2kJ]NT1]OR4R2nJaNo0^OR4Q2PKaNn0^OR4o1RKcNl0^OQ4o1TKcNk0]OR4o1TKdNj0]OQ4P2UKcNj0]OQ4o1VKdNh0^OR4n1VKcNi0^OQ4o1WKcNh0^OQ4n1XKdNg0^OP4o1YKcNg0^OP4n1ZKdNf0^OP4n1ZKdNf0^Oo3n1\\KdNe0]Oo3P2\\KcNe0]Oo3o1]KdNd0\\OP4P2\\KdNc0]OQ4n1]KdNc0^Oo3o1^KcNc0^Oo3n1_KdNb0^On3n1aKdNa0^On3n1aKdN`0^Oo3n1bKdN?^On3o1cKcN?^On3n1dKdN>^On3n1dKdN>]On3o1eKdN=]On3n1fKdN<_On3m1fKdN<_Om3m1hKdN;_Ol3m1jKdN:^Om3n1iKdN:^Om3m1jKeN9^Ol3n1kKdN9^Ol3m1lKeN8^Ok3m1nKeN7]Ol3n1mKeN6^Om3l1nKfN5^Ol3l1PLfN4]Om3m1oKeN5^Ok3m1QLeN4^Ok3m1QLeN4^Ok3l1RLfN2_Ok3k1TLfN1_Ok3j1ULgN0^Ok3l1ULfN0^Ok3k1VLgNO^Oj3l1WLfNO^Oj3k1XLfNO_Oh3l1YLeNO^Oi3l1YLfNN^Oh3l1[LfNM^Oh3l1[LfNL_Oh3k1]LfNK_Oh3j1^LgNJ^Oi3k1]LgNJ^Oi3k1]LgNJ^Oh3k1_LgNI^Og3k1aLfNI_Of3k1aLfNI_Oe3k1cLfNG_Og3j1cLgNF_Og3j1cLgNF@e3i1fLgNEAd3h1gLgNECa3f1kLgNDG]3b1oLgNCLZ3\\1TMgNC2S3W1[MgNB6n2S1aMgNA:j2n0fMhN@=f2l0jMgN@`0c2i0mMgN@f0]2b0TNhN_Oi0Y2`0XNgN_On0T2:^NhN^OR1o16dNhN]OV1k12hNhN\\O[1h1LmNiN[O_1c1IROhN[Oc1^1EXOhNZOf1[1B[OhNYOj1Y1]O_OiNXOP2R1XOFhNXOS2o0TOJiNWOW2j0QOOhNWO\\2e0kN5hNWOa2`0fN:jNUOd2=bN>jNUOh28^Nd0kNROl25ZNi0nNnNl25UNn0TOhNj26SNR1[O_Ni29kMY1@ZNj27fM`1DTNl27_Mf1LlMg2<]Mh1m5TNSJm1Q6oMoIQ2`90O100O1O100O100O100O1O100O1O100O100O1O10000O1O100001O2N1O4L2N2N2N001O1O1O0000N2O1N2O1O1O1O1O1O100O1O2OO0100O2N1O010O100O1O1O100O1O10000O1O1O1O100O100O100O100O1O1O10000O1O100O1O100O1O100O1O1O1O100O100O1O1O100O100O100O1O1TH`LW4a3hKaLW4_3hKcLV4^3jKbLV4^3jKbLU4_3kKbLT4^3lKbLS4_3mKaLS4_3mKaLR4`3nK`LQ4a3oK_LQ4a3oK_LP4b3QL]Lo3c3QL]Lo3c3QL]L`M1W5b3ZM\\L^M`0k4T3gM\\L]Mm0`4g2SN\\L]MT1Y4`2ZN\\L]M]1P4W2cN\\L]Mh1f3k1mN^L[MS2]3_1XO^L[M`2Q3Q1D_L[Mg2k2i0J`LZMR3c2=3aLZM]3Y21>aLYMh3o1Gh0aLYMV4b1XOU1bLYM]4[1QO\\1cLWMg4T1eNe1dLWMP5m0ZNl1fLWMZ5d0oMU2gLVMg5:aM`2hLVMn53ZMg2hLVM]6^OZI0c3\\3gLTMX:l2hETMX:l2hETMX:l2hETMX:m2gERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fEQM[:o2fEPMZ:P3fEPMZ:Q3eEoL[:Q3eEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:S3cElL^:T3bElL^:T3cEkL]:U3cEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEiL_:X3`EhL`:X3`EhL`:W3aEiL^:X3cEfL^:m2[ESM70^:k2_ESM32^:j2dEPMN5_:j2RFVMn9i2TFUMm9j2TFVMl9j2TFVMl9j2TFVMl9j2UFTMl9l2TFSMm9m2SFRMo9n2Q11O1N2O1O001O01O04L000O4M1O1O001O00000000003M2N1O001O00000010O00O1001O001O0M4eLoDNGO3Mh04]O^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + } + ], + "model_output": "A black cat with a sleek coat and pointed ears, featuring bright, alert eyes and a small, pink nose, is sitting beside a laptop." + }, + { + "image": "images/caption_detailed_87.png", + "subject_name": "teddy bear", + "object_name": "teddy bear", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "^ZV39l<8K6I5L3N1N2O1N1G:M2N2N2N3M2O1N2N2O1N2N2O1N2N2VOlM^FU2`9oM]FR2b9QN[FP2c9SNZFn1f9SNZFm1e9UNYFk1h9VNVFj1j9WNVFi1j9XNTFh1KfMn9d0VFd1o9\\NQFa1R:_NnE`1S:aNkE_1V:cNhE[1Z:k0O1O001O1N010O01O001O0010O101O001O00001O2N3M2N1O1O0O2O1O1O0010O011N1O10O0100O1O1^MUFd1k9[NWFe1h9ZNZFe1g9XN[Fh1e9WN\\Fi1d9WN]Fh1d9WN\\Fh1e9XN\\Ff1f9YN[Fe1f9[N[Fd1f9ZN\\Fe1e9YN^Fd1d9ZN_Fd1c9WNcFe1e:K1N10O[O`DD_;=cDA^;?cD_O];b0eD[O\\;d0fDYO\\;g0fDSO^;l0`0O2O0O2O1N2N2M4Hfee3" + }, + { + "size": [ + 425, + 640 + ], + "counts": "[UW31Y\\W10_\\lN1]dL6K3M5^ODoC=m;GQD;m;GRD:l;GTD9l;GTD:j;GVD9i;IVD7i;JWD6h;KXD5g;LXD5e:CPF9YO5e:EPFQ1o9SOnEm0Q:WOlEi0T:[OgEf0Y:]OdEc0\\:_OaEa0_:A`E?`:B]E?e:R1001O2M3N001OO1O100O10000O1000001N1000000001O001O1O000010O001O001M20[N\\Ee0c:ZO`Ef0_:YObEg0^:UOfELJ8_:HkE0I5]:HmE2G5]:FnE5G4Z:ERF7E2Z:_OZE2i0=D2Z;MgD2Y;NgD2Z;MfD3Z;MgD2Z;MgD2Y;NgD1[;NfD1[;NfD0\\;OeDO];0eDL^;2P1LWnf2" + } + ], + "model_output": "A plush teddy bear with a light brown, textured fur exterior sits beside another teddy bear. It has a rounded head with two small, rounded ears, and a red nose. The bear's eyes are not visible. Its arms are outstretched, and it has a white snout with a black nose at the tip. The bear's legs are short, with the front paws being white, and it appears to be in a seated position." + }, + { + "image": "images/caption_detailed_88.png", + "subject_name": "snowboard", + "object_name": "car", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Pdj24k>3N2N1O2N2N1O1O1O1O001O1O001O001O0000000000000000000000000000000O1lA\\Ol=l0O1000O010L301O100O110O2N001O000O10000001O00000000000000000000000000O10000000O100000000O101O00002N1O00000000O20mAXOl=n00000000O1N1O2O101N101O00000000O10000000000000000000000000000001O00001O00001O1O001O1O1O1O3DcA1f>2M010O0000I700O\\k]4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aa`17f>8I5VOFgB=S=KdB;X=j0L3M3N2N1O1O2N2N2O001O0O2O00001O001O000O2O001O00001N101O001O1O3L6K3M2N2M3M3N2M2O1N2N2O1N1O2O0O2N100O1O1O2N1O1O100N200O10000O1O00100O01O0010O01O010M210O0O20OO2O1O03N001O0O100O2O00000POlKTGT4l8TLlFl3S9VLlFk3S9TLmFm3T9RLlFn3T9SLWFHa0U4W9XLgFi3Y9YLeFg3\\9WLeFi3[9WLeFi3[9XLdFh3[9[LcFe3]9[LbFf3^9ZLbFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFh3\\9YLbFh3^9XLbFh3^9XLaFi3_9WLaFi3_9WL`Fj3_9XL^Fk3a9j000O1000_K_Fd3_9[LdFd3\\9\\LdFd3\\9\\LcFe3^9ZLbFf3_9XLbFi3]9WLcFi3]9WLcFi3]9XLaFi3_9WL`Fj3a9TL`Fl3`9TL_Fm3a9SL_Fm3a9SL_Fn3`9SL^Fn3b9RL^Fn3b9RL_Fm3a9RL_Fo3a9QL_FP4`9PL`FP4`9PL`FP4`9PL`FP4`9PL`FP4a9oK_FQ4a9oK_FR4`9nK`FR4`9nK`FR4_9nKbFR4^9nKbFR4^9mKcFS4]9mKcFS4]9mKcFT4\\9lKeFS4[9mKeFS4Z9nKfFR4Z9nKfFR4Z9nKfFR4Z9oKeFQ4Z9PLfFQ4Y9oKgFQ4Y9oKgFQ4Y9PLfFP4Z9PLfFP4Z9QLfFn3Z9TLeFl3Z9ULeFk3[9m0000WKfFl3Z9SLgFm3Z9l00001O01OO10YKeFi3[9n0O1100O1OO10000000001O00000000001O00000000QKhFV4X9jKhFV4X9jKgFW4Y9h00QKgFX4Y9gKhFX4X9hKhFX4X9hKhFX4X9hKhFX4Y9gKgFZ4Y9eKgF[4Y9eKgF[4Y9eKgF[4Z9dKeF]4]9aKeF]4\\9bKgF\\4Y9[KfFK2j4d9WK[Fi4e9WK[Fi4n90001O01O01O000010O0000010O0010O01O01O00001O000000001O00001O0011O2M1O2O0O2O0O2N10002N0O00010O100O0010O100O00010O1O1O010O1O001O1O010O1O001O001O1O1O001O1O001O0O2O2N1N100O2O001N2N2O1N1O3N1N1O2N1N3N1O1O1O2N1O2O001O010O001O00001O1O0O10001O001O00001O0O101O001O1O00001O00001O000000001N101O1O1O000O10001O00000O101N101O000O2O1O0O101O001N101O1N1O2O000O2O000O2O1N2N3M2O1O2M3M4M3L3M2M3N2N1N3M4J6I:IB1O001N2N2O2M2N2Nkol4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V2S;X200000000000000000000000001O2N2N1O001O2N00000000001OO1001O0000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000O10000O100O100001O000000000000000000000000000000000jIaM`1_2`NbM_1^2aNcM^1]2bNdM]1]2bNdM]1[2dNfM[1Z2eNgMZ1Z2eNfM[1Z2eNgMZ1Y2fNgMZ1Y2eNjMY1[1VJSOa4CX1U2gNlMY1T2gNlMY1T2gNlMY1T2fNmMZ1S2fNmMZ1S2fNmMZ1S2fNnMY1R2fNoMZ1Q2eNPN[1P2eNPN[1P2eNPN[1P2dNQN\\1o1dNQN\\1n1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1cNTN]1l1cNTN]1k1dNUN\\1i1eNXN[1h1eNXN[1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1gNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1gNVNY1j1gNVNY1j1gNVNY1j1gNUNZ1l1eNSN\\1m1eNRN[1n1eNRN[1n1eNRN[1n1fNQNZ1o1fNQNZ1o1gNPNY1P2gNPNY1P2hNoMX1Q2hNnMY1S2fNmMZ1S2fNmMZ1S2gNlMY1T2fNmMZ1S2eNnM[1R2ZNYNf1g1RNaNn1_1oMdNQ2]1lMeNT2[1kMeNV2[1iMfNW2Z1hMgNX2Y1gMhNY2W1gMjNY2V1gMmMfL5d5n1eMmMmL0^5S2eMmMoLL^5W2bMmMZ3S2fLmMZ3S2fLmMZ3S2fLmMZ3R2gLnMY3R2gLoMX3Q2iLnMW3Q2jLoMV3m1VIhMf3;S3h1VInMl3;m2e1YM\\Ne2d1]MZNc2f1bMUN^2l1eMnM]2R2^4O100001OO100001O00000000000000O100001O00O10lFRNk6o1TIRNk6n1^HRNTO2]8l1_HTNRO2]8j1aHUNPO2_8h1cHXNjN2b8f1dHYNiN1c8f1eHXNgN3d8e1dHYNhN2d8e1dHZNgN1e8e1dHZNgN1e8e1cH[NgN1f8c1dH\\NeN2V2GQ4k1TK\\NeN2T2KQ4g1WK[NeN2R2NQ4d1YK\\NdN2P20S4b1YK\\NdN2P20S4b1YK\\NcN3P20T4`1ZK^NaN2P21U4_1[K]N`N3=Em0?j4\\1\\KoNmNXOj0?l4Z1\\KPOnNYOg0>o4X1]KQOlN\\Of049f0\\5l0SKnNlNWO=98f0\\5l0SKnNA@0f0\\5l0RKoNB_O0f0\\5l0RKPOA^O2e0[5m0SKoN@_O3d0Z5n0SKPO_O^O>:P5X1VKUNfNc0f0Fb06l4\\1]KgNTOFe05k4^1^KRN`N1N9d01f04j4_1gK\\NhN2h01j4a1fK^OAPOi4c1eK]OCnNi4e1dK]OEkNh4h1bK^OHeNi4m1_K^OW6b0hI@W6`0iIbNXNA0O1`0n7^1iI_NkNL]7e1gI_NnNJ\\7g1gI^NPOGZ7k1fI^NY7b1gH_NX7a1hH_NX7a1hH^NY7c1gH]NX7e1gHZNYNEl8R2kH]NT7d1jHXN[NGj8Q2`HcNfN^Oh8P2^HnNh1ZO[2i1lKmNe1_O^2e1kKnNe1_O_2c1lKnNd1_OdM]O^4V2ZLQNlNc0g2JdM_O]4S2]LPNnNa0d2LbME]4n1_LPNQO2J1f27dML[4j1aLoMRO0J3d27bM0]4g1dLlMXO7U23cM5\\4e1eLjMXO9T23`M8_4b1ZMSNh0o0n1o0XMTNi0m0o1o0XMTNi0m0o1o0XMTNj0l0n1P1XMTNk0k0m1Q1XMTNl0j0l1R1YMSNn0h0i1U1YMSNR1d0e1Y1YMSNX1>_1_1YMSNY1=^1`1ZMSNW1=_1`1ZMSNW1=_1`1ZMSNW1=_1`1[MRNU1>a1`1ZMRNU1>a1`1[MQNT1?a1`1\\MPNR1`0c1`1\\MoMQ1a0c1`1\\MoMQ1`0d1a1[MoMQ1`0d1a1[MoMQ1`0d1a1ZMPNR1?d1a1ZMPNR1>e1b1XMQNS1=e1b1XMQNS1=e1b1XMPNS1?e1a1XMPNQ1a0g1_1XMPNP1b0h1^1YMoMn0c0j1^1YMnMm0d0j1`1XMjMo0f0i1a1>_NBb1>]NBc1>\\NCd1=\\NCe13R6T2jJUNgND=3Q6U2kJTNgNC>4P6U2lJSNfND>4P6U2lJoMkNG95P6U2nJhMnNN44R6U2mJgMnN034R6U2nJfMmN134Q6V2PKdMlN234Q6V2PKdMlN234Q6V2PKdMmN125Q6V2PKdMlN314S6U2PKdMlN314S6U2PKdMlN304V6T2oJeMjN314V6T2oJeMjN313W6U2_J]MUOV15WOX6V2]JRO[OgNX6X2]JQO[OfNY6Z2[JQO[OeNZ6Z2ZJRO\\OdNZ6Z2ZJRO]OcNY6[2YJSOB]NV6`2XJ_NROYOb0CX6d2TJ[NAROV7c2XIgMWOH_8a2ZHfM[OC^8g2XHeMS8\\2kGeMV8[2gGhMY8Y2dGjM[8V2dGjM]8V2cGjM]8V2cGiM^8W2bGiM_8V2bGhM_8X2mGZMU8f2W11_IYMTLNU6i2eM`M[2`2dMaM\\2_2bMcM^2]2aMdMRMMV4_2gNeMQMOW4]2fNdMSM1V4[2gNcMSM3W4Y2eNcMUM8R4W2hNaMUM>n3R2lN`MUM`0n3P2mN`MTMb0n3n1oN_MSMf0k3k1ROcMmLf0AoNV4h2]OcMhLV1f3W1bMTM:b0VNX1m3R1bMVM:a2T29bMWM:_2T2:aMXM;^2T2:aMYMZ3`M`M]O_Of2D>]3_M^M_OAZ6Q3WJkLQOGO6c07V6Q3WJiL]OH6?U6Q3XJhL37d5R3ZJfL28c5S3]JbL1;c5R3gKnLY4Y2ZImM]2JY4Y2\\IkM[2LX4Z2]IjM[2LX4Z2^IhM[2OV4Y2_IhM[2OV4Y2`IfM[21U4Y2`IeM]21R4[2bIcM\\22R4[2cIaM\\24Q4[2cIaM\\24R4Z2cIaM[25R4Z2dI`MZ27P4[2fI\\M\\28n3\\2hIXM\\2k0`JWOR5N=l0`JWOS5M=l0_JXOT5L=l0^JYOU5K[NBe1fJZN_51Kc1gJ^N]5OLa1iJ`N[5OKb1iJ`N\\5NKb1hJbN\\5LKb1jJbN[5LKb1iJcN\\5KJb1kJcN[5KJa1jJgN[5HKa1jJgN[5HJb1jJgN\\5GJa1kJhN[5GJa1kJhN[5GJV25jMKV25jMJX25hMKX25hMJY26gMJZ25fMKZ25fMKY26gMJY26gMIh1hJeN_5CIh1hJcNa5EGh1iJbN`5FGh1jJ`N`5HEh1mJ^N_5JDh1nJ\\N_5LCh1oJYN`5OAh1QKUN`53_Oh1k0XNUOh1k0XNUOh1k0XNUOi1j0WNUOj1k0VNUOi1l0WNUOh1k0XNUOg1l0YNTOg1l0YNTOf1m0ZNSOd1o0\\NQOc1P1]NPO9iJc02VOU6NQO6jJf0^6TOjN2jJj0\\6TOUOk0l0UOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOi0l0WOdN2SKg0Y6WOcN7PKb0]6WOcN:lJ?b6WOcNW1^1iNcNU1^1kNcNS1^1mNfNl0]1TOfNl0W1TOnNi0P1WOROi0l0WOUOk0h0UOYOm0d0SO]Om0b0SO_Ol0a0TO_O7XJ7Y6B_O5]J6T6E@3bJ2o5K_O2Y1NR60000000000000000000000000000000000000000000O1000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000N" + } + ], + "model_output": "A single-engine propeller aircraft with a high-wing configuration, featuring a tricycle landing gear setup, is flying over the sky. The aircraft has a sleek, aerodynamic design with a pointed nose, a bubble canopy, and a vertical stabilizer with a rudder. The wings are straight and positioned above the fuselage, and the propeller is at the front of the aircraft." + }, + { + "image": "images/caption_detailed_91.png", + "subject_name": "tree", + "object_name": "rock", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0]=c100000000000000000000000000000000000000000000000000000000O1000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O11O00O100000000000000000000000000000000000000000000000000000000000000hI[NVI2d5c16cNWJd0`5i06iNUJc0b5d082lJ_Nh3`1[16dJaNo3Y1[1;WJPN1e0[4P1\\1\\1bNdN]1]1cNcN]1^1bNbN]1b1`N^N_1d1`N\\N_1e1aN[N_1e1aN[N_1e1aN[N^1g1`NZN`1f1`NZN`1f1`NZN`1g1_NYNa1i1\\NXNd1i1[NWNe1i1[NWNf1h1YNYNg1g1YNYNg1h1XNXNh1h1XNXNh1h1XNXNh1h1XNXNh1h1WNYNi1g1WNYNh1i1WNXNh1g1YNXNg1i1XNXNh1h1XNXNg1h1ZNXNf1h1ZNXNe1i1ZNXNf1g1[NYNe1g1[NYNd1g1\\NZNd1f1\\NZNd1e1]N[Nb1e1_N[Na1e1_N[N`1f1_N[N`1f1`NZN_1g1aNYN^1h1aNYN_1h1`NXN_1i1aNWN^1i1cNWN\\1j1cNWN[1k1eNUNZ1k1gNUNY1k1gNUNZ1j1fNVNZ1i1gNWNY1i1fNXNY1h1hNXNW1h1jNXNV1g1kNYNT1g1lNZNT1f1lNZNT1f1kN[NU1d1lN\\NS1e1lN\\NT1c1lN^NS1c1iNaNW1_1gNcNY1\\1gNeNX1[1fNhNZ1X1eNiN[1W1dNjN[1V1eNkN[1U1dNlN\\1T1dNlN\\1T1cNmN\\1T1dNlN\\1T1dNlN[1T1eNmNY1U1gNkNX1V1gNkNX1U1iNkNV1V1jNjNU1W1jNjNV1V1jNjNU1W1kNiNU1X1jNhNV1X1jNhNU1Y1kNgNS1[1mNeNR1]1mNcNS1]1mNcNR1]1oNcNP1^1oNcNP1^1PObNo0`1PO`No0d1nN\\NQ1g1mNYNR1o1gNQNY1R2cNoM]1S2aNmM_1V2^NjMb1W2\\NjMc1d2oM]MQ2h2iMYMV2k2fMVMZ2l2aMWM^2k2_MWMa2k2YMYMg2i2PM^Mo2f2dLnIWOa3U4U70000O10000000000O100000000000000001O00000000000000001O000000001O1O001OaNgEWNY:o1]EUNc:U3]MfK^JZ4U5VLhJk3U5XLjJh3T5ZLlJf3S5[LmJe3S5[LmJe3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5]LmJc3R5^LnJb3S5]LmJc3S5]LmJc3T5\\LlJd3U5[LkJd3V5\\LjJd3W5[LiJe3X5ZLhJf3Y5YLgJg3Y5YLgJg3Z5XLfJh3Z5XLfJg3[5YLeJg3\\5XLdJh3\\5XLdJg3^5XLbJh3_5WLaJi3_5WLaJi3`5VL`Ji3b5VL^Jj3b5VL^Jj3b5VL^Jj3b5VL^Ji3d5VL\\Jj3d5VL\\Jj3e5UL[Jk3e5UL[Jj3g5ULYJk3g5ULYJk3g5ULYJk3h5TLXJl3h5ULWJk3j5TLVJl3j5TLVJk3k5ULUJk3k5TLVJl3j5TLVJl3j5TLVJl3k5SLUJm3l5RLTJn3l5RLTJn3l5RLTJm3m5SLSJm3m5SLSJm3n5RLRJo3m5QLSJo3m5QLSJo3n5oKSJQ4e4ULmII^1R4a4ZLQJC^1S4]4aLTJZO_1T4^4bL`L^3`3bL`L^3_3dL`L[3]3iLcLW3\\3jLdLV3\\3kLcLU3\\3lLdLT3Z3oLeLQ3R3YMmLg2P3]MoLc2P3^MPMb2m2aMSM_2k2cMUM]2d2jM\\MV2T2mKmJV2o2m1S2[NmMe1o1nKmJ^2T3d1k1cNUN]1k1aNWN_1i1`NXN`1g1`NZN`1f1_N[N`1f1^N\\Nb1e1[N]Ne1c1WNPKhM^3R4b1`MVO`2k0UM_Ok2c0bLnJDd4j3V6O10000O1000000000000000000000000001O0000O1000000001O0000000000001O00001O000\\MnKZJS4c5oK]JR4^5RLbJn3Z5WLeJi3X5ZLhJf3V5\\LjJe3T5\\LlJd3P5`LPKa3m4aLSK_3k4cLUK]3i4eLWK\\3g4eLYK\\3d4fL\\K[3b4fL^KZ3a4gL_KY3`4hL`KX3^4jLbKV3]4kLcKV3[4jLfKV3Z4jLfKV3Y4kLgKU3V4nLjKS3R4PMnKP3P4RMPLn2o3SMQLn2m3SMSLm2l3TMTLm2j3TMVLl2i3UMWLk2h3VMXLj2g3WMYLj2c3YM]Lg2`3\\M`Ld2^3^MbLc2\\3^MdLb2[3_MeLa2Y3aMgL_2X3bMhL_2W3aMiL_2V3bMjL_2U3aMkL_2S3cMmL^2o2eMQM\\2k2gMUMY2k2gMUMY2j2hMVMX2g2kMYMU2d2nM\\MS2b2mM_MS2a2mM_MT2]2oMcMQ2]2oMcMR2]2mMcMT2]2kMcMV2]2iMcMW2^2hMbMX2_2gMaMZ2_2eMaM]2^2bMbM_2_2_MaMa2_2_MaMb2e2WM[Mj2g2SMYMm2g2RMZMo2f2PMZMQ3e2oL[MQ3f2nLZMS3e2mL[MT3d2lL\\MT3e2kL[MV3e2iL[MX3f2fLZM\\3g2aLYM`3g2^LZMb3f2^LZMc3e2]L[Md3e2[L[Mg3c2YL]Mh3e2UL[Ml3e2RL\\Mn3e2QL[MP4h2lKXMV4g2eK]M\\4b2cK_M]4c2`K^Ma4b2^K^Mb4c2\\K^Mf4`2YKaMh4_2WKaMi4a2UK_Ml4a2RK`Mn4a2QK_MP5a2nJ`MS5_2mJaMT5_2kJaMU5a2iJ_MX5`2gJaMZ5_2eJaM\\5^2dJbM]5^2bJbM_5^2`JbMa5_2]JaMe5^2ZJbMi5\\2SJgMm5Y2SJgMn5Y2QJgMP6Z2mIgMV6`51O1O2N3M7I7I5K3M5K3M3M2N3M3M5K3M1O2N9G7I2N3M2N6J5K3M1O4L4L2N3M2N2N3M5K4L002N5K3M1O0\\M]Ek0c:ROaEm0g:jNZEV1h:gNYEY1k:bNWE]1i:bNXE^1j:^NXEb1k:PNiDH>X2Q<000001O00000000000000001O0000000000000000000000000000000000O1001O000000O11O0000O100000000O10000000000000000O1000000000000000000O100O100000000O1000000000000000000O10000000000000000O100000000" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`=_1a=0000000000000000000000000000000000000000000000000O1000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000O1000000O1000000O10000000000O1000000O100000000O1000000O1000000O1000000O10000O100000000O1000000O10000O1000000O10000000000O1000000000000001O00000000001O00000000001O0000001O00000000000000000000000000000000000000000000000000000000000000O10000O100O1O1O1O100O100O1000000O1000000000000O100000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000O1000000000000O1000000000000000000000000O10000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000000000O1000000000000000000O100000000000000000000000000000000000000000000001O0000000000000000001O00000000000000000000001O0000000000000000000000000000000000001O0000000000001O000000001O0000000000001O0000001O0000000000000000O100000000000000000000000000000000000000000000O100O100000000O1000000O10000000000000000O1000000000000000000O10000000000O100000000O1000000000000000000cC" + } + ], + "model_output": "A dense cluster of green leaves with a mix of light and dark green hues, featuring elongated, oval-shaped leaves with smooth edges, is attached to a tree. The leaves are arranged alternately along thin, brown branches, with some leaves overlapping each other, creating a lush and vibrant appearance." + }, + { + "image": "images/caption_detailed_92.png", + "subject_name": "bear", + "object_name": "rock", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "\\fk0h0]<9J5C>XO^NoDV2f::G5M1N3M2N2N3N1N2O1N2O1O1O1N2O1O2N001O1O00100O1O1N2O10O01O1O1O1O0000000000001OO10000001O00001O1O0000001O001O2N001O1O002N1O001O1O001O1O1O1O001O1O1O1O100O1O00100O00100O1000O10O1000O10O100000O0100O1000O100O01000O1O1O0003NO1O10O011O2gCaMk;l2O0O10000O01000O2O0O1O101N10010N2O1O001O0O101O0O1000000000O1000000000O0100O100O00100O1O001O1O1O001O1N2O1O00001O1N2O001O1O1O1N101O1O1O001N2O001O1N2O001N2O1N101O1O1O001N2O1O1O001N2O001O1O001N2O002N002M101O001N2O1O1O001N2O1O0O2N2N101N2N2N1N3N2N1O2N2N2M3N2M3N2L6G_ab0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "T6j8V60O1O2O0O010N2N2O1N2O1O2N1O1N101O1O1O1O100N2O1O1O1O100O1O1O101N1O1OWGYKZ7g4eH`KV7_4jHhKP7W4PInKl6Q4TITLh6k3XIYLe6f3\\I^L`6a3`IdL\\6[3eIiLW6V3jImLS6R3mIRMP6m2QJXMj5h2UJ^Mf5`2\\JbMb5]2_JfM^5Z2bJjMZ5U2gJmMW5R2iJSNS5l1nJVNP5i1QKZNl4e1UK_Ng4a1YKaNe4^1[KgNa4X1`KjN^4U1cKmN[4S1eKoNY4P1hKSOU4l0kKWOS4i0mKZOP4f0PL[Oo3d0RL^Ol3a0ULAi3>XLDf3P3AQM`0n2@RMb0l2]OUMe0i2ZOXMg0g2XOZMk0c2UO]Mm0a2RO`Mo0_2PObMR1\\2nNdMT1Z2kNfMW1Y2iNgMZ1V2eNkM]1S2bNnMa1o1^NRNd1l1[NUNh1h1XNXNj1f1UN[Nm1c1RN^NP2`1oM`NS2_1lMbNX2Z1gMgNZ2X1eMiN_2S1`MnNa2Q1^MPOe2m0ZMSOi2k0VMVOm2g0RMZOo2e0PM\\OR3b0mL_OV3>iLC[39dLH^36aLKb32]LNf30YL1i3MVL4k3KTL6m3ISL7n3HQL9o3GPL:Q4EnKV4BjK>W4AhK`0X4@hK`0Y4_OfKb0Z4^OfKb0Z4^OeKc0\\4\\OdKd0\\4\\OcKe0]4[OcKd0^4\\OaKe0_4[OaKe0_4[O`Kf0`4ZO_Kg0a4YO]Ki0c4WO\\Kj0d4VO[Kk0e4UOZKl0f4TOYKm0g4SOXKm0i4SOVKn0j4ROUKo0k4QOTKP1k4QOSKQ1m4oNRKR1n4nNRKR1m4oNRKQ1o4oNPKR1P5nNPKR1P5nNoJS1Q5mNnJT1R5lNmJU1R5lNmJU1S5kNmJU1S5kNlJV1T5jNkJW1U5iNjJW1V5jNjJV1V5jNiJW1W5iNhJX1W5iNiJW1W5iNhJX1X5hNgJY1Y5gNgJY1X5hNgJY1Y5gNgJX1Z5hNeJY1Z5hNeJY1[5gNeJY1[5gNdJZ1\\5fNdJZ1[5gNdJZ1\\5fNcJ[1]5eNcJ[1]5eNbJ\\1^5dNbJ[1_5eN`J\\1_5eN`J\\1`5dN`J\\1`5dN_J]1a5cN_J]1`5dN_J]1a5cN^J^1b5bN^J^1b5bN]J^1c5cN\\J^1d5bN\\J^1d5bN[J_1e5aN[J_1d5bN[J_1e5aNZJ`1f5`NZJ`1f5`NYJa1g5_NYJa1f5`NYJ`1h5`NXJ`1h5`NWJa1i5_NWJa1h5`NWJa1i5_NWJa1i5_NVJb1j5^NWJa1h5`NXJ`1h5`NXJ`1g5aNZJ^1f5bNZJ]1g5cNYJ]1f5dNZJ\\1f5dN[J[1e5eN[J[1e5eN[J[1d5fN\\JZ1d5fN]JY1c5gN]JY1c5gN]JY1c5gN]JY1b5hN_JV1b5jN^JV1b5jN^JV1b5jN_JU1`5lN`JT1`5lN`JT1`5lNaJS1^5nNbJR1^5nNbJR1^5nNbJR1^5nNbJR1]5oNdJP1\\5POdJP1\\5POdJo0]5QOdJn0[5SOeJm0[5SOfJl0Z5TOfJl0Z5TOfJl0Y5UOgJk0Y5UOgJk0X5VOiJi0W5WOiJi0W5WOiJi0V5XOkJf0V5ZOjJf0V5ZOjJf0U5[OkJe0U5[OkJe0U5[OlJd0S5]OmJc0S5]OmJc0S5]OnJb0R5^OnJb0R5^OnJb0Q5_OoJa0Q5_OPK`0P5@PK`0o4ARK>n4BRK>n4BRK>n4BRK>m4CTKhKBX4>iKAW4?iKAW4?iKAV4`0kK^OV4b0jK^OU4c0kK]OU4c0kK]OT4d0mK[OS4e0mKZOT4f0lKZOT4f0lKZOS4g0nKXOR4h0nKXOQ4i0oKWOQ4i0oKWOQ4i0PLUOQ4k0oKUOQ4k0oKUOP4l0PLSOQ4m0PLROP4n0PLROP4n0PLQOP4P1QLoNo3Q1QLoNn3R1RLnNn3R1RLnNn3R1SLlNn3T1RLlNm3U1SLkNm3U1TLjNl3V1TLjNl3V1TLiNl3X1TLhNl3X1ULgNk3Y1ULgNj3Z1VLeNk3[1ULeNj3\\1WLcNi3]1WLcNi3]1WLcNh3^1YLaNg3_1YL`Nh3`1XL`Ng3a1ZL^Nf3b1ZL^Ne3c1[L]Ne3c1[L\\Nf3d1[L[Ne3e1[L[Ne3e1[L[Ne3e1[L[Nd3f1]LXNd3h1\\LXNc3i1]LWNc3i1]LWNc3i1^LbLfMh0k5g2_L^LlMh0e5j2_L\\LQNg0`5m2`LZLSNf0]5Q3`LXLVNe0Z5S3`LWLXNe0X5T3aLULYNf0V5U3aLTL[Nf0S5W3bLRL^Ne0P5Y3cLQL_Nc0o4\\3lMdLT2\\3mMcLR2^3PN`LP2`3QN_Lo1a3RN^Ln1b3SN]Ll1d3UNZLl1f3UNYLj1h3XNVLh1j3XNVLg1k3ZNTLf1l3[NSLe1m3\\NRLd1n3]NPLc1Q4_NmKa1S4`NkKa1U4e40O100O1000000O1000000O10000O100000000O100O1000000O1000000O10000O100000000O10000O100O1000000O100000000O10000000000O10000O100O100000000O1000000O10000000000O1000000O1000000O10000O100000000O100000000O100O100O1000000O10000O1000000O10000TJmJj1T5VNnJh1R5XNoJg1Q5YNPKe1Q5[NPKd1P5\\NQKc1o4^NPKa1Q5_NPK`1P5aNPK^1P5bNQK\\1P5dNQK[1o4eNRKZ1n4fNSKY1m4hNSKV1n4jNSKU1m4kNTKS1m4mNSKS1m4nNSKP1n4POSKo0m4QOTKn0l4SOSKl0n4TOSKk0m4UOTKi0m4WOTKh0l4XOUKg0k4ZOTKe0m4[OTKd0l4\\OUKc0k4^OUK`0l4@TK`0l4@UK>l4BUK=k4DUK:l4FTK:l4FUK8l4ISK6n4JSK4n4LSK2n4ORKKS55nJHT58lJEW5;jJCW5=jJBV5?jJ_OW5a0iJ]OY5d0gJZOZ5f0gJXOZ5h0fJWO[5j0eJSO]5m0dJPO^5P1cJlN`5T1aJiNa5X1_JYNTOdM^6S4^JXNP6i1PJUNQ6k1PJTNP6l1PJTNP6l1QJRNP6n1QJQNo5P2QJoMo5Q2QJnMP6R2QJmMo5T2QJkMo5U2RJjMn5V2RJjMn5W2RJgMo5Y2RJfMn5Z2RJfMn5Z2SJeMm5[2TJcMm5^2SJaMm5_2TJ`Ml5`2TJ`Ml5`2UJ_Mk5b2UJ\\Ml5d2UJ[Mk5e2UJ[Mk5f2UJYMk5g2VJXMj5h2VJWMk5j2UJUMk5k2VJTMj5l2WJSMi5m2XJQMi5P3VJPMj5P3WJoLi5Q3XJmLi5S3XJlLh5U3WJjLj5V3WJiLi5W3XJgLi5Z3WJdLj5\\3WJbLj5^3WJ`Lj5`3VJ_Lk5b3UJ\\Ll5d3UJZLl5f3TJYLm5h3SJVLn5j3SJSLo5m3RJQLo5P4PJoKQ6Q4PJlKR6T4oIjKR6V4oIgKT6Y4mIcKU6]4mI]KV6d4S2001O0000001O001O000000001O000010O0000O2O00010O00001O00001O0O110O000000001O00kJ" + } + ], + "model_output": "A black cellular telephone with a glossy finish, featuring a physical keypad with white alphanumeric characters on the keys, a central navigation button surrounded by four function keys, and a small screen above the keypad, is placed on the table." + }, + { + "image": "images/caption_detailed_94.png", + "subject_name": "pillow", + "object_name": "couch", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "kc05gc04M3O1O1O1O100O100O1O100O1O1O100O1O100O10000O1O100O100O100O100O1O100O1O100O100O100O100O1O100O1O1O1O10000O100O10000O10000O100O1N2O10000O10000001O1O1O1O001O1O1O1O1O1O1O2N1O1O1O1O1O1O1O1O1O1O1M3O1N2N2N2N2O2M2N2N2N2N3L`lW7" + }, + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PnLAQ3b0mL^OS3c0mL\\OR3f0mLZOR3h0nLWOR3i0nLWOQ3m0mLROR3Q1lLoNP3W1mIbMh2W1Z3Y1kIdMj2R1[3\\1gIgMk2m0^3h1aLXN_3j1_LVNa3l1]LTNc3n1[LRNe3P2YLPNg3R2WLnMi3S2VLmMj3U2TLkMl3W2RLiMn3Y2PLgMP4[2nKeMR4^2jKcMV4_2hKaMX4`2fKaMZ4b2bK_M^4Y1gIgNf12c4W1gIjNc1Of4g2WKZMa2dNUNV4TOXMg2bNUNY4POVMk2aNUN\\4lNTMW5P3bJSM^5Q3[JRMe5T3RJoLn5W500000000000000000000000000000000000000000eKgId1Y6\\NgId1Y6\\NgId1Y6g20eKgId1Y6\\NgId1Y6\\NgId1c5gKQKe2\\Od1c5gKQKe2\\Od1Y6\\NgId1Y6\\NgId1Y6\\NgId1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1W6^NiIb1W6^NiIb1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6h21O0000O1001O0ZIgIZ6Y6<0000000000000000000000000000000000000000O100001OO100O11O0000O1O1O1O1O1N2N2001OFkH^JU7a5PI[JP7d5QI]Jn6d5RI[Jo6d5<0eH\\Jo6c5QI^Jo6b5QI^Jo6c5PI]JP7c5PI]JP7c5PI]Jo6d5QI\\Jo6d5QI\\Jo6d5QI\\Jo6d5RI\\Jn6c5RI\\Jo6d5QI\\Jo6d5<0O10000000000000000000001O000000000000000kHZJe6f5[IZJe6f5[IZJe6g5ZIYJf6g5ZIYJf6g5?00000000000000001O0000000000001O00000000000000001O000000000000000000000000000000001O0001OO100000000000001O000000001O`LdHQ1\\7oNdHQ1\\7_20000TLSJ:m5WLTJ[3O>R6AnI?S6@mI`0T6_OlIa0U6^OkIb0U6^OkIb0U6Z300001O001O001O0000001O00000000000000000001O00000001O1O001O001O00000000O10000001O000000000000001OQLUI^1k6bNTI_1l6a2O00PLUI`1k6`NUI`1k6`200000000000000000000000001O1O00000000001O00000000001O000O1000000000000001O000000000001O00000O100001O00000]I" + } + ], + "model_output": "A black touring motorcycle with a large front fairing featuring a clear windshield and dual rearview mirrors is parked on gravel. The bike is equipped with a black top case and a passenger backrest, both with reflective strips. It has a visible rear suspension system, a black seat, and a license plate mounted on the rear. The motorcycle also has a black exhaust pipe on the right side and a visible rear tire with a multi-spoke wheel." + }, + { + "image": "images/caption_detailed_97.png", + "subject_name": "bench", + "object_name": "tree", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + } + ], + "model_output": "A white metal bench with an ornate, leaf-like pattern on the backrest, featuring a symmetrical design with a central floral motif flanked by two symmetrical, curved leaf-like elements, stands in front of a tree. The bench has a curved top edge and is supported by two ornate, curved legs with a decorative finial at the top." + }, + { + "image": "images/caption_detailed_98.png", + "subject_name": "cell phone", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 443 + ], + "counts": "Rlm33mc0O01O1O010O3N1O000O101N1N20O100L4O10000001O0010O01O000001MVXT4" + }, + { + "size": [ + 640, + 443 + ], + "counts": "WcY19dc07J5J5N2M3M3M2N2N2O1N2O0O2N2N101O0O2O1O001O001O1N2O001O1O0KWNR^Oj1ga0^NX^Oc1T?eNiBi1m[OmAP1n=ZOhAj0T>c2M3M4M2M3O1N3M2O1N2O1O1O1O1O100O1O100O10000O100O1[OaIhC`6SjJ\\AW5f>d0]O1OUJmAj5P>UJRB2Ne5o=YJSBk5m=XJRBg5[>H6^JaAX5k>JN14M8I2O1N1M4K5L7HTNnKSDj3Z>G5L5K5K3M2MXLSM`Fk2^=L5XNe^Oa0da0TO`^Oj0ga0nN\\^OP1\\b0N5J2O3M4K2O2N1O2M2O1O4K2N2O3L2J_R`2" + } + ], + "model_output": "A woman with shoulder-length brown hair, wearing a teal tank top with thin straps and a ruffled neckline, paired with dark blue jeans and white sneakers with red and blue accents, is seated with one leg crossed over the other, holding a smartphone in her hands, which are positioned in front of her. She has a necklace with a pendant and is looking down at the screen of the cell phone." + }, + { + "image": "images/caption_detailed_99.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c]n25g>5M2N101O001O001O001N100O2O1O1N2O3M4L3QBROg=[1H4M4K6K9F5L2M3N000O1ZOgMeDY2Y;nMaDR2^;SN_Dm1`;j0O1O1O_M_De1a;l001O100000O010ZO`DZM0=a;X2jD[ME2b;a2h0O0O100O101O1N2N2XNbCk0aN4J_Y\\5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0W4i:00000000O10000001O00000000001O001O001O1O1O001O002N1O1O1O3MW1iN2N1O1O2N8H1OO1L4H8O1O100O10000001O001O1O1O000000O100O100_OWGiJk8i4m0M3N2N2O1O1O1_MfE;[:BmEZNGc1]:1PFYNFc1[:3\\FKe94_FIa95bFJ^92hFLX91lFMU91mFNT90oFOQ91oFOQ90PG1o8OQG1o8NRG2n8NRG2n8NRG2n8NRG3m8MSG3m8NSG1m8OSG1m81PG1o80PG0P91oF0P91nF2P91mF0R91mF0R91mF0R93kFMU94kFNR92nFOQ91oFOQ91nF1Q9OoF2P9OnF5o8LoF8ROPN6OS9j1bGm0]8TOaGm0_8UOQGTNLj2R9ClF>T9h2O001O1O2N1O2N1O0000O1O1M3O1N200O100N2O100O100O1oNPGRLP9l3TGQLm8n3[GiKg8V4\\GgKe8X4^GeKc8[4Q1O100000000000000000000000000001O00001O1O1O1O1O3Md0\\O1O1O1O2N001O00001O00001O00001O001O00001O001O002N1O1O2N1O2N2N2N2N1O1O1O1O0000000000000000O100O1O1O1O1N2O1N2O1RKTGf3n8VL\\Gc3e8\\L]Gb3g8ZL[Gd3P9QL]Gb3W:I3M3N1NlMPMZHP3f7VMUHh2l7ZMQHf2P8[MhGl2X8UMXG[O[Oi3^9mLUGY3k8hLTGc2BQM[9=QGa2NiLR9o0gFU2Q:lMmET2T:nMjEQ2W:QNhEm1Y:TNfEl1Z:\\11O100O10000O1000000000000001O000000001O1O1O2N2XF^Kn8d4PG^Kn8d4lFbKR9`4kFbKT9V5O1O1O001O00000000O1O1N2N2N2O1O1O1O1O1O100O100O100F:L4O1N2N2N2M3N2O100O1O1O10000O1000000O100000000001O0000001O00001O1O001O2N1O1O1O3M6J4YFQKU9^5O1O003M1O0000O1K5N2O1O1O1N2N200O1O1O1K5O1O1L4O100O1O1O1O1O1O10000O100000000O1000000001O001O001O001O2N1O001O2N002N2Nc0QFdJT9e5OFmFgJS9U5>L4O1O1O1N2O1]Oc0N2O100O10000O100000000001O1O1oEjKV9W4eFfKA:d9Q4jFRLV9Q4cFSL]9o3]FTLd9d40000000O100nN[FfLh9Z3XFeLP:GoEh23lLO9PN201N2JhZ^3" + }, + { + "size": [ + 375, + 500 + ], + "counts": "lTe21e;101O001O001N2O001O1O1O1O1O2N1O2N1O2O0O1O1O1O10O01O000001O000010O01O00010O001O010O00100O21N5K4L1O01O0O10001N1O101N10000O2O001N10001O001O0O101O1N101O1O001O1O001O1O1O1O100O1O001NWYQ2" + } + ], + "model_output": "A person wearing a black jacket with a high collar, black pants, and black shoes, with short dark hair and hands in their pockets, is looking at a kite." + }, + { + "image": "images/caption_detailed_101.png", + "subject_name": "sand", + "object_name": "playingfield", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "b8d4d80000000000000000001O000000O10000001O00O10000000O100000001O0000000000O100001OO01000000000000000000O100000000000000000000O1001OO0100000000000000000O10000000000000000000000O11O00O10000000000000000000000000001O0O1000000000000000000000000000000000000O100000001OO100000000000000000O10000000000000001O0000O100001O0O01001O00O100000O100000000000000000001O00O1001O0O10O100001O00000000000000O11O000O1000000000O10000000000000000000000001N010000QLPHg2o7TMYHi2h7dLlHZ3c8N7I6J4L2N2N1O2N1O2N2N2N1O1O002N1O1O001O1O1O1O001O001O00001O001O00001O001_NUEc0k:SO`El0`:POdEP1]:nNeEQ1[:lNiES1X;N00001O2N1O2N1O001O1O001O00001O000000001O00000000000000000000000000001O0000001O000000O1000000001O000000O11O1O0000O1000000O100O1000000000000O1000000000000POZO]Eg0b;00000oN[O_Ee0_:]OaEc0]:@bE`0]:AbE`0]:AcE?]:AbE`0]:BaE?^:B^Eb0a:_O^Eb0a:@^E`0b:B[E?e:BZE>f:DXEC4L2M3N1O1O1O10OO2O1O2N1O1O0010O01O001O0001O00000000A?L4O1N1M3N3O01O1O01O01O1O1O1M4J6_IdJQ5a5eJhJW5h6UNnHWLLb0Y7R3oHZLN=W7V3nHXL?Oh6e3j1M5K4lMeF0d9IhFL^9KoF^Oe9OoFIYX9" + }, + { + "size": [ + 425, + 640 + ], + "counts": "`UW31X=0Vb23f]M2nL0iH1X7KiH6T:2M2N2O1O2O000O10000O1000000O1O2N100O1000000O1^FYOa6h0^I\\O_6d0\\ICb6=^ID`6=`IC`6=`IC_6>aIB_6>aIB_6>aIC[MBb33c1i0`MB]6>cIB:Bo2m0gLBZ6?fIDW6dLC]36iLJk4XMSIn0Z2j1S801O00000000000000000000000000000000O1000000000000000000O10000[NWMXG4Z1f2W7lMaHT2W7[NYHoN_Og2R8[NnGkN>U3\\7WNXHcN7OGY3d7ZNRIc1i6`N[I^1`6gN`IY1[6kNgIT1U6POkIP1Q6SOPJm0m5VOSJj0i5XOYJh0d5ZO]Jf0`5]O`Jc0^5^OcJb0]5]OdJc0]5YOfJg0b5oN`JQ1g5eN\\J[1k5YNZJg1P6jMUJV2_800000000000001O00000000000000000000000000000000nFYMa0E[5R3oIbM=F_5h2TJfM8Dc5f2TJoMO^Ok5d2RJXNIZOH]OHNQ6S3dJ?\\3jLUMg2ROn0[3mL]MU2XOn0o2^MdMd1]OnLGl3h2WNoMo0@U1T2UNZN\\5R1YKjNi4m0eKkN`41UHLo31l8N102LUel2" + } + ], + "model_output": "A person wearing dark blue denim shorts rolled up to mid-calf, a yellow top, and black flip-flops stands beside a dog." + }, + { + "image": "images/caption_detailed_103.png", + "subject_name": "umbrella", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 640, + 640 + ], + "counts": "gok54hc05I7L3K5M3L4L4M3L4N2N2N3M2O1O0O2O1N3N1O1N2O1O2L3O0N3M4K5M1M3M3N4K5I6G8DVC@l<>YC]Oi200O1N3M2O2M4M4L2N2N2N2M2O0O2O000O1M4O000001OO2O00010O0O100010O000000010O0010O00001N1M5M3M3M3[OZNnCi1f;gNTD[1i;P1L3N4L4L5K4M1N2N1000000O1000O10000O101N100O2O0O2O1O1O1O1O2N1O3M`0@3M3M001O001O002N2N0O100LYCPNiRC]OR=a0g0I7M4M3Moob5" + }, + { + "size": [ + 478, + 640 + ], + "counts": "2i>500000000000000000000000000000000000000000000000000001O0000O100001O000000000000O1000000001O0000000000O100001O0000000000000000O100000000001O000000jHMA3e7000O100nHOWO1h03UOMk06ROJm09QOGn0O^H6c6Ko0NaH7^6KZ16dNJ_15`NJb16\\NJf15YNKi14VNLk15SNKm17QNIP28nMHT27kMIV28hMHZ27eMI^26`MJa26]MKd25\\MJf24ZMLh23WMMk22TMNn22PMNR30mL1V3OgL1[3MeL3S90[J0\\L0d31\\LNd35YLKg36XLJj34VLLm31SLO^90O11O00fJ0fK0Z4NhK2X4MiK3W4MiK3`91SKLPK4m90PKMUK3j4OUK1j4M[F0j43k4L\\F1i43m9000iJNbFOj43d47YKIg47YKIf49YKGf4mFGe3K]5`0eF\\O2?h3E`5a0fF\\O0?j3D_5a0hF[OOb0i3B^5b0mF0c3^O`5d0jF0e3\\O`5f0hF1g3YO`5g0iF1f3XO`5g1_JYN`5i1_JWN_5k0oFNa3WO_5k0RGN^3WO_5l0SGM^3WO^5l0VGL\\3XO]5m0XGKZ3XO\\5n0[GKX3WO\\5n0]GLV3VO\\5n0_GMT3UO\\5o0aGKS3VO[5o0cGLQ3UOZ5Q1fGIP3VOX5R1jGGn2WOW5R1lGHl2VOV5S1oGGk2VOS5U1THEh2VOS5U1WHDf2WOR5U1ZHCd2XOQ5U1\\HCc2XOP5U1_HBa2YOo4U1bHA_2ZOn4V1cH@_2ZOm4V1fH@\\2ZOm4W1hH^O[2[Ol4W1jH]O[2\\Oj4W1mH\\OY2]Oi4W1PI[OW2^Oh4W1SIYOV2@g4V1TIZOU2@f4W1VIXOT2Ae4W1YIVOS2Cc4W1\\IUOQ2Db4W1_ITOo1Ea4W1bIROn1G_4W1dIROm1G_4W1eIPOm1I]4X1fInNn1J\\4W1hInNl1K[4W1kIlNk1MX4Y1mIiNl1NQ4_1TJbNk1OP4a1UJ_Nk10m3e1XJYNl12T3MkIk1W1TNj14o2c2kN]MQ1h2nNXMQ1k2mNUMQ1R3jNnLU1T3jNlLV1Y3eNgLZ1^3bNbL]1`3bN`L]1b3bN^L]1f3`NZL`1f3`NZL`1g3_NYL`1h3`NXL`1h3`NXL`1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXLa1h3jIULa43e1k3[NULe1j3\\NVLe1i3[NWLf1f3\\NZLe1d3\\N\\Ld1d3\\N\\Le1c3[N]Lf1a3[N_Lf1]3^NbLd1\\3\\NdLf1Z3ZNfLi1Y3UNgLS2o2oMQM\\2d2dM\\M]2b2dM^M]2a2cM_M]2a2cM_M^2`2bM`M_2_2`MbMa2]2_McMb2\\2^MdMc2[2]MeMd2Z2\\MfMd2Z2\\MfMd2Z2\\MfMe2X2\\MhMd2X2\\MhMd2W2]MiMc2V2^MjMc2T2^MlMb2U2]MkMd2T2\\MlMe2S2[MmMe2R2\\MnMe2P2\\MPNe2n1\\MRNe2j1^MVNc2g1_MYNa2d1bM\\N_2a1cM_N]2`1dM`N]2^1dMbN^2[1cMeN_2Y1aMgN`2W1aMiNa2U1_MkNb2T1^MlNc2R1^MnNc2n0nGoN_53c2n0`MROa2m0_MSOa2l0`MTOa2k0_MUOb2j0^MVOb2i0_MWOb2g0_MZO`2e0aMZO`2f0`MZOa2d0`M\\Oa2b0`M^Oa2a0_M_Ob2`0^M@c2>^MBd2<\\MDi24ZMLl2LVM4j8000000000000000000000000000000000000001O0000000000O10000000000000000000000000000000000000000000000000000000000000000000000000aM7iEIV::hEFW:=gECX:`0fE@Y:b0fE^OY:c0gE]OY:d0fE\\OY:e0gE[OY:e0gE[OY:e0gE[OY:e0gE[OY:d0hE\\OX:d0hE\\OX:c0iE]OW:b0jE^OW:?kEAU:?kEAV:=kECV:0nA2P>MQB3o=MQB4n=LQB6n=JRB6n=JRB7m=IRB8n=IQB7o=JmA:R>EoAc0i=\\OXBf0f=ZOZBg0e=YO[Bg0e=YO[Bg0e=YO[Bh0d=YOZBh0f=XOZBh0f=YOYBh0f=XOZBh0f=XOYBk0e=SO]Bn0b=RO^BP1`=PO_BQ1a=oN_BQ1a=oN_BR1`=nN`BR1`=nN`BR1`=nNaBR1^=nNbBR1^=nNbBR1^=nNbBS1]=mNdBR1\\=nNdBR1\\=nNdBS1[=mNfBR1Z=nNhBQ1W=POiBo0W=QOiBP1V=oNkBQ1U=oNlBP1T=POlBQ1S=oNnBP1R=POnBP1R=POnBQ1Q=oNPCP1P=POPCP1P=POQCo0o\\O8[[Ld0m;QOQEMVOc1i;`NQEMVOd1h;_NRENUOc1i;_NoD3UO_1l;^NoD=VOi0l;jNmD4VOO3S1i;jNnD4WON2T1i;jNnD4WOL4V1g;jNnD5AP1b;kNnD5_OP1c;kNnD6^OP1c;jNoD8[Oo0f;iNoD8[Oo0f;iNoD8ZOGLR1l;oNnD8YOGNa0NKm;EnD8WOI0?NKm;EnD9UOI2=OLl;EnD9UOI2>NJn;FnD8YOE14N`0j;_OnD8\\ODM4O`0k;@mD8\\ODM4Oc0h;]OPE8\\ODL6Ob0h;\\OQE8[OFM4Nb0i;\\OQE8ZOGN4Lb0k;[ORE7XOI0h0LVOj;2SE6WOI04L;Pl:F^F8b9H^F9a9G`F8`9HbF6^9JdF4\\9LfF3Y9MhF2X9NiF0X90hF0X90hF0X90hF0X90hF0X90hFOY91gFOY91hFNX92j200000000000000000000000000000000001O000000000000O100001O0000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000N" + } + ], + "model_output": "An individual wearing a red ski jacket with a black helmet and goggles, equipped with ski goggles and a black backpack, is going down the snow." + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_1b_detailed_eval.json b/evaluation/GAR-Bench/model_outputs/gar_1b_detailed_eval.json new file mode 100644 index 0000000000000000000000000000000000000000..3e60205cb62c0f3fc77d9ad53d866ce0cdb2735b --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_1b_detailed_eval.json @@ -0,0 +1,2570 @@ +[ + { + "image": "images/caption_detailed_0.png", + "subject_name": "person", + "object_name": "skateboard", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hT\\63W=1N3M2O1O2N1O1O2O0O1O001O00O10O10O001000O011N1O1O10000O1O1000000\\MCZH>e7GTFGk1b0Q81mGOS83kGMU85hGKY86fGJZ88cGJ\\88`GIb88[GIe8:WGGj8:PGIQ99lFHU99dFK]96\\F0d92WF0k90SF0o90PF0P:1nENU:b12O1O1O1M3N2M3L4N2O2N2O1O001N1OZFoMT8P2kGUNS8k1jGYNU8c1aFRNY1>V8^1aFTNX1a0d0@j5b1ZH]NW1d0a0@n5^1ZH^NV1g0>@Q6[1\\H]NT1k0=]OT6[1`I\\O9ZOV6\\1^I]O:WOX6]1\\I_O:UOZ6\\1ZIB:SO[6[1ZIE8QO^6Z1YIH7nN`6[1VIK8jNb6]1SIL8iNd6\\1RIN0ZN_O<`7[1PI2OWN@=a7Z1oH6LUNB=Ic0^O7J4K2N2O0O2O0O10O010TOhBOY=KoBAGNg=a0d0O0101XOAlB`0Q=ElB=Q=FmB=P=FmBbNXO]:8QG`0bNXO_:6oFb0j9\\OVFb0m9]OSFa0R:\\OnE?Y:^OiE?\\C1N100O100O10O0000010O0100O000000000000000000000O10000O100VNiCj0WRSN`0\\9RO`H?TN?[9RO^Hc0XN9\\9SOZHf0\\N3\\9WOVHg0aNOZ9ZOTHh0cNLZ9[OSHj0dNIZ9[OSHm0fNCY9]ORHQ1hN^OX9_ORHS1iNXOX9EoGS1jNUOX9HoGR1lNROW9KnGS1QOeNX95jGV1Y9hNhFX1X9gNiFX1X9hNhFX1X9gNhFZ1X9eNhF\\1X9dNfF]1[9cNcF_1]9aNbF`1]9aN`Fb1a9]N_Fc1a9]N_Fc1a9]N_Fc1a9]N^Fd1b9\\N^Fd1b9\\N]Fd1d9\\N[Fe1e9[NZFf1f9ZNYFg1g9YNXFh1h9XNXFh1S3XNj10RKi1R3\\Ni1KUKh1Q3`Ni1HVKh1Q3aNg1HXKf1o2fNh1DZKe1i2mNl1^O\\Kc1g2SOk1ZO_Kb1e2VOk1XO`Ka1f2XOi1WOaK`1f2[Oh1UO_K`1l2[Oe1UO]K_1Q3\\Oc1TO[K`1R3^Oa1TO\\K]1T3_O`1UO[K[1V3_O`1XOWKY1Z3_O_1ZOUKU1]3B^1ZOTKS1_3C]1\\ORKQ1a3C]1]OQKo0c3E[1]OQKn0c3G[1_OnJj0g3H[1_OlJi0i3HZ1AlJg0j3IY1BkJe0k3JZ1DhJb0n3JZ1GeJ?Q4JZ1IcJ>R4I[1KaJM2\\2^OgMb0MO^2\\OfMe0LO^2\\OeMg0LM`2ZOdMjN^NP1_1k0c2VO`MfNjNX1S1k0g2RO]MjNkNY1P1k0k2nNZMlNoNZ1m0k0m2kNXMnNQO\\1j0j0m2lNWMnNTO\\1h0i0n2_M`KY1f1SOWO[1d0j0n2`MbKV1f1TOXO[1c0j0l2aMgKR1b1XOZO[1`0j0W3aNoLZO\\OZ1?j0X3_NnL]O[O[1`0g0aNbMa4n0RM_O]OZ1>g0Z3^NkLA]O\\1>d0\\3\\NjLD\\O\\1>d0]3ZNiLF]O]1c3QNdL2]O_1==f3lMaL9[O`1=:kNjM]6<[Ja1=8kNoMX69`Ja1=6lNoMV6:dJ_1:8nNmMT6ZMdNl1i5i1SKeM0Z1`0F[4\\1PKSLYOc1k0W1`0I\\4[1nJjM6P1`0M[4Y1kJoM9j0a00Z4W1jJRN;f0`03Z4U1iJUN=?b09W4S1cJSLKU2i09c0>U4R1bJTLKU2j07c0cMcNi2b5i3TKnK1^MMP3m4e3SKnK2^MNP3l4g3fJTL?XMNo2l4i6SK[F1m2k4h6RK^F1l2l4f6RK_F2l2k4e6QKaF4l2i4c6SKaF4m2h4b6SKcF4l2h4a6RKXFF7a0R3d4_6UKfF7k2c4`6TKfF9l2a4_6TKfFk2^4Z7bKgH]4Y7bKiH]4W7cKiH]4W7_KnH`4R7`KnH`4Q7aKQI]4o6aKXEEh3i4P7aKZEFf3h4Q7aKUI_4k6aKUI_4l6`KUI_4k6`KVI`4k6^KWIa4j6^KWIa4k6\\KWIc4T;O001O001O001O00001OL`Ki@_4]?O1O002N2N1O2N2N2N2N1O2N2N1O2N2N2N2N1K6M5K6J0O10000O2YOo^O^NQa0X20O100O1[On^O]NRa0V220000N20000N3O1N2^Od^O]N`a0b1`^O]Naa0b1a^OSNia0j18N2N2O1O1N2N2N2O1N2N2N2O1O1O1M3O1N2O1N1O2O0000MXOP]Of0Rc0ZOn\\Od0Xc0N2OIAo\\O=Sc0Cl\\O=Uc0Ck\\O;Wc0Eh\\O;Yc0Eg\\O:Zc0Ff\\O8[c0Gf\\O9[c0Ge\\O7^c0Gb\\O8`c0H`\\O6ec001N[KM`E3W?N2OPPi1" + } + ], + "model_output": "The sky is a soft, pale blue with a smooth gradient, transitioning from a slightly darker blue at the top to a lighter blue towards the bottom, with a person over it.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_4.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "al_6=^>8I7AWO[BS1a=:M1N04MmNfN_DZ1];kNaDU1[;POdDQ1X;ROhDn0S;WOmDi0Q;YOPEf0m:]OSEc0k:_OUEb0h:@ZE>d:FZE:e:HhDoNIM8[1V;KhDmNIN9Z1U;MgDPO4T1l0Ne73WGgNOM9^1`0f0_7YOkGbNO08b16j0f7RORHbN3e1ES1U8gNaHQ3_7nLbHS3\\7nLdHR3[7oLeHQ3Z7PMeH`NYO^4Q8TMeH]N[O_4^7aKQIc1I]N20F_4Z7eKUI_19l2_6iKVI\\1;k2\\6mKXIX1=j2Y6PLZIV1=j2X6PL[IW1=i2V6RL]IU1>i2R6ULVI@1b1g0i2P6WLUI\\1l0\\2n5\\NRJd1m5]NSJc1l5^NTJb1l5]NUJc1j5^NVJb1j5]NWJc1i5]NWJc1h5^NWJc1i5]NWJc1i5\\NWJe1i5[NVJf1j5ZNVJf1j5ZNRJj1n5UNPJn1P6RNnIP2R6PNiIU2W6kMcI[2^6eM\\I`14^L`6]6eIaI[6[6mIaIS6e0[I\\4i0lJm5f0]IW4l0RKg5e0`IS4o0UKe5d0_IR4Q1VKe5d0]Im3V1^K_5c0]Ii3Y1dK[5a0^Ia3`1nKY56]Ic3b1ULU53\\Ia3^:K4L3K6G\\DRMg;m28M3N2N6I6K5Ic0J5M2L5L3N3N2M4M2M3N3M2N2M2O2N1O2N1O1O1O2O1N1O001O00WO]CaN1?c2O0L4N00100N2O1O0]OFaB<^=J\\B7e=KVB7j=>`BSOP=l0QCVOnRCCo<=PCCR==kBCX=>cBCb=k03M2N1N2O1N2NTCXOd;g0\\D\\Ob;d0]D^Oa;i0XDXOh;m0RDTOn;S1hCPOX6K4L3N2M3N2N1O2N100O2N100O100O1000O01000O1O010O1O00001O1N2O0O1O2M3C=DeA4Y>LjA4T>LmA7o=ISB5m=KTB3m=MUB0l=1a000O2Oln00SQO2O0N4L4L3N1M4F9M3O1O2O00O0100000O100O101N1O2N2M2O3L3M6GTl]6" + } + ], + "model_output": "A person wearing a black helmet with a reflective visor, a black long-sleeve top, and black pants is riding a bicycle. The individual is in a bent-over posture, gripping the handlebars with both hands, and wearing black gloves. The person's head is slightly tilted downward, and the facial features are not visible.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_8.png", + "subject_name": "surfboard", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 408, + 640 + ], + "counts": "Ved18VO11O000000001O1N2N2N2N2N2N2N2N2O12N1O2NjN]E9b:EgE6W:IkE9R:DSF;m9DUF;_OQO[:>[Fc0e9[O_Fa0o:N2M3M1O4M:F000O1O0O2O010O1O1O1OhV^3" + } + ], + "model_output": "A person with wet, curly hair, wearing a black wetsuit with a red stripe down the side, is captured in a dynamic pose with one arm extended upwards and the other bent at the elbow, legs spread apart in a running or jumping motion, playing with a surfboard.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_9.png", + "subject_name": "person", + "object_name": "grass", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "d45k>0N200O1O2N2O1N1O1O1O010O2O3L4M000O100O010OO2N1NI9O10O1010O0100O1O2M2O2N2FfAK^>OfANg>OjSi8" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i8U6k80O10001O0000000001O1O2N2M6K6K1N2M2O3M2O0O1O0O2O001O1O1O001O10O00O10000000000000000000001O0000001O000000001O000000000000001O000000000000000000O1000000O2O00O02N1O1O001O1O1O1O100O1O1O1N2O1O1N2O1O1N2N2N2010N2O001O001O00001O00001O001O001O1O000000001O0000000000001O00001O00001O0000001O000000001O00001O1O3M1O1O2N2N2N001O2N1O100O0O2O1O001O001O0000001O0001O000001O00O010000000000000O100000000O10000O100000000000000001O000000000000O1O10000001O0000000000000000001O00000000001O00001O000000001O0000001O000000001O0000010O000O10001O0000001O00001O000000001O0000001O00001O0000001O00001O000000001O000000001O00000000001O00000000001O0000001O000000001O00001O000000001O00000000000000001O0000010N2O3M2N4L2N3M3M2N1O2N1O1O001O1O1O1O0000001O0000000000000000001O000000000000O10000000000O10000O10000O10000O100O100O1O100O100O1O100O100O1O1N2O1N2N2N2O1M3O1M3N2M3L4M3K5K5M3G9L4L4N2000000000000001OO10000O1O100O1O100O1O1O100O100O100O1O100O1O10000O100O100O100O100O100O100O100O100O1O10000O100O100O100O1O10000O100O1O100O1O10000O100O1O10000O10000O10000001O00001O2N001O001O001O001O0000001O000000000000000000O10000O1O100O100O1O1N2O1O2K3K6H8I7O1O100000001O00O1000000000000000O1FfHmH[7Q7iHlHX7S7;O10000O1O100L4M3ZOQHkI31R8S6a00000O100001OO10000000000O100001OO10000000000000000000000000000000000001O0000000000000000000000000000001O0000000000mI" + } + ], + "model_output": "A person with short dark hair, wearing a light-colored, short-sleeved shirt with a collar, and a dark-colored undershirt visible at the neckline, is standing on grass.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_10.png", + "subject_name": "umbrella", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "b>;j1JWN3[a05f^OE106OO6Jm0Xa0d1M2O1O0O2O00001O0O2O1O00001N2O00001N101O001N101N10001O000O2O001O1N101O00001N101N101O0O3N001O0O2O001O1O0O2O1N101O001N2N101O1O0O2O001O000O2O0O2O000O2O000O100O2O00000O101N10000O100O100O10000O01000O100O010O1000O1000000000O0101O00O10O10000O100000000O100000000O1000000O3N7I7dK[@o3U`0F9G5KcNmL]Bc2b=fMZBV2e=UN[BXOaNS2T?QOXBWOeNS1S?0WBbNiN]1P?[1i@^Mg0X1_>f1XAZNh>i1UAWNj>l1RAkLIY1U?`3000000000001O0O1VHUK?k4AUK?k4[71O0000000000000O11O0000O0100O1O100O1O100O10O010000O1O1O1O10000O1O010O1O100O10000O100O00101OO0100O100000O010000O10O1001O1N100O2O0000001N100000001N10001N1O100O101O0O2O0O100O1O2N10000O100O1O1O2O0O1O100O2N1O1O1O100L5O0O100O101M2N2O1O1O1O1N2O1O1Oo_O]Mh>a2ZA_Me>^2`1O100O1N2N2O1O1O1O1N2O1O1O1N1O2O1O1O2Nf0YO7J5L3L7H3M3N1O1NUiV2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "cj04413L20gb0j7Q@gJa3Z5[LiJd3X5]LgJd3W5^LhJb3X5_LgJ`3Z5`LfJ`3Z5aLeJ_3Z5bLfJ^3Z5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5dLdJ\\3[5fLdJZ3\\5fLdJZ3[5gLeJY3[5hLdJX3\\5hLdJX3\\5hLdJX3\\5iLcJW3\\5jLdJV3\\5jLdJV3\\5kLcJU3]5kLcJU3\\5mLcJS3]5nLbJR3^5nLbJR3^5oLaJQ3^5QMaJo2_5QMaJo2_5QMaJo2^5RMbJn2^5SMaJm2_5SMaJm2^5UMaJk2_5VM`Jj2`5VM`Ji2a5WM_Ji2`5XM`Jh2`5YM_Jg2a5YM_Jg2`5[M_Je2a5\\M^Jd2b5]M]Jc2c5]M]Jc2c5^M\\Jb2c5_M]Ja2c5`M\\J`2d5aM[J_2e5aM[J_2d5cM[J]2e5cM[J]2e5dMZJ\\2e5fMZJZ2f5fMZJZ2e5hMZJX2f5iMYJW2g5iMYJW2g5jMXJV2g5kMYJU2g5lMXJT2h5lMXJT2h5mMWJS2h5nMXJR2h5oMWJQ2h5QNWJo1i5QNWJn1j5SNUJm1j5TNVJl1j5TNVJl1i5UNWJk1i5VNVJj1i5WNWJi1i5WNWJi1i5XNVJh1j5XNVJh1j5XNVJh1i5YNWJg1i5YNWJg1i5YNWJg1h5[NWJe1i5[NWJe1h5\\NXJd1h5\\NXJd1h5\\NXJe1f5\\NZJd1f5\\NZJc1f5^NZJb1f5^NZJb1e5_N[Ja1e5_N[Ja1e5_N[Ja1d5`N\\J`1d5`N\\Ja1cNcIj6m4cJ`1cNhIe6h4hJl1U5TNlJP2P5PNPKU2i4mMWKS2g4oMYKQ2f4PNZKP2f4oM[KQ2iNUIX5j4oKQ2fN[IY5c4QLR2eN^IY5^4SLU2bN`IZ5Z4TLR3oNVHR4h4oLT3fN]HY4_4QMU3aN`H]4\\4RMl3n2TLRMm3l2TLTMl3h2WLYMo3i1WGlLo4[1Q4`1dL`Nd3U1_LkNe3m0_LSOj3OjL2h3RObLn0P4_NPLb1W;00000O100000000O1000000000000000000000000O11O0000iJ]NQHc1m7cNoG]1`7]NhC=g4V1U7AkH?o6GQI9n6HRI8n6IQI7n6JRI6l6KUI5j6LVI4i6MWI3e61[IO`66`IJZ64L2O1N3N2M2O2M2O1O1N3N2N2M3N1O1N3N1O1N2O1N3N2M3N1N102M2O2M2N2O1O2M2N3M2O1N2N2O2M2N3M2N2N2O1N3M2N1O3M1O3M2N1O2N2O1O1N2N2N2N2O1N2M3OiL^EQ2`:oMcEP2[:PNiEn1V:QNnEn1o9SNUFj1i9WNYFi1e9VN_Fh1_9YNeFe1X9[NlFd1Q9\\NSGb1k8]NZGa1e8_N^G_1`8aNcG^1[8cNhG[1V8eNmGZ1Q8fNSHX1k7hNXHW1g7iN[HV1c7jN`HU1^7kNeHT1Z7kNiHS1V7nNkHR1S7nNPIP1o6QOSIn0l6QOVIn0i6ROZIm0d6TO]Ik0b6UOaIj0]6WOdIh0[6YOgIf0X6YOjIg0T6ZOmIe0R6[OPJe0n5\\OTJb0k5^OWJb0h5^OZJa0d5_O^J`0b5_OaJ`0\\5BfJiLAW3`0hL_OX3b0hL^OW3c0iL\\OW3e0hL\\OW3e0iLZOW3g0iLXOX3h0hLWOX3j0hLUOY3k0gLUOZ3k0dLUO]3l0bLSO`3n0^LQOd3o0[LPOg3Q1VLoNl3R1RLmNP4S1oKlNT4T1iKlNY4U1eKjN]4V1aKjNa4V1^KiNd4W1[KgNi4X1VKfNl4[1RKeNQ5Z1nJdNU5\\1iJdNY5\\1eJdN]5[1bJfN_5Y1aJgNa5X1^JhNc5W1\\JkNd5U1[JkNf5U1XJlNi5T1VGTNg2i0S6R1SGZNg2e0W6^1eIcN\\6^1bIaNa6^1]IcNc6^1\\IaNf6_1XIbNj6^1TIaNn6^1QIbNR7\\1nHdNR7]1lHcNW7\\1gHeNZ7Z1eHfN]7X1cHhN_7W1`HiNb7V1]HkNd7T1\\HkNg7U1_FaNi09j8W1YFdNd0\\OWOi0n9X1RFhNf07Y9Y2[FhMh9Y2SFhMo9d31N2N3N1N2N2N3M2N2N1O3M2O1N2O1N2O1N2O1N2N2O1N2N2N2N3M2O0O2N2O1N2N2N2N2N2N2N2O1N2N3M1O2N3M1O2O1N2N3M1O3M2N102M1O2O1M3O1N2O1N3M1O2O1N2N2O1N2O1N2N2N2O0O2O1N2N2N2O1N2N2N3N1Mocg0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "2l>400001OO1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000oJb0`J^O\\5k0_JUO]5S1_JmN]5[1_JeN^5`1`J`N[5i1aJWN]5m1aJSN[5S2cJmMZ5Y2cJgMZ5]2eJcMW5c2gJ]MV5h2hJXMV5k2iJUMT5P3jJPMR5U3mJkLP5Z3nJfLn4_3QKaLm4c3QK]Lk4h3TKXLi4l3VKTLf4R4XKnKg4T4XKlKa4\\4^KdKc31XJ]4U2bKb36SJ[4Z2_Kc38nI^4]2ZKd3=hI]4c2VKe3a5[L_Je3b5ZL^Jf3b5ZL^Je3d5ZL\\Jf3e5YL[Jg3f5XLZJg3h5XLXJh3h5XLXJe3k5[LUJb3o5]LQJ`3R6`LnI]3V6bLjI\\3Y6cLgIX3^6hLbIV3a6iL_IS3e6mL[IP3i6oLWIn2l6RMTIk2o6UMQIf2U7YMkHe2W7[MiHa2[7_MeH^2_7aMaH\\2b7dM^HY2f7fMZHW2i7iMWHS2m7mMSHQ2P8nMPHm1U8SNkGk1X8TNhGi1[8WNeGf1^8ZNbGc1a8]N_G`1d8`N\\G]1g8cNYG[1j8dNVGY1m8gNSGX1o8gNQGW1Q9iNoFU1S9kNmFT1U9kNkFS1W2gLd4V2UIR1X9nNhFP1Z9POfFo0\\9POdFo0^9PObFn0`9RO`Fm0b9RO^Fm0c9SO]Fl0d9TO\\Fk0f9TOZFk0g9UOYFk0g9UOYFj0h9VOXFi0i9WOWFh0k9WOUFh0l9XOTFg0m9YOSFf0n9ZORFf0o9YOQFf0P:ZOPFf0P:ZOPFe0R:ZOnEe0S:[OmEd0T:\\OlEd0T:\\OlEc0V:\\OjEd0V:\\OjEc0W:]OiEc0W:]OiEb0Y:]OgEc0Y:]OgEb0[:]OeEc0[:]OeEb0\\:^OdEa0]:_OcEa0]:_OcEa0^:^ObEa0_:_OaEa0`:^O`Eb0`:^O`Ea0a:_O_Ea0a:_O_Ea0b:^O^Ea0c:_O]Ea0c:_O]Ea0c:_O]Ea0c:_O]E`0d:@\\E`0e:_O[Ea0e:_O[Ea0e:_O[Ea0f:^OZEb0f:^OZEa0g:_OYEa0h:^OXEb0h:^OXEb0h:^OXEb0i:]OWEc0i:]OWEc0i:]OWEb0j:]OWEc0i:]OWEc0i:]OWEc0j:\\OVEd0j:\\OVEd0k:[OUEe0k:\\OTEd0m:[OSEe0m:[OSEf0l:ZOTEf0l:ZOTEf0m:YOSEg0m:YOSEg0n:XOREh0n:XOREh0n:XOREi0m:WOSEi0m:WOSEi0n:VOREj0n:UOSEk0m:UOSEk0m:UOSEk0m:UOSEk0n:TOREl0n:TOREl0n:TOREm0m:SOSEm0n:SOQEn0n:ROREn0n:QOSEo0m:QOSEo0m:QOSEo0n:POREQ1m:oNSEQ1m:oNSER1l:oNSEQ1m:oNSEQ1m:oNSEQ1m:oNSER1l:mNUES1k:mNUES1k:mNUES1k:mNUES1k:mNUET1k:kNUEU1k:kNUEU1k:kNUEV1j:kNUEU1j:lNVET1j:lNVET1j:lNVET1j:lNVEU1i:kNWEU1i:kNWEV1h:jNXEV1g:kNYEU1g:kNYEV1f:jNZEV1f:jNZEV1f:jNZEV1e:kN[EU1e:kN[EV1d:jN\\EV1c:kN]EU1c:kN]EU1c:kN]EU1c:kN]EV1a:kN_EU1a:kN_EV1`:jN`EV1_:kNaEU1_:kNaEU1^:lNbET1^:lNbEU1]:kNcEU1\\:lNdET1[:mNeES1[:mNeET1Y:mNgES1Y:mNgES1Y:mNgES1X:nNhES1V:nNjER1U:oNkEQ1U:oNkER1S:oNmEQ1S:oNmEQ1S:oNmEQ1R:POnEQ1P:POPFP1P:POPFP1o9QOQFP1m9QOSFo0k9SOUFm0k9SOUFn0i9SOWFm0h9TOXFl0g9UOYFk0g9UOYFk0f9VOZFk0d9VO\\Fj0c9WO]Fi0b9XO^Fh0`9ZO`Fg0^9ZObFf0]9[OcFe0\\9\\OdFd0[9]OeFd0X9^OhFb0W9_OiFb0T9@lF`0R9BnF>P9DPGRHCl7>THBl7>THBk7?UHBj7>VHBj7>VHBj7>VHCi7=WHDh7UKAk4?VKAi4?WKAi4?XK@h4`0YK_Of4b0ZK_Oe4a0\\K^Od4b0\\K^Od4b0]K]Oc4c0^K]Oa4c0`K\\O`4d0`K\\O_4e0bKZO^4f0bK[O]4e0dKZO\\4f0eKZOZ4f0fKZOZ4f0gKYOX4h0iKXOV4h0kKWOU4i0kKXOS4i0nKVOR4j0nKVOR4j0nKWOP4j0QLUOn3l0SLSOm3m0SLTOl3l0ULTOi3m0XLROh3n0YLROf3n0ZLROf3n0[LROd3n0\\LROd3n0]LQOb3P1_LPO`3P1aLoN]3S1cLnN\\3R1eLnNY3S1gLnNX3R1iLmNW3S1jLmNU3S1kLmNT3T1lLmNR3T1PMkNm2V1TMkNk2U1VMkNh2U1YMlNe2U1[MkNd2U1^MkN`2U1cMiN\\2V1gMiNY2U1iMmNT2S1mMmNR2S1PNmNl1U1UNlNj1R1YNnNe1R1]NmNa1U1_NlN_1T1cNmNZ1P1kNPOT1m0POROP1m0QOTOm0m0SOUOk0k0VOUOh0i0\\OWO`0j0CUO=i0EXO9h0IYO4g0MZO1b05^OJa08_OF?=C@=a0D]O=e0BZO=h0DRO=Q1ElN;U1FhN;Z1FbN;_1F`N7d1JZN4i1LVN1n11nMOT22eMWMdLg2h54bMO`22]MMe24XMMi27QMJQ36lLKV36gLH]3:_LCf3?WL@j3b0SL_On3b0PL^OP4g0eIaLQ2f2^4S1YKlNk4Z1\\IQL^1e2W5_1gJ`N[5`1cJ`N_5`1`J_Nb5b1[J\\Nh5e1VJZNl5g1SJYNn5g1oI[NQ6f1nIXNU6h1iIVN[6j1dIUN^6k1`IVN`6k1^IVNc6i1\\IUNg6l1WIPNo6P2PIoMR7R2kHoMU7R2jHnMW7R2gHmM\\7S2bHjMb7V2]HhMg7X2WHiMi7X2VHhMk7X2SHhMo7X2PHeMT8[2jGdMX8a42N1O2L4N2N101M3K5N10000O2M3M200O3L3L4M2O2N1N3M3M201O0O2M3N2N1O2M2M4N2M2N3M3M3M2L4N3N1O2N101N10_J" + } + ], + "model_output": "A piece of white tissue paper with a soft, slightly crumpled texture and irregular, torn edges is on the dining table.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_13.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\n_13W=1N2N3M2N2N2N2N2N2N2N2O1M3N2N2N2O1N2N2N2N2O1N2N2N2N2O1N2N2N2O1N2N2N2N2N2O1N2N2N2M3[NbM\\H_2c7cMZH_2d7cMZH_2e7bMXHa2h7_MWHb2h7^MWHd2i7]MUHd2j7]MTHe2l7[MRHg2n7YMPHi2P8WMnGk2R8UMmGl2T8SMjGo2U8RMiGP3W8QMgGP3Y8PMeGR3[8nLbGU3^8lL_GV3a8j00000001OO100000000O10000O100000000000000000000000000000000O10000O100O100O1N2N2N2N2O1N200O1O100O1O1O1O1N2jNlJSJV5k5mJSJT5l5nJXIO:Z5]6UK[IP5e6k0N2O1O1N2ZO\\IcJf6\\5_I^Jc6b5_I[Jb6b5e0K5OaI[JZ5c5hJ]JX5b5hJ_JX5_5hJdJW5Y5gJmJX5R5dJSK\\5k4bJgJRO?\\6i4bJ[K^5d4aJ^K_5b4\\JcKd5\\4ZJgKf5Y4XJhKi5X4VJiKj5W4UJjKk5V4SJkKn5U4oInKP6S4oImKS6R4lIoKT6Q4kIoKV6P4iIQLX6o3fIRL[6n3cITL]6k3cIUL^6j3bIWL^6h3cIWL^6i3aIWL`6i3_IWLb6i3]IXLc6W5100O10000O100O1O10000002N5K;E3M4L4VLlHU1X7eNRIS1R7eNSIZ1o6bNTI]1n6_NYI\\1j6^N[I_1i6[NZIf1g6TN]Il1e6lMaIT2a6dMeI\\2`6XMgIh2^80001O000000O100DTMWFn2d9]MTFe2l9=0000O10O1001N0lNfLYHZ3`7nL^HS3Y7YM]Hn2a7WMZHk2d7XMXHk2f7XMXHi2f7^MSHd2k7_1O2A>ASJ^Io5`6TJ_Il5]6ZJcId5X6bJiI\\5W6fJhIY5W6jJiIT5W6mJiIR5V6PKjIo4V6SKiIl4V6UKkIj4U6UKlIk4S6dJcI7;T5S6cJdI89U5T6aJeI97V5_6jJ`IW5`6jJ_IV5b6jJ\\IW5d6kJYIV5g6g000000000000000000000000000000000cNgIcJLQ1P3nN:P5hLTKa0h0\\2WO9m4jLTKj0?T2^O9o4iLTKl01SO3o2GTM0Q3Q5mLTKl01TO1o2JRM1P3o4oLSKm02XOKk22nL2Q3l4QMSKm03\\29@a4XMRKm03[2;_O`4YMRKo02Y24fLMg2k4[MRKo03W2>^O]4\\MUK2Ma04a2?^O[4^M_Kc0H_2a0^OZ4]M^Kg0G]2a0_Om4UNaJ\\2a0@o4RNaJ^2h0XOW4fMkJN?=Hg2f0XOX4fMeK3^Oo2e0XOj5g0VJYOk5f0UJZOk5f0VJYOk5f0UJZOk5f0UJZOl5e0TJ[Ol5e0TJ[Om5d0SJ\\On5c0RJ]Oo5b0QJ^OP6b0oI^OQ6b0oI^OR6a0nI_OS6`0mI@U6>kIBV6=jICV6=jICW6mGZO@O123O17_8QEBo:N1000000O100O2O1N2NQW_7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "f;Y3g;000000000000000001O0000000000000000O100000000000000O100O1O1000000000000000000001O001O1O001O001O1O3M1O1O000O100000000000O10001O0000000O10000O1000000O1000000M3O1O1N2O1L4J6N2O100J6O1OISE]Lm:k3000000000000O1O100O100N2O10000000000000000001O0000000000VOjKjFV4P:01O00000000001O0000001O00001O0000001O00000000001O000000000^ORERMo:_3000000001O00000000001O000000001O00000000001O0000001O000000001O0000000000001O00000000001O0000000000001O0000001O00000000001O0000001O000000001O000000001O000000000000001O000000001O00000000001O0000001O00000000001N11O000001O000000001O00000000001O0000001O000000001O00001O001O0000001O00001O0000001O0000001O0000001O0000001O00001O000000001O0000001O0000001O0000000000001O0000001O00001O00001O001O00001O00001O000000001O00001O0000001O00001O00001O0000001O00001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O0000001O00001O00001O0000001O00001O00001O000000001O000000001O00001O0000000000001O001O0XNlBd1U=[NkBe1U=ZNlBf1W=000O11O000000000000O1000000000000000000O10000000000000000000000O10000000000000000000000O100000000000000O10000000000000000O10000000000000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000O10000000000000000000000000000000000O1000000000000000000UC" + } + ], + "model_output": "A dark-colored car with a glossy finish, featuring a curved roofline that slopes gently towards the rear, a visible section of the rear windshield with a defroster grid, and a partial view of a rear windshield wiper, is parked on the road.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_18.png", + "subject_name": "person", + "object_name": "baseball glove", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "YRa01Z=000Uh?4eW@8I3L5L2N3M3N1\\DTOf:n0XEVOc:m0YEWOe:k0XEWO2Jl9Q1oEWO5Hk9S1mEXO7EEOk9U1VFYO9ECOm9U1TFYO4E^9N`F:3I\\9MeF50NZ9NgF3OOY9OjF0IUOJk0b91mFNE7]9LnFME7[9MjF3HnNNR1_9NhF5J1]9KdF9NL^9d20OO2O010O0001O001O00000O2N2N10100O001O0N3O010O000M3001O001000000O1O1O1O1O2N0G^LiFd3R9=N2N2O1N2O010O100O01000O010O10000^GhKQ8X4nGjKQ8V4nGlKQ8T4jGQLV8P4gGSLX8`400010O01O01O000010O000010O0dMiGEW85oGJQ81UHNk70VH1j7NWH1k7MUH4k7KVH5k7JUH7j7JUH6l7JSH6m7KRH5o7LoG4R8LmG4V8JiG6X8JgG6Z8lM`G22U12n0T9SOjFm0V9TOiFl0X9UOfFk0[9b11O1O2N5J4XOYFfMh9X2YFhMh9V2ZFiMg9`0TFc06mNg9>TFd07mNf9=TFe07mNf9>SFd08nNf9oEd0m:\\OTEa0n:_ORE`0P;_OPE`0R;_OnD`0T;@kD>X;BgD;];EbD:_;FbD8`;H`D5b;M]D1e;N^DMe;3^DFf;:\\DZOn;e0<10O100O100O10lC]O_O0k;d0dDC[;=cDE\\;;cDG^;8[D1c;n0N00O110O002M2N2N100O1O100O2N1O0O1O2N3M3L3N3K4L4L5M3KjSi4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V\\o23Z=N^n25[QM3N2N2N2ZOEXD>e;EXD?e;CWDb0f;AVDb0i;a0O1N110O01O1O2N1O010O1O101M2O1O0O1O2N2N3K6K3L4L4N3L4L^fh4" + } + ], + "model_output": "A baseball catcher in full gear, including a helmet with a face mask, chest protector, shin guards, and a mitt on the left hand, is crouched in a ready position, wearing a dark blue jersey with red and white accents, and a tan baseball glove.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_19.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "j8o01UO03m0J\\16iM0N12JX>b1gA]N=;BM00000:0A1N11O50:OB1N10l=P6XBSJN?af4=00000O100001O00001O1O001O001QLi@S3X?lLh@T3X?lLi@S3X?kLi@U3X?jLh@W3X?hLh@X3X?gLi@Z3W?dLk@e3l>XLVAl3h>gKbA\\4P?102M1O100O1O1O1O1O100O001O010O1O001O10O01O1O00100O1O1O10O01O1O1O00001O010O1O1O1O0000010O00001O0000O1O2SNYAZNl>d1XAVNl>h1WASNm>m1TAnMQ?P2QAkMS?U2n@fMW?X2k@dMX?\\2i@_M[?`2h@ZM\\?f2g@SM^?l2k00001N101O1O0O2N101N2N101N1O2O1O1O1O1O1O1O1O2N1O1N2O2O1N1O1O1O1O1O2N1O01O001N3N2N1N3M2N2O1N1O2O1O1N00O20O103K2O1O1O1O003M1O10N11n^OXMl`0n2O1OK5N2100O1O1O101O000O100000002NO1O1001O7I0O2O0O100O2N101OO1000O2O00O1000O010000O10O1000O10O100000O10000O1000000O10001O00001O0O1000001O000O2O001O001O1O0O2O001O001O1O0O2O00001N2O001O1N3N3L1000001N2O0O3N2N2N3M1N3N1N3M3M2O2M4L5J6K5J6J6Hh\\b0" + }, + { + "size": [ + 640, + 425 + ], + "counts": "Yi0;6HWb09f]OH6V3a?m0O00000000O10O1000O100000000O10000000O1000O1000000O100000O100000O1000000O100O10000000000O10000O10O1000O10000O100000000O10O100000O100000001O000O0100000O1000000O100000O100000O100000000O0100000000000O10000000O10000000O10000001O00000000000O10000000000000000O100000001O0010O0001O0101O1N2O1O2N2M2O2N1O3MROfLWAW3h>jL[AT3d>nL]AX3Z>jLgAU3W>lLnAP3P>RMQBl2n=UMXBd2h=^MYB`2f=aM]B]2a=eMdBV2Y=mMkBo1S=RNRCi1mVC^Oj01OO1WHRIS7n6mHRIS7n6g0O003M00_HVI`6i6`IWI`6i6_IXIa6h6_IXIa6h6TIWI_O020[7i6oHWID00020\\7h6_IXIa6h6^IYIa6h6_IXIa6h6^IYIc6f6nHXIN2T7f6\\I[Ic6f6[I\\Ie6d6]IZIc6f6oHWID051Z7g6mHXID0j7h6RIYIb6g6^IYIb6g6]IZId6e6[IZIh6e6XI[Ih6e6VI[Il6e6RI]Io6b6QI]IP7c6oH^IQ7b6oH]IR7c6nH]IS7b6mH]IT7c6lH]IT7c6lH]IU7b6lH]IT7c6k0000001O0000001O1O0000001OeGbIg7^6YHbIh7]6XHbIi7^6c00001O00000000aGcIo7\\6PHeIP8[6a00000000]GdIV8[6iGfIW8Z6iGfIW8Z6jGeIV8[6>000001O01O00000000000000000000000000000000000000000O2O00000000000\\GcIX8]6hGcIX8]6<0000000000000000000000000000001O000001O0000000000001OO101O1O00001O1Oj3_9fLSGX3m8jLQGV3o8mLnFS3R9nLlFS3T9\\M\\FT22iLb9[1SFc1f0QMW9]4n0O2K7ULnDA=f2X;^MRELXO4>5Pjm1" + }, + { + "size": [ + 493, + 640 + ], + "counts": "V;P11T2W;lMiDT2W;lMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;lMkDT2U;kMlDU2T;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2R;kMnDU2R;kMnDU2S;jMmDV2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;lMmDT2S;mMlDS2T;mMlDS2T;mMlDS2T;mMlDS2T;lMmDT2S;mMlDS2T;mMlDS2T;mMkDT2U;kMlDU2T;kMlDU2T;kMlDU2T;lMjDU2V;Q100000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000001O0000001O0000001O0000000000000000001O000000000000001O0000001O0000001O000000001O0000001O0000001O00001O00001O001O001O001O1gL^De2b;ZM`De2`;ZMbDe2_;YMcDf2^;WMeDh2[;WMfDi2[;UMgDj2Y;UMhDk2Y;SMiDl2X;RMiDn2W;RMjDm2l;O1O001O00001O001O0000001O001O001O001O000000001O1O001O00000000001O00001O1O00001O001O00001O000000001O0000001O001O1O1O00000000001O001O001O001O001O00001O001O001O001O001O00000000001O0000001O00001O00001O0001O01O0O101O001O001O001O001O00001O0000001O001O00001O001O000000001O001O002N00001O00000000001O0000001O001O00001O001O0000001O00001O001O1O1O00000000001O00001O00010O0O101O001O000010O0001O001O001O1O00000000001O0000001O001O00001O00001O00001O001O001O001O001O0000001O0000001O00001O001O0000001O001O001O1O001O001N2O00hhP3" + } + ], + "model_output": "A red and black diesel locomotive numbered 1803, featuring a prominent front headlight cluster with two main lights and additional smaller lights, a yellow front bumper, and a series of grilles and vents on the side, is driving on the railroad. The locomotive has a yellow railing on the front, a pair of windshields with wipers, and a set of steps leading up to a door. The side of the locomotive displays a series of vertical handrails and a ladder leading to a platform with a railing.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_21.png", + "subject_name": "sandwich", + "object_name": "sandwich", + "predicate_name": "leaning on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lg_32m>2O1N101N2O1M4M2O1N2O0N3N101N1O2N1SOUOdCm0Z:G8J3O3I5M4N1N2M3M4L3N1O2M3M3N2N1O2M3N2M3O1N2N2N101O0O2O0O2O1N101N101O1N101O1N101N2O001N2N101O001N2O1O0O2O001N2O001N2O1N2O1O001N101O001N101N10001O0O101O1O0O101O000O101O00000O101O0O10001N10000O1000000O10000000O010O10000O01000O100000O010000000000O100000O10O100000000O0100000000000O1000000000O10O10000000000O10000O100O1000000O1000000O1000000000000000000O10000O1000000O10010O0O10000000000O11O00000000O11O00O100001O00O10000O100O2N11O1O0000O2N1000000O20O000001O0000010O0O11O010OO10001O00000O1100O0000O1O10001N1O1001O00O2O0O100O1O10001O000O1O10001N1L4N21O1O00O2L3001O01N100003NO000O10010O000000010O000O11O01O00001O0000001O0O100000000010O01N101O001N101O00001O00000001O01O00001O00010O00000O101O000000001O00001O00001O001N10001O0000001O0000001O0000001O00001O00001O01O0001O00001O001O001O001O1O001O100O2N2N2M3N2M3N3L3N3L3N1O1N2O1O3L2N2O2M4M3M2M3N4K5Ib]f2" + } + ], + "model_output": "A sandwich with a layer of green pesto spread, topped with slices of tomato and a piece of bread, is attached to a sandwich.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_22.png", + "subject_name": "cat", + "object_name": "sand", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "g^S67T=3N3M101N3]CBS5b0@W30oKf5ImJ=1a0]OY30PL\\62aIHj0j0XO]32nK[6MfJe0nN_32oKZ6LiJd0jNb32oKh7=VHe32nKi7;UHh32mKk79SHj32mKl77SHl31mKm76QHn32lKo72PHS40lKR8NoGV4OlKT8JoGY4MnK^8R4bGnK^8S4_GoKa8^4001O00001O1O001OOQM^GV1b8hNaGW1^8hNdGX1\\8hNeGW1[8jNdGV1\\8lNaGU1^8nN`GR1`8m1VMXKPMh4n2\\KZJNb2f4S3fKhLZ4W3jKfLV4Z3mKcLS4]3RL]Lo3c3ULXLl3h3TLVLn3j3RLTLP4l3PLRLR4n3nKnKV4S4iKjKZ4V4eKjK\\4V4dK^Kh4b4VK\\Kn4e4jJ[J@h0P6m4`J\\J^Oe0U6P5\\JPKh5P5WJmJm5S5SJkJo5V5PJiJQ6W5oIhJR6X5nIcJW6]5hI]JEKl00h4h5gJ[Jg0Oa4f5fJ\\Jj0N`4f5fJZJf6e5[I\\Jd6b5_I]Ja6c5_I\\Jb6d5c000000000000QJeJkNI\\4b5dK\\J`0<\\OI_4_5eK]JO`KTIb2l6\\MXIb2h6]MYIc2h6[MZId2f6[M[Ie2e6[M[Ie2e6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2e6YM[Ig2e6ZMZIf2f6[MXIf2h6o10000000mIRIH3j5k6a000000gITIR6l6mIUI5Me5n6VJUIS6k6lIVIT6Q700RJkHe5U7UJmH00j5S7]JlHb5Q7bJnH^5R7bJnH^5R7aJoH_5P7aJQI_5o6bJPI^5P7bJPI^5Q7bJnH^5S7aJmH_5S7aJmH_5S7aJmH_5S7=0000O10000000000001O0RKmHc3S7\\LQIa3o6`KPIa01o3o6`KPIa01o3o6_KQIb00o3P7^KQIb0OP4P7^KQIa01P4o6^KRI`0OR4o6_KQIa0MQ4R7_KnH`5R7=000000000000001O001O00000000000000000YJiHY5X7=0000O100000000000000001O00O11O0000000000000000001O00000000001YKjHV3V7iLoHS3Q7lLQIS3o6mLRIR3n6nLTIP3l6oL[Ik2f6TMhI^2X6bMiI]2W6dMlIX2T6hM[Ji1f5WNdJ^1\\5bNeJ]1[5dNeJ[1[5gNdJX1\\5iNdJV1\\5kNcJU1]5kNdJT1\\5mNcJS1]5mNcJS1]5mNcJWOSNT1[7DaJUOYNV1W7D`JUO\\NU1T7G_JSO_NU1R7I]JROcNT1P7J[JSOfNS1P7H[JSOgNU1n6H[JROiNU1m6HZJSOiNU1m6HZJROjNV1m6F]JoNhN[1k6F\\K:e4D\\KcJC]5>bJB^5a0_J_Ob5a0\\J@d5a0[J_Oe5c0YJ]Og5g0UJYOk5k0oIWOR6S1mH]MMa1V7]1\\H]M8W1[7T2_HmMa7o3001O001O000000001O00O100000000O100000aJ`HR5_7nJcHQ5]7oJdHP5]7oJdHP5\\7oJeHQ5[7PKdHP5\\7PKdHP5]7oJcHQ5]7oJcHQ5]7PKbHP5^7SK^Hn4b7>1O3M1O001O0000000000001aLTH\\1m7aNVH^1k7VMSHg02S2l7TMSHh03S2j7UMSHh03S2j7UMTHh02R2k7TMTH:0C2n2j7UMTH;OB4m2i7VMTH;0A3n2i7VMTH;0A3n2j7UMSH;1B3m2j7UMRHm0Qc04N000O100000000000000000000000000000000000000000000000000000000000000000000000O100000O10000000000000000000000000000000000000000000000000000000000000000000000000000O10000001N4M7Ib0\\OjSQ10WlnN3X\\OLSRS6" + } + ], + "model_output": "An individual with a contemplative expression, wearing a light-colored, short-sleeved shirt and dark pants, is sitting on a bench with one leg crossed over the other. The person's arms are bent at the elbows, with hands clasped together in front of the body, and the head is slightly tilted downward.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_24.png", + "subject_name": "airplane", + "object_name": "sea", + "predicate_name": "flying over", + "mask_rles": [ + { + "size": [ + 488, + 640 + ], + "counts": "kjg73T?2O00000O1K5VAH]>NfA:MI0OT>1mA8OHO1T>OnA?NBT>OnA?NBT>OnAf0R>8O7E=D6M100000O1O1L4I7J>Cc0]O8L10000O10000000000000000000O100N5K7G6K4N10O1M4Ha0E2N2O001N_nn0" + }, + { + "size": [ + 488, + 640 + ], + "counts": "Y7k0Q1R4^7nKbHR4_7mKbHR4^7nKbHS4]7mKcHS4]7mKcHS4\\7mKfHR4Z7jKeG[OQ1k4Z7fKnH[4Q7cKQI]4o6bKSI]4m6bKTI^4l6bKTI_4k6aKUI_4k6`KVIa4i6_KWIa4i6^KXIb4h6^KXIc4g6\\KZId4f6\\KYIf4f6ZKZIf4f6ZKYIg4g6ZKWIh4h6YKhHGWOU5P8UKfHY5Y7iJcHY5]7hJaHZ5^7gJaHZ5_7fJ`H[5^7eJbH\\5^7dJbH\\5^7dJbH\\5^7dJ_H`5`7`J^Hc5a7^J^Hb5b7bJYH_5g7eJUH[5k7hJPH[5o7o0000001O001O0000000O110O00001O00gIYHd2ObNh7fN`Hc2KfNe7dNgH_OA0OL7h1N9d7bNlH[OC0=f1C0001OO10000O100O2N1O1O1PLc0kEUO^19b8k0dE[Ob1Kg8V2mFkMW9>^ESONj1i0hNW;^1XDhNi;[22N2N002N1O1O1O001O1O001O001O001O00001O001O1OmNaM]E_2Z:mMdE7[Ol0j:UOkEKAm0\\:ASF]OFQ1n9L_FgNL\\1Q9b0PH^Oo7d0oG]OP8d0oG]Oa5gNXJ2iNl1]1[Ob5iNUJ5eNl1a1WOe5kNRJb28cNf5oNnI_2:cNh5POlI_2:aNj5TOjI[2:aNl5VOkIW28cNm5XOkI\\OZOS2k0ZOP6XOeJZ1ZO^OQ6ZOfJV1XO@R6]OeJR1WOBS6^OeJP1VODU6^OcJo0VODX6]OaJQ1ROE]6[O`JR1nNFb6]O[JX1dN_MIl1X7@WJa3i5jLkIW3U6kLhIV3X6mLeIS3Z6SM_Io2a6VMWIm2i6YMlHl2U7c2000000000000000000O100001O0000000000000000000000000000000000O10000001O00O1000000000000000000000000001O0000000000000000000000000O10000O1SOSI^In6[6S1001O1O00001O00001OmIeGY5[8cJkG\\5T8cJnG\\5R8dJnG\\5R8cJoG^5P8cJoG]5Q8cJnG^5R8dJlG\\5T8hJfG[5Y8j0000bJiGeN0n4W8TL\\Hj3d7PLbHQ4]7nKeHQ4[7mKgHT4X7kKiHU4W7jKjHW4V7gKkHZ4T7fKlHZ4T7`KRIa4m6[KXIe4g6[KYIe4g6ZKZIh4d6XK\\Ii4c6WK]Ij4a6WK_Ii4a6WK_Ii4a6WK_Ij4`6VK`Ik4_6TKbIm4]6SKcIm4^6PKcIQ5]6nJdIS5[6mJdIT5\\6lJaIW5_6iJRIf5n6ZJQIh5n6ZJPIf5P7\\JmHe5R7^JlHb5T7_JjHb5V7_JiHa5W7^JiHc5X7\\JgHe5Y7[JeHg5Z7ZJfHf5Z7ZJeHg5[7ZJdHg5[7ZJdHf5\\7aJ\\H`5d7bJZH^5f7cJYH]5g7R101O00aIZH[5g7dJ`HV5`7iJbH=Kh0M9e7bNdH9Oi0J;c7aNPIUOB6=e1A>`7_N_Jo0UNa0\\7]NcJP1RNc0[7XNjJR1lMf0Z7VNnJP1kMi0X7PNWK7]Ni1b;000O1O10000O1O2O0O100N2_ObMUDa2b;g02YMQDQ2Z<_MRDZ2a00001O0O1000000O1N2O100N2]LDXH>[7nMdAT2\\>PN^AR2a>PNVAX2i>nMY@k2b=oLUCe4j<_KfBALT5\\=W1O1O1M3O100O1N2O1O100O1M3O100O1O1O100O100O10000000000000000000000000000001O00aNUIiEk6U:XI`EJXOo6W;a1N2I7N2O1O100O1O1O100O1O100O1O100O10000O1O1O1O100O1O100TOfFQG[9l8iFSGW9j8oFdFD9]9Q9XGmFi8R9S1O100O100oM^EdJb:[5`EcJa:\\5aEbJ`:^5aE_Ja:`5aE\\Jb:c5_E\\Jb:c5bEWJa:i5aESJa:m5`ESIAa0R;[6_ESI_OI2129P;j6\\EXIAE5ON0Q;T7ZE_IE^OX;R7SElIj:T6VEmIi:S6WEmIi:R6XEmIi:S6WEmIi:R6XEmIi:S6XEkIi:T6XElIh:T6YEjIh:V6YEgIi:X6ZEbIj:^6WE`Ij:`6YE]Ig:c6ZE_Ic:a6fEXIX:h6fEXI\\:i6`EZI`:W6aDiIU1J[:\\6eDcIk<\\6=000000000000O10hBgIiYL^A?GoN2k2j>eMjA1IT2`>eMlA3HV2_>bMnA4GW2h`0N3M1O2J6XOZ^OQOia0h0b^ORO_a0j0X1D;E8Hm]T2N^njM=Ji00i7ZN]Ag1c>]NWAe1i>bNo@_1Q?h1O10000000000O10000O10000O100O1O100O100O1O100O100O1O100O1O1O1O1O1O1O1O1N2M3lKeJ`I^5_6dJ^I^5`6eJZI`5e6aJoHk5P7VJfHT6Z7mIYGM`NKh0`6^9iIXGc0gNg5P:gIVGb7i8_HVGb7i8^HVGd7i8]HUGe7k8[HTGf7k8[HUGe7j8\\HVGd7i8]HWGc7i8^HVGb7i8_HVGb7j8^HVGb7i8`HUGa7k8bHRG^7o8cHnF^7R9gHiFY7X9jHbFX7_9lHZFV7f9mHSFW7n9]1001O1O1O1O2TGlEh7V:QHSFk7U;L2N2N2N2`LXDdNi;X1^DdNc;X1bDfN_;W1dDhN];V1dDjN];S1fDlN[;P1hDPOY;m0iDSOX;k0jDTOW;h0lDXOU;c0PE\\OY;ZMeC2?[1j0Y1oYLmAg3Q>\\LnAd3o=_LQBa3n=bLoA_3o=dLPB\\3n=gLQBY3n=jLoAW3n=mLPBT3n=oLPBR3o=PMmAS3l=d1J6J6E;D3M101N2O2M3N1O1N0100O010O01O0001000O010O001M201O100O1O1O1O1N2N3N1N2O101O000O2O0O101O0O2O1O1O1N2N2O1N3N1O2M3N2N1N4M2LLXCWNeoDA]12d94_]A4YAIc>;N2O1O10000O01O0102NO0020aA@[>c000O00100O10O1O001O001O1O001O001O001N2O1M3O1O10OO3M2O00Y`g5" + } + ], + "model_output": "A small bird with a stout build, featuring a mix of brown and white plumage, stands on a wall. The bird has a distinctive black eye stripe extending from the beak through the eye, a short, conical beak, and a white underbelly. Its wings display a pattern of brown and white feathers with darker brown edges, and the tail is long and narrow with brown and white feathers. The bird's legs are thin and pinkish, with three forward-facing toes and one backward-facing toe on each foot.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_28.png", + "subject_name": "bird", + "object_name": "floor", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "\\WR3=k<5L4K5M3L3N3M3L8H>C3M2N2M3N3MAVEdNd:\\1^EdN`:\\1bEdN]:\\1dEdNZ:]1gEbNY:]1jEaNV:_1kE`NT:a1nE]NR:c1oE\\Nj9JgEk1`0ZNg9NhEi1a0XNf90iEh1b0WNBLj96RFg1c0VN@Oi96SFe1e0VN^OOi98SFc1g0UN]O1f99VFa1g0UN]O1e99XFa1g0SN]O3`9=\\F^1l0VNd8`0`FZ1k0WN]8f0iFS1j0WN[8h0kFQ1j0WNZ8h0mFQ1h0XNZ8k2fGUMZ8k2eGVM[8j2eGVMZ8k2eGVM[8j2dGWM\\8i2cGWM^8i2aGWM`8h2_GZMa8f2\\G]Md8c2TGeMl8[2oFkMP9j2TG^Ll8]3YGbLg8[3]GdLd8Z3^GeLd8X3j0dMPFW1U:]NTFa1Q:YNTFb1FYNi:c1[EaN`:^1cEaN]:]1fEbNZ:\\1hEcNX:\\1kEbNU:[1oEdNR:Z1PFeNP:Z1QFfNP:X1RFgNo9V1SFjNP:o0UFPOm9k0VFUOk9i0VFWOl9e0VF[Om9a0TF_Oo9=RFCR:7QFHi;000QZY4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\7m5^7000000000000000000000000000000000000000000000000000001O00O10000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O1001O00000000000000000000000000000000000000000000000000001O1O4L3M2N2N1O2N1O2N2N1O3M1O2NcJYOYOV1g0jNQO^1R5b0\\Od0A?@`0YOg0TOl0L4O1M3N2N2N2N2N2N2M3N2M3O1N2O1N2O1N2N2N2N2N2O1N2O1O1O21M3N3M2N2N3M3M2N1O3M102MGRJjJl5f5K3M3M2N3M2N3M2N3M3M1O2N2N4L5L7H9G9F9H7I7IgLoLUNh2k1XM[Nc2e1\\MdN[2\\1eMlNS2T1mMUOi1k0XN\\Ob1c0^NDZ1=fNJS16mN3j0MVOc03DMS61O1^DN];2cDN\\;3dDM];2cDM^;2cDN\\;4bDN];1eDM];0_V20giM1O0O11O0O10O11000SlV2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "nnW35a;2N2ZEJd9=XFFf9=UFFj9=nEJQ:7lEKT:l000000000001O001O001O000O1001O0O100O10000O10000O10000000O1000001N10001O0O10O1O100000O0100000000O100000010O0100O010O01O00000000000O100000000000000000000dNeN\\H[1c7gN\\HY1a7kN^HU1b7kN^HU1a7lN_HT1a7lN^HU1a7lN_HT1`7mN`HS1_7nNaHR1]7PO[HDVO]1]8ROZHEWOY1^8UOWHV1i7lNSHV1l7lNoGX1Q8iNlGY1T8iNhGY1X8iNeGX1[8R10000oMoL]KP3b4SM\\Km2c4VM[Kj2e4XMYKh2f4ZMYKf2g4[MWKf2i4]MnI^Oj0U3X5]MkICk0P3Z5_MhICl0o2]5jMbJW2^5iM`JY2`5gM]J\\2d5cMYJ`2g5`MXJa2i5_MUJb2k5^MTJc2n5[MRJe2Q6XMnIi2U6TMjIm2W6SMhIm2Z6QMfIo2]6nLcIR3^6nLbIQ3_6nLaIR3`6nL_IW1DNn6kN^IQ3c6oL^Io2d6PM[IP3f6PMZIo2g6QMYIn2i6QMWIn2k6QMUIn2l6QMTIo2m6QMRIo2n6QMSIn2m6TMPIm2Q7TMeH@1_3Z7i01O00001O0000O10000O1000000O1XOhKQJX4o5hKPJY4o5iKoIX4Q6hKmIZ4S6gKiI\\4X6eKaI`4`6;1O00000000001O2N1O1O000000O1N2O100O1000000O100000000001O2N6J3M2N4L1O1OO100O1O100O1O10000001O1O001O1O1O1O1O2N1OdH" + } + ], + "model_output": "A tree with numerous slender branches covered in small, elongated green leaves stands beside a stop sign. The branches spread out in various directions, creating a dense canopy of foliage.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_30.png", + "subject_name": "grass", + "object_name": "stop sign", + "predicate_name": "on back of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "\\:c4^:O000000001O1O001O1O001O1O1O2N1O001O00001O000000O10000O100O1O100O1O1O1O100O100O10000001O001O001O00O100000000001O1O1OO1O1000000O1O1000000000000001O001O1O1O001O0000001O0000001O1O0000001O1O001O1O1O1O1O001O1O1O1O001O001O1O1O1O1O1O1O1O1O1O001O1O002N001O1O001O1O1O001O1N2O1O001O1O002N010O1O1O1O001O1O1O1O001O1O1O1O1O001O1O1O1O001O002N1O001O001O1O1O1O001O001O1O001O1O1O001O1O1O1O1O001O1O1O1O1O1O001O1O1O1O1O001O1O1O1O1O001O1O1O1O001O1O1O001O1O001O1O001O1O001O001O00001O00001O00000000000000000000000000000000O1000000000000000000000000O1000000000000000000000000000O2DoAEogX5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "abe1h1V=Z1kBQMh;c3@l0TOf0ZO?A4L2N2N2N2N2N2M201N2N2N2N2N1O2N2N2N2N2N2N1O2N2N2N2N1O2N2N2N2O1N2M2O2N3M1O2N[MPI]Mn6b2VI\\Mj6d2VI]Mh6d2WI_Mf6b2XI`Mg6a2YI`Me6a2[I`Mc6a2\\IaMa6a5OnL`IeM^6[2dIdM[6]2eIQKNS2[6m2gIcMX6^2hIcMV6^2jIcMT6^2lIcMR6]2oIdMo5^2PJcMn5i5N2N2N2N]MXJPLg5P4ZJQLd5P4\\JPLc5R4\\JPLa5i6N2N2N1OYMgJjKW5W4iJjKU5W4kJjKS5W4mJjKQ5S7OUMPKiKn4X4SKgKl4Z4SKhKk4Y4UKiKh4Y7NPMZKhKe4j3[KdI0c2c4Y4]KgKb4Z4^KhK`4]7NlLbKhK]4Y4cKhK[4Y4dKhK[4`7N2N2N2N2N2N1O2N2NfIWL`1g3bNYL^1e3cN\\L]1b3Q5O2O1MbIbL_1]3aNcL_MOa1\\3Q1eL^MOa1[3c3eL^LX3g7N3N1N1OcJoLmNP3c6NkJUMWNj2j1XMUIN6M]4l2Y2TMYI0j4i2V7N3N2N3N1N1O2N1O2N2O0O1O2N1000O1000000001OO1000001OO10000001O0O100000000000000000001N1000000000000000000000001O000O1000000000000000000O1000001O00000000000000001N100000000000000000000O2O0000000000000000000O1000001O00000000000O10000000001O00000O10000000000O100000001O00000000000O11O00O10001O000000000000000O2O000000000000000O101N1O1O1O1O1O1O1O100N2O1O2N1O1N2O100O2N1O2N2N2N2N1N3N2N3N3L2N1O3M2N2N2M3N4L2O1N2M3N3M2N2N2N2M4N0O2N3M3M2N2N2M3N2N2O1N2N3M3L3N3M1O3M2N2N2O3L2M3N2N3M1O2N3M3M2O1N2M4L3N3M1O3M2O1N2N2N3M2M4L3N2O1N2N2O1M4N2L3M3N3M1O2N2K5H:]Oa0H9^Oa0Df2`1000001O1MgN" + } + ], + "model_output": "A bouquet of deep red roses with velvety petals, tightly clustered together, creating a lush, rounded appearance, sits beside the TV.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_32.png", + "subject_name": "truck", + "object_name": "road", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Q7V1U<1O1O1O00001O000O0100O1000001O000O101N10001O1O1O1O00000001O1O2N1O1O000O2OOO2O1000000000O100O10000O10000001O1O1O000000000000000001M2O1O1000000O1O1O100000000000O10O100L4O1O2N2WObC4a[V7" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\8m08E\\;;dDE\\;;dDF[;:eDF[;:eDF\\;9dDG\\;9dDH[;8eDH[;8eDH[;8eDH[;8eDHZ;9fDHY;8gDHX;9hDHW;8iDHW;8iDHW;8iDIV;8iDHV;9jDGV;9jDGV;9kDGT;9lDGT;9lDHS;8mDHS;8mDHS;9lDGU;8kDIT;7lDIU;6kDKU;4lDKU;4kDLU;4kDLU;4kDLU;4kDLV;4iDLW;4iDMV;3jDMV;3jDMV;3jDMV;3jDMV;3kDMT;3lDMT;3lDMT;4kDLU;4kDMT;3mDLR;5nDKR;5nDLQ;4oDLQ;4oDLQ;4oDLQ;5oDKP;5PEKP;5PEKQ;4oDMP;3PEMP;3PENo:2QENo:2QENo:2QEOn:1REOn:1REOm:2SENm:3RENm:2SENm:2TEMl:3TENj:3VEMi:4WELi:4WEMh:3XEMh:3XEMg:4YELg:4YELh:4WEMi:2XEMm:NSE3m:LSE4m:LTE3m:LSE4m:LRE5o:JQE6o:JQE7n:JRE5n:KRE5n:KRE6m:JSE6m:JSE6m:JSE7l:IUE7k:HUE8k:HVE7j:IVE7j:IVE7j:IVE8i:HWE8i:HWE8i:IVE7j:IVE8i:HWE8i:HWE8j:GVE9j:GVE:i:FWE:i:GWE8i:HWE9h:GXE9h:GXE9h:HXE8g:HYE8h:GXE9h:GXE9h:GYE9g:FYE@O6k:8VE_O37h:9VE[O7;c::lEES:=mECR:=oEBQ:>oEBR:=oEBR:>nEBR:=nECR:>mEBS:>mECR:>]EXON9e:`0ZEZO07e:a0WE[O34f:c0SE[O72g:R1YEnNg:S1XEmNh:S1XEmNh:S1XEnNg:R1YEnNg:R1YEnNg:R1YEnNf:T1YEmNf:S1ZEmNf:S1ZEnNf:P1[EPOe:P1\\EoNd:Q1\\EoNe:P1\\EoNd:1oDa0=_Oc:P1]EPOc:P1]EQOb:n0`EQO`:S1\\EmNd:T1ZEmNf:S1[EmNd:S1\\EmNd:T1[ElNe:T1[ElNe:T1[EmNc:T1^EkNb:U1^EkNb:U1^EkNb:V1]EkNb:U1_EkN_:V1aEkN^:U1bEkN]:V1cEjN]:V1cEkN\\:U1dElN[:T1eElN[:U1dEkN\\:U1dEkN\\:U1dElN[:K\\Ei09\\OY:V1gEjNU:Z1kEfNU:e0bE[O90U:Z1kEfNU:`0bEF8JU:\\1kEdNU:\\1kEdNU:\\1kEdNU:\\1lEcNT:]1lEdNS:\\1mEdNS:\\1mEeNR:[1nEeNS:Z1mEfNS:Z1mEgNR:Y1nEgNR:Y1nEgNQ:Z1oEgNP:Y1PFgNP:Y1QFgNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFhNk9X1UFhNk9X1UFhNk9X1UFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFjNi9V1WFjNi9V1WFkNh9U1XFkNg9V1YFiNh92QF47Kg9V1YFjNh94oEO9Ng93PFO9Ng92QF08Ng9OTFXOKf0:3g99oED:3g9:UF\\O4;f9S1ZFmNf9S1ZFmNf9S1ZFmNf9S1ZFnNe9R1[FnNk9l0UFTOn9i0RFWOQ:f0nE[OT:c0lE]OV:a0jE_O]::cEG_:6aEJ`:5`EK`:5`EKa:4_EM_:4aEL_:4aEL_:4aEM^:3bEN]:2cEN]:2cEN^:nNYEP193\\:mN]Eo065o9POnE0Kk094m9UOlEMOh087h9XOQFIOh078d9]OTFe08Nc9h0]FXOc9h0]FYOb9g0^FYOb9g0^FYOb9g0^FYOb9g0^FYOb9f0_FZOa9f0_F[O`9e0`F[O`9e0`F[O`9e0`F[Oa9d0_F]O`9c0`F]O`9c0`F]O`9c0`F]O`9c0_F_Oa9`0_F@a9`0_F@a9`0_F@a9@XF27>a9@XF]O285k0b9JZFlNL;8P1a9J]FVO2P1b9JWFZO7l0b9<_FDa9]OXF27a0a9^OVF29a0`9;`FEa9:_FFb99^FHc96\\FKf93ZFNf91ZFOh9OXF1i9NWF2j9MVF3j9MVF3j9MVF3j9MVF4i9LWF4i9KXF5h9KXF6g9JXF8g9HYF8g9HZF6g9JYF6g9YOQFC7U1h9gNRF3O17V1g9fNSFd06g0f9eNTFd06h0e9dNTFe07g0e9F[F:e9VOXF]O3]1e9UO[F\\O0`1d9TO\\F\\O0`1d9TO\\F[O1a1c9VOUF^O8\\1c9D]Fc9A^F?b9A^F?b9YNVFR18f0c9VNXFR15h0j9_NmE=9T1m9TOSFl0m9TOSFl0m9TOSFm0k9TOUFm0j9SOVFn0j9TNnEh09T1n9SNjEd08X1S:]NiEJ4i1S:]NjEI3j1S:]NeEG127j1S:]NeEH018k1f9hNRF\\OO29k1e9hNUF^O5l1e9fNWF\\O5o1c9eNXF\\O5o1d9cNYF]O3P2e9bNXF^O3P2n9YNoEG3P2j9]NRFD3Q2f9nMQF`02C7o1f9iNZFX1d9iN\\FW1d9`NSFC9m1e9_NRFD9m1i9[NnEH9n1g9[NPFG9n1e9]NRFD:o1c9^NTF^O1O8U2d9]NSF^O207U2e9\\NbFe1_9^NRF\\O7V2g9bNYF^1h9bNWF^1i9bNWF_1i9`NWFa1k9\\NUFd1l9[NTFd1m9[NTFe1h9jMPFa08e1g9`NYF`1[9iMgFf0Na1[9iMjFc0Kd1[9jMiFb0Lg1X9gMjFd0Mf1Y9fMhFf0Od1Y9fMiFe0Ne1Y9fMoF>Il1X9fMPG=Hl1Y9gMnF>Ik1Y9gMgFe00d1Y9gMgFe0Of1Y9eMhFe0Og1X9dMiFe0Oh1W9cMjFe0Oh1W9cMjFe0Oi1V9bMkFe0Oi1V9bMkFd00j1U9bMkFd0Om1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9_MmFd0Om1T9_MmFd0No1`9QN`Fo1`9QN`Fo1`9PNaFP2_9PNaFo1a9PN_FQ2`9oM_FR2a9nM_FR2V9[MlFc0OQ2T9^MlFa00P2V9^MjFb0OQ2W9]MjFa00R2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM^FS2b9lM_FT2a9lM_FT2a9lM_FT2a9lM^FV2a9jM_FV2a9iM`FW2a9hM_FX2a9hM_FX2a9hM_FX2a9hM_FW2b9iM^FW2b9iM^FX2a9hM_FX2a9gM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9eM`F[2`9eM_F\\2a9dM_F\\2a9dM_F\\2a9dM_F\\2a9cM`F]2`9cM`F]2a9bM_F^2a9bM_F^2a9bM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9`M_F`2a9`M^Fa2b9_M^Fa2b9_M^Fa2b9_M^Fa2b9_M]Fb2c9^M]Fc2c9\\M]Fc2d9\\M]Fd2c9\\M]Fd2c9\\M\\Fe2d9[M\\Fe2d9[M\\Fe2d9[M\\Fe2e9YM\\Fg2c9ZM]Ff2c9ZM]Ff2c9ZM]Ff2d9YM[Fh2e9XM[Fh2e9XM[Fh2e9XM[Fh2e9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM[Fk2d9TM]Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2e9SM\\Fm2d9SM\\Fm2d9SM[Fn2e9RM[Fo2d9PM\\FP3e9QMZFo2f9PM[FP3e9oL\\FQ3n91000000O101OO010000O100O100O1FiL_FX3`9jL^FW3b9900O100000001N100O1O0100000O10000O2M200O0100000000000O101OO1000001O1O1O000000O1O1N200O100000001N11O3L5L1O01O00O11N1O1K5O10O11O00000O1000000000000001O0000000000O100000000O10000O1000000000000O100000000O100001OO1000000O11O1O00000TOjFcMW9Y2SG_MQ9]2Q1N2N3M2M5L4L00000000O100C=O1000O100000000001O0000000000000000O11O000dF" + } + ], + "model_output": "A red fire engine, featuring a silver front bumper, a set of headlights, and a side compartment with reflective striping, is parked on the road. The vehicle is equipped with a side-view mirror, a visible door handle, and a section of the wheel well with a black tire. The side of the truck displays a reflective strip and a series of compartments with handles, and there is a visible hose reel with a yellow and black striped pattern.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_33.png", + "subject_name": "paper", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 500, + 333 + ], + "counts": "R`?3`?2N3N1O2M2O1N103M1N3N1O1N2O1O0O2O0O0100O2VOZAd0k>00O2OO010000O01000O100O100O2O00000O2N1N2N3HS?0TA0^Ul3" + }, + { + "size": [ + 500, + 333 + ], + "counts": "Q>e0U=\\OmBd2OaM100LS<_3M100O100O100O100O1O100O1O10000O11O00OBYDQMf;P3]DmLd;Q3^DnLb;R3_DmLa;S3`DlL_;T3cDkL[;W3gDgLY;3ZDn2>oLY;V3iDiLX;V3e0L3M4N1N2N2N3M3L3N2N3L3M2L5J6K4O1O1O1000O010O10O10O010O0100O001O001O001O10O10O10N101O001O1G^N`Bb1^=dN^B\\1b=;2M2O1O1O2N100O2N1000010N10000000O1O2O0O00001O1O1O2N6J2ZCoMe;Q2WDVNf;k1WDZNf;f1YD^Nd;e1VD^Nj;f1mCeMMg0W?VA@a>o0F9F:H8I7F:H9F9H8CDH8EE;K6B=K5N3N100010O01PLkI_NV6c0gJ]OZ58PKHQ57oJIS55lJLU53kJMU53kJMV53hJNX52gJOY52fJN[51eJO[51dJ0]50bJ1]5OcJ1^5NbJ2_5N_J3a5M_J3b5L^J4b5L]J5c5K]J5d5K[J5e5KZJ6g5JXJ6h5JWJ7j5HVJ8j5IUJ7l5HTJ8l5oMPHn0T2S1l5oMQHm0R2U1n5mMRHm0o1W1n5lMTHl0m1Y1P6jMTHm0k1Y1Q6kMTHl0i1Z1T6iMSHm0i1Z1T6jMSHk0i1[1U6iMSHk0g1]1V6hMSHk0g1]1V6hMSHl0f1\\1X6hMRHk0e1^1Y6gMRHk0e1^1Z6gMPHk0f1_1Y6fMRHj0d1a1Z6eMRHk0c1`1\\6eMPHk0d1`1\\6eMQHj0b1b1^6cMPHk0b1b1^6dMoGk0a1c1_6bMQHk0_1c1a6aMPHl0_1c1a6bMoGk0`1c1b6aMoGk0^1e1c6`MoGl0]1d1d6`MoGl0]1e1d6^MPHl0[1g1e6^MoGk0\\1g1f6^MnGk0Z1h1h6]MnGk0Z1h1h6]MoGj0Y1i1i6]MmGj0Y1j1R4nLmL>hMk0X1i1e3]MZMOiMk0X1i1X3jMhMBhMj0W1l1k2UNVNUOhMk0V1k1\\2eNfNeNhMk0U1l1m1TOWOTNhMk0T1m1_1BEfMhMk0T1m1S1N1[MgMk0S1m1d0>c0iLgMk0R1n1NT1Y1SLgMk0R1n1F\\1a1lKfMj0R1P2ZOf1o1_KfMj0Q1k7Y1\\GeMj0Q1j7[1[GdMj0R1k7Z1[GdMk0P1k7\\1ZGdMk0P1k7\\1[GdMi0P1l7]1ZGcMk0o0k7^1ZGcMk0n0l7_1ZGcMi0n0m7`1ZGaMi0o0m7`1ZGaMj0m0m7b1ZG`Mi0n0m7b1[G_Mh0o0m7c1]GZMg0S1l7c1oH\\NR7d1nH\\NR7e1nHYNS7g1mHZNR7f1nHZNR7g1mHYNS7g1mHYNS7g1mHYNS7g1mHXNT7h1mHWNS7j1lHVNT7j1lHVNT7j1lHVNT7j1mHUNS7l1lHTNT7l1lHTNT7l1lHTNT7m1kHSNT7n1lHRNd6^2\\IbM]6e2cI[M\\6g2RIYLiNQ1V8e2iHfLkNg0\\8c2dHXN\\7h1aH[N_7e1^H[Nd7f1[HZNg7e1XH]Ng7c1XH_Ng7`1YHbNe7_1ZHdNd7\\1[HeNe7\\1YHfNf7Z1YHhNf7X1ZHhNe7Y1ZHhNf7Y1YHgNf7Z1WHiN`7`1`HaNV7i1iHWNj6V2UIkMa6`2^I`MZ6h2eIYMP6R3PJnLe5^3YJcLP5U4nJlKd4b4\\KXKE`Mf4c7cKlJ[4[8\\KXER4Z;F;I6N3N1O101O000000001O00001O0000001O0000001O0000000O2O0000001O0000001O0000001O0000001O00000000001O1O0O1000001O0000001O0000001O000000001O000O11O01O000O2O000000001O1O0000000000001O000000001O0O11O01O000101N4Lb0^Oc0^OV1\\DoJW:U6RO`0@W1jN;D000001O00001O000000001O000000001O0000001fHnHP6R7mI[Ii5f6RJnI\\OfNn5\\7eJ]JQ5c5nJhJh4^=" + } + ], + "model_output": "A brown and white pinto horse with a predominantly white head, a brown patch over the left eye, and a brown mane stands on a chair. The horse's body displays a pattern of large brown patches interspersed with white areas, and it has a white underbelly and legs. The tail is not visible.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_35.png", + "subject_name": "person", + "object_name": "giraffe", + "predicate_name": "feeding", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "mgV58^;4L1O100O100O01000O100O1000000O1000001O1O001O1O2N1O1O2O1N2O00N1N3M3M3M4L3MaT1GhI" + }, + { + "size": [ + 375, + 500 + ], + "counts": "bil0=U;:H6J5K3N2O0010O001O100O001O010O00100O001O010O10O0100O010O10O10O100O10O10O10O010O10O10O1000000O10O01000000O10O10O10000O1000O10O101OO10O@aEJ_:5fEF[::hE@FOb:a0a00O10000O10000000001N100O10001N10000O10001O00001N100O2O1O00001O0010O010O0101N003N2N1N2O1N101N1O101O0O2O0000000001OO2O0O101N8H1O1O1N2L3O1O2O000000O1RNjF>2IV9HlF5;Jl8NnF1W:MkE0mRT3" + } + ], + "model_output": "A person with short, light brown hair, wearing a white shirt with a collar, is feeding a giraffe.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_36.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Wme21kc06N000[O6Q]OMkb0d01O14T]OXOYb0[1L3N2N3L5K1O1DSNd^Om1[a0=O0100000001O1kM_^Oh1da0PNb^OP2ha0N1G9F:K6O0O2O1O2N3Mf0SO[\\O1gVP6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "hU18d?LVDZ2i9nMPFR2o9lMTFT2l9jMVFV2i9jMXFV2h9jMXFV2h9iMYFW2g9dM^F\\2c=0TJXN`Ih1[6bNaDQOl4]2^6QO[Io0d6UOYIk0f6XOXIh0h6XOXIh0h6WOYIi0g6WOYIi0Y7_NmHa1Z;QNm@o1o>YNm@g1P?aNk@_1S?eNk@[1n>oNo@Q1n>UOo@k0P?XOn@h0P?[On@f0o>_Oo@a0n>El@>Q?Z2M3N2N2O1M3N2N2N2O1O1O1O100001O1O1O1O1O1O1O1O1O1O1O1O1O001O1O1O00001O001O0000POXKhBh4U=^KYBD7n4_=iK_BW4`=kK_BU4_=nK`BR4_=PL_BQ4`=RL]Bo3c=SLYBo3f=V100M3O1N200O1001gIbBJ5k5S>I2N1O1O00002N;E:F7iLl@T1Y?cNm@[1[?YNi@g1b?lMa@S2a?hMc@W2_?eMc@[2^?`Mg@_2]`00001O00001O00000000000000001O000ZO`^OlN`a0S1a^OmN^:EWL_1ZIlN]:LTLn1l3RNULm1j3WNSLi1m3XNRLh1m3]NoKc1P4`NfETOU6\\2T4eNkK[1U4fNjKZ1k3iMSFj0OROa23k0X2a6lNSGhNQO\\1Y1`7lNTG]O1e0S1R1h7lNTG]O2j0l0m0n7iNXG_O0a136e8iNYG@4c1G4l8hN[G_O4d1E5l8_NVFVOZ1a00e1C5n8YNjGLFf1A5o8UNPHMBi1^O5Q9oMUH2]Oj1\\O5S9nMUH1^Ol1YO5T9mMVH1^Om1XO5T9mMVH1_Om1VO5V9lMVH1^OQ2SO2[9iMUH4]OR2RO1j;mMTER2RO1k;lMSES2RO1k;lMREU2ROOm;kMPEW2SONm;jMoD[2SOKo;iMmD^2SOIP`0\\2n0]:eNiBN=>^2o0X:lNgBM`06d2P1o9DYCZOj2R1k9GZCUOn2S1h9HZCTOP3S1e9J[CQOR3U1c9KZCnNU3W1a9KZCmNV3X1`9K[CkNW3Y1^9L[CjNX3Z1]9L[CiNh1M\\O^1a;L[ChNa1OVNk0d0b0ZB1O00O1J6L4[OSNU_Oo1o?gM]@S3`?TMR@J1S3m?g0001O1O00001O1O2TMk_OTO0`2W`0PNc@e1Va0F5K1O1O0000IZNS^Og1Sb0000002YNh]O[1cb0L3NO2001O2N2M3DM4O010N2O1N101O001O01O00001N100O2O1O1N10100O_OnI_KR6`4PJ_KQ6Q4^JoKb5k3dJUL\\5k3oIhK?;e5k3bJUL^5k3bJUL_5j3aJVL`5i3`JWL`5i3`JVLa5j3_JVLb5h3^JYLb5g3^JYLc5g3\\JXLe5h3[JXLf5n3gIhK4:U6`3dIVL3O2129W6^3UJYLD9W6\\3WJ[LB9W6[3XJ\\LA9W6Z3ZJ\\L_O:X6W3fJiLZ5V3gJiLZ5W3fJiLZ5V3gJjLY5U3hJkLX5T3iJlLW5T3\\JcL]O9W6T3\\JcL]O9W6S3jJmLV5S3jJmLV5S3iJnLW5R3hJoLX5Q3^JeLUO<]6n2^J[Mc5d2\\J]Mc5d2[J^Me5b2ZJ_Mf5`2ZJaMe5`2YJbMg5^2XJcMh5\\2XJeMg5\\2XJeMh5Z2XJhMg5X2XJiMg5X2XJiMh5V2WJlMh5T2WJoMh5P2XJQNg5P2XJQNh5o1WJSNg5m1YJTNg5l1WJVNh5j1XJXNf5h1ZJYNf5g1XJ[Nh5d1XJ^Nf5c1YJ^Ne5f1nIiL1c1o5k3N3L3K6I6K5N2M3L4LdMTKUNh4o1YKPNe4Q2\\KPN`4S2`KmM_4T2aKmM[4V2eKkMY4V2gKRMNoNY4P4jKoLNROW4o3lKlL0VOP4P4QLiLOYOm3o3TLgL1]Of3m3YLeL3AQ3QOPMk4LbL4Hd1lNoN]5XOnK5NT1GdNd43gK53l0`5nN]J67g0^5RO[J7h<01M20100000O1nNEXES;BmD>S;BmD=T;ClD=T;DjD=V;EfD=Z;DeD<[;DeD<[;DeD;];EbD<^;DaD=_;C`D>T;]OhD44`0R;^OkD12a0R;_OmDO2b0o:@oDO1`0Q;AnDO1`0Q;AoDOO`0S;@nD1Ma0U;^OnD0Mb0U;@lDONa0V;3gDMZ;P10XNjD^1c;N1O1O1O000O_DgNT;Y1hDoNT;P1kDTOS;l0lDWO7DZ:V1\\EYO8CZ:l1dEUNP:LoEQ20SNo91nEm12SNn9\\2PFeMQ:h2001O0\\MmET2T:jMPFR2Q:mMPFS2Q:lMQFR2R:jMQFS2Q:jMQFV2`:0O01DUE\\Nk:c1WE\\Ni:c1XE\\Nh:c1ZE]Ne:c1\\E]N_:f1dEWN^:f1a0XOgDGV;9mDFR;;oDCQ;=RE@n:a0UEZOl:g0k0N2O2ROjCg0[<000O2N2N2N1O2N2O1N2OO0O110N2O00000L400100O20N100N2O0010O01O5^OeC0a1O10000001O3M3M1O001O00000000000000000000000000O100^NSMWB1A2=j2^>_MbAa2^>_MbAa2]>aMbA_2l=SMSB?1^2l=SMSB?0_2^>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`MbAa2^>_MaAb2^>_MbAa2^>^McAb2]>^McAb2]>^McAb2^>]MbAc2^>\\McAd2\\>\\MdAe2\\>XMgAh2Y>VMiAj2V>VMkAj2U>UMlAk2R>WMnAi2Q>XMoAh2o=ZMRBe2l=\\MUBd2i=^MWBb2i=^MWBb2i=^MWBb2h=_MYB`2g=`MZB_2f=aM[B^2e=bM[B^2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=bM]B^2c=bM]B^2c=bM\\B_2d=`M]B`2c=_M^Ba2c=]M^Bc2b=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=]M^Bc2b=^M]Bb2c=`M[B`2f=_MZBa2f=`MYB`2g=`MZB_2f=aMZB_2f=aMZB_2f=aM[B^2e=bM[B^2e=cMZB]2f=cMZB]2f=cMnAEFh2\\>dMlAFGf2]>eMjAGGe2_>dMjAHFd2`>dMjAi2V>WMiAj2W>VMfAm2Z>TMdAm2\\>SMdAm2]>RMbAo2^>RM`Ao2`>QM_AP3a>f00000000000001O00000000000000000000001O001O4L2N1O1O1O001O1O1O1O2N1O2N1O1O1O2N2N3M1O1O1O2N1O1O1O1O1O1O1O001O1O1O1O001O1O001O1O001O1O001O1O1O1O1O001O1O1O001O0000001O00001O000000001O000000001O0000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000O100000000O10000000000O1000000O1000000O10000O100O1O100O1O100O1N2O1O100O1O1O100000000001O00001O0000001O0000000000\\OaMj@_2U?cMi@^2V?eMg@\\2Y?eMe@\\2[?c0O10000000000O1000000O10000O100O100O10000O1000000O100O10000O1O10000O10000000000000000000000000000000000000000000000000000000000O100000000O10000O100O100O100O10000O1O100O100O100000000001OP1PO5K3M2N:F8H=C;E3M2N1O1O001O1O001O001O001O0000001O000000000000000000001O0000000000000000000000000000O100000000000000O100000000O10000O1O1L4K5hLmN]BN\\O0R1j1T=ZNUBb3k=^LUBb3k=]LWBb3i=^LWBb3i=^LXBa3h=\\L]Bb3c=]LaB`3b=\\LbBa3j>aLe@NF3NX2]a0SO?A6J4L3M001O1O001O001O001O0000001O000000001O0000000000001O0001OO100001O00000000000000O1000O2O000000000000O100000O2O000000000O0100000N2O1nK1TE1l:3lD1S;1lCHnMd0V>EkCn0UdDA\\;`0dD_OUM" + } + ], + "model_output": "A white ceramic bowl with a flared rim and a scalloped edge design, featuring a subtle floral pattern with hints of pink and green, sits on the dining table.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_41.png", + "subject_name": "sports ball", + "object_name": "person", + "predicate_name": "touched", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kiR13W=2EOYC2d<3YCNe<<00000O10000001O00O20O0001O1ObCDn;=PDHm;8RDKl;3UDOi;2VDOj;0VD1j;0TD1k;0TD2k;f010000000000O1000VDiN`;W1_DjNa;V1_DjNa;V1_DjNa;U1_DlNa;T1_DlNb;S1^DmNb;S1]DnNc;Q1^DoNc;P1]DoNd;Q1\\DoNe;P1ZDQOf;o0ZDQOf;n0[DROf;m0YDSOi;m0TDUOm;T101O10O1O2N1N2O1N2M5J5M4L5K4L3K4NST\\6" + }, + { + "size": [ + 427, + 640 + ], + "counts": "bQT12Y=1J0oB01Nm<0`U:8QjEd0E:XDPOl:n1G7G9I6J8J5L5J5G:K4L4L5K6G7L5K5L4K3VLeKgKUOe2Y5b1mKjKjN\\11\\OY5]3nKiKjN[13[OW5_3RLUMnNTOR5e3SLTMROiN^O2a5P4RLRMi4l2[KQMf4n2[KoLh4P3YKWLWN`0a6Y3ZKTLWNa0`6Z3bKeL^4[3cKbL_4]3bKcL^4\\3cKdL]4\\3fK`L[4_3lKZLU4e3mKYLT4g3lKYLT4g3mKXLa27RL`3d6eLZIZ3d6kLZIT3d6PM[IP3d6RM[Im2e6VMYIi2i6XMUIh2k6\\MQId2o6]MPIb2Q7_MnHa2R7`MmH`2S7cMjHc0K[NAn0k79dH;>VOo6`0bH7d0VOk6h0[H2n0ROh6R1SHKZ1POc6h2dIRM]6m2gIoL[6P3iIkLY6S3P2O2N1O10000O2O0O10002N5K4K4M4L2N4L2N2N4L2N3N2M5K9H0O0010O00001O2N1O001dEfNe8[1UGkNj8W1RGlNm8U1PGnNo8S1mFPOS9S1eFRO[9o0`FVO_9k0oE_N1n0o9U2O00lNTFjNk9[201VNSMTIn2i6VMUIj2j6YMTIh2k6ZMSIf2l6`MoHa2X3fLkNLRM^1f0Q2Z3kLaNe1RNa1[3kL\\Nl1XNZ1Z3lLSMCNd2_Oo0X3XNQMX1[Ob0a3_NaLW5]3]2O1O1O1O1O1O2N001N2O1O1O1O1O1N2O1O1M4L3O1M4M2M8G6J5UNcD]1o;aNUDi0c3N2N2O1O0O101O00000000000000000000000000000000000000000000000O1000O10000000000000000000000000QJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0[63nJV7R5jHnJV7R5jHnJV7R5jHnJV7R5iHWJO6W7c5jHWJO6W7c5jHUJ19T7b5kHTJ2:S7b5kHSJ3V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LR9J6K4L5K4L3N1O2M2O2M101N2O1N3M2M3M3M2O2N2O1O1N1O2O001O0O1OO21O0O0XORN`Do1_;WNZDl1f;UNXDk1i;f01N2N101N1O2N2O0O2O1N2N2N1O2N100O2N1O1O100O1O1O100O2O0O1O10000O2O000000010O01O00001O0010O00010O01O1O100O1O010O1O010O1O10O01O100O1O1O10000O1O1O100O100O0010000O2N2N2O2N1N2N1O2O2M2N1O100O2O1N10000O100O1O1O100O1O0010O0101N1O3N3L5K3M3M3RHnKi4U4RKlKn4V4oJkKR5W4gJmKY5X4aJiK_5[4[JfKf5d4nI^KS6g4fI[KY6k4^IXKb6l4YIUKg6n4TISKm6S5jHPKV7]60O1000000GUH\\Il7b6YHZIh7d6;O0O2O1iNeGhK\\8T4RHbKn7[4\\H]Ke7d4[HZKf7h4YHWKg7n4SHRKn7T5gGSKW8l5OO1O2N11O1OnLiGUO]8j0XH_Nk7a1bHfLkNR1d8X2PIaMQ7_2UI[Mk6e2[ISMg6m2[IPMf6P3^IjLe6U3c21O0000001O001O010O00001O00001O00001O00010O001O00001O001O000000001O00000O10001O0000000000001O0000000000000O1000000000000000O01000000000O1000000000O01000000O10000O10O010000O100O10O10O1O1O1O100O100O100O1O\\GjL`5U3`JmL_5R3^JSMa5l2\\JXMd5h2[JYMe5g2YJZMh5f2UJ\\Ml5e2PJ^Mo5e2lI]MU6e2hI\\MX6e2eI]M[6f2aIZM`6h2\\IWMg6k2VIPMP7R3lHnLV7T3gHmLY7T3eHkL]7W3`HjL`7X3\\HiLd7Y3ZHgLg7[3VHdLl7]3QHcLQ8`3kG_LW8c3eG]L]8e3`GZLb8i3YGXLh8l3RGTLP9l42O1N2O1O1N3N1O1O1N2O1NmGnJ\\6Q5dIPK]6m4bIWK]6h4cIZK\\6d4dI_K[6`4dIbK]6\\4cIeK]6Y4eIgK\\6W4dIjK\\6U4bInK_6Q4_IQLa6R4YIQLg6Q4SIRLo6P4gHVLZ7m3\\HWLh7]53N2O2N1O2O0O2N1O1M4XM[GPOh8n0^GbNo8[1VGWNU9g1PGRNT9l1PGlMW9Q2mFgMZ9V2jFbM]9Z2iF^M]9_2d1M2M4L3N3M3M3L3M3N3N2M2M4M3L4PO_B9g=F_BOh=0d00001O00001O001O1O0O3NTef1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "V6e17W5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6k1000000000000000000000000000gHiIi5W6WJiIi5W6WJjIh5V6XJjIh5V6XJjIh5V6XJjIh5W6WJhIj5X6UJiIk5W6VJhIj5W6`100001O5K0kKcIG^6RMaIo12o0]67cII]67cII^67`IJb6Y4000000001gH^IR6b6mI`Ii1N[1b6lL_Ii11Z1`6mL_Ii11Z1_6oL^Ih12Z1`6nL^Ih11[1a6U400000000000000O_H_Ie6`6\\IbIb6]6_IdIj1Mm1_6YLcIa6]6_IcIa6]6_IcIa6]6_IbIb6]6Q1000UKcIlLOd3^62dIN\\6dLbIb23j0Z6eLcIa23j0Z62eIO[60fI0Z60fI0Z6dLcI`23l0Y6eLdI_23l0Z60eI1[60dI0\\6OeI1Z6kKfIo306Z6jKhIo3N7Y6kKiIn3N7Y6jKjIo3M7X6kKkIn3M7X6lKjIX8V6hGjIT4LKZ6SLgIR40KY6TLfIP43KV6>jIBV6?iIAV6`0jI@U6b0iI_OV6f4O1O1OTNoIkJQ6S5RJlJm5S5UJmJk5S5VJlJj5S5WJmJi5S5WJmJi5R5XJnJh5R5XJnJh5S5VJnJj5R5VJnJj5S5SJoJm5Q5SJoJm5Q5SJoJm5T5nInJR6P700000000000000O100000000001_GRJn7n5mG^Jl7b5RHbJl7\\6M2N2N1O001O1O000000001O0000000000000000O1000000O10000O100N2O1N2N2O1O1O100O100O100O100N200O1O1O100O100O1N200O1O1O1O1O1O100000000O10000O11O00O1000000001O00001O0000001O1O1O2N001O1O001O1O1O001O001O1O2N001O2N1O1O1O001O1O1O1O001O2N1O1O3M2N2N2N1O2N3M2N3M001O002N2N1O1O2N1O1O1O1O1O1O1O00001O2N2N3M3M4L2TMVG^On8O1O11O0000000000000000O100000000O1kLRMXJn2h5_MkGYOW13`NX3^8eM`G_OS1V3]7UNZHl1f7VN`GWNa0d3o7\\NoGe1Q8]NkGe1V8b200001O001O001O0000001O001O00001O001O001O0000001O0000001O00001O00000000001O00001O000000000000000000000000000000000000O1000000000000O100000000O100000000O10000O10000O10000O1O10000O100O1O100O1O\\L_Gk0a8SOkGc0T8[OPHd0P8YOTHf0k7YOWHg0i7XOXHh0g7WO\\Hh0c7WO_Hi0b7QObHP1^7oNcHQ1^7kNeHT1]7iNeHW1\\7dN_GVNU1W3^7aNeH`1\\7^NcHc1`7YN`Hh1b7UN^Hl1c7QN^HP2d7mM[HU2f7iMZHX2g7fMQHnNQO_3Q9^MnGj2S8TMnGl2S8RMmGo2W8lLjGT3X8hLhGZ3[8aLeGa3\\8]LcGe3k91N2M3O1N2O1O1N2NPMcEa1\\:\\NiEc1W:[NlEd1S:[NoEe1P:ZNSFe1i9^NXFb1g9^NZFb1e9^N\\Fb1c9^N^Fb1`9_NbF`1]9`NdF`1[9`NfF`1Z9^NhFb1W9^NjFb1U9[NoFe1P9WNUGi1l8TNVGl1l8jM[GW2g8]McGc2\\:100O1O1O1H8O1O1O100O1N2N2O1M3F:bM]LTIn3i6ZLlHl3S7WLgHm3V7YL`Hn3^7UL\\HP4a7WLTHP4i7VLPHn3P8a1N2N2L4M3N2L4O1M3N2O1M3O1K5N2H8@`0O1O1O1000000O100000000N2O10000O1N2O100000000000000000\\HWJQ6i5mIYJS6g5lIZJT6g5kIYJU6g5kIZJT6e5mI[JS6e5mI[JS6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHQMX1bNhN3^7k5jIQJjN3\\7[4bHQM\\1dNR6[4cHPM^1bNo5^4dHoLm8Q3TGnLl8R3UGmLk8S3TGnLl8R3SGeKNU1o8V3SGPMl8R3RGnLn8Q3TGfK1n0k8\\3TGfK0o0l8Z3UGgKNP1m8Y3UGoLk8Q3VG\\LMEm8o3VGcKLe02Jk8n3ZGYLJIl8n3ZGlLf8T3ZGlLf8T3YGnLf8R3YGoLg8Q3XGQMg8o2YGQMg8o2WGSMi8R4VGiJk8\\4WG`K00OJN6l8`4WGaKOO0IO7k8`4VGbK0OOH08j8T4VGkK1NNL20a8T4oFmK`0Oa8T4oFmK?0^ONj8U4YGnK=0@Mj8O" + } + ], + "model_output": "An adult zebra stands with its body profile facing left, showcasing its distinctive black and white striped pattern. The stripes are unique, with varying widths and patterns across its body, legs, and mane. The mane is erect and striped, and the ears are pointed upwards. The zebra's eyes are alert, and its muzzle is partially visible with a discernible mouth and nostrils. The tail is tufted with longer hair at the end, and the hooves are dark and sturdy. The zebra is standing on grass.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_47.png", + "subject_name": "cake", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "PYR41h01[=2cBO[=4dBL[=6cBK\\=7bBJblKBT4=mKCT4fFGU9;dF@cN7f:=aFN]92dE^O=d0n9OcE@b5n600000O10000000000000000O100001OO10000fH" + } + ], + "model_output": "An individual wearing a bright yellow ski jacket with a hood, red ski pants, and black gloves, equipped with a black helmet and goggles, is captured in a dynamic pose with one arm extended outward and the other bent at the elbow, suggesting movement, over the snow.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_49.png", + "subject_name": "donut", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 612, + 612 + ], + "counts": "_Ql1l0oa0c0A8H7J6I:D;K5J5M6H7H6L2N2M4K7I5L3M3N4I6L2N4L4K0020001O1N2N110O1N2N1O2O1O1O2M2O2N1O1N2O001O100N2N3OO10O1N2O011N1O1O1N201O0O1O1O1O1O101N2O0O1O1O1O10000O2N1O101N1O100O100O1O1O101O0O100O10000O1O1O10000O101N100O100O2N1000000O10000O101O0O10000O1000lLhD\\NW;a1PE\\NP;a1VE]Ni:b1ZE\\Ne:e1\\EZNd:f1]EYNc:f1^EZNa:g1aEWN^:i1dEVN[:k1eEUN[:j1fEVNZ:i1hEUNY:j1hEVNW:k1jETNV:l1jETNV:l1jEUNU:k1kEUNU:j1mEUNR:l1nETNR:l1oESNQ:m1oESNQ:m1oESNP:n1QFRNn9n1RFRNn9n1RFRNn9n1RFRNn9n1RFRNm9o1TFPNl9P2TFPNl9P2TFPNl9P2TFQNk9o1UFRNj9n1VFRNj9o1UFRNj9n1VFRNj9n1VFRNj9n1WFRNh9n1XFRNh9n1XFRNh9n1XFRNg9o1YFQNg9P2XFQNg9o1YFQNg9P2XFPNh9P2XFQNg9P2YFPNf9Q2YFoMg9Q2YFPNf9Q2YFoMg9Q2YFPNe9S2ZFlMf9U2YFlMf9U2YFlMf9T2[FkMe9V2ZFjMf9W2XFjMh9X2VFhMj9Y2TFiMk9X2TFhMl9Z2PFhMP:Z2mEhMR:]2cEjM\\:_50001O001O001O1O1O01O0001O1O001O00100O00002N10O01O001O100O010O1O100O1O010O100O100O2OO01O011O0O01O010O10O01N101N2O0O2N2O0O101N1O2O1N2N1O2O1N2N1O2N1O2N2O0O2N2N2N2N1N3M3N2N1N3N2N2N2N1O2N2O1N2N2N1O3M2N2N2N2L5J5L4N3M2M3M5K3N2M3N2M3M3N2M3N3L3N2N2M3N3K5M2M4K6J5J5L7I5K8F9I6K9F6JZb0`0F7G8H8G:B=F:B>N2O1000000000000O10001O00000000000000O02O00O100000000O100O1O1O1O1O1O1O100O1O001O1O100O1O1O1O1O10O010000O1ROf_OROZ`0h0o_OUOQ`0g0T@XOk?g0W@YOi?e0Z@ZOe?f0]@YOc?e0`@ZO`?e0b@ZO^?d0d@\\O\\?c0f@\\OZ?c0h@[OY?d0h@[OY?d0i@YOY?g0h@VOZ?i0j@POZ?P1]100O1000001O000010O3M10O00010O000O101N1O1O2N1O2N1O1Ok_O_Oi=`0TBEk=:TBIl=4TBNl=1oA5Q>JkA;V>DeAa0[>^OeAc0\\>ZOcAi0]>VO[AS1f>lNTAZ1m>dNPA`1Q?]Nn@f1U`03H8N2K5L4N3K3M4K5L5K4I7L3L6L3J6K4J7M3M4L2M4H8G9N2L4N2N200O100O1O100O100O1000000O10000000000000000000000001O000000010O0O2O00010O1O001O001O1N2O1O001O2L3@`0H8J7J5I7K5K5H:F9I7J5K6J8UNY_Om0ea0H6L4K6J6K4L4M2MdgZ6" + } + ], + "model_output": "A red frosted doughnut, generously sprinkled with white nonpareils, is held by a person.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_50.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0R8n6000000O10000O100O10000O10YLSH;l7]3000`LTHLk75VHJj73[HKd7i3000000O100O100O1000000O10000O10002M102N2M5L3L5L3M3L5L5K3L4M1N2O00001N101N2O0O2O00000O2O001N2O001N2O001N2O1O0O2O1O1N2O0O2O1O0O2O1N102N001N2N101O1O1N101O1N2N2O1O001N2O0O2O1N2O1O1N101N2O1O0O2O1O001N3N0O2O1O0O2O1N2O001N2O1N2O1O0O2O1O001N2N2O1O001O1N101N2O1O1O0O2O1N2O0O2O1O001O2M101N2O001N3N001N2O0O2O1N2O1O1O0O101O1O1N2O1O0O2O1N2O1O1O0O2O0O2O1O0O2O1O0O2O1N3N1N101O001N2O1O001N2O1O1N2O1O0O2O001O1N2O1O0O2O2N001N2O1N2Nf]Q6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "U8i6W8000O100O10000O10000O10000O100O1000000O100O100O1000000O100O10000O100O10000O10000O100^HlHX7T7gHnHW7S7iHQIS7o6lHVIP7j6PIWIn6j6RIYIk6g6UI_Ie6a6[IaIb6`6^IaIa6_6_IaIa6_6`I`I_6a6aI_I_6a6`I`I`6`6`I`I_6a6aI_I_6a6bI^I]6c6bI^I^6b6bI^I^6b6bI^I]6c6cI]I]6c6dI\\I\\6d6dI\\I[6e6dI\\I\\6d6dI\\I\\6d6eI[I8VOe5_7SJ[I8VOd5`7TJZI7XOd5^7TJ[I8ZOa5[7WJ[I7\\O`5Z7ZJYI6]O`5Z7ZJYI6^O_5Y7[JXI6@_5X7[JXI6@_5X7[JXI6A^5W7\\JXI5C]5V7^JWI5D\\5U7_JWI5EZ5U7aJVI5FY5T7bJVI5GX5S7cJVI5GW5T7dJUI5HV5S7eJUI5IT5S7gJTI5JS5R7hJTI5KR5Q7iJTI4LR5Q7jJSI4LR5Q7jJRI5NP5P7kJQI60m4P7mJPI53l4m6oJPI54j4m6QKoH54j4m6QKoH55h4m6SKmH66g4m6SKmH58g4k6TKmH59e4k6VKlH5:c4k6XKkH5:c4k6XKkH4`4i6[KiH5>`4i6\\KhH4`0^4i6]KgH6a0\\4h6^KgH6b0Z4h6`KfH6c0Y4g6aKfH6c0Y4g6aKTHL>:g0X4h6bKSHL>9j0W4e6dKdH6h0U4d6eKcH7j0R4d6gKaH8l0P4c6hKRHL:=Q1n3d6iKQHL:=R1m3c6jK`H7n0o3b6jK`H7o0m3b6lKPHL;;T1l3a6mKPHL;:V1k3`6oKoGL;:W1j3_6PL^H6T1h3_6RL]H6T1h3_6RL\\H7V1f3^6SL\\H6X1f3\\6TL\\H6Y1d3\\6VL[H6Z1c3[6WL[H5[1c3[6XLZH5\\1b3Z6YLkGM;8`1b3Z6YLlGL:9a1`3Z6[LYH4`1_3W6]LYH4a1^3V6^LYH4a1]3W6_LXH4b1\\3V6`LWH5c1Z3W6aLVH5e1X3U6cLVH5f1W3T6dLVH5f1W3T6dLhGM99l1T3T6fLhGL89m1U3S6fLSH6k1R3S6hLRH6k1R3S6hLQH7n1o2Q6jLQH6o1o2Q6kLPH6P2n2P6lLoG7R2l2o5mLoG7S2j2o5oLnG7S2j2o5PMcGL7:X2i2n5QMmG6V2g2n5RMlG7X2f2l5SMlG7Y2e2k5TMhGNJ9c2d2l5UMgG;^2_2k5VMgG:`2^2j5XMfG:a2]2i5YMgG9a2]2h5ZMfG:c2[2g5\\M`GL2=g2Z2h5]MdG9e2Y2g5]MeG:e2X2f5^MeG:f2V2f5`MdG9h2V2d5bMbG9j2U2d5aMcG:j2S2d5cMcG9i2T2d5cMcG9j2S2c5eM]GL2j4BWK=i4BYK=g4BZK>f4BfHEf0h0d6CbHLh0?g6E_H1h08j6F\\H7i02k6GYHk0Jm6HWH`0k0Go6ITHc0n0Bn6JTHe0o0_On6LRHg0P1\\On6MQHh0Q1ZOo6NoGj0R1WOo6OnGk0S1UOP70mGl0T1ROo62lGm0V1oNo64kGn0W1kNo67jGn0Y1gNo6:iGo0i:QOWEP1h:POXEQ1g:oNYEQ1g:oNYER1f:nNZER1f:nNZES1e:lN\\EU1c:kN]EU1c:kN]EV1b:iN_EW1a:iN_EX1`:hN`EY1_:gNaEZ1^:fNcEY1]:fNdE[1[:eNeE[1[:eNeE\\1Z:dNeE^1Z:bNfE_1Y:`NhE`1X:`NhEa1W:_NiEb1V:^NjEb1V:^NjEc1U:\\NlEd1T:[NmEf1R:ZNmEh1R:WNoEi1Q:WNoEj1P:VNPFk1o9UNQFk1o9UNQFl1n9TNRFm1m9SNRFo1m9QNSFo1m9PNUFP2j9PNVFQ2i9oMVFR2j9nMVFS2i9mMWFT2h9lMXFU2g9kMXFV2h9jMXFW2g9iMXFX2h9gMYFZ2f9fMZF[2e9eM[F[2e9eM[F\\2d9dM\\F]2c9bM^F^2b9bM^F_2a9aM_F`2`9`M_Fb2`9^M`Fb2`9^M`Fc2_9]MaFd2^9\\MbFe2]9[McFe2]9[McFf2\\9ZMdFf2\\9ZMdFg2[9YMdFi2[9WMeFj2Z9VMfFj2Z9VMfFk2Y9UMgFl2X9TMgFm2Y9SMgFn2X9RMhFo2W9QMiFP3V9PMjFQ3U9oLkFQ3U9oLkFR3T9nLkFT3T9lLlFU3S9jLnFW3Q9iLoFW3Q9iLoFX3P9iLnFX3R9gLoFZ3P9fLPG[3o8eLQG\\3n8dLRG\\3n8dLRG]3m8cLSG^3l8bLSG`3l8`LTGa3k8_LUGa3k8_LUGb3j8^LVGc3i8]LWGd3h8\\LXGe3g8[LXGf3h8ZLXGg3g8YLYGh3f8XLZGh3f8XLZGi3e8WL[Gj3d8VL[Gk3e8UL[Gl3d8TL\\Gm3c8SL]Gm3c8SL]Gn3b8RL]GP4b8PL^GP4b8PL^GQ4a8oK^GR4b8nK^GS4a8mK_GT4`8lK`GT4`8lK`GU4_8kK`GW4_8jK`GW4_8iKaGW4_8iKaGX4^8hKbGY4]8gKcGY4]8gKbG[4]8fKbG[4]8eKcG\\4\\8cKeG]4[8cKeG^4Z8cKeG^4Z8bKeG_4[8`KfGa4Y8_KgGb4X8^KhGc4W8\\KjGd4V8\\KiGf4V8ZKjGg4U8YKjGi4U8WKkGj4T8VKlGj4T8VKlGk4S8UKmGl4R8TKnGl4R8TKmGn4R8RKnGo4Q8QKoGo4Q8QKoGP5P8PKPHQ5o7nJRHS5m7mJSHS5m7nJRHS5m7mJSHS5m7lJSHV5l7jJTHW5k7jJTHV5l7jJTHW5k7iJUHX5i7iJVHY5i7gJWHY5i7gJWHZ5h7fJXH[5g7eJXH]5g7cJYH]5g7cJXH_5g7aJYH`5f7`JZH`5f7`JZHa5e7_J[Hb5d7]J\\Hd5d7]J[Hd5c7]J]Hd5b7\\J^Hd5b7\\J^He5a7[J^Hg5`7ZJ`Hg5_7YJaHg5_7YJaHh5]7YJcHh5\\7XJdHi5[7WJeHi5[7WJeHj5Y7WJfHk5Y7UJgHl5X7TJiHk5W7UJiHl5T7VJlHk5R7VJoHi5Q7WJPIi5o6WJQIj5n6WJQIj5m6WJTIh5k6ZJVIe5i6[JWIe5i6\\JVIe5i6[JWIf5g6[JYIf5f6ZJ[If5d6[J[Ie5e6[J[If5d6[J[If5c6\\J\\Id5c6]J^Ic5`6^J`Ic5_6^J`Ib5`6^JaIb5^6^JbIb5]6`JcI`5\\6`JeI`5Z6`JgI_5Y6bJfI_5X6bJhI_5V6bJjI_5U6bJjI_5T6bJmI^5Q6dJnI]5P6dJPJ]5o5dJQJ[5n5gJRJY5m5gJTJX5k5iJUJX5j5iJUJX5i5iJWJW5i5iJXJW5f5kJYJV5f5kJYJV5e5kJ[JU5d5lJ\\JU5b5mJ^JS5a5mJ`JR5_5oJaJR5^5nJbJS5\\5oJcJQ5\\5PKeJP5Y5QKgJP5X5PKiJP5U5QKkJP5T5QKlJn4S5SKmJn4Q5TKoJXNYOa6f5XKQKk4n4WKQKj4m4WKUKg4j4ZKWKf4g4\\KXKd4g4]KYKd4e4]K[Kd4c4^K\\Kb4d4^K]Kb4b4^K^Kb4b4_K]Kb4a4_K_Kb4_4_KaKa4_4`KaKa4\\4aKcK_4]4bKbK_4\\4bKdK_4Z4bKgK^4W4cKjK\\4V4eKjK[4U4eKlK[4R4fKnKZ4Q4hKnKY4P4hKPLY4o3hKQLX4m3iKSLW4m3iKSLX4k3jKULU4k3lKVLS4h3nKXLR4h3oKWLR4g3oKYLQ4f3PLZLP4e3QL\\Ln3d3SL[Ln3c3SL]Ll3c3VL\\Lj3c3WL]Li3c3XL]Lg3c3YL]Lg3b3[L]Le3b3\\L^Lc3c3]L]Lc3b3^L_La3a3`L_L^3b3bL^L^3a3cL`L[3`3fLaLY3_3gLcLV3]3lLcLS3\\3nLdLQ3]3oLcLQ3\\3PMeLn2\\3RMdLn2Z3TMfLl2Z3TMfLk2Z3VMfLi2Z3XMgLg2Y3YMgLg2X3ZMhLe2Y3^MdLb2[3_MeL`2[3aMeL_2[3aMeL^2[3aMhL^2W3dMhL[2X3fMmKXLg0R6\\3iMeLV2[3jMgLU2X3lMhLS2X3oMgLQ2Y3PNeLP2[3QNfLn1Z3QNhLm1X3TNiLk1W3WNgLi1X3YNgLf1Y3SNQLTLh0h5W3TNPMk1P3VNPMj1o2YNoLg1Q3\\NlLc1T3]NmLb1T3]NnLb1Q3_NPM_1P3bNPM^1o2dNPM\\1o2eNQMZ1P3eNRMY1n2hNSMW1m2jNRMU1n2lNRMT1m2mNSMR1n2oNQMQ1n2POSMn0m2TORMl0m2VORMi0o2VORMj0m2WOSMh0m2YOSMg0m2ZORMe0n2\\ORMd0m2^ORMa0o2_OQMa0n2@RM?o2BPM>P3BPM=P3EoL;P3FQM8P3HQM7n2KRM3n2NPMPKUOQ5k3OSM0m22RMNm23SMLm25SMKm26RMIn28SMGl2;TMCl2>UMAk2`0TM_Ol2c0SM]Om2c0SM\\Om2e0TMZOk2g0VMWOj2k0UMUOk2l0TMSOl2n0TMROl2n0TMQOk2R1TMnNk2S1UMlNl2T1TMlNk2V1PMYJZO`4f3X1PMXJZO`4f3Y1oLWJ[O_4g3Z1nLWJ[O_4f3[1oLVJ[O^4f3^1oLSJ\\O]4f3`1SMSJVOY4f3f1VMZNi2h1VMWNk2j1gLoIH0KV4f3l1gLnIIOKV4e3n1fLmI0S4Y3Q2gLkI2R4W3U2fLjIE09Q4[3V2gLiIE19n3[3Z2fLfIH08o3[3[2eLfI5o3U3\\2fLeIL2I10k3f3^2dLeILU4a3W2aLeIMU4a3W2bLeIHM2V4d3Y2cLfIHQ4d3Y2eLgIFP4d3Z2fLfIFo3e3[2eLfIGm3d3^2eLaILNKo3d3c2RM[Mm2h2SMWMm2i2cL]I0NKl3b3k2bL\\I0MLl3a3l2cLZI00Li3a3m2lL[IFd3^3R3kL[IHb3]3T3jL[IH`3_3W3fL[IJ]3`3X3gL\\IH\\3`3Y3iLZIG\\3a3Z3hLZIG[3`3]3iLYIFZ3a3^3hLXIGZ3`3_3hLYIGW3b3a3gLWIGX3a3b3hLVIGW3a3e3gLgLX3Z3iLdLW3]3iLcLW3o0" + } + ], + "model_output": "A black laptop with a glossy finish, featuring a full keyboard with white lettering on the keys, a touchpad centered below the keyboard, and a series of stickers on the palm rest area, is placed on the table.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_51.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "bS\\1SEGg:=VEFg:;XEGf:;XEHd:4N2M2O2N1O101L3O1M3O1O0\\OWORCj0jI7I7L4M2O2M3N2M3N2N2M3O1M3N2L4L5WIiIc4[6SKTJb4R6]KQJ\\4T6cKoIV4V6jKlIn3[6QLfI^3k6_LWI`3l6]LVIb3R7ULPIi3T7bKoGOo0_4R9O2N1O2M2O2N1O2O1M2O2M2O1N3N1N3M3N1O2L3N3L4M3M2N3L4L5L5K4K6J6I8J6I7KU1iN_Rj0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nj_66l03V@`Rj0" + } + ], + "model_output": "A smiling man with short hair and glasses, wearing a blue and gray zip-up jacket with a logo on the left chest, blue jeans, and brown shoes, is standing with his hands in his pockets, in front of a fence.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_53.png", + "subject_name": "handbag", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Qha0a0f=F^BY1j2N10000O0100O0001O0O101_NMXC074=FQ;;VD070a0Jm:;WDg0j0ROl:c1TE_Nj:a1UEbNj:^1UEdNi:\\1VEfNj:Y1VEjNh:U1WEnNh:R1WEPOh:P1WEROh:n0TEWOk:j0nD]OQ;d0lD_OT;a0iDBV;U200001cLgDP3[;lLgDT3b;2O0O2N2O1N2_EdL[9]3aFeL_9^3]FcLd9`3WFaLi9b3SF_Lm9\\2iE]N2\\OU:U2nEROR:l0QFROQ:=aFA`9fF_O\\9?gF^OZ9UOcEg0Y10T9YOgE`0[13P9\\OiEj8BmEM]1?h8CfE2m11^8j0dGSO^8k0dGSO^8l0cGSO]8l0dGSO^8k0dGSO^8j0eGUO\\8d0kGYOY8b0U3M3Jgh>" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y3e;[3000001O000000001O0000001O0000001O00000000001O0000000000001O0cKbLXM^3f2iLUMW3i2mLUMS3i2QMUMP3i2SMUMm2j2ZMPMf2[2eLYJk0W3`2_2hLWJl0W3]2`2jLWJk0W3]2_2kLVJk0Z3]2[2ZNdMl1V2UNiMl1V2TNjMQ2P2PNPNW2i1iMWNX2h1hMXNZ2f1gMYN[2e1eM[N\\2d1dM\\Nd2\\1]MbNk2W1VMhNk2X1UMhNj2X1XMfNi2Z1\\M`Nh2]1cMWN`2g1cMUN_2k1cMQN^2P2fMjM]2V2hMbMk2m1XMnMj2Q2VMnMk2R2TMnMl2R2UMmMl2R2UMmMk2S2WMkMi2U2XMjMh2V2]MeMc2[2bM`M^2`2eM]M[2c2gM[MY2e2iMYMW2g2jMXMV2h2kMWMT2j2mMUMS2k2nMTMQ2m2QNQMn1P3SNoLm1R3TNlLn1R3SNmLo1R3RN`LmKLQ6d3WNWLoK3j5f3dNZL\\1f3eNYL^OOaMi3P3XLZO5fMc3V7^LjHc3U7^LjHc3U7]LkHd3S7gLbHZ3^7QMWHP3^2VLa1Q1kKi2d2YL^1P1lKh2e2ZL]1o0lKj2f2nKbL6j4S1WKgN`0T4R3PLV1\\1eKg2T8]MgGf2W8\\MfGh2X8XMhGi2R4bKgKOm3b6aMmIh0Di1a6ZM`J;oNZ2d6XMbJ9kN_2f6UMbJ7jNe2g6PMdJ5gNj2i6mLdJ4dNo2m6hLbJ6aNS3n6dLcJ7`NU3n6cLeJ4^NY3n6QLhH9Q2N`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + } + ], + "model_output": "A person wearing a white basketball uniform with the number 8 on the back, white shorts, and white sneakers with orange accents is running on the playing field. The individual has short dark hair and is captured in a dynamic pose with one leg extended forward and arms slightly bent at the elbows.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_55.png", + "subject_name": "bowl", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "ejb06Q=7I7I5K4N3M2M3M4M1O2N2N2N1O2N1O2N1O2N1O2N101N1O100O1O2O0O1O2O0O1O101N1O10001N100O2O0O2O000O100O2O0O100O101O0O100O1000000O10000O10000O10000000000O100000O100000000O10O1000O10000O10000O100O100O100O100O100O2N100O1O1O2O0O1O1O1O1O2O0O1N3N1O1O2N1N3M2O2M3M2N3N2M2O2M3N3M2M3N3L4L4K6HZQn5" + }, + { + "size": [ + 426, + 640 + ], + "counts": "d1Z29Bb0f0b4IkJBc0e0a4JjJDd0b0a4JjJFe0`0a4Q2_KPN_4Q2bKnM^4R2bKnM\\4U2dKjM\\4V2dKjM[4W2eKiMY4Y2hKgMV4DoJ`0k0LU4\\2jKdMU4]2kKdMS4]2mKcMS4]2nKbMQ4_2oKaMP4`2PL`Mo3b2PL^Mo3c2RL]Mk3e2UL[Mj3g2VLXMi3i2XLVMg3k2YLVMf3j2ZLVMe3k2[LUMd3m2\\LSMb3n2^LRMa3o2_LQM`3P3`LPM^3R3cLmL\\3U3cLlL\\3T3dLlL[3U3eLlLY3U3gLkLY3U3hLjLW3W3iLiLW3X3hLiLU3Y3lLfLS3[3mLeLR3\\3oLcLo2_3QMaLn2`3RMaLm2_3TM`Lk2i0[L[Oi0Lk2f0dLZOa00j2e0kLWO;5h2b0RMXO56h2`0XMXO08g2?]MWOM9d2a0aMUOL9b2a0eMUOI;a2?hMUOH;_2`0kMTOF=^2>nMUOD<^2=QNVOA=^2;TNWO^O>^2;TNWO^O>^2:VNXO\\O=^2:XNXOZO>^29YNYOYO?]27\\NYOXO?\\27]N[OVO>]26_N[OTO?]26_N[OTO`0\\24aN\\OSO`0\\23cN]OQO`0[22eN^OPO`0[21fN_OPO?Z22gN^OoN`0[20gN@nNa0[2nMdLc1S2OnN?o2ZOTN7mN`0o2XOUN7lNa0Q3UOTN:lN`0R3SOSN>jN?T3QOTN?hN`0U3POSN`0hNa0V3lNSNc0gNa0X3jNRNd0gNa0\\3eNnMk0eN`0a3`NkMP1eN`0a3gM^Ka0\\2X1eN`0b3fM_K?\\2Z1cNa0l3RNaM]1dN`0n3oM_Mb1bN`0P4kM`Md1`Na0P4kM`Md1aN`0P4kM_Me1aN`0Q4jM^Mf1aN`0R4hM_Mh1_N`0R4eMaMk1]N`0j6@VI`0j6@VIa0i6@VI`0k6_OUIa0k6_OVI`0j6@VIa0i6_OWIa0i6@WI?j6@VI`0j6@VI`0j6@VIa0i6@VI`0j6@WI?i6AWI`0h6@YI?g6AYI?g6BXI>h6AZI?e6A[I?e6A[I?j2fMIj1^Ma0h2iMFd1eMb0e2kMEb1gMc0d2lMD_1jMe0b2nMB\\1mMf0a2PN_OZ1RNf0^2RN^OV1VNh0\\2TNWLEk2^1dNh0Z2_NROg0fNj0X2_NROf0hNj0[2[NmNi0jNm0Y2YNlNj0lNm0Z2WNjNj0oNn0X2WNiNi0QOQ1V2UNiNi0ROR1d5lN^JT1a5lNaJS1_5kNcJU1]5jNdJV1\\5hNgJX1W5hNjJX1V5gNkJZ1T5dNoJ[1P5eNRKZ1n4eNSK[1l4eNVKZ1j4fNVKZ1i4gNWKZ1h4gNWKY1i4gNXKX1g4iNYKW1f4jNZKV1f4jNZKW1d4jN]KU1b4lN^KU1a4lN^KT1a4mN_KS1a4mNaKQ1^4PObKQ1\\4POeKo0[4QOeKo0Z4SOeKn0Y4SOgKm0X4TOhKl0W4UOjKj0U4WOkKj0l0nM]OY1Gi0j0QN]OW1Jg0i0SN[OX1Ke0j0SNZOY1Le0i0SNQMGd1b1b0d0h0UNoLHe1`1d0c0h0VNmLIe1_1g0a0g0XNjLJg1_1g0?h0YNhLKf1_1j0>g0\\NcLIj1_1l00n3d2dMXOPNTN>Oo3e2cMYOoMSN`0MP4f2aMZOoMSN`0LQ4g2aMXOoMVN?IR4j2_MWOPNVN`0FS4m2]MWOQNUNY5d2gLVOPNVN=IV4k2\\MVOQNVN60]4d2\\MVOQNWN32_4a2]MWOnMXN53^4^2_MWOlMZN65\\4Z2cMGPNQN\\4X2dMHnMSNo2@QNd2S1g0e0PMSNZ2W1g0b0UMTNT2Z1i0=YMVNn1]1j0;[MVN:]OOS2\\27_MXN3@3P2\\26aMXNMF5l1^23cMZNHI7j1^21gMZNCL8j1e0iMGU2K[N^O0;g1d0kMGR2N[N[O2=e1c0lMHQ2M]NXO4?b1d0lMIo1O]NSO8b0`1b0mMIm11^NPO:d0^1b0nMIk12^NmN>e0[1c0oMHi15^NiNa0g0Y1c0PNHf16aNeNb0k0V1b0RNGd19aNaNe0m0T1b0TNEa1=aN]Nh0o0S1a0UNE^1?aNZNl0P1P1b0VND\\1a0bNXNm0Q1o0b0VND\\1b0aNUNP1S1m0b0WNCZ1e0bNoMS1X1i0a0YNBY1f0bNmMV1Y1g0a0XNCY1g04UO;a0XNCX1h06SO:b0YNBW1i06TO9a0ZNBW1i06TO9a0ZNCU1i08TO9?ZNDU1i09SO8`0ZNDU1j08RO9`0ZNDU1j08RO:?YNDV1k08RO8?ZNDU1l09QO8?ZNDU1l09QO8?ZNDU1l09QO9>YNEU1l0:QO7>ZNEU1l0:QO7>ZNEU1l0:QO7>YNFV1k0:QO8=XNGV1k0;PO7>WNHW1j0;PO7>VNHY1j0:QO7Y2a7VNQHA>Z2`7VNVIj1j6VNVIj1j6WNUIi1k6WNUIi1k6XNTIh1l6XNTIh1l6XNTIh1l6XNTIi1k6XNTIh1l6XNTIh1l6XNSH^O9[2OfMc6a0UI^O9[2NgMd6`0UI^O:Z2MhMd6a0UI\\O:[2MiMc6`0VI\\O:\\2LjMc6>VI^O:Y2MmMa6gM]O:W2MQOX2oNJ_OA]NSO?S2IiNl1c0\\O_O`NSO>R2JiNj1h0ZO[OeNQO=S2JiNi1m0VOWOjNPO=T2IgNj1Q1TOUOkNoN>T2IgNi1S1ROUOnNmN?S2HgNh1W1oNUOROjN?S2HgNg1c1dNjN_OhN>T2HfNg1l1ZNfNIcN>U2HeNg1S2QNdN2`N=T2IeNf1Z4SNmJ>T2IdNf1\\4TNkJ=U2IdNd1^4VNiJ>T2HeNb1a4XNeJ>U2HeNa1b4YNdJ>V2HbNa1f4XNbJ?U2IcN_1g4YNaJ?U2IcN^1h4[N_J>V2IcN]1j4\\N\\J>W2IcN[1l4_OaLVOcN[1l4@`LUOcN[1n4@_LUOcNY1Q5A\\LWObNW1S5B[LWObNU1V5CXLXObNT1W5EVLWOaNU1Z5DULWOaNS1]5ERLXO`NS1_5EQLXO`NR1`5FPLXO_NR1c5EnKYO_NQ1d5GlKYO^No0i5GiKZO^Nn0j5IgKYO^No0k5HhKXO]No0l5IgKXO\\Nn0o5JeKXO[Nn0R6IcKYO[Nl0U6K_KYO\\Nk0V6L]KZO]Ni0W6M\\KZO\\Ni0Y6M[K[O[Ng0[6NZK[O[Nf0]6OXKZO[Ne0_61VKZO[Nd0`62UKZO[Nc0a64RKZO\\Nb0d63PK[OZNb0h64nJYOZNb0j64lJ[OXNa0m64jJ\\OYN?n66hJ[OYN?Q75fJ\\OYN=S77dJ\\OYN_IDZNMX8`0]IC[NLY8a0\\IC[NJ[8c0ZIDZNH]8c0ZIEZNF^8d0XIFZND`8f0VIGZNBa8f0UIH[N_Ob8i0SIG]N^Ob8j0QIH]N\\Oe8k0nHJ[N\\Og8j0nHJZN\\Oj8i0lHK[NYOl8k0jHKZNYOn8k0hHL[NWOf0M[7P1cIL[OVOR7n0cILZOXOR7l0dILXO[OT7i0cILXO^OS7f0eILWO@S7e0eIKWOBS7c0fILVOBS7b0gILUODT7?gIMTOGS7=hILTOHT7W2SAjMk>V2UAkMj>V2UAmMh>S2WAPNh>Q2PAWNn>_3N2O1N2N2N2O0O2N1O2N2N2O0O2O0O2N2O0O2N101N1O101N1O1O100O2O1N1O2O0O101N1O100O2N100O1O100O100O101N100O1O100O2O0O100O1O100O1O100O1O10O01O100O010O1O010O00100O010O0010O01O1O00100O001O01O10O010O010O01O010O0010O00100O0010O0010O01O010O01O010O01O01O010O0010O001O010O01O010O01O0010O1O010O01O010O001O010O010O001O010O10O01O01O01O10O1O01O0010O01O010O010O010O001O10O010O01O010O1O0010O10O1O001O010O1O10O01O001O010O1UKbBS3_=f11O001O100O1O00100O100O00100O1O1O010O1O1O1O1O101N1O1O1O101N1O1O2N1O2O0O2N1O2O0O2N2N2N1O2N1O2N2O1N1O2N2N1O2N3M2N4L4M4K7I2N2N2N2N1O2O1N2N2N2O1N2N2N2N2O2M4L3N2M6J:F3M1O3N1N2N3M3Mf6" + } + ], + "model_output": "A golden-brown hot dog bun with a soft, fluffy interior and a slightly crisp exterior, featuring a split top design to cradle its contents, is attached to a hot dog.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_57.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0V5j90WFlJf9W501O1O4L4L1O2N2N5K2N3M1O3M2N3M2N3M2N3M6J1O002N4L3M1O12O0100NO2N2O3M1O01O02M7J2M1O2O2L3N3L2O1N5J:E4N2O004N2NO2N0101O1N8H0O00003N1N2N2N2M3N1O0000O1O1O2O0O1M3O1O0100000O001O01M3O1N2JkE_KW:_46N2O00100O10001O0001eEdKP:\\4oEfKP:[4PFdKP:]491iEfKf9]4QFiKo9d40000000000000000000000001O00001O1O001O000iKRF\\3o9`LUF_3k9^LYFa3g9]L[Fc3e9\\L\\Fd3d9[L]Fe3d9XL_Fg3a9XL`Fh3`9VLbFj3Z:0O10000000000M3O1000000O1HmK`E0NT4e:mK[ES4e:nKZEQ4f:5L4000000LeKbE\\4\\:5N2000XMkEe0T:XOWFa0h9\\ObF>Z9lMnEc1R1:T9CQG;o8\\OjElN[1f1k8ZOaGe0^8ZOdGf0\\8WOhGh0U8YNmEn0o1i0T8XOnGh0R8WOPHh0o7UOUHk0k7QOYHo0g7POZHP1f7nN]HQ1c7mN_HS1`7jNcHW1U7ZN[F=c2Y1Y7gNgHY1Y7fNhHZ1Y7dNhH\\1X7bNiH_1W7`NfHIZMg1P:]NiHLWMg1P:[NoHe1U:1M3JVNUCk1P=00000K5LoMYCQ2j<100M3O100000000O100000000O1O1001O00000O2O1O1O2N2N001O1O1O1O001O001O1O0000001O1O00000000MYNmBg1S=YNmBg1S=YNmBg1S=YNmBg1S=YNmBg1V=000001O000000000000001O00000000001O00001O1O00000O2O00001O001O001O1OYMfNhGZ1W8gNjGX1V8iNiGW1W8iNjGV1V8kNjGT1V8lNkGS1U8nNkGQ1U8oNkGQ1T8QOkGo0U8QOlGn0S8TOmGk0S8UOnGj0R8VOnGj0R8VOoGi0P8YOPHf0o7[OQHe0o7[OQHe0o7[ORHd0n7\\ORHd0m7^OSHa0l7@TH`0l7@UH?j7CUH=k7CUH=k7DUH;i7GWH8i7JWH5h7LXH4h7LXH4g7MZH2d70\\H0c72\\HNc73^HLb74^HLa75_HKa75`HJ^78bHH]7:bHF^7:cHE\\7=cHC[7?eHBZ7?eHA[7?eHA[7?eHA[7?eHAZ7a0eH_O[7a0eH_O[7a0eH_O\\7`0eH_O[7a0eH_O\\7a0bH@^7`0bH@_7?bH@^7`0bH@^7a0aH_O_7a0aH_O`7`0`H@`7`0`H@a7`0^H@b7`0^H_Oc7a0\\H@e7`0YHAg7?YHAh7>WHCi7>VHBj7>VHBj7>VHAl7>THBl7>THBm7>RHBo7=QHZOeMB[:T1PHUO^8k0bGSO_8m0aGSO_8n0aGPOa8o0`GPO`8Q1_GoNb8P1^GoNc8Q1^GlNd8U1[GjNf8V1ZGjNg8U1ZGjNf8V1c2O10O01O01O0010O01O01O1O010O001O001O01O000000010O00100O010O0001O3M2O0O2N2N2O2M1O2N3N0O3N1N3M2N3N1NTgZ3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[5e2=`1KdM`5l0eJ`1KdMa5k0cJb1LcMc5i0`Je1MbMe5h0]Jf1NbMg5f0[Jh1NbMk5b0WJl1NbMm5`0UJm1OcMn5>SJo1OcMP6R1V8]O]G@>R1U8]O^G@`0Q1S8]O^GBa0o0Q8^O`GAb0o0n7@`GAd0m0l7AaGBd0l0k7BbG@d0n0j7AcGAd0m0h7CdG_Of0m0f7CfG^Of0n0d7DfG^Og0m0d7DeG_Oh0l0P2eNj1o0_K_Oh0m0b7DfG^Oj0m0`7DgG_Oj0l0_7EgG_Oj0l0^7EiG^Ok0l0\\7EiG@k0k0\\7EiG_Ol0l0[7DjG@k0l0[7DjG@l0k0l1mN]1g0kKAm0k0k1POYLLh4g0XLBm0j0j1ROY1b0PLAn0k0j1QOX1b0QLBm0k0j1SOW1>SLCm0l0i1YOQ18XLDn0k0j1XOP18YLEm0k0j1YOo06[LEm0l0j1XOn06\\LFl0l0j1_Og0ObLFn0l0i1Bd0KfLEP1m0g1C7ZOPLa0R1DR1n0e1G3WORLa0T1CR1n0e1H2VORLb0T1AU1o0d1JNNYMYOU1o0d1KMLZMZOV1o0d1JLMZMYOW1P1c1JKM[MYOY1o0b1KIM\\MYOY1o0c1JHM\\MZOZ1o0b1KFM\\M[O\\1m0b1LEL\\M[O^1m0a1LEK]M\\O]1m0b1KDL]M\\O\\1n0c1KCJ^M]O]1n0c1JBK]M^O^1m0c1K@K^M]O`1m0c1J_OL^M]O`1m0c1J@J^M^O`1n0c1I_OK^M^O`1n0c1J^OJ_M]Oa1o0b1J^OI`M^O`1o0c1I]OJ`M\\Ob1Q1a1J\\OHbM]Oa1Q1a1J\\OHbM\\Ob1R1a1JYOIdMZOc1S1`1KXOGc0>5KXOGb0?6JXOFc0`05JXOFc0`05KWOEd0`05KWODe0a04KWODe0a04LVOBg0b04KUOBh0c03KUOBh0c03LTOAi0c03LTO@j0d02MSO_Ok0d03LRO_Ol0e01NRO]Om0e01OQO\\On0e02NPO\\Oo0f01NPO\\Oo0f01OoN[OP1f01OoNZOQ1g00OoNZOQ1g00OPOYOP1h000POVOP1k00OPOVOP1k000POTOP1l000QOSOo0m001POQOQ1n0O1POQOQ1n0O1QOPOP1o0O2POoNQ1o0O2QOmNQ1Q1N2ROkNP1T1O0ROkNo0U1N1TOhNo0W1M2d3N\\L2d3N\\L2d3N\\L2d3N\\L3c3M]L3c3M]L4b3L^L4b3L^L4b3L^L4b3L^L5a3K^L6b3J_L5a3K_L6`3J`L6`3J_L7a3I_L8`3H`L8b3F_L9a3G^L:c3E]L;e3C[L>e3A[L?e3A[L`0e3_O[La0e3_O[Lb0d3^O\\Lb0d3^O\\Lb0e3]O[Lc0e3]O\\Lb0d3^O\\Lc0b3^O^Lb0b3^O_La0a3_O_La0a3_O`L`0`3@bL?]3AeL=[3CfLhM@X2`0kM]OU2c0lM\\OT2d0nMZOQ2g0PNYOo1g0SNWOm1i0UNUOj1l0WNSOi1m0YNQOg1o0_NjNa1W1T60000000O100000000O1000000O10000O1000000000000O100O100000000000000O100000000O100000000O1000000O1O100O1O100O100O1O10000O1000000O1000000000000000000000000001O00001O000000001O00001O000000001O000000000000001O00000000001O00eB" + } + ], + "model_output": "A black laptop with a visible keyboard featuring white lettering on the keys, a touchpad, and a series of ports on the left side including a USB port, an HDMI port, and possibly a card reader slot, is on the table. The laptop's screen is open at an obtuse angle, and the display bezel is partially visible.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_58.png", + "subject_name": "person", + "object_name": "surfboard", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "_nd22\\=3N2N100O11O000000QFJe65ZINd62ZI0f60XI2i6MTI6m6JnH9S7GkH;U7EhHj7BTH?m7BoG?S8AkG?X8@fGa0[8@bG`0`8@_G?c8A\\G>g8BWG=k8CTG;o8EPG9T9GkF7W9JgF6[9IdF7]9JaF6a9J]F7c9I]F6d9J[F7f9IXF7i9JUF7k9IUF6m9JQF7o9JoE7Q:KlE5MBP9lFO0ET9=kF2JD[9>fFR1Y9QOeFo0[9SObFn0^9SO`Fm0a9UO]Fk0c9WOZFj0g9XOUF?U:Y16WO`EZNh:c1d0O2M101O0O101O001N2O1O1O001N2O1O1N2O2M4L=C3L5K6Hejf4" + }, + { + "size": [ + 432, + 640 + ], + "counts": "c\\k24\\=001O00000000001O0O100000001O000000000000000O1000001O0000000000000000000000000O1000000000000000O10000000O1000O100000O01000000O10O0100O10O0100O1O100O2Meab4" + } + ], + "model_output": "A person in a full-body black wetsuit with a white logo on the left chest, featuring a high collar, long sleeves, and integrated gloves, is playing with a surfboard.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_59.png", + "subject_name": "horse", + "object_name": "sand", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VSR21[=0ZS_16]l`N5K4M3M4L5QDUOT;R1bDVOZ;`1M3L4M3N2N3M\\OPE@\\:=jEBU:8QFHn98QFJo97oEJQ:h10]OkEPNT:n1SFmMn9U2c02N00]OWEcNj:\\1[EYNI1m:d1d0N2N30\\EaNZ9b1bF`N]9b1`F_Na9`1^FaNc9^1[FdNe9[1YFhNg9X1UFlNl9S1SFlNP:S1oEkNU:V1eElN]:o12N2M4L5M3L2B`0H7L5M3N2M3N001N101O0@eC1[d3^8bLaG^3_8aLUGM6b3e8aLUGN6`3e8oLTGS3l8f000O1000000000000000000000O11O000000000000000000000000000000bLTG]2m8QM[GDI10Z3l8mLcGX3]8dLhG[3X8cLjG]3R9000000000004L0000000000001OO1M3N2000000000000001O000000001O1O001O00000000001O00O1WOdLcG\\3T92002N1O000000000000O1O100O1O2N00101N1O1O1L4001O1O2N3M1O1O0000000000000000001O2N1O0000001O00mNYMaGh2\\8_M`Ga2\\8dMcG\\2\\8fMcGZ2]8Z11OO101N1000000000000O10000000000000000000000000000000000001aK`GQ4Q9L2N3M2N2N2N8SLaF5KR3R:00O2O000001O0000O01AQMbFo2]9TM`Fm2`9TM_Fl2a9UM^Fk2a9UM`Fk2_9RMfFm2Y9oL`FM8T3h8mLPGO682l2g8cMZG]2f8`M]G`2c8^M_Gb2`8ZMeGf2[8YMfGg2Y8ZMgGf2^8PMnFKf0U3\\91M3M3O1L4N2M3N2N2N2J6E;M3O10000O1001O3M>B>oK^F]3l9K2O1O00001O0000lNmLRG>:e2_8jM]GV2`8PN\\GQ2^8kL`GW10n1_8a110000000000O1000000O10000O10000O10000000000O1000000000000000000O100000001O0000000O100000000000000O1000001O0O10000000000000000O2O0000000000001O0O1000001O00000000000O1000000000000000O100000000000O1000O1000000000000000000000000000000000O11N100000O11O000000O11O00000000000000000000000000000000000000000000000000000000000O100000001O0000000000000000000000000000000000000000000000000O2O01O00O1000000000000000000000000000001O000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000001O0001O0000000000000000000000000000000000001O00_OWG`Li8`3XG_Lh8a3YG^Lg8b3YG^Lg8a3ZG_Lf8a3[G^Le8b3[G^Le8b3[G^Le8a3]G^Lb8c3^G]Lc8b3]G^Lc8a3^G_Lb8a3_G^LW1" + } + ], + "model_output": "A dark bay horse with a sleek coat, black mane, and tail, featuring a white blaze on its forehead and white socks on its front legs, is walking on sand.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_60.png", + "subject_name": "banana", + "object_name": "bowl", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c`]5c0\\>`0@3N1N2O1N2O0O2O1N101N2O0O2O1N101O0O2O0O2O001N10001O000O2O00001N100000001O0000001O0000001O0000000O11O000001O00000000001O0000001O00000001O0001O00000001O0001O00000000010O00000001O01O00000001O01O000000010O0000010O000001O01O0000010O0001O01O00010O0001O01O01O01O00010O000010O00010O00010O00010O0100O0001O0001O10O001O00010O010O02O0O1O010O101N010O1O10O02O0O1O100O100O100O2O0O101N101N1eGZM[4f2aK`MbLK[7g2PLaMaL1Y7_2SL[Ng3g1TL_Nj3a1SLdNj3^1QLeNP4]1kKeNV4\\1dKhN\\4\\1\\KgNf4[1SKgNo4\\1gJgN^5\\1YJgNj5_1gIfN\\6c1RIdNQ7c42O2N1N3M3N1N3N2N1O2N2M3N1O2N2N1O2O1N1O2N2N2N2N2O1N1O2O1N2O1N2O1O1N2O1N2O1N2O1N2O1N2O1N3M2N2O1N2N3M2N3M2N3M2N2N3M2N3M3M2N3L4M3M3L4L3OXJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hZ^55j>3N1M3eI9YMJe2k0iGmNe4:`3_1UL`Ni3c1VL\\Ni3e1YLZNe3g1[LZNc3g1^LYN`3h1`LXN_3i1aLWN^3j1cLVN[3k1eLUNZ3l1hLRNW32aHV1Z4gNS31hHV1W4gNP33nHR1T4iNm26RIn0Q4mNk25XIk0n3oNi27ZIi0m3POh27]Ih0k3QOg28_If0j3ROf29aI7VOAc4Oe29dI5WOA`41d2:fI2WOC_41b2lIIV4Jl1=QJFT4Mj1>SJDS4Ni1?TJCS4Nh1`0VJAR4Og1a0WJ@R4Ng1c0XJ]OQ41f1c0ZJ[OP42e1c0\\JZOP43c1d0^JWOP45a1e0O[O0f00ZOOf02ZOMg03YOLh04XOLh04XOKh05YOKg05YOJh06XOIh07YOIg07YOHh08XOHh07YOIf08ZOGg09YOGg08YOIg07YOIg07YOIf07[OHf08ZOHf07[OIe06\\OJd06\\OJc06^OJb05_OKa05_OKa05_OK`06@J`06@J`07_OI`09_OGa09_OGa09_OGa0:^OFa0<^ODb0<^ODb0=]OCc0=]OCc0=]OCc0>\\OBc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAb0a0]O_Oc0a0]O_Oc0a0]O_Oc0a0]O_Ob0b0^O^Ob0b0^O^Ob0a0^O@a0a0_O_Oa0a0_O_Oa0a0_O_Oa0`0@@?a0A_O?a0A_O?a0A_O?a0A_O?a0A_O?`0B@>`0B@>`0B@=a0C_O=a0C_O=a0C_OX15nJ_Oh3=Z14nJ_Og3>[13mJ@g3=]13kJAh3<]12kJCg3;_12iJDg3;`11hJEg3;a10gJFg3:c10eJGg3:d1OeJFg3;e1NdJHf3;f1McJIg39g1NaJJf39j1M_JKf38l1M^JKe38n1M\\JLe38o1L[JMd38R2JYJ0d36T2JWJ0e36U2JUJ1e35W2JRJ3f33Y2JPJ4f32[2JnI5NZOg2f0_3KjI7OZOg2c0a3MfI81YOf2c0d3LbI:3YOf2a0f3M^I;5XOf2`0h3NYI=7WOg2>j33nH=?TOg2=n3a1ZISNf2=Q4`1WIUNg2:T4a1SIWNg29X4_1oH[Ng26[4_1lH]Ng24`4^1fHaNg23d4\\1bHdNi20g4[1]HiNi2Mk4_2RKbMQ5^2lJcMU5]2gJfM[5Z2bJfMa5[2ZJgMh5Z2RJiMP6Y2jIiMX6Y2_IlMd6V2RIoMP7X2_HoMe7d42M3N4L3M4L9GA;E?B=B;D9GmeT1" + } + ], + "model_output": "A ripe banana with a bright yellow peel, exhibiting a gentle curve and a smooth texture with a few small brown speckles, is attached to a bowl.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_61.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 482, + 640 + ], + "counts": "W]o31o>4M2N2[D1T81gG9R8IiG?R8CkGm0f7UOWHR1c7oNYHV1e7lNXHW1g7kNkGa1T8bN_Gj1^8\\NUGm1j8VNkFS2U9PNbFV2]9h1O10000O1O1O10000O1000000O1000000hMiJbH1ONk1X5Z5^KdJb4X5dKfJ\\4m4SLQKm3n4XLnJh3R5ZLkJg3T5aLeJ_3X5gLeJY3Y5mLcJS3Z5SMcJm2\\5XM_Ji2`5[M]Je2\\5eMaJ[2Y5nMdJR2W5UNfJl1W5ZNfJf1W5_NgJa1V5dNgJ]1V5hNgJY1S5^4O1N2O1M3M3N2N2O1O1L400O1O1O1O1O1O1O100N200OoGiKY5W4i2O1O10000O100O100O10000O1000000O1000001O0O0100001OO01000000001O00000000001O00001O1O1O0000001O0000010O0O2O1O1O001O000010OO2O1O1O1O1eIfKl1[4RNiKk1X4TNkKi1V4UNnKh1S4WNnKh1S4VNPLh1R4UNPLj1R4SNQLk1P4SNQLm1Q4PNQLo1Q4mMQLS2Q4iMQLW2Q4fMPLZ2S4aMPL^2S4]MoKd2U4UMnKj2Z4jLjKU3a4`JSJ11O0_1_1P4]5]KfJb4a5UKaJk4`5PKdJP5^5nJcJQ5^5mJcJS5^5kJdJU5\\5iJeJW5^5dJdJ\\5b7N100010OO100010O01O001N110O0O1000000jJWFS5i9mJWFS5i9lJXFT5h9lJXFT5h9mJWFS5i9mJWFS5i9mJWFS5i9lJYFS5k900001O01O1O001O1O0O1000O2O0O10000000O101O0001O01O0O101O00001O0000001O001O1O1O1O00001O001O001O1O00001O0000001O1O1O1O1O1O1O1O1O1O001O1O1O1O1O001O001O1O1O001O002N1O1O001O001O1O1O1O1O1O001O1O1O1O001O2M2O0O2O001O100O1N2O010O1O1O100N101O1O1O1O1O00100O1O1N2O00011N2M110O002N1O1N110O1O2N1O001O1O1O2N001O002N1O1O001O2N1O1O1O2N2N1O1O2N1O1O1O2N1O1N4L2N2O2L4L5K:DRo0" + }, + { + "size": [ + 482, + 640 + ], + "counts": "Zn071200001J0005O11J0O2O50K0:0K0J10001O0O5^2IiM7JKR7h4WJSMb5o2ZJUMe5l2WJWMi5l2SJUMm5m2PJTMP6o2iIUMW6n2bIVM^6f510000000000O100000000000000O1000000000O010000000000000000O10O11O1O0O2ROaIWI_6a6U1K5K5L4N102N1O1O00010O10000O1O1N200O100000000000000O10000O2O000O10O1TH^JX6b5fIfJT6Z5lIhJR6W5nIlJP6U5oIkJQ6U5oIkJR6U5lImJS6S5lInJT6R5lInJS6S5lImJU6S5jInJV6R5iIoJW6R5hInJX6R5gIoJY6Q5fIoJ[6e3RI`La0L]6a3XI`L:O_6_3[I`L61_6^3]I`L33`6\\3`I`LO3c6[3aIaLH7g6W3cIaLAVOES1X7U3cIbLB9n6R3aIfL_O9R7o2`IhLZOg0QO_OKKB2H7CJo11f2S1XKiN1c1h0_O]O^OLK0GW34^1k2PLUN^OYONMNFo0LW19_2i2QLWN^OWOONNF<`0f1Fb2f2PL\\N_OROO1NE;c0e1Dc2d2PL_N@POO1OD9f0e1Dc2e0]Ke0e0H_OnNO4OA9h0d1Ce2c0]Ke0e0L]OlN05N_O:j0d1Cd2`0`Kf0c0N^OiN07;:`1Cc2?^Kh0e0O^OeN298:b1Bc2e0^K;f08\\OaN3=5:d1Bd2b0^K9h0=YO_N4>49e1Cc2n1XLZOWO^N6>2;e1Bb28`Kg0j0=XOTN6d00:f1Bb28`Kc0l0c0WOPN5h0O9e1Cd25`K>Q1i0]OiNH9f1Ba2[OdKh01?n0m0]OgNH8g1Cc20dKd0i0m0_OeNJ8e1Dc2DeKI3V1e0m0AdNI9e1Ec2BgKH0Y1e0n0BbNH:f1Cc2@hK_1d0c0]OeMOX2c1cNd2^OhKa1c0b0_OdMOX2b1eNc2_1XL3CaM0Y2a1eNb2a1UL4G\\M2[2^1eNc2h1kK41C_1cNc2IoKl3NkM^1bNc2HQL_6[1kIc2V7[MlHd2U7ZMlHf2V7VMmHi2T7UMnHk2S7QMPIn2R7nLQIQ3Q7kLRIT3Q7eLSI[3Q7_LRI`3Y9O1O1O1O001O1O1O1O1O001O1O1O1O1O1O1O1O1O001O001O001O2N001O1O1O1O1O001O1O1O1O1O1O1O1O1O1O1O001O1O1O2N002N001O1OhMRN]Gm1`8YN^Gf1`8_N^G`1`8dN_G[1m2WNa0a0aLW1l2^N=>gLS1i2cN>=iLn0g2jN<:mLj0f2oN;9oLg0d2SO;8QMd0c2WO98TM?b2[O87XM=]2_O:4ZM<[2A:5[M:Y2C;4]M8V2F<3^M7T2H<2aM6R2H=3aM4Q2K=1dM2o1M=1eM1n1O<1gMMm13<0iMJl16:1kMKg15>0lMKe15`0OlMLb17`0OnMJ`19ROWNbM1h0h1UOVNc1_o0" + } + ], + "model_output": "A woman with short, light brown hair, wearing a black helmet with a badge emblem on the front, a black top with a V-neckline, and a gray knitted cardigan, stands in front of a wall. She has a silver necklace with a circular pendant and is looking slightly to her left with a neutral expression.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_62.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 444, + 640 + ], + "counts": "mm[43g=3N1O2O001O001N1010O01O0aJD]M1=Q1]OS2I_Me4a0bKoNa0n0@S3\\4lMnKROHQ3Y4QNiKPOOo2W4UNbKPO8k2U4NlK3S4LmK5R4KoK5P4LPL4o3MQL3o31mKOS43iK0U4l40000O10000000YHfKl5Y4l101O00000O10QOnKTHR4g7XLTHh3f7cLTH^3d7PMUHQ3b7i1^OmIVIX6g6a0M2O20O000001O0001O000001O00010O0000001[JTIX4l6`K`I\\4a6bKaI]4_6bKcI]4]6cKdI\\4\\6cKfI\\4[6nJQI9h0h4W6mJTI9f0j4W6kJVI4i0P5R6kJ]JU5V7O1O010O001O001O1O1O1O1O1O2N1O001O1O2N001O1O1O1O1O1O002O0O1N200O2N1O1O1O2[N[FoNf9c0YFSN0L4\\1d9l0XFhM3]1g9i0VFjM2^1j9f0TFlM2^1l9h0UFWOm9h0RFXOo9Z23M2M4M2N3L3N4L6H4M4L3N3L3L6J4M3M4J:D8I6M5GUCWOZ=6c__1" + }, + { + "size": [ + 444, + 640 + ], + "counts": "o_l5:P1Ib9?gFNjND?3n93L3O1O1O1N1O2O001N2O2N2M4M1O1N2N0O2O1O1O1O001O2O000O010O2O000000010O1O:F2N2N1O001OO100010OO1O011O002M5L1O1O1O2N1O2N1O1O1N2O1N2N2N8G6D`ALcml02WaSO2M2O2N1O2M2O2K6L3N1O2N1O3M2N2O0O2N4L2O0YHeNi1\\1UNiNg1X1UNmNh1V1VMeNQL?b6n0YMjNiKe0i6b0\\M:_2H]M`0^2B_Mc0\\2@[Mj0a2ZOYMm0c2TO[Mo0c2ROZMQ1e2SOUMR1g2SORMR1k2QOQMS1l2ROnLR1o2TOkLP1R3QOlLR1Q3POkLT1S3nNkLT1S3lNlLW1R3jNlLZ1R3eNnL^1o2aNPMd1n2\\NQMf1m2ZNSMh1l2VNUMl1j2RNWMo1h2PNWMT2g2kMTM\\2l2dMQM`2n2_MPMd2o2[MmLk2S3TMgLS3Y3lL`L\\3_3dL\\Lb3d3^LWLg3i3YLTLj3k3WLPLn3P4SLjKR4V4oKUK[M200P7h4S3000000O1O100O0010000O2N100O100O00100O100O100O1O2O0O001O100O100O1nMiEnMFLNO;b0Z:\\1\\ESN_1>U9Z1XGcNi8Y1_GcN`8[1eGaN]8V1nGfNT8Y1j2N2O1O1O2O000000O01000001M200O100O10O01O1O2O0O10000O10000O1000000O100000nFXOl3h0V501O0000O0101O0000000000000000000001O00001O00000YFXOV5i0`4001O001O001O1O001O001O001O1O1O1O001O001O1O1O1O1O2N2N1O1YEcNi7^1VHbNj7_1THcNk7^1gFiNFKc9^1VF`N@i03^OV:Z1oEgNEZ1\\:1iEjNIV1^:3aElN0S1^:g2O1O00001O001O1lMeKaI\\4Z6iKeIX4V6oKgIR4S6VLjIj3Q6\\LnId3o5bLPIQOCF:5M5JYbd2" + } + ], + "model_output": "A white football with a pattern of blue curved stripes and a blue logo is in front of a person.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_64.png", + "subject_name": "car", + "object_name": "road", + "predicate_name": "driving on", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "kRZ83h`09H9a_OQ7l7jIdGWOkNgAT1^>bNhA]1n>O001O00000000O100000000O100O100O1O1SO]NXBi1g=h0M300O100O1M3O100M3O1O1N2O1O1O1N2N2N2M3O100L]LkBd3Y=O1O3M;cLWBn2n=O00cMQMkFn2U9RMSGf2j;ZMdA[2d>L1O5K4L6J2N1O1O1O1O2N0000000000O1O100N2H^Nn@g1m>8FPN_A10P2W>c000001O1O7`M^AU2h>N4L1O002N1O1O00000000000000O100O1lKTNSIn1h6YNTIi1l6aNiH`1V7YO_FeNYOOd0S2d9B^E\\NOU1l0m0h9T1TFmNl9X1nEiNQ:Y1nEgNR:Z1lEgNS:\\1jEeNV:_1eEbN[:b1]EbNc:m3O1O100O1O100O1O1O11O1O1O1O001O1O0000001O1O0000001O1O1OO1O13M2N1O001O1O001O0000001O2N001O001O001O002N1O001O00001O000000001O1O000000000000O11O001O0000000000O1001O001O000000000lJhDa4W;_KkD`4U;_KlDa4T;_KmD`4T;^KoD`4Q;`KPE_4P;aKQE^4o:bKQE^4o:aKSE^4m:aKVE]4j:cKWE\\4i:eKVE[4i:fKXEY4h:hKWEX4j:gKXEW4h:iKXEW4h:hKZEW4f:iK[EV4e:jK[EV4e:jK\\EU4d:jK]EV4c:kK\\EU4d:kK]ET4c:lK]ET4c:lK_ER4a:nK`EQ4`:nKbEQ4^:oKbEQ4^:nKdEQ4]:nKdEQ4\\:oKdEQ4\\:oKdEQ4\\:oKeEP4[:PLfEo3Z:QLgEn3Y:RLgEn3Y:QLhEo3X:QLiEn3e;O1O1O00lNRLWEn3i:RLXEm3h:SLYE9A5]OHNl1k;nM[E4c0g1R:UNaGd1o;N8H3fN[@T1j;lNRHS1n7mNSHR1l7oNUHP1k7POWHn0j7QOYHl0g7TO\\Hi0m;01O00000000001O1O0000001O000000O11O01O000000O100000000001O00000O1N110O1O1L5L3O1O1K50ON3O100O1100O001O001O001O000000001O2N5QOT@a0b`0B9FYoi01P`UO0``02]_O1_`09JDh_O=V`0Fh_O;V`09N10000000000000000m_O" + } + ], + "model_output": "A dark-colored minivan with a visible rear section featuring a red and white taillight on the left side, a partially visible rear window with a reflection, and a rear windshield wiper is driving on the road. The vehicle has a visible rear bumper and a partially visible license plate area.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_65.png", + "subject_name": "cup", + "object_name": "bed", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 436, + 640 + ], + "counts": "oTo37[=5L3M2N2N2O1N101N100ODYCNf<1[COe<1\\CNd<2\\CNd<2\\COc<1]COc<1]COd<0\\C0d<0\\C0d<0\\C0d<1\\CNe<1[COe<2[CMe<4ZCLg<3ZCL^S5DnJYIAi6;ZIDg69\\IFf64_IKb63cII_65bIJ_65aIK_64bIL_63aIM`62aIM_62bIN^62bIN_60bI0^6OcI1]6NeI1\\6MhIOY60iIOX6OiI1W6NjI2W6LkI3U6LnI2S6LgIHoL=Y9JiIInL=Z9IPJ6P6IQJ7o5HSJ7n5GSJ9m5GSJ:l5DVJgJ\\O_L6h8`0iJZO_L6o75WH?Z3VO`L5n7]1bK]NaL5n7^1fKaN[4_1]K_NeL2o7^1[KaNeL1Q8_1YK^NhL2P8`1XK]NiL2P8c1TK^NjLOR8d1SKdNn4]1QKbNP5`1nJ_NS5b1lJ^NT5c1kJ]NU5d1jJ\\NV5e1iJ[NX5d1hJ[NY5f1gJYNZ5g1eJYN[5f1`JUNVM5[8f1_JVNUM4]8f1]JUNWM5\\8g1\\JTNXM5\\8f1aJ[N`5b1bJ]N`5b1eGXNc26h5i1YJWNh5h1jGTNLNg17d6h1RJZNk1J^Om1dNZNn1K^Oj1cN\\No1I_Ol1`N\\NQ2H@l1]N]NS2G@m1\\N[NT2J^Om1\\NZNV2I^On1ZNZNW2I^Oo1ZNWNY2J]OP2XNWNZ2J]OQ2UNXN]2H]OR2TNWN_2H\\OS2RNVNb2G[OV2oMTNg2FYOY2mMRNj2EXO[2lMQNk2EXO]2iMoMP3DWO_2fMnMS3DUOa2dMmMW3BUOe2_MiM]3BSOi2ZMhMa3ATOj2UMiMf3_OSOl2QMhMl3\\OSOQ3dLjMZ4UOQOc4o0^KoNc4Q1]KoNb4R1_KlNb4S1_KmNa4R1`KnN_4Q1cKnN^4Q1dKnN\\4Q1eKoNZ4R1gKlNZ4T1fKkN[4T1fKlNZ4T1gKjNY4W1hKhNX4X1hKhNW4Y1iKfNX4Z1iKeNW4Z1jKeNV4Z1mKeNS4[1nKcNS4]1nKbNR4^1nKaNR4`1oK^NR4b1oK\\NR4d1nK\\NR4d1nK[NR4f1oKYNQ4f1PLYNQ4g1PLWNQ4j1oKTNQ4m1oKSNQ4n1oKPNR4P2oKnMQ4T2nKkMS4V2mKhMT4X2lKgMT4[2mKbMT4^2mK`MT4a2mK\\MS4e2nKYMS4h2mKVMT4k2lKQMV4Q3jKmLW4S3jKkLV4W3lKdLV4]3jK`LX4`3lKYLV4j3iKRLZ4o3hKkKZ4V4jKbKZ4^4iKYK]4h4V22N2O001N2O001N100O2O0O2O1O0O2O000O2O1O00100O001O001O001N102N1O1O1O1O1O1O1O1O1O101N1O1O1O100O2N1O2N2N3M4L2N4L3L5L3L4K5DH9B=D;G9C=H9G8M3L6J4M5J6J5L5I7H?WOhmk0" + } + ], + "model_output": "A cup of coffee with a frothy, light brown crema on top, served in a white ceramic cup with a visible handle on the right side, is placed on a paper napkin.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_66.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "n[k523VOa@l0R1N]>VO]@S1S1G`>l0\\ATOd>o0YAROf>R1VAnNk>U1QAkNo>V1PAkNn>X1PAhNP?Y1o@gNo>^1n@cNo>a1o@_N:\\OY=Y2ZB[N4MAQOa=j2gBYN2;D`N]=m2lBXNOc0Q=Z1kBVN2b0R=l4N4L2N3M1OCUC]I05k^571O2N1O2O1N]OjA_KT>b4lA_KR>Y5O001N2N101O001N1O101O000O1O2[KmIWGJJ241JO50i2]6f4PM^HlLg1V6i5l5O0001O7I:Fa0_Od0\\O:F=C;E<\\F\\ET9Z;^O;E4jGWDl7Sl0K4000H8134JN2BXAIP?K^`e4" + } + ], + "model_output": "A person with short hair, wearing a light-colored shirt with a dark tie, dark pants, and dark shoes, is holding a smartphone in their right hand and appears to be in mid-stride with their left leg forward, pushing a stroller.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_68.png", + "subject_name": "cow", + "object_name": "grass", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "TVU22m>2O0O2O0000000O2L9\\ADX>c0N2N1000001O01O01O010O100O1O2M2O2M2O4K4M6I;F3M1O00YOg0O1O1O1O10000O11O001O2N2N4L2N2NCiBROV=n0kBSOS=m0oBUOmn0L4K5O13N6IWOiBFY=6iQn5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m7R7n7000000000001O00001O0000000000000000001O00000000001O000000000000000000000000001O0000000000001O0000000000000000000000001O0000000000000000001O000000001O000000000O1000000001O0000O1001O0000000000000000O10000001O00000000O100000000001O0000O2O0001O00000000000000O20O00000000000001O0000000O1UJfGQ5Y8oJhGP5X8PKiGo4W8QKiGo4W8QKjGn4W8PKjGP5V8PKkGo4U8QKkGo4U8PKlGP5T8PKmGo4T8PKlGP5T8mJoGS5Q8lJSHQ5m7oJ]Hg4c7YK^Hf4b7YK`Hf4`7ZKaHe4_7ZKbHf4^7ZKdHd4\\7\\KeHc4[7]KeHc4[7]KeHc4[7^KdHb4\\7^KcHc4]7]KcHc4^7\\KcHc4]7]KcHc4]7]KfH`4Z7`KgH_4X7bKjH\\4W7cKkH[4U7eKmHY4S7gKPIV4P7iKSIU4m6kK_Ii3a6VLfId3[6[LfId3Y6\\LiIc3W6^LhIb3X6^LhIb3X6]LhId3Y6[LXIT4h6lKQI[4o6eKoH]4Q7dKmH]4S7bKnH^4R7bKnH]4S7cKmH]4S7cKnH\\4R7eKnHZ4R7eKQIY4o6gKSIW4m6iKVIU4i6kKXIT4h6lKZIR4g6mK[IP4f6PL]Im3c6SL`Ij3`6WL`Ih3`6XLlHFWOR4l7WLmHLSOm3P8WLlH3nNf3V8WLlH5lNd3X8WLlH6jNd3Z8VLlH`4T7`KlH`4T7`KkH`4W7_KiHa4W7^KkHa4U7_KkHb4T7^KmHa4S7_KmHa4S7_KnH;eN^3]8WLnH7iNb3Y8VLRI1iNi3U8VLfIj3Z6VLfIi3[6WLfIh3Z6XLfIh3[6WLeIj3Z6VLfIj3Z6WLeIi3[6WLeIi3[6WLeIi3[6WLeIi3[6XLoH]4Q7cKiHc4W7^KiHa4X7`KgH_4Y7bKgH]4Y7dKeH]4[7eKjGT5V8nJcGW5]8f000000000000O11O0000000000O10000001O01O00000O10001O000001O00000000O2O00000000001O000000000000000000000000000000000000000000000000000000000001O00O1000000000000000000000001O0000000000000000000000000001O00O100000000000000000000000001O00000000000000001O0000001O00001O00000000001O000000000000001O0000000000001O00000000001O0000000000000000001O00O1001O00000000000001O00O100000001O000000000000000000001O0000000000000000000000001O00000000000000001O00000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000aJjFT5W9iJkFW5V9hJjFX5V9hJjFX5W9fJjFZ5V9fJjFZ5V9fJjFZ5V9eJjF\\5V9cJkF]5\\900000001O00001O0000001O0000001O001O001O0000000000001O00000001O00O10000000000000000000001O0000001O00000000001O00000000O1000000001O00O10000000000000000000000O10000O100_OiJYGW5g8jJWGW5i8lJSGU5m8lJQGU5o8mJlFV5T9;000000000000000000000000001O0000000000001O00000000000000001O000000000000000001O0001O0000gF" + } + ], + "model_output": "A black and white cow with a predominantly white head, a large black patch over the left eye, and a smaller black patch on the right side of the face stands on a gently sloping hill covered in green grass. The cow has a black body with irregular white patches, a visible udder, and is standing with its head turned slightly to the left, showcasing its profile.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_69.png", + "subject_name": "wine glass", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "WUb4:ec02M3N2N2O1N2O1N101]N\\Oh_Oe0T`0_Ok_Oa0R`0Cl_O?Q`0Co_O>n?EQ@;l?IS@7k?KV@5g?N]_OWO=j0T`02]_OUO?i0R`05^_ORO>k0S`05^_OQO:o0U`03^_OPO=m0P`0e0o_O[OP`0g0o_OYOP`0h0P@XOP`0i0V@POi?R1W@mNi?S1W@lNj?U1U@kNk?V1U@iNk?W1U@hNl?X1U@fNl?Z1U@dNk?]1V@bNj?^1W@`Nj?`1W@^Nk?`1W@^Nj?b1X@[Ni?e1Y@VNj?j1V1O2O000O2O0O2O0O2N1O2M3M4L4iN]Rg2" + }, + { + "size": [ + 640, + 427 + ], + "counts": "_jT11VSc25a`]M9H5K6VIM\\I9]6M`I7\\6KbI:Y6JbI1RO=Y2Y5X2X5?VKj4TO`H9NGZ2e5c0RKd4\\O^H<2AT2d5m0nJm2@_J0h0\\1h4S1jJf2L^JH=k0\\1g4T1iJd2OZKa0n0g4V1fJ[2MZJ5k0a1`4V7nKWGBc1\\4W7VLUG^OAROo1Z5Z7WLWG\\O@SOo1Z5Y7XLXG[Oc1]4T7YLYGZOc1\\4Q7aLXGTODROo1Y5S7dLYGQOEROo1Y5S7`L]GUOAROo1Y5S7`L^GTO@SOo1X5S7bLmFVN`0k0AUOo1W5S7kL[GgNCWOo1W5S7PMWGaNGYOn1V5T7nLZGbNDYOo1W5S7lL\\GdNe1`4o6kL_GcNb1b4o6iLcGbN_1e4n6iLdGaN^1f4n6iLdG^NXOCV2W5m6hLfG\\Na1l4i6hLiGYN^1o4i6gLlGSNSONY2Y5g6fLmGSN`1W5c6fL`JZ3`5fL`JZ3a5eL_J[3a5dL`J]3_5cLaJ]3`5bL`J^3a5aL_J_3a5^LbJb3_5]LaJd3^5[LcJe3^5YLcJg3]5YLcJh3]5SLgJm3[5lKjJU4W5_KjFUOT4\\5[:1O00001O00001O1O1O001O001O2N1bDZJ`8f5_G[Ja8f5WGcJg8^5QGkJm8W5mFnJR9R5oFmJQ9T5PGkJo8V5mFnJR9U5jFmJU9Y5eFgJ[9Y5eFgJ[9Y5fFfJZ9\\5cFeJ]9j5nD]Il0n0U:l5hEVJX:k5bEYJ_:g5`EWIBg0n:T6[EYIEd0P;Y6PEZJP;[700000000000000000001O0000001O2N00000000001O00001O0000001O001O0000001O000000001O0000001O000PKlF`KNZ2V9l1[GdK@`2U9T1YHULcNg2T9n0gHoKYNS3Q9h0oHPLlMGOb3V9?eIPLTMa3X93oIaLhLX3Y92TJdLeLZ3X9MVJjLbLY3Y9GUJmL`Lf3[9YOYJk0h5ROYJoNbLJU9V1YJoNhLGP9Y1XJkNVMCb8b1XJeN_MFZ8d1UJfNdMEW8g1SJcNhMEV8h1PJcNnMBR8k1oIcNRN@P8m1jIfNYNZOn7P2gIbNiLaNe1l0k7R2fIaNbN]Oh7S2dIaNfNZOg7U2aI`NnNWOb7Y2^I`NTOTO_7\\2[I`NXOSO^7]2YI^N\\OTO[7_2WI]NBQOX7b2TI^NFnNW7e2mH`N0hNU7g2jHaN4eNS7i2hHcN6cNR7k2gHbN9aNQ7n2bHbN_:_1]EcNc:_1WEeNj:[1TEfNn:Y1QEfNQ;Y1mDhNW;V1eDmN[;U1cDkN^;U1`DkNb;T1]DmNd;T1XDmNk;S1QDmNRoNZAE2O4KNOL323ij0" + } + ], + "model_output": "A clear wine glass with a thin stem and a broad, stable base, partially filled with a transparent liquid, reflecting light on its curved surface, is held by a person.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_70.png", + "subject_name": "person", + "object_name": "snow", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kTY55c0Nj;9oCJP<;kCFTU8R6THnIm7o5VHoIk7h4jEiKf2SOb7Q5oEZKCGf=n4iBVKb=i4l0O0SO\\A]Le>\\3gA\\L]>a3gAZL\\>a3T1N2OhAcLkM9f=P3_D_M[;^2bDjM];S2bDQNVNlNH1ne0f2T6g4oHVH8b6C]I=c6D\\IOn0d4j5[NjHoLe0Ng0g4k5\\NiHmLj2f4^4]NiHhLm2j4[4^NhHeLP3m4X4^NXLa1j3^NVLb1j3bNRL]1o3dNPL[1Q4eNoK[1Q4eNoKZ1R4fNnKZ1R4fNnKY1S4fNnKZ1R4fNnKY1T4fNlKZ1T4fNlKY1V4fNjKY1X4fNhKZ1[4cNeK\\1^4aNcK_1_4_NaKa1a4]N^Kc1m4RNTKn1Q5YLaG7_3`3T5SL`G;]3b3^5_K_Gn0S3c3`6ZLaId3d6WL]Ii3d6VL\\Ij3d6VL\\Ij3e6TL[Im3f6RLZIm3l6nKTIR4n6kKSIU4n6jKRIU4P7jKPIV4R7hKoHW4T7eKmH[4V7bKjH^4X7`KhH_4];0000OeEcKU5]4kJbKV5^4V50cEfKT5Z4kJmKn4T4RKmKn4R4RKmKo4S4QKmKn4T4SKjKmMoAR2R>oMmAP2e8TMgKm0dKo1`8\\MhKf0hKm1_8aMfKe0iKj1]8gMgKa0kKh1\\8jMgK?mKf1\\8mMeK?nKc1\\8RNcK=PL`1]8UNaK=QL^1Z8ZNcK:RL\\1W8_NeK7SLY1X8cNbK6ULW1W8fNcK3VLW1V8hNbK3WLU1W8iN_K5YLR1X8kN\\K5[Lo0Y8nNZK5\\Lm0Y8QOWK5^Lk0[8SORK5cLg0[8UOQK5cLf0\\8VOoJ6dLd0]8XOkJ8fL`0_8[OeJ9kL<_8_ObJ7nL:a8_O^J:PM7b8A[J:RM5b8EWJ9VM2c8IoI:]MMd8KlI:_MKe8NgI9dMIe8OdI:gMGe83]I9nMCf89SI8WN_Of8=lH7^N\\Of8a0cH8gNWOf8e0[H7POTOe8n0gG:CgNg8a5YG_Jg8a5YG_Jg8a5YG^Jh8b5XG^Jh8b5XG]Ji8c5WG]Ji8c5WG]Ji8c5WG]Ji8CVGd31iLh8A_GoMC^56QMi8@fG]3ASMh8^OnG[3ZOWMh8]OUHV3SO]Mh8ZO`HQ3hNeMh8WOeHR3cNfMh8SOmHU3[NhMh8ROPIT3XNjMh8POVIR3RNnMh8POVIR3RNnMg8mN\\IT3mMoMg8kN_IU3iMQNh8gNcIW3fMQNg8fNgIW3bMSNg8cNlIX3]MUNg8\\NWJ[3RMXNg8YN]J^3kLZNh8VN_J`3jLZNe8UNdJ`3gL[Ne8QNiJc3bL\\Ne8oMlJd3_L^Nd8jMRKg3YL`Ne8gMUKh3WL`Ne8cMYKl3RLaNo<_1QCaNQ=]1oBcNQ=]1oBcNR=\\1nBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBgNR=X1nBhNR=W1oBiNQ=W1oBjNP=W1oBiNQ=W1oBiNQ=V1PCkNP=T1PClNQ=S1oBnNo:hM\\FX3fNPOh:oMbFo2gNSOd:QNdFl2hNTOb:SNfFe2jNZO]:TNfFc2mNZOY:XNgF^2QOZOW:ZNgF[2SO\\OS:\\NhFY2UO\\OQ:]NiFV2WO^Om9_NkFQ2ZOAg9bNnFl1\\OCb9eNQGg1^OE]9gNUGd1^OE\\9iNUGa1@GY9iNVG_1CIS9lNXG\\1EIQ9lNZGZ1FKl8oN]GV1GKk8PO^GT1HMh8QO^GR1KOa8SOdGl0M2]8TOeGi0O4Z8UOfGg004Y8VOgGe016U8XOiGb027R8ZOjG>69n7ZOlG=6:l7[OmG:8a7BSH0=`0\\7DUHK`0b0X7FWHFc0e0T7GWHDf0f0n6KYHAi0e0h6]3XIeLa6a3`I`L\\6a3eIaLU6b3lI`Ln5c3SJ`Lg5b3ZJ`LZ2PL_Oc7WN_LW2RL[Ob7_N]LU2]LlNY7PO[Lo1cLiNU7YOZLk1gLgNo6_OZLg1kLhNl6BXLf1mLVMPOF0Q1m7=WLd1mLUM]O1CNR8Y1QL_1hKSMP;_1XI]1k6eNVIX1k6iNUIU1m6kNTIT1k6mNVIQ1j6oNXIn0h6TOYIj0h6WOXIg0h6ZOYIe0g6[O[Ib0e6_O\\I?d6B]I;d6F]I9b6H`I5`6L`I3a6M`I2_6ObIO^62cIL^64dII[69eIF[6;fIDZ6hIBW6?jI@U6a0mI\\OS6e0nIYOR6h0oIWOQ6i0PJVOP6j0QJTOo5m0SJoNn5R1WJ_NQ6c1YJlMn5T2n5100O1O1O100O1O1O1O1O1O1O100O100O1O1N20Dn^OjMQa0c2O1O1O1O10000N2O1O1O100O1O1O1O1O100O002N1O1O010O2O0O1N10101O0O1O0O201N100O1N2O2N1O2Lm5Z@^GN^O2YOO3N44oa1" + } + ], + "model_output": "A small, fluffy white dog with light tan markings on its ears and around its eyes, wearing a silver chain collar with a tag, is sitting on a bench.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_72.png", + "subject_name": "fence", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bbe32T=9K2N3M3M2N1000000000000000000000000001OO10000001O000000000000000000000000000000000000000000000000000000000000001O00O1000000001O0000000000000000001O0000000000000000000000000000000000000000000000000000000000000000O10000000000001O0000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000O10000000000000000000000001O000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "e`l0i0[<=B`0D9J4PJWN6o1@ZNeMYOH8d2?dMYOH8d2?dMYOH8d2`0cMYOH7e2`0cMYOH6f2a0bMXOI7e2a0bMYOH6f2`0cMZOF7g2?cMZOF7g2?dMYOE8g2`0cMXOF8g2`0cMXOF7h2a0bMXOF7h2a0bMXOE8i2`0bMXOE8i2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD6k2a0aMZOC5l2a0aMZOC5l2b0`MYOC6m2a0aMXOB7m2a0aMXOB7m2a0aMXOB7m2a0aMYOA5o2b0`MYOA5o2b0`MYOA5o2b0aMXO@6o2b0aMXO_O7P3a0`MYO@6P3a0`MYO@6P3a0aMXO_O7P3a0aMXO_O7P3a0aMXO^O8Q3`0aMYO]O7R3`0aMYO^N]O:i0h3a0aMXO\\NB8e0k3a0aMXO\\ND6c0m3a0aMXO[NG5`0o3a0aMXOZNI5>P4b0`MWO[NJ4=Q4b0`MWO[NJ3=S4b0_MWOZNK4XLBh3?WLAi3a0TL@l3b0RL_Om3b0RL^On3d0PL\\OP4e0oK[OQ4g0mKYOS4j0iKWOW4j0hKVOX4l0fKTOZ4m0eKSO[4o0cKQO]4P1bKPO^4Q1`KPO`4S1]KmNb4U1]KkNc4X1TKnNm4f30000000000000000000001O000000000000000000001O00000000001O0000001O0000000000001O000000001O000000001O000000001O00001O001O1O1O1O1O2N2N2N1O2N2N2N2N2N1O2N2N2N2N1O3M2N1O1O2N1O2N2N1O3M1O1O1O2N3M4RMeGg2`8N2N2N1O3M2N1O1O1O1O2N3M2N2N2N2N1O1O1O1O2N2N2N1O2N2N2N001O1O2N1O1O2M3N1N7[ObENg:N^EN[Y\\2" + } + ], + "model_output": "A baseball catcher in full gear, including a blue helmet with a protective face mask, chest protector, and leg guards, is crouched on the playing field, ready to receive a pitch. The catcher is wearing a white uniform with blue and black accents, and is equipped with a mitt on the left hand.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_74.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "looking at", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "\\aT38cc06J5N2N4n\\OEZb0Q1G9O1OHn]OcNSb0[1Q^ObNPb0\\1;2N;E00010O`Nk]OU1Vb0iNl]Oa0<]Oia00m]Oa0<]OUb0=l0GQ[W2" + }, + { + "size": [ + 640, + 296 + ], + "counts": "QXZ28cc07d\\OLeb0i0N3N1O2[]OUOa0Ek`0`2M3M100O1O100O100N1O2OYO]_OQNc`0o1f0201d^OhMm`0S3C001O0001O1TMY_O]2i`0_M[_O_2Va0M3M1O3N4XNZ^Ol0oa0jNV^O:8CZc0O1O0O3NfYf2" + } + ], + "model_output": "A person with short hair, wearing a dark-colored t-shirt and blue jeans, stands with their back to the viewer, beside a young child.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_75.png", + "subject_name": "motorcycle", + "object_name": "grass", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "gmT72Y=2N001O1O1O2N1O2N1O2N1O6J:F1O1O00000000000000000000O100O100O100O100O1O100N2O100N2O1O1O1N2N2N2M3Lb`a0" + }, + { + "size": [ + 427, + 640 + ], + "counts": "1o1a0\\5o2dJQM]5n2aJTMa5j2_JVMb5i2^JVMd5i2\\JWMf5g2ZJYMg5f2YJZMh5e2XJ[Mi5d2VJ\\Mm5b2SJ^Mo5`2QJ`MP6_2PJaMQ6^2oIaMT6]2kIdMV6[2jIeMW6Z2iIfMY6X2gIhM[6V2eIjM[6V2eIhM_6V2`IkMb6S2^IlMd6S2\\ImMe6R2[InMf6Q2YIPNi6n1WIQNk6n1UIRNl6m1TISNm6l1RITNP7k1PIUNR7j1lHVNV7i1jHWNW7h1hHYNY7e1hH[NY7d1gH[N\\7c1dH]N]7b1cH^N_7`1`HaNa7^1_HbNb7]1^HbNc7_1[HbNg7\\1YHdNh7[1WHfNk7W1VHiNk7V1UHiNn7U1QHlNP8S1PHmNQ8R1oGnNS8P1mGoNV8o0jGQOW8n0hGRO[8l0eGTO]8j0cGVO_8h0aGWOa8h0_GXOc8f0]GYOe8f0ZG[Oh8UOn0V9dNeG>[Om0l8dNiG>_OP1c8bNnG>AQ1^8aNQH=CT1Y8_NSH>IP1R8bNUH>KQ1m7aNXH=OP1h7cNXH>0o0h7cNXH>5k0b7gNYH>:g0\\7kNZH=?e0V7nNZH>`0e0U7mN[H=a0g0`NQOV8KiH=a0h0]NSOX8HiH=c0V1c6]NjH=c0O[Ng0X8mNjH=b00^Nd0V8oNjHD5HU1n7XNfH>B7IS1S1lNh4]OjJ=A9IQ1P1VOf4TOnJgNf4]NXK>C_2=kNe4XN[K>C_2;POd4SN^K=D`28TOd4oM`K=D_26[Oc4iMcK:]12S2V3RL^L>:]12T2U3QL_L=;_10T2U3PL_L>=^1NT2V3PL_L=>`1LT2V3oK`L=>a1JU2W3mKaL=>a1JU2W3mKaLi2X5_1iJcNW5\\1iJeNV5[1jJeNV5[1jJfNV5Y1jJhNV5W1iJjNW5V1iJkNV5U1jJkNW5T1hJoNV5Q1iJROV5m0jJTOU5l0kJTOU5l0jJVOV5i0jJXOU5h0jJZOV5e0jJ\\OV5c0jJ^OU5a0lJ_OT5a0lJAS5>lJDS5[J^Oj5a0VJ^Ok5b0TJ]On5b0SJ\\Oo5d0QJZOR6f0mIXOU6h0kIXOU6h0jIXOX6f0jIXOW6i0iIUOY6j0kIQOV6o0PJjNQ6V1RJfNP6Y1RJeNn5[1ZJ\\Nh5c1[JYNf5g1^JTNc5l1dJlM]5S2hJiMY5V2oJaMR5`2VIeLb1e0Y5f2UIhL`1a0\\5f2TIoL[1:a5g2TISMX14e5i2SIXMT1Ni5j2SI[MQ1Jm5k2RI^Mn0GQ6j2QIhMe0]O[6k2PInM>WOc6k2oHQN:UOg6j2oHVN5oNm6k2nHWN3oNP7h2nHUOR7k0nHUOS7j0mHUOT7l0kHTOU7l0kHTOV7k0jHTOW7k0jHUOV7k0jHUOW7k0hHlL7g1R7\\1gHlL9g1Q7[1gHmL9h1P7[1gHmL:g1P7[1fHmL;g1P7\\1eHlL2O1J6OM21LHcA6Vm0KbA1N21OZhb0Mnh\\O0k>1]AOc>000fm01_cN1d>N\\A0k>0bP17``NJ\\95`K;Y4L`K:[4MXK>d4HlJg0P5[OmJh0Q5\\OgJi0W5ZOdJk0Z5ZO^Jl0[2QNbMW1Lm0ISO_NZOc1h02n0GP13UN1n0JQ1EbLYNa1T2P1IR1GbLXN^1T2T1GQ1J[N:i0IP1JYN:j0Jo0KWN:l0Io0KWN9n0Hm0OVN6o0In0OUN6n0Jo0OTN5n0JQ1hN[L7f1W1n0JR1dN_L9a1`0dMH[3j0U2mNlLY1P1HV2nNkLY1o0IW2mNkLY1n0IY2mNiLZ1n0IZ2lNiLZ1n0H\\2kNhL\\1m0HU57lJGU59lJFT5:lJEU5;lJDU5;mJCT5V5BjJ>V5BiJ?W5AiJ?W5AiJ`0V5@iJa0W5_OiJa0V5@iJb0V5^OjJb0V5^OiJc0W5]OiJc0W5]OhJd0X5\\OfJg0Y5YOUH3`0e0[7XOSH6?d0^7VOQH9>d0`7SORH;6h0h7mNRH=2i0k7jNSH>0i0m7iNSH>Ok0m7gNTH?Mk0P8eNSHb0Il0R8cNVHa0^OV1[8YNWHb0dNA5f1P9WNWHc0bNC4d1S9VNWHe0^NE4c1W9RNWH[1^Ne0[9PNWHd3g7]LYHc3g7]LYHc3g7]LYHc3g7]LYHb3h7^LXHa3j7^LVHa3k7_LTHb3l7^LTHa3m7_LSH_3o7aLQH]3P8dLPH[3i2gLfK0l06Y2H\\NZ3l2VMSL3a2]O`NX3o2fN^NRNbNX3P3hN]NPNcNW3f2iLiKU2l2kMeNV3f2lLgKT2e1lM]OMa0U3l2YOQMjM_OHd0U3l2[OmLkMBEe0T3j2BiLjMF@g0T3?QMMk2mNeMM[Oj0S3=_MPN3j0`2KdM2XOl0R3=WNjNn1HcM5VOl0R3;YNjN\\2XOWMg0ROl0R3:ZNiNd3NPKn0R3;\\NeNf31lJo0R39_N^N^NIV5a0kJo0R38aNcNd36iJn0S38bNbNc38hJn0S38bNaNd39gJn0R37fN_Nc3i4dNXHn0o2>i4dNXHn0n2?j4cNXHm0o2`0j4bNWHl0Q3a0i4cNVHk0Q3c0j4aNUHl0Q3c0j4aNUHl0Q3b0m4`NRHm0P3d0Q5]NoGm0Q3g0S5XNmGQ1o2h0Y5RNhGU1P3i0_5VObJj0^5UOaJl0a5RO_Jn0c5PO]JP1d5nN]JS1c5lN^JT1c5hN_JX1e5cN[J^1h5_NXJb1k5XNXJh1`90O1O010[GkM]4W2bKkM\\4V2_KoMa4Q2^KPNa4Q2\\KRNd45hGQ1c3kNd46hGo0d3kNd46hGo0d3kNd44jGQ1b3jNe43lGS1G`Nb3;j41oGY1V3eNl40PHZ1U3fNk40PHY1U3hNj40QHV1T3lNl4MQHV1o2^NeL?\\8MPHV1o2_NcL>_8MoGV1o2_NcL=`8OmGU1P3^NdL=`8OmGV1o2_NcL;c8MnGX1l2lNY5JlGZ1k2kNZ5KkGZ1k2kNZ5JlG[1j2kNZ5JlGZ1k2kN[5IlGU1m2SOY5GjGT1g2iNhL>g8EjGS1m2XO[5DiGS1k2YOW6f0jIZOW6d0hI^OY6a0fI@Z6?fIA\\6=eIC[6;fIE]64hIK[61gIOZ6NhI1[6KgI4^6DeI<^:11O:DaQl2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l;S3m;0O10000O10000O1O100O1000000O10000O100O100O10000O1000000O100O100O100O1000000O100O1000_OoDRMR;k2b0N3M2O2N1O2O001O1O10O01000O010O1O01OTO_MZE`2g:bMXE\\2i:eMWEY2k:hMTEV2n:jMQEV2o:lMPEQ2S;PNlDn1V;SNiDk1Y;UNfDj1[;WNeDg1];YNcDf1^;[N`Dd1a;]N^Dd1a;]N^Dc1c;^N\\Da1d;`N\\D_1e;bNZD\\1g;fNWDZ1j;gNSD[1m;fNQDY1QO100O100O1O2QOhB3Z=ETC4n=M2O2N2MdTh27njWM9B_OTBj0j=7O100000O03N7I9PM]NPHf1k7\\NTHe1k7\\NUHd1j7^NUHa1k7_NUHb1j7_NVH`1j7`NWH_1i7bNVH_1i7aNXH^1h7bNXH_1h7`NXH`1h7`NYH_1g7aNYH`1f7_N[Ha1e7]N]Hc1d7YN`Hf1]:0000000001O0001N1000001N100YDaNl9a1PFiNh9X1UFnNg9T1UFPOj9[1eEkNZ:n2O1O2N1N2O1O100O10000000000000000O10O02O0O100O1O1O1O1O1O100O1O1O1O100O100O100O100O1O100O1O1O100O1N2O1M3C=N2O1M3O1O1O1N2N2N2N2N2N2O1O100001O00000000001N1001O0001O000O10001O00000000001O00001O00001O00000000001O0000000000001O00000000001O000000001O00000000001O000000000000001O0000000000000010OO10000000001O00000000000000001O0000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000001O000000000000000000000000000000000000001O00000000000000000000001O00iF" + } + ], + "model_output": "A young boy with short dark hair, wearing a beige baseball cap and a pink and white striped shirt with a graphic design on the front, is sitting on a bed, holding a yellow cup in his right hand and pointing with his left hand.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_77.png", + "subject_name": "person", + "object_name": "road", + "predicate_name": "crossing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "`gm36Q=4kNIjDc0P;GhD>LWOm:?QE?J\\OR;6PEP1e0_N`9Y2]FhMb9[2[FgMd9\\2YFdMh9_2SFbMm9m22O1O5K4L1O1kNdEYO]:e0eEZO]:e0cEYO_:f0cEWO`:g0bEWO`:c0eE[O_:LXEI?9]:FYF9`;L1O1N3N2M_PQ4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "c9g3c910000O100O01000O2N100O10000O1000000O100O1000000O100O100O10000O10000N2O10000O1O10000000000002N8H1O001O00000000001O000000O100000000000000FVLRGk3^8i0J6O1N2F:O100000000O1000000001O000000000000000000O100000000000000000000000000000000000000O1000000001O0000000000000000OYLWHn1i7mM]HR2c7iMcHV2]7iMdHW2]7gMeHX2[7fMgHZ2Y7dMiH\\2W7aMlH_2S7`MoH`2Q7^MQIb2o6\\MSId2n6WMVIi2k6PM[IP3f6oLZIQ3g6nLYIR3i6kLXIU3k6cLZI]3Z8000O100O100O100O100001O0000000000000000O10000001O1O00000000UOdLfG]3Y8eLfG[3W8jLgGV3X8kLgGV3X8lLgGT3X8mLgGT3Y8lLgGT3Z8kLeGV3\\8iLdGW3_8fLaGZ3h8\\LXGe3i8ZLVGg3W901O2N[N_LaI`3^6cL`I]3`6dL_I\\3a6fL]IZ3b6kLZIU3f6lLYIT3g6mLXIS3h6nLWIR3h6[MlHe2S7aMhH_2W7fMeHZ2[7iMbHW2^7lM_HT2a7mM^HS2d7kM\\HU2j7fMUHZ2S8^MlGc2W8ZMiGf2Y8ZMdGg2\\8T100000000000000000000000000000000000000O100O1LUKgGl4U87O1O100000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000001O000000002N4L00001O1O0000001O0000001O004L1O00000000000000000000000000O100O1O1O1O100O100M3N2M3N2O1N2O1N21O001O00001O1O1O00000000O1N200O100000000001O1O2N1O00000000O100O1001O001O00001O=lJ_Gd4Q9C2N001O0000000000000000O100001O0000O100000000000000O1000000000000O100M3FeKcG\\4X8eKaGf4^84O1001OO10000001O000000000000001O0000001O1O1O00000000000000001O0000000000000000000000000000000000O100O1001OO1O100000000O10000001O0000000000001O1O2N2N5K4L002N000000000000000000000000000000001O2N4L3M3M2N2N000O1001O0000000000000000000QMYLZLg3e6000O1L4O1LSLiFn3W93001O1O6J5K3M5K4L1O_HiLo4U3d21O00000000000000000000000000000O100O100N2N2O1O100N2O1O100O1001O000000000000O100001O00000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000ZF" + } + ], + "model_output": "A person wearing a dark jacket with a fur-lined hood, light-colored pants, and dark shoes, with short hair and a dark-colored beanie, is walking on the road.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_78.png", + "subject_name": "person", + "object_name": "bench", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "WS_24X=2N1O2N1O100O1O100N2O1O1O1O1[OBYD`0c;FZD;c;KYD6f;MWD4g;OVD3i;h0O2O0O2M2O1O1O10001O001N1L4N2O2M2O1O2N100O1O1N2O100000000000010O01O001BTEZNm:^1TE[N06n:]1^E[Nf:d1b000E\\DmNd;S1\\DnNd;]1010[O]DoN37a;e0iD[OY;KXD`0b0D\\;:fDF\\;7eDH_;3bDN^;1cDN];2cDN_;ObD1_;MbD3`;JbD5[<01O0000001OO10O10Sli4" + }, + { + "size": [ + 429, + 640 + ], + "counts": "R\\Y1i0a<3O1O1O1O0OG7M221101O1O1O1L4J6J6OGPDWOPOmgd4" + } + ], + "model_output": "A person with long, wavy hair, wearing a patterned dress with a mix of dark and light tones, and dark shoes, is sitting on a bench with one leg crossed over the other, and the left arm extended forward, resting on the armrest.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_79.png", + "subject_name": "person", + "object_name": "tie", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "TjW56fc05L4K4M4N2J7G8N2N3N0O3N3L2O1N2O1O1N1000O1O001O1O10O01O1O1O00100O10O0100O00_OfN^^OY1\\a0QO`^Oo0]a0WOa^Oh0]a0m002N20O0[Ob^OiN_a0l1O1O101N1000001O001O1O1O1O1O1O010O000010O1000O10O1O2N1QNk^OP1Xa0iNn^OV1na0O1O2N1O2N1O2O001N100O2O0O2N100010O0010O010O01O01O001O001O010O10O1O010000O4M5K2N4L2O1O2N0N3N1M4M1O3LYae0" + }, + { + "size": [ + 640, + 428 + ], + "counts": "l]V63_c0Ol\\O3Qc00m\\O2Rc0Ol\\O3Sc0Nj\\O5Uc0;0O1O1O1O2N3M4Ca\\O1oam1" + } + ], + "model_output": "An older man with a receding hairline and a full head of hair is wearing a dark suit jacket over a light blue dress shirt and a dark tie.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_80.png", + "subject_name": "dog", + "object_name": "elephant", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + } + ], + "model_output": "An adult elephant with a brownish-gray skin tone, large ears, and a long trunk extending downwards stands beside a dog. The elephant's tusks are not visible, and its eyes are small in proportion to its head. Its tail is thin with a tuft of hair at the end, and its legs are sturdy and columnar, supporting its massive body.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_81.png", + "subject_name": "potted plant", + "object_name": "vase", + "predicate_name": "in", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Sa\\13k>`0A?B:F>B;E:FP3R4PMlMNbM7=k2U4PMlMNaM:9d2\\4oLlM0\\M?j2j2oL[LCj2l2mL\\LA;i02m04U3P4aKkK=On06T3o3aKmK>Mm06U3o3`KPL?Hm09T3n3`KRL`0El0;U3l3aKUL=Cn0a0c5S2VLlMj3V2VLhMj3[2VLbMj3_2XL^Mh3e2XLXMh3i2ZLSMg3o2T43dEoLc8S3[GoLc8R3\\GPMb8P3^GQMa8o2_GRM`8m2aGSM_8l2bGTM^8k2cGTM^8k2cGSM_8l2cGRM^8l2dGRM^8m2dI`Mn3_2mKgMS4X2nKhMR4W2nKkMjMZOf4i2bMnMfM_Oc4a2iMQNcMNT4n1\\NTN`MOS4l1^NVN^MNT4j1`NXN\\MNT4h1bN[NWMNX4f1aN]NUMOZ4c1bN_NRMO\\4a1cNaNoLO^4`1cNaNoLN_4`1aNdNmLOb4]1_NUNYLFe0i0c4[1_NVN\\LCc0k0c4\\1^NUN_LAa0n0b4\\1]NVNbM=R4]1WNZNhM9Q4\\1UN^NiM6S4\\1SN_NjM5S4\\1RN_NkM5T4\\1PN`NlM4T4\\1nMbNlM4V4[1lMaNnM5V4Z1lMaNmM6W4Y1lMaNmM4Y4[1jMaNmM4Y4\\1jM_NiM9]4X1jM_N]Me0i4l0kM^N[Mg0j4k0lM]NYMi0k4j0lM]NYMh0l4k0mMZNXMk0k4j0QNWNTMo0l4j0WNPNmLV1l4j0[NkMkLY1k4k0AUO?l0@TO`0l0@SOa0m0_OSOa0n0^OROb0o0]OQOc0P1]OnNd0S1\\OkNe0V1[OoMTKNa5S2^OgMXK3[5U2=fMDZ2a600000000O10000000000AaC]N_5CJk0CXO>f70O001O1001N1O2N8FhRj3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "mki51]4o0T5ROTJk1e5ZNfIY2X6iM\\Hc3b7\\LkFJ]1P4f7WLmFI[1S4g7TLnFIZ1U4g7ULWHn3i7mK[HS4e7jK^HW4U91O001N2O0O1N3N1N2O2O0O1O2O0O10000O2O00000O10000001O0000001O00001O001O1O2N1O1O1N3NnNbKcG]4\\8dKfGZ4S8mKnGR4P8PLQHn3P8RLRHl3n7TLSHk3m7ULSHj3n7WLRHh3n7XLRHh3o7XLPHg3Q8ZLnGf3R8ZLnGe3T8ZLlGe3U8[LjGf3V8[LiGd3Y8\\LeGd3\\8]LbGc3_8_L^Ga3c8aL[G^3g8bLWG^3j8cLTG\\3n8eLoF\\3S9iLeFW3]9U11N3M3L4N2L6K3MXL`KVM[4j6M4K6J4L4K5K5J5L5K4M4I8K6H8H:F:E?]OeVS2" + } + ], + "model_output": "A brown ceramic vase with a glossy finish and a curved handle on one side, featuring a smooth surface and a slightly tapered base, is in front of the window.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_82.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "^[b35g>3N1N3N1N2N2O1N2N2N2N2N2O1N2O1N2O1N2O1O1N2N2O1O1O1O1O1O1O100N2O1O1N2N2N2O1O1O002M200O1[OSNXDn1e;YNVDh1f;`NTDb1j;j0M3N2M3L4M3O1N2N2O1N2O1M3O100O10000000000000O10010O0O10000gLQEd2n:[MZE^2f:aM[E_2e:_M^E`2c:YMcEg2]:XMdEh2]:WM[EQ3e:nLRE[3V;3J6O1O1O1O1N201N1O1O002N100O1O100O1O100O100O10000O1000000000O1001N10O100001O0O01000000000O10000000000000001OO100000010N100001O00O100010O00O100010O00O101O010O000O1010O0001N2O010O001N110O001N101O100O001N2O010O001N2O010O1O0O11O00000000000000000000000O10000O10000O10000O100000000O10000000000O10000000000001O0000000000000000001O0000000^ORETMn:k2WEPMj:P3]EgLf:W3a000O10001N1000000O2O0O101N1O1O101N1O1O2O0O2N1O2N1N3N2N2N2N3M2N3M2N2N2M3M3N2N2M3M3M3M3M3M4L4K6J8Eon[1" + }, + { + "size": [ + 478, + 640 + ], + "counts": "0\\6b8000000000000O101O0000O10000000O1000000000000000000O100000000000000O100000000000000000000O10001OO100000000000O1000000000000O2OO1000000000O100000000O10000000000O1000000O1000001N1000000000000000000000000000000001O0000O100000001O00O1000000O1000O100000000000001O0000000O10000000000000000O2O00O010000O1N2K5M3N2O1N2O1O10000O10OO3N1O1O1N200O1O00100O1O2N1O010O101O0O1O100O100O100O1O1O1N2O1O2O0O100O10000O01000O2O0O10O010001N010O100O2O00O0100O1O100O100000000000000000000O101O0O00100O1O100O10001O0O010O10000O10000O2O00O10000000000000000000O1001O00000000000000000000000O1001O0001O0O20O0001N2O10O0001O1N200O001N6K3M010O1N1010N1000O100001O0O010O2O0000O100001O0000O11O0001N1001O01O000O11O0000O2O01O0001O0O11O0000O2O0010O01N1010O01O1N10100O00001N11O00jE\\KR:k4L2O10O0000O2O1OO2O00O2O010O00O1000000O10O2O01O0000000O10000000O11O0O1000000000000000000000000000000000O0200OO10O10000000O1001O000000O11O000O10O11O00000000000O100000001O00O100001OO01000001O0000O11N10O10000001O000O011O00O10000001O000O10000000O100001O000O011O00O10000001O00O1000O10000000001O0000000O1000O1001O0000000O100000O1001O00000O10000000O1001O000O1000000000O1001O000000000O10000000000O100000000000000000000O1000000000000000000O10000000000000000O1000000000000000000O10000000000000000O10000000000000000000000O10000000000O1000000000000O1000N" + } + ], + "model_output": "A close-up of a human finger with a visible thumb and two fingers, showing the skin texture and natural nail color, is in front of the wall.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_83.png", + "subject_name": "sky", + "object_name": "road", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "0d2h:0000000000M300M3L4N2O100O1UOlD^OT;=`ETO`:k0cEQO_:n0P1O1O1N2O1O1000000001O001O001O1O1O1O001O0000000000EjC@V_CCad1_O\\5d1RI]Ob1oN]5j2cJVM]5j2cJVM]5j2dJUM\\5l2bJUM^5k2bJUM^5k2cJTM]5l2bJUM^5l2aJTM`5l2_JTMb5k2^JUMc5j2]JVMi5e2WJZMm50eH\\2^1dMY6V2gIjM]6R2cIoM^6P2aIoMb6P2]IPNe6m1\\ISNe6m1ZISNi6j1WIVNj6i1VIWNl6T1eG\\O_1@n6e1RI[No6e1PI[NR7c1nH]NR7c1nH]NS7b1mH^NT7`1mH`NT7^1mHbNT7]1kHdNW7Y1jHgNV7X1kHhNV7V1lHiNT7W1lHiNU7U1lHkNT7U1lHkNT7T1mHlNU7R1kHnNT7S1lHmNT7S1lHmNU7R1kHnNU7R1kHnNU7R1kHnNU7Q1lHoNT7P1mHPOT7o0lHQOS7o0nHQOQ7o0PIQOP7o0nHSOR7m0mHTOS7j0PIUOP7k0PIUOQ7j0oHVOQ7i0PIWOn6k0TISOl6m0TISOl6m0SITOm6l0SITOm6l0SITOn6k0RIUOo6j0QIVOm6l0SITOm6m0RISOn6m0RISOn6l0TISOm6l0SITOm6m0QITOP7k0PIUOQ7j0PIUOQ7j0oHVOS75SG:i1BV7OTG?g1A\\7>eHB[7>eHB\\7>bHC`7:aHF_7:bHE^7;bHE\\7=cHD_7:bHE^7;bHE^7;aHF`79`HG`79`HG`79aHF_7:aHF`79_HHb77_HHa77`HI`78^HIb77^HIc76]HJc76^HIb77]HJc76]HJd74]HLc74]HLc75\\HKd75\\HKd74]HLc74]HLc74]HLd73\\HMe72\\HMe72[HNg70YH0f9100O1TMMTJ3h800000000000001OO1O11O1O00001O000000O100O11O00001O00O1000000000000000000000000O100001O1O000O01001OO100001OO10000000000001OO1kK0fIOPO1^20k41eI0QOO_21j41eI1POMb21k4OeI0QOO^22l4OfIOPO0^22k4NgI0g12h8OkLOeJ0[50eJ0Y50XH1i1Of61ZIOQ60eJ0[50eJ0a80000000jJ2dI3M0a4KT70000000000000000000000000000000000000000000000000O1mN;TFEk9l0dEUO[:T1000000000000000000000000000000O1000000000000000000000000000000O11O0000O11O00O1000000000000000000000000000000000000O100000000000000000001O1O3M4L;Eb0^O>jFSMf8Y3F?A`0_Ok0VO?A5mIZJ35MM[5S7fJoHV4o7ZO1N4MRI" + } + ], + "model_output": "A white bulldog with a broad, wrinkled face, dark eyes, and a black nose sits on a truck. Its ears are small and folded, and it has a red collar with a visible tag. The dog's mouth is open, revealing its tongue and teeth, and it has a muscular build with a broad chest and a deep chest.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_85.png", + "subject_name": "mirror", + "object_name": "wall", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "k5b4^:1O3L3N3M3M3L3N3M2N2N2N0O10O100O10O01000O010O010000O10O0100O100O010O100O1000O0100O10O0100O10O10O100O10O10O100O100O010O100O10O01000O010O10O10O0100O10O0100O10000O010O100O10O010O10O100O10O10O100O100O01000O101N3N5J4M5J5L5J6K2M6K3M5J5L3L6K4K5L3M3L6K3M4K5K5L5K4K5L4K4M4K4M5K4K5L4KfP[7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "1_2]1\\O_3W1_L]O2\\O`3W1]L\\O3^Oa3W1ZL[O5^Oc3W1VL[O7^Oe3W1RL[O9]Oi3W1lK\\O;]Ok3W1hK\\O=]On3V1cK]O?]Oo3X1_K[Ob0]OR4W1ZK\\Oc0]OV4V1VK]Od0]OW4W1SK\\Of0]OV4Z1RKYOh0]OV4\\1PKVOk0]OV4_1mJTOl0^OV4a1lJQOn0^OV4c1jJoNP1^OV4e1hJmNR1]OW4g1fJlNS1]OV4k1dJhNV1]OU4m1dJfNW1]OT4n1eJeNW1]OT4o1dJdNX1\\OU4Q2bJcNX1]OU4Q2cJbNX1]OU4Q2cJbNX1]OT4R2dJaNX1\\OU4S2cJ`NY1]OT4S2cJ`NX1^OT4S2dJ_NX1^OT4T2cJ^NY1^OS4U2dJ]NY1]OT4V2cJ]NX1^OT4U2eJ]NW1^OT4U2eJ]NW1^OT4U2eJ]NW1]OT4V2fJ\\NW1^OS4V2fJ\\NW1^OS4V2fJ\\NW1^OR4V2hJ\\NV1^OQ4U2kJ]NT1]OR4R2nJaNo0^OR4Q2PKaNn0^OR4o1RKcNl0^OQ4o1TKcNk0]OR4o1TKdNj0]OQ4P2UKcNj0]OQ4o1VKdNh0^OR4n1VKcNi0^OQ4o1WKcNh0^OQ4n1XKdNg0^OP4o1YKcNg0^OP4n1ZKdNf0^OP4n1ZKdNf0^Oo3n1\\KdNe0]Oo3P2\\KcNe0]Oo3o1]KdNd0\\OP4P2\\KdNc0]OQ4n1]KdNc0^Oo3o1^KcNc0^Oo3n1_KdNb0^On3n1aKdNa0^On3n1aKdN`0^Oo3n1bKdN?^On3o1cKcN?^On3n1dKdN>^On3n1dKdN>]On3o1eKdN=]On3n1fKdN<_On3m1fKdN<_Om3m1hKdN;_Ol3m1jKdN:^Om3n1iKdN:^Om3m1jKeN9^Ol3n1kKdN9^Ol3m1lKeN8^Ok3m1nKeN7]Ol3n1mKeN6^Om3l1nKfN5^Ol3l1PLfN4]Om3m1oKeN5^Ok3m1QLeN4^Ok3m1QLeN4^Ok3l1RLfN2_Ok3k1TLfN1_Ok3j1ULgN0^Ok3l1ULfN0^Ok3k1VLgNO^Oj3l1WLfNO^Oj3k1XLfNO_Oh3l1YLeNO^Oi3l1YLfNN^Oh3l1[LfNM^Oh3l1[LfNL_Oh3k1]LfNK_Oh3j1^LgNJ^Oi3k1]LgNJ^Oi3k1]LgNJ^Oh3k1_LgNI^Og3k1aLfNI_Of3k1aLfNI_Oe3k1cLfNG_Og3j1cLgNF_Og3j1cLgNF@e3i1fLgNEAd3h1gLgNECa3f1kLgNDG]3b1oLgNCLZ3\\1TMgNC2S3W1[MgNB6n2S1aMgNA:j2n0fMhN@=f2l0jMgN@`0c2i0mMgN@f0]2b0TNhN_Oi0Y2`0XNgN_On0T2:^NhN^OR1o16dNhN]OV1k12hNhN\\O[1h1LmNiN[O_1c1IROhN[Oc1^1EXOhNZOf1[1B[OhNYOj1Y1]O_OiNXOP2R1XOFhNXOS2o0TOJiNWOW2j0QOOhNWO\\2e0kN5hNWOa2`0fN:jNUOd2=bN>jNUOh28^Nd0kNROl25ZNi0nNnNl25UNn0TOhNj26SNR1[O_Ni29kMY1@ZNj27fM`1DTNl27_Mf1LlMg2<]Mh1m5TNSJm1Q6oMoIQ2`90O100O1O100O100O100O1O100O1O100O100O1O10000O1O100001O2N1O4L2N2N2N001O1O1O0000N2O1N2O1O1O1O1O1O100O1O2OO0100O2N1O010O100O1O1O100O1O10000O1O1O1O100O100O100O100O1O1O10000O1O100O1O100O1O100O1O1O1O100O100O1O1O100O100O100O1O1TH`LW4a3hKaLW4_3hKcLV4^3jKbLV4^3jKbLU4_3kKbLT4^3lKbLS4_3mKaLS4_3mKaLR4`3nK`LQ4a3oK_LQ4a3oK_LP4b3QL]Lo3c3QL]Lo3c3QL]L`M1W5b3ZM\\L^M`0k4T3gM\\L]Mm0`4g2SN\\L]MT1Y4`2ZN\\L]M]1P4W2cN\\L]Mh1f3k1mN^L[MS2]3_1XO^L[M`2Q3Q1D_L[Mg2k2i0J`LZMR3c2=3aLZM]3Y21>aLYMh3o1Gh0aLYMV4b1XOU1bLYM]4[1QO\\1cLWMg4T1eNe1dLWMP5m0ZNl1fLWMZ5d0oMU2gLVMg5:aM`2hLVMn53ZMg2hLVM]6^OZI0c3\\3gLTMX:l2hETMX:l2hETMX:l2hETMX:m2gERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fEQM[:o2fEPMZ:P3fEPMZ:Q3eEoL[:Q3eEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:S3cElL^:T3bElL^:T3cEkL]:U3cEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEiL_:X3`EhL`:X3`EhL`:W3aEiL^:X3cEfL^:m2[ESM70^:k2_ESM32^:j2dEPMN5_:j2RFVMn9i2TFUMm9j2TFVMl9j2TFVMl9j2TFVMl9j2UFTMl9l2TFSMm9m2SFRMo9n2Q11O1N2O1O001O01O04L000O4M1O1O001O00000000003M2N1O001O00000010O00O1001O001O0M4eLoDNGO3Mh04]O^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + } + ], + "model_output": "A black cat with a sleek coat and pointed ears, featuring bright, alert eyes and a small, pink nose, is sitting beside a laptop.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_87.png", + "subject_name": "teddy bear", + "object_name": "teddy bear", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "^ZV39l<8K6I5L3N1N2O1N1G:M2N2N2N3M2O1N2N2O1N2N2O1N2N2VOlM^FU2`9oM]FR2b9QN[FP2c9SNZFn1f9SNZFm1e9UNYFk1h9VNVFj1j9WNVFi1j9XNTFh1KfMn9d0VFd1o9\\NQFa1R:_NnE`1S:aNkE_1V:cNhE[1Z:k0O1O001O1N010O01O001O0010O101O001O00001O2N3M2N1O1O0O2O1O1O0010O011N1O10O0100O1O1^MUFd1k9[NWFe1h9ZNZFe1g9XN[Fh1e9WN\\Fi1d9WN]Fh1d9WN\\Fh1e9XN\\Ff1f9YN[Fe1f9[N[Fd1f9ZN\\Fe1e9YN^Fd1d9ZN_Fd1c9WNcFe1e:K1N10O[O`DD_;=cDA^;?cD_O];b0eD[O\\;d0fDYO\\;g0fDSO^;l0`0O2O0O2O1N2N2M4Hfee3" + }, + { + "size": [ + 425, + 640 + ], + "counts": "[UW31Y\\W10_\\lN1]dL6K3M5^ODoC=m;GQD;m;GRD:l;GTD9l;GTD:j;GVD9i;IVD7i;JWD6h;KXD5g;LXD5e:CPF9YO5e:EPFQ1o9SOnEm0Q:WOlEi0T:[OgEf0Y:]OdEc0\\:_OaEa0_:A`E?`:B]E?e:R1001O2M3N001OO1O100O10000O1000001N1000000001O001O1O000010O001O001M20[N\\Ee0c:ZO`Ef0_:YObEg0^:UOfELJ8_:HkE0I5]:HmE2G5]:FnE5G4Z:ERF7E2Z:_OZE2i0=D2Z;MgD2Y;NgD2Z;MfD3Z;MgD2Z;MgD2Y;NgD1[;NfD1[;NfD0\\;OeDO];0eDL^;2P1LWnf2" + } + ], + "model_output": "A plush teddy bear with a light brown, textured fur exterior sits beside another teddy bear. It has a rounded head with two small, rounded ears, and a red nose. The bear's eyes are not visible. Its arms are outstretched, and it has a white snout with a black nose at the tip. The bear's legs are short, with the front paws being white, and it appears to be in a seated position.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_88.png", + "subject_name": "snowboard", + "object_name": "car", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Pdj24k>3N2N1O2N2N1O1O1O1O001O1O001O001O0000000000000000000000000000000O1lA\\Ol=l0O1000O010L301O100O110O2N001O000O10000001O00000000000000000000000000O10000000O100000000O101O00002N1O00000000O20mAXOl=n00000000O1N1O2O101N101O00000000O10000000000000000000000000000001O00001O00001O1O001O1O1O1O3DcA1f>2M010O0000I700O\\k]4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aa`17f>8I5VOFgB=S=KdB;X=j0L3M3N2N1O1O2N2N2O001O0O2O00001O001O000O2O001O00001N101O001O1O3L6K3M2N2M3M3N2M2O1N2N2O1N1O2O0O2N100O1O1O2N1O1O100N200O10000O1O00100O01O0010O01O010M210O0O20OO2O1O03N001O0O100O2O00000POlKTGT4l8TLlFl3S9VLlFk3S9TLmFm3T9RLlFn3T9SLWFHa0U4W9XLgFi3Y9YLeFg3\\9WLeFi3[9WLeFi3[9XLdFh3[9[LcFe3]9[LbFf3^9ZLbFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFh3\\9YLbFh3^9XLbFh3^9XLaFi3_9WLaFi3_9WL`Fj3_9XL^Fk3a9j000O1000_K_Fd3_9[LdFd3\\9\\LdFd3\\9\\LcFe3^9ZLbFf3_9XLbFi3]9WLcFi3]9WLcFi3]9XLaFi3_9WL`Fj3a9TL`Fl3`9TL_Fm3a9SL_Fm3a9SL_Fn3`9SL^Fn3b9RL^Fn3b9RL_Fm3a9RL_Fo3a9QL_FP4`9PL`FP4`9PL`FP4`9PL`FP4`9PL`FP4a9oK_FQ4a9oK_FR4`9nK`FR4`9nK`FR4_9nKbFR4^9nKbFR4^9mKcFS4]9mKcFS4]9mKcFT4\\9lKeFS4[9mKeFS4Z9nKfFR4Z9nKfFR4Z9nKfFR4Z9oKeFQ4Z9PLfFQ4Y9oKgFQ4Y9oKgFQ4Y9PLfFP4Z9PLfFP4Z9QLfFn3Z9TLeFl3Z9ULeFk3[9m0000WKfFl3Z9SLgFm3Z9l00001O01OO10YKeFi3[9n0O1100O1OO10000000001O00000000001O00000000QKhFV4X9jKhFV4X9jKgFW4Y9h00QKgFX4Y9gKhFX4X9hKhFX4X9hKhFX4X9hKhFX4Y9gKgFZ4Y9eKgF[4Y9eKgF[4Y9eKgF[4Z9dKeF]4]9aKeF]4\\9bKgF\\4Y9[KfFK2j4d9WK[Fi4e9WK[Fi4n90001O01O01O000010O0000010O0010O01O01O00001O000000001O00001O0011O2M1O2O0O2O0O2N10002N0O00010O100O0010O100O00010O1O1O010O1O001O1O010O1O001O001O1O1O001O1O001O0O2O2N1N100O2O001N2N2O1N1O3N1N1O2N1N3N1O1O1O2N1O2O001O010O001O00001O1O0O10001O001O00001O0O101O001O1O00001O00001O000000001N101O1O1O000O10001O00000O101N101O000O2O1O0O101O001N101O1N1O2O000O2O000O2O1N2N3M2O1O2M3M4M3L3M2M3N2N1N3M4J6I:IB1O001N2N2O2M2N2Nkol4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V2S;X200000000000000000000000001O2N2N1O001O2N00000000001OO1001O0000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000O10000O100O100001O000000000000000000000000000000000jIaM`1_2`NbM_1^2aNcM^1]2bNdM]1]2bNdM]1[2dNfM[1Z2eNgMZ1Z2eNfM[1Z2eNgMZ1Y2fNgMZ1Y2eNjMY1[1VJSOa4CX1U2gNlMY1T2gNlMY1T2gNlMY1T2fNmMZ1S2fNmMZ1S2fNmMZ1S2fNnMY1R2fNoMZ1Q2eNPN[1P2eNPN[1P2eNPN[1P2dNQN\\1o1dNQN\\1n1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1cNTN]1l1cNTN]1k1dNUN\\1i1eNXN[1h1eNXN[1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1gNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1gNVNY1j1gNVNY1j1gNVNY1j1gNUNZ1l1eNSN\\1m1eNRN[1n1eNRN[1n1eNRN[1n1fNQNZ1o1fNQNZ1o1gNPNY1P2gNPNY1P2hNoMX1Q2hNnMY1S2fNmMZ1S2fNmMZ1S2gNlMY1T2fNmMZ1S2eNnM[1R2ZNYNf1g1RNaNn1_1oMdNQ2]1lMeNT2[1kMeNV2[1iMfNW2Z1hMgNX2Y1gMhNY2W1gMjNY2V1gMmMfL5d5n1eMmMmL0^5S2eMmMoLL^5W2bMmMZ3S2fLmMZ3S2fLmMZ3S2fLmMZ3R2gLnMY3R2gLoMX3Q2iLnMW3Q2jLoMV3m1VIhMf3;S3h1VInMl3;m2e1YM\\Ne2d1]MZNc2f1bMUN^2l1eMnM]2R2^4O100001OO100001O00000000000000O100001O00O10lFRNk6o1TIRNk6n1^HRNTO2]8l1_HTNRO2]8j1aHUNPO2_8h1cHXNjN2b8f1dHYNiN1c8f1eHXNgN3d8e1dHYNhN2d8e1dHZNgN1e8e1dHZNgN1e8e1cH[NgN1f8c1dH\\NeN2V2GQ4k1TK\\NeN2T2KQ4g1WK[NeN2R2NQ4d1YK\\NdN2P20S4b1YK\\NdN2P20S4b1YK\\NcN3P20T4`1ZK^NaN2P21U4_1[K]N`N3=Em0?j4\\1\\KoNmNXOj0?l4Z1\\KPOnNYOg0>o4X1]KQOlN\\Of049f0\\5l0SKnNlNWO=98f0\\5l0SKnNA@0f0\\5l0RKoNB_O0f0\\5l0RKPOA^O2e0[5m0SKoN@_O3d0Z5n0SKPO_O^O>:P5X1VKUNfNc0f0Fb06l4\\1]KgNTOFe05k4^1^KRN`N1N9d01f04j4_1gK\\NhN2h01j4a1fK^OAPOi4c1eK]OCnNi4e1dK]OEkNh4h1bK^OHeNi4m1_K^OW6b0hI@W6`0iIbNXNA0O1`0n7^1iI_NkNL]7e1gI_NnNJ\\7g1gI^NPOGZ7k1fI^NY7b1gH_NX7a1hH_NX7a1hH^NY7c1gH]NX7e1gHZNYNEl8R2kH]NT7d1jHXN[NGj8Q2`HcNfN^Oh8P2^HnNh1ZO[2i1lKmNe1_O^2e1kKnNe1_O_2c1lKnNd1_OdM]O^4V2ZLQNlNc0g2JdM_O]4S2]LPNnNa0d2LbME]4n1_LPNQO2J1f27dML[4j1aLoMRO0J3d27bM0]4g1dLlMXO7U23cM5\\4e1eLjMXO9T23`M8_4b1ZMSNh0o0n1o0XMTNi0m0o1o0XMTNi0m0o1o0XMTNj0l0n1P1XMTNk0k0m1Q1XMTNl0j0l1R1YMSNn0h0i1U1YMSNR1d0e1Y1YMSNX1>_1_1YMSNY1=^1`1ZMSNW1=_1`1ZMSNW1=_1`1ZMSNW1=_1`1[MRNU1>a1`1ZMRNU1>a1`1[MQNT1?a1`1\\MPNR1`0c1`1\\MoMQ1a0c1`1\\MoMQ1`0d1a1[MoMQ1`0d1a1[MoMQ1`0d1a1ZMPNR1?d1a1ZMPNR1>e1b1XMQNS1=e1b1XMQNS1=e1b1XMPNS1?e1a1XMPNQ1a0g1_1XMPNP1b0h1^1YMoMn0c0j1^1YMnMm0d0j1`1XMjMo0f0i1a1>_NBb1>]NBc1>\\NCd1=\\NCe13R6T2jJUNgND=3Q6U2kJTNgNC>4P6U2lJSNfND>4P6U2lJoMkNG95P6U2nJhMnNN44R6U2mJgMnN034R6U2nJfMmN134Q6V2PKdMlN234Q6V2PKdMlN234Q6V2PKdMmN125Q6V2PKdMlN314S6U2PKdMlN314S6U2PKdMlN304V6T2oJeMjN314V6T2oJeMjN313W6U2_J]MUOV15WOX6V2]JRO[OgNX6X2]JQO[OfNY6Z2[JQO[OeNZ6Z2ZJRO\\OdNZ6Z2ZJRO]OcNY6[2YJSOB]NV6`2XJ_NROYOb0CX6d2TJ[NAROV7c2XIgMWOH_8a2ZHfM[OC^8g2XHeMS8\\2kGeMV8[2gGhMY8Y2dGjM[8V2dGjM]8V2cGjM]8V2cGiM^8W2bGiM_8V2bGhM_8X2mGZMU8f2W11_IYMTLNU6i2eM`M[2`2dMaM\\2_2bMcM^2]2aMdMRMMV4_2gNeMQMOW4]2fNdMSM1V4[2gNcMSM3W4Y2eNcMUM8R4W2hNaMUM>n3R2lN`MUM`0n3P2mN`MTMb0n3n1oN_MSMf0k3k1ROcMmLf0AoNV4h2]OcMhLV1f3W1bMTM:b0VNX1m3R1bMVM:a2T29bMWM:_2T2:aMXM;^2T2:aMYMZ3`M`M]O_Of2D>]3_M^M_OAZ6Q3WJkLQOGO6c07V6Q3WJiL]OH6?U6Q3XJhL37d5R3ZJfL28c5S3]JbL1;c5R3gKnLY4Y2ZImM]2JY4Y2\\IkM[2LX4Z2]IjM[2LX4Z2^IhM[2OV4Y2_IhM[2OV4Y2`IfM[21U4Y2`IeM]21R4[2bIcM\\22R4[2cIaM\\24Q4[2cIaM\\24R4Z2cIaM[25R4Z2dI`MZ27P4[2fI\\M\\28n3\\2hIXM\\2k0`JWOR5N=l0`JWOS5M=l0_JXOT5L=l0^JYOU5K[NBe1fJZN_51Kc1gJ^N]5OLa1iJ`N[5OKb1iJ`N\\5NKb1hJbN\\5LKb1jJbN[5LKb1iJcN\\5KJb1kJcN[5KJa1jJgN[5HKa1jJgN[5HJb1jJgN\\5GJa1kJhN[5GJa1kJhN[5GJV25jMKV25jMJX25hMKX25hMJY26gMJZ25fMKZ25fMKY26gMJY26gMIh1hJeN_5CIh1hJcNa5EGh1iJbN`5FGh1jJ`N`5HEh1mJ^N_5JDh1nJ\\N_5LCh1oJYN`5OAh1QKUN`53_Oh1k0XNUOh1k0XNUOh1k0XNUOi1j0WNUOj1k0VNUOi1l0WNUOh1k0XNUOg1l0YNTOg1l0YNTOf1m0ZNSOd1o0\\NQOc1P1]NPO9iJc02VOU6NQO6jJf0^6TOjN2jJj0\\6TOUOk0l0UOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOi0l0WOdN2SKg0Y6WOcN7PKb0]6WOcN:lJ?b6WOcNW1^1iNcNU1^1kNcNS1^1mNfNl0]1TOfNl0W1TOnNi0P1WOROi0l0WOUOk0h0UOYOm0d0SO]Om0b0SO_Ol0a0TO_O7XJ7Y6B_O5]J6T6E@3bJ2o5K_O2Y1NR60000000000000000000000000000000000000000000O1000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000N" + } + ], + "model_output": "A single-engine propeller aircraft with a high-wing configuration, featuring a tricycle landing gear setup, is flying over the sky. The aircraft has a sleek, aerodynamic design with a pointed nose, a bubble canopy, and a vertical stabilizer with a rudder. The wings are straight and positioned above the fuselage, and the propeller is at the front of the aircraft.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_91.png", + "subject_name": "tree", + "object_name": "rock", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0]=c100000000000000000000000000000000000000000000000000000000O1000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O11O00O100000000000000000000000000000000000000000000000000000000000000hI[NVI2d5c16cNWJd0`5i06iNUJc0b5d082lJ_Nh3`1[16dJaNo3Y1[1;WJPN1e0[4P1\\1\\1bNdN]1]1cNcN]1^1bNbN]1b1`N^N_1d1`N\\N_1e1aN[N_1e1aN[N_1e1aN[N^1g1`NZN`1f1`NZN`1f1`NZN`1g1_NYNa1i1\\NXNd1i1[NWNe1i1[NWNf1h1YNYNg1g1YNYNg1h1XNXNh1h1XNXNh1h1XNXNh1h1XNXNh1h1WNYNi1g1WNYNh1i1WNXNh1g1YNXNg1i1XNXNh1h1XNXNg1h1ZNXNf1h1ZNXNe1i1ZNXNf1g1[NYNe1g1[NYNd1g1\\NZNd1f1\\NZNd1e1]N[Nb1e1_N[Na1e1_N[N`1f1_N[N`1f1`NZN_1g1aNYN^1h1aNYN_1h1`NXN_1i1aNWN^1i1cNWN\\1j1cNWN[1k1eNUNZ1k1gNUNY1k1gNUNZ1j1fNVNZ1i1gNWNY1i1fNXNY1h1hNXNW1h1jNXNV1g1kNYNT1g1lNZNT1f1lNZNT1f1kN[NU1d1lN\\NS1e1lN\\NT1c1lN^NS1c1iNaNW1_1gNcNY1\\1gNeNX1[1fNhNZ1X1eNiN[1W1dNjN[1V1eNkN[1U1dNlN\\1T1dNlN\\1T1cNmN\\1T1dNlN\\1T1dNlN[1T1eNmNY1U1gNkNX1V1gNkNX1U1iNkNV1V1jNjNU1W1jNjNV1V1jNjNU1W1kNiNU1X1jNhNV1X1jNhNU1Y1kNgNS1[1mNeNR1]1mNcNS1]1mNcNR1]1oNcNP1^1oNcNP1^1PObNo0`1PO`No0d1nN\\NQ1g1mNYNR1o1gNQNY1R2cNoM]1S2aNmM_1V2^NjMb1W2\\NjMc1d2oM]MQ2h2iMYMV2k2fMVMZ2l2aMWM^2k2_MWMa2k2YMYMg2i2PM^Mo2f2dLnIWOa3U4U70000O10000000000O100000000000000001O00000000000000001O000000001O1O001OaNgEWNY:o1]EUNc:U3]MfK^JZ4U5VLhJk3U5XLjJh3T5ZLlJf3S5[LmJe3S5[LmJe3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5]LmJc3R5^LnJb3S5]LmJc3S5]LmJc3T5\\LlJd3U5[LkJd3V5\\LjJd3W5[LiJe3X5ZLhJf3Y5YLgJg3Y5YLgJg3Z5XLfJh3Z5XLfJg3[5YLeJg3\\5XLdJh3\\5XLdJg3^5XLbJh3_5WLaJi3_5WLaJi3`5VL`Ji3b5VL^Jj3b5VL^Jj3b5VL^Jj3b5VL^Ji3d5VL\\Jj3d5VL\\Jj3e5UL[Jk3e5UL[Jj3g5ULYJk3g5ULYJk3g5ULYJk3h5TLXJl3h5ULWJk3j5TLVJl3j5TLVJk3k5ULUJk3k5TLVJl3j5TLVJl3j5TLVJl3k5SLUJm3l5RLTJn3l5RLTJn3l5RLTJm3m5SLSJm3m5SLSJm3n5RLRJo3m5QLSJo3m5QLSJo3n5oKSJQ4e4ULmII^1R4a4ZLQJC^1S4]4aLTJZO_1T4^4bL`L^3`3bL`L^3_3dL`L[3]3iLcLW3\\3jLdLV3\\3kLcLU3\\3lLdLT3Z3oLeLQ3R3YMmLg2P3]MoLc2P3^MPMb2m2aMSM_2k2cMUM]2d2jM\\MV2T2mKmJV2o2m1S2[NmMe1o1nKmJ^2T3d1k1cNUN]1k1aNWN_1i1`NXN`1g1`NZN`1f1_N[N`1f1^N\\Nb1e1[N]Ne1c1WNPKhM^3R4b1`MVO`2k0UM_Ok2c0bLnJDd4j3V6O10000O1000000000000000000000000001O0000O1000000001O0000000000001O00001O000\\MnKZJS4c5oK]JR4^5RLbJn3Z5WLeJi3X5ZLhJf3V5\\LjJe3T5\\LlJd3P5`LPKa3m4aLSK_3k4cLUK]3i4eLWK\\3g4eLYK\\3d4fL\\K[3b4fL^KZ3a4gL_KY3`4hL`KX3^4jLbKV3]4kLcKV3[4jLfKV3Z4jLfKV3Y4kLgKU3V4nLjKS3R4PMnKP3P4RMPLn2o3SMQLn2m3SMSLm2l3TMTLm2j3TMVLl2i3UMWLk2h3VMXLj2g3WMYLj2c3YM]Lg2`3\\M`Ld2^3^MbLc2\\3^MdLb2[3_MeLa2Y3aMgL_2X3bMhL_2W3aMiL_2V3bMjL_2U3aMkL_2S3cMmL^2o2eMQM\\2k2gMUMY2k2gMUMY2j2hMVMX2g2kMYMU2d2nM\\MS2b2mM_MS2a2mM_MT2]2oMcMQ2]2oMcMR2]2mMcMT2]2kMcMV2]2iMcMW2^2hMbMX2_2gMaMZ2_2eMaM]2^2bMbM_2_2_MaMa2_2_MaMb2e2WM[Mj2g2SMYMm2g2RMZMo2f2PMZMQ3e2oL[MQ3f2nLZMS3e2mL[MT3d2lL\\MT3e2kL[MV3e2iL[MX3f2fLZM\\3g2aLYM`3g2^LZMb3f2^LZMc3e2]L[Md3e2[L[Mg3c2YL]Mh3e2UL[Ml3e2RL\\Mn3e2QL[MP4h2lKXMV4g2eK]M\\4b2cK_M]4c2`K^Ma4b2^K^Mb4c2\\K^Mf4`2YKaMh4_2WKaMi4a2UK_Ml4a2RK`Mn4a2QK_MP5a2nJ`MS5_2mJaMT5_2kJaMU5a2iJ_MX5`2gJaMZ5_2eJaM\\5^2dJbM]5^2bJbM_5^2`JbMa5_2]JaMe5^2ZJbMi5\\2SJgMm5Y2SJgMn5Y2QJgMP6Z2mIgMV6`51O1O2N3M7I7I5K3M5K3M3M2N3M3M5K3M1O2N9G7I2N3M2N6J5K3M1O4L4L2N3M2N2N3M5K4L002N5K3M1O0\\M]Ek0c:ROaEm0g:jNZEV1h:gNYEY1k:bNWE]1i:bNXE^1j:^NXEb1k:PNiDH>X2Q<000001O00000000000000001O0000000000000000000000000000000000O1001O000000O11O0000O100000000O10000000000000000O1000000000000000000O100O100000000O1000000000000000000O10000000000000000O100000000" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`=_1a=0000000000000000000000000000000000000000000000000O1000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000O1000000O1000000O10000000000O1000000O100000000O1000000O1000000O1000000O10000O100000000O1000000O10000O1000000O10000000000O1000000000000001O00000000001O00000000001O0000001O00000000000000000000000000000000000000000000000000000000000000O10000O100O1O1O1O100O100O1000000O1000000000000O100000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000O1000000000000O1000000000000000000000000O10000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000000000O1000000000000000000O100000000000000000000000000000000000000000000001O0000000000000000001O00000000000000000000001O0000000000000000000000000000000000001O0000000000001O000000001O0000000000001O0000001O0000000000000000O100000000000000000000000000000000000000000000O100O100000000O1000000O10000000000000000O1000000000000000000O10000000000O100000000O1000000000000000000cC" + } + ], + "model_output": "A dense cluster of green leaves with a mix of light and dark green hues, featuring elongated, oval-shaped leaves with smooth edges, is attached to a tree. The leaves are arranged alternately along thin, brown branches, with some leaves overlapping each other, creating a lush and vibrant appearance.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_92.png", + "subject_name": "bear", + "object_name": "rock", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "\\fk0h0]<9J5C>XO^NoDV2f::G5M1N3M2N2N3N1N2O1N2O1O1O1N2O1O2N001O1O00100O1O1N2O10O01O1O1O1O0000000000001OO10000001O00001O1O0000001O001O2N001O1O002N1O001O1O001O1O1O1O001O1O1O1O100O1O00100O00100O1000O10O1000O10O100000O0100O1000O100O01000O1O1O0003NO1O10O011O2gCaMk;l2O0O10000O01000O2O0O1O101N10010N2O1O001O0O101O0O1000000000O1000000000O0100O100O00100O1O001O1O1O001O1N2O1O00001O1N2O001O1O1O1N101O1O1O001N2O001O1N2O001N2O1N101O1O1O001N2O1O1O001N2O001O1O001N2O002N002M101O001N2O1O1O001N2O1O0O2N2N101N2N2N1N3N2N1O2N2N2M3N2M3N2L6G_ab0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "T6j8V60O1O2O0O010N2N2O1N2O1O2N1O1N101O1O1O1O100N2O1O1O1O100O1O1O101N1O1OWGYKZ7g4eH`KV7_4jHhKP7W4PInKl6Q4TITLh6k3XIYLe6f3\\I^L`6a3`IdL\\6[3eIiLW6V3jImLS6R3mIRMP6m2QJXMj5h2UJ^Mf5`2\\JbMb5]2_JfM^5Z2bJjMZ5U2gJmMW5R2iJSNS5l1nJVNP5i1QKZNl4e1UK_Ng4a1YKaNe4^1[KgNa4X1`KjN^4U1cKmN[4S1eKoNY4P1hKSOU4l0kKWOS4i0mKZOP4f0PL[Oo3d0RL^Ol3a0ULAi3>XLDf3P3AQM`0n2@RMb0l2]OUMe0i2ZOXMg0g2XOZMk0c2UO]Mm0a2RO`Mo0_2PObMR1\\2nNdMT1Z2kNfMW1Y2iNgMZ1V2eNkM]1S2bNnMa1o1^NRNd1l1[NUNh1h1XNXNj1f1UN[Nm1c1RN^NP2`1oM`NS2_1lMbNX2Z1gMgNZ2X1eMiN_2S1`MnNa2Q1^MPOe2m0ZMSOi2k0VMVOm2g0RMZOo2e0PM\\OR3b0mL_OV3>iLC[39dLH^36aLKb32]LNf30YL1i3MVL4k3KTL6m3ISL7n3HQL9o3GPL:Q4EnKV4BjK>W4AhK`0X4@hK`0Y4_OfKb0Z4^OfKb0Z4^OeKc0\\4\\OdKd0\\4\\OcKe0]4[OcKd0^4\\OaKe0_4[OaKe0_4[O`Kf0`4ZO_Kg0a4YO]Ki0c4WO\\Kj0d4VO[Kk0e4UOZKl0f4TOYKm0g4SOXKm0i4SOVKn0j4ROUKo0k4QOTKP1k4QOSKQ1m4oNRKR1n4nNRKR1m4oNRKQ1o4oNPKR1P5nNPKR1P5nNoJS1Q5mNnJT1R5lNmJU1R5lNmJU1S5kNmJU1S5kNlJV1T5jNkJW1U5iNjJW1V5jNjJV1V5jNiJW1W5iNhJX1W5iNiJW1W5iNhJX1X5hNgJY1Y5gNgJY1X5hNgJY1Y5gNgJX1Z5hNeJY1Z5hNeJY1[5gNeJY1[5gNdJZ1\\5fNdJZ1[5gNdJZ1\\5fNcJ[1]5eNcJ[1]5eNbJ\\1^5dNbJ[1_5eN`J\\1_5eN`J\\1`5dN`J\\1`5dN_J]1a5cN_J]1`5dN_J]1a5cN^J^1b5bN^J^1b5bN]J^1c5cN\\J^1d5bN\\J^1d5bN[J_1e5aN[J_1d5bN[J_1e5aNZJ`1f5`NZJ`1f5`NYJa1g5_NYJa1f5`NYJ`1h5`NXJ`1h5`NWJa1i5_NWJa1h5`NWJa1i5_NWJa1i5_NVJb1j5^NWJa1h5`NXJ`1h5`NXJ`1g5aNZJ^1f5bNZJ]1g5cNYJ]1f5dNZJ\\1f5dN[J[1e5eN[J[1e5eN[J[1d5fN\\JZ1d5fN]JY1c5gN]JY1c5gN]JY1c5gN]JY1b5hN_JV1b5jN^JV1b5jN^JV1b5jN_JU1`5lN`JT1`5lN`JT1`5lNaJS1^5nNbJR1^5nNbJR1^5nNbJR1^5nNbJR1]5oNdJP1\\5POdJP1\\5POdJo0]5QOdJn0[5SOeJm0[5SOfJl0Z5TOfJl0Z5TOfJl0Y5UOgJk0Y5UOgJk0X5VOiJi0W5WOiJi0W5WOiJi0V5XOkJf0V5ZOjJf0V5ZOjJf0U5[OkJe0U5[OkJe0U5[OlJd0S5]OmJc0S5]OmJc0S5]OnJb0R5^OnJb0R5^OnJb0Q5_OoJa0Q5_OPK`0P5@PK`0o4ARK>n4BRK>n4BRK>n4BRK>m4CTKhKBX4>iKAW4?iKAW4?iKAV4`0kK^OV4b0jK^OU4c0kK]OU4c0kK]OT4d0mK[OS4e0mKZOT4f0lKZOT4f0lKZOS4g0nKXOR4h0nKXOQ4i0oKWOQ4i0oKWOQ4i0PLUOQ4k0oKUOQ4k0oKUOP4l0PLSOQ4m0PLROP4n0PLROP4n0PLQOP4P1QLoNo3Q1QLoNn3R1RLnNn3R1RLnNn3R1SLlNn3T1RLlNm3U1SLkNm3U1TLjNl3V1TLjNl3V1TLiNl3X1TLhNl3X1ULgNk3Y1ULgNj3Z1VLeNk3[1ULeNj3\\1WLcNi3]1WLcNi3]1WLcNh3^1YLaNg3_1YL`Nh3`1XL`Ng3a1ZL^Nf3b1ZL^Ne3c1[L]Ne3c1[L\\Nf3d1[L[Ne3e1[L[Ne3e1[L[Ne3e1[L[Nd3f1]LXNd3h1\\LXNc3i1]LWNc3i1]LWNc3i1^LbLfMh0k5g2_L^LlMh0e5j2_L\\LQNg0`5m2`LZLSNf0]5Q3`LXLVNe0Z5S3`LWLXNe0X5T3aLULYNf0V5U3aLTL[Nf0S5W3bLRL^Ne0P5Y3cLQL_Nc0o4\\3lMdLT2\\3mMcLR2^3PN`LP2`3QN_Lo1a3RN^Ln1b3SN]Ll1d3UNZLl1f3UNYLj1h3XNVLh1j3XNVLg1k3ZNTLf1l3[NSLe1m3\\NRLd1n3]NPLc1Q4_NmKa1S4`NkKa1U4e40O100O1000000O1000000O10000O100000000O100O1000000O1000000O10000O100000000O10000O100O1000000O100000000O10000000000O10000O100O100000000O1000000O10000000000O1000000O1000000O10000O100000000O100000000O100O100O1000000O10000O1000000O10000TJmJj1T5VNnJh1R5XNoJg1Q5YNPKe1Q5[NPKd1P5\\NQKc1o4^NPKa1Q5_NPK`1P5aNPK^1P5bNQK\\1P5dNQK[1o4eNRKZ1n4fNSKY1m4hNSKV1n4jNSKU1m4kNTKS1m4mNSKS1m4nNSKP1n4POSKo0m4QOTKn0l4SOSKl0n4TOSKk0m4UOTKi0m4WOTKh0l4XOUKg0k4ZOTKe0m4[OTKd0l4\\OUKc0k4^OUK`0l4@TK`0l4@UK>l4BUK=k4DUK:l4FTK:l4FUK8l4ISK6n4JSK4n4LSK2n4ORKKS55nJHT58lJEW5;jJCW5=jJBV5?jJ_OW5a0iJ]OY5d0gJZOZ5f0gJXOZ5h0fJWO[5j0eJSO]5m0dJPO^5P1cJlN`5T1aJiNa5X1_JYNTOdM^6S4^JXNP6i1PJUNQ6k1PJTNP6l1PJTNP6l1QJRNP6n1QJQNo5P2QJoMo5Q2QJnMP6R2QJmMo5T2QJkMo5U2RJjMn5V2RJjMn5W2RJgMo5Y2RJfMn5Z2RJfMn5Z2SJeMm5[2TJcMm5^2SJaMm5_2TJ`Ml5`2TJ`Ml5`2UJ_Mk5b2UJ\\Ml5d2UJ[Mk5e2UJ[Mk5f2UJYMk5g2VJXMj5h2VJWMk5j2UJUMk5k2VJTMj5l2WJSMi5m2XJQMi5P3VJPMj5P3WJoLi5Q3XJmLi5S3XJlLh5U3WJjLj5V3WJiLi5W3XJgLi5Z3WJdLj5\\3WJbLj5^3WJ`Lj5`3VJ_Lk5b3UJ\\Ll5d3UJZLl5f3TJYLm5h3SJVLn5j3SJSLo5m3RJQLo5P4PJoKQ6Q4PJlKR6T4oIjKR6V4oIgKT6Y4mIcKU6]4mI]KV6d4S2001O0000001O001O000000001O000010O0000O2O00010O00001O00001O0O110O000000001O00kJ" + } + ], + "model_output": "A black cellular telephone with a glossy finish, featuring a physical keypad with white alphanumeric characters on the keys, a central navigation button surrounded by four function keys, and a small screen above the keypad, is placed on the table.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_94.png", + "subject_name": "pillow", + "object_name": "couch", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "kc05gc04M3O1O1O1O100O100O1O100O1O1O100O1O100O10000O1O100O100O100O100O1O100O1O100O100O100O100O1O100O1O1O1O10000O100O10000O10000O100O1N2O10000O10000001O1O1O1O001O1O1O1O1O1O1O2N1O1O1O1O1O1O1O1O1O1O1M3O1N2N2N2N2O2M2N2N2N2N3L`lW7" + }, + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PnLAQ3b0mL^OS3c0mL\\OR3f0mLZOR3h0nLWOR3i0nLWOQ3m0mLROR3Q1lLoNP3W1mIbMh2W1Z3Y1kIdMj2R1[3\\1gIgMk2m0^3h1aLXN_3j1_LVNa3l1]LTNc3n1[LRNe3P2YLPNg3R2WLnMi3S2VLmMj3U2TLkMl3W2RLiMn3Y2PLgMP4[2nKeMR4^2jKcMV4_2hKaMX4`2fKaMZ4b2bK_M^4Y1gIgNf12c4W1gIjNc1Of4g2WKZMa2dNUNV4TOXMg2bNUNY4POVMk2aNUN\\4lNTMW5P3bJSM^5Q3[JRMe5T3RJoLn5W500000000000000000000000000000000000000000eKgId1Y6\\NgId1Y6\\NgId1Y6g20eKgId1Y6\\NgId1Y6\\NgId1c5gKQKe2\\Od1c5gKQKe2\\Od1Y6\\NgId1Y6\\NgId1Y6\\NgId1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1W6^NiIb1W6^NiIb1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6h21O0000O1001O0ZIgIZ6Y6<0000000000000000000000000000000000000000O100001OO100O11O0000O1O1O1O1O1N2N2001OFkH^JU7a5PI[JP7d5QI]Jn6d5RI[Jo6d5<0eH\\Jo6c5QI^Jo6b5QI^Jo6c5PI]JP7c5PI]JP7c5PI]Jo6d5QI\\Jo6d5QI\\Jo6d5QI\\Jo6d5RI\\Jn6c5RI\\Jo6d5QI\\Jo6d5<0O10000000000000000000001O000000000000000kHZJe6f5[IZJe6f5[IZJe6g5ZIYJf6g5ZIYJf6g5?00000000000000001O0000000000001O00000000000000001O000000000000000000000000000000001O0001OO100000000000001O000000001O`LdHQ1\\7oNdHQ1\\7_20000TLSJ:m5WLTJ[3O>R6AnI?S6@mI`0T6_OlIa0U6^OkIb0U6^OkIb0U6Z300001O001O001O0000001O00000000000000000001O00000001O1O001O001O00000000O10000001O000000000000001OQLUI^1k6bNTI_1l6a2O00PLUI`1k6`NUI`1k6`200000000000000000000000001O1O00000000001O00000000001O000O1000000000000001O000000000001O00000O100001O00000]I" + } + ], + "model_output": "A black touring motorcycle with a large front fairing featuring a clear windshield and dual rearview mirrors is parked on gravel. The bike is equipped with a black top case and a passenger backrest, both with reflective strips. It has a visible rear suspension system, a black seat, and a license plate mounted on the rear. The motorcycle also has a black exhaust pipe on the right side and a visible rear tire with a multi-spoke wheel.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_97.png", + "subject_name": "bench", + "object_name": "tree", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + } + ], + "model_output": "A white metal bench with an ornate, leaf-like pattern on the backrest, featuring a symmetrical design with a central floral motif flanked by two symmetrical, curved leaf-like elements, stands in front of a tree. The bench has a curved top edge and is supported by two ornate, curved legs with a decorative finial at the top.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_98.png", + "subject_name": "cell phone", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 443 + ], + "counts": "Rlm33mc0O01O1O010O3N1O000O101N1N20O100L4O10000001O0010O01O000001MVXT4" + }, + { + "size": [ + 640, + 443 + ], + "counts": "WcY19dc07J5J5N2M3M3M2N2N2O1N2O0O2N2N101O0O2O1O001O001O1N2O001O1O0KWNR^Oj1ga0^NX^Oc1T?eNiBi1m[OmAP1n=ZOhAj0T>c2M3M4M2M3O1N3M2O1N2O1O1O1O1O100O1O100O10000O100O1[OaIhC`6SjJ\\AW5f>d0]O1OUJmAj5P>UJRB2Ne5o=YJSBk5m=XJRBg5[>H6^JaAX5k>JN14M8I2O1N1M4K5L7HTNnKSDj3Z>G5L5K5K3M2MXLSM`Fk2^=L5XNe^Oa0da0TO`^Oj0ga0nN\\^OP1\\b0N5J2O3M4K2O2N1O2M2O1O4K2N2O3L2J_R`2" + } + ], + "model_output": "A woman with shoulder-length brown hair, wearing a teal tank top with thin straps and a ruffled neckline, paired with dark blue jeans and white sneakers with red and blue accents, is seated with one leg crossed over the other, holding a smartphone in her hands, which are positioned in front of her. She has a necklace with a pendant and is looking down at the screen of the cell phone.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_99.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c]n25g>5M2N101O001O001O001N100O2O1O1N2O3M4L3QBROg=[1H4M4K6K9F5L2M3N000O1ZOgMeDY2Y;nMaDR2^;SN_Dm1`;j0O1O1O_M_De1a;l001O100000O010ZO`DZM0=a;X2jD[ME2b;a2h0O0O100O101O1N2N2XNbCk0aN4J_Y\\5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0W4i:00000000O10000001O00000000001O001O001O1O1O001O002N1O1O1O3MW1iN2N1O1O2N8H1OO1L4H8O1O100O10000001O001O1O1O000000O100O100_OWGiJk8i4m0M3N2N2O1O1O1_MfE;[:BmEZNGc1]:1PFYNFc1[:3\\FKe94_FIa95bFJ^92hFLX91lFMU91mFNT90oFOQ91oFOQ90PG1o8OQG1o8NRG2n8NRG2n8NRG2n8NRG3m8MSG3m8NSG1m8OSG1m81PG1o80PG0P91oF0P91nF2P91mF0R91mF0R91mF0R93kFMU94kFNR92nFOQ91oFOQ91nF1Q9OoF2P9OnF5o8LoF8ROPN6OS9j1bGm0]8TOaGm0_8UOQGTNLj2R9ClF>T9h2O001O1O2N1O2N1O0000O1O1M3O1N200O100N2O100O100O1oNPGRLP9l3TGQLm8n3[GiKg8V4\\GgKe8X4^GeKc8[4Q1O100000000000000000000000000001O00001O1O1O1O1O3Md0\\O1O1O1O2N001O00001O00001O00001O001O00001O001O002N1O1O2N1O2N2N2N2N1O1O1O1O0000000000000000O100O1O1O1O1N2O1N2O1RKTGf3n8VL\\Gc3e8\\L]Gb3g8ZL[Gd3P9QL]Gb3W:I3M3N1NlMPMZHP3f7VMUHh2l7ZMQHf2P8[MhGl2X8UMXG[O[Oi3^9mLUGY3k8hLTGc2BQM[9=QGa2NiLR9o0gFU2Q:lMmET2T:nMjEQ2W:QNhEm1Y:TNfEl1Z:\\11O100O10000O1000000000000001O000000001O1O1O2N2XF^Kn8d4PG^Kn8d4lFbKR9`4kFbKT9V5O1O1O001O00000000O1O1N2N2N2O1O1O1O1O1O100O100O100F:L4O1N2N2N2M3N2O100O1O1O10000O1000000O100000000001O0000001O00001O1O001O2N1O1O1O3M6J4YFQKU9^5O1O003M1O0000O1K5N2O1O1O1N2N200O1O1O1K5O1O1L4O100O1O1O1O1O1O10000O100000000O1000000001O001O001O001O2N1O001O2N002N2Nc0QFdJT9e5OFmFgJS9U5>L4O1O1O1N2O1]Oc0N2O100O10000O100000000001O1O1oEjKV9W4eFfKA:d9Q4jFRLV9Q4cFSL]9o3]FTLd9d40000000O100nN[FfLh9Z3XFeLP:GoEh23lLO9PN201N2JhZ^3" + }, + { + "size": [ + 375, + 500 + ], + "counts": "lTe21e;101O001O001N2O001O1O1O1O1O2N1O2N1O2O0O1O1O1O10O01O000001O000010O01O00010O001O010O00100O21N5K4L1O01O0O10001N1O101N10000O2O001N10001O001O0O101O1N101O1O001O1O001O1O1O1O100O1O001NWYQ2" + } + ], + "model_output": "A person wearing a black jacket with a high collar, black pants, and black shoes, with short dark hair and hands in their pockets, is looking at a kite.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_101.png", + "subject_name": "sand", + "object_name": "playingfield", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "b8d4d80000000000000000001O000000O10000001O00O10000000O100000001O0000000000O100001OO01000000000000000000O100000000000000000000O1001OO0100000000000000000O10000000000000000000000O11O00O10000000000000000000000000001O0O1000000000000000000000000000000000000O100000001OO100000000000000000O10000000000000001O0000O100001O0O01001O00O100000O100000000000000000001O00O1001O0O10O100001O00000000000000O11O000O1000000000O10000000000000000000000001N010000QLPHg2o7TMYHi2h7dLlHZ3c8N7I6J4L2N2N1O2N1O2N2N2N1O1O002N1O1O001O1O1O1O001O001O00001O001O00001O001_NUEc0k:SO`El0`:POdEP1]:nNeEQ1[:lNiES1X;N00001O2N1O2N1O001O1O001O00001O000000001O00000000000000000000000000001O0000001O000000O1000000001O000000O11O1O0000O1000000O100O1000000000000O1000000000000POZO]Eg0b;00000oN[O_Ee0_:]OaEc0]:@bE`0]:AbE`0]:AcE?]:AbE`0]:BaE?^:B^Eb0a:_O^Eb0a:@^E`0b:B[E?e:BZE>f:DXEC4L2M3N1O1O1O10OO2O1O2N1O1O0010O01O001O0001O00000000A?L4O1N1M3N3O01O1O01O01O1O1O1M4J6_IdJQ5a5eJhJW5h6UNnHWLLb0Y7R3oHZLN=W7V3nHXL?Oh6e3j1M5K4lMeF0d9IhFL^9KoF^Oe9OoFIYX9" + }, + { + "size": [ + 425, + 640 + ], + "counts": "`UW31X=0Vb23f]M2nL0iH1X7KiH6T:2M2N2O1O2O000O10000O1000000O1O2N100O1000000O1^FYOa6h0^I\\O_6d0\\ICb6=^ID`6=`IC`6=`IC_6>aIB_6>aIB_6>aIC[MBb33c1i0`MB]6>cIB:Bo2m0gLBZ6?fIDW6dLC]36iLJk4XMSIn0Z2j1S801O00000000000000000000000000000000O1000000000000000000O10000[NWMXG4Z1f2W7lMaHT2W7[NYHoN_Og2R8[NnGkN>U3\\7WNXHcN7OGY3d7ZNRIc1i6`N[I^1`6gN`IY1[6kNgIT1U6POkIP1Q6SOPJm0m5VOSJj0i5XOYJh0d5ZO]Jf0`5]O`Jc0^5^OcJb0]5]OdJc0]5YOfJg0b5oN`JQ1g5eN\\J[1k5YNZJg1P6jMUJV2_800000000000001O00000000000000000000000000000000nFYMa0E[5R3oIbM=F_5h2TJfM8Dc5f2TJoMO^Ok5d2RJXNIZOH]OHNQ6S3dJ?\\3jLUMg2ROn0[3mL]MU2XOn0o2^MdMd1]OnLGl3h2WNoMo0@U1T2UNZN\\5R1YKjNi4m0eKkN`41UHLo31l8N102LUel2" + } + ], + "model_output": "A person wearing dark blue denim shorts rolled up to mid-calf, a yellow top, and black flip-flops stands beside a dog.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_103.png", + "subject_name": "umbrella", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 640, + 640 + ], + "counts": "gok54hc05I7L3K5M3L4L4M3L4N2N2N3M2O1O0O2O1N3N1O1N2O1O2L3O0N3M4K5M1M3M3N4K5I6G8DVC@l<>YC]Oi200O1N3M2O2M4M4L2N2N2N2M2O0O2O000O1M4O000001OO2O00010O0O100010O000000010O0010O00001N1M5M3M3M3[OZNnCi1f;gNTD[1i;P1L3N4L4L5K4M1N2N1000000O1000O10000O101N100O2O0O2O1O1O1O1O2N1O3M`0@3M3M001O001O002N2N0O100LYCPNiRC]OR=a0g0I7M4M3Moob5" + }, + { + "size": [ + 478, + 640 + ], + "counts": "2i>500000000000000000000000000000000000000000000000000001O0000O100001O000000000000O1000000001O0000000000O100001O0000000000000000O100000000001O000000jHMA3e7000O100nHOWO1h03UOMk06ROJm09QOGn0O^H6c6Ko0NaH7^6KZ16dNJ_15`NJb16\\NJf15YNKi14VNLk15SNKm17QNIP28nMHT27kMIV28hMHZ27eMI^26`MJa26]MKd25\\MJf24ZMLh23WMMk22TMNn22PMNR30mL1V3OgL1[3MeL3S90[J0\\L0d31\\LNd35YLKg36XLJj34VLLm31SLO^90O11O00fJ0fK0Z4NhK2X4MiK3W4MiK3`91SKLPK4m90PKMUK3j4OUK1j4M[F0j43k4L\\F1i43m9000iJNbFOj43d47YKIg47YKIf49YKGf4mFGe3K]5`0eF\\O2?h3E`5a0fF\\O0?j3D_5a0hF[OOb0i3B^5b0mF0c3^O`5d0jF0e3\\O`5f0hF1g3YO`5g0iF1f3XO`5g1_JYN`5i1_JWN_5k0oFNa3WO_5k0RGN^3WO_5l0SGM^3WO^5l0VGL\\3XO]5m0XGKZ3XO\\5n0[GKX3WO\\5n0]GLV3VO\\5n0_GMT3UO\\5o0aGKS3VO[5o0cGLQ3UOZ5Q1fGIP3VOX5R1jGGn2WOW5R1lGHl2VOV5S1oGGk2VOS5U1THEh2VOS5U1WHDf2WOR5U1ZHCd2XOQ5U1\\HCc2XOP5U1_HBa2YOo4U1bHA_2ZOn4V1cH@_2ZOm4V1fH@\\2ZOm4W1hH^O[2[Ol4W1jH]O[2\\Oj4W1mH\\OY2]Oi4W1PI[OW2^Oh4W1SIYOV2@g4V1TIZOU2@f4W1VIXOT2Ae4W1YIVOS2Cc4W1\\IUOQ2Db4W1_ITOo1Ea4W1bIROn1G_4W1dIROm1G_4W1eIPOm1I]4X1fInNn1J\\4W1hInNl1K[4W1kIlNk1MX4Y1mIiNl1NQ4_1TJbNk1OP4a1UJ_Nk10m3e1XJYNl12T3MkIk1W1TNj14o2c2kN]MQ1h2nNXMQ1k2mNUMQ1R3jNnLU1T3jNlLV1Y3eNgLZ1^3bNbL]1`3bN`L]1b3bN^L]1f3`NZL`1f3`NZL`1g3_NYL`1h3`NXL`1h3`NXL`1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXLa1h3jIULa43e1k3[NULe1j3\\NVLe1i3[NWLf1f3\\NZLe1d3\\N\\Ld1d3\\N\\Le1c3[N]Lf1a3[N_Lf1]3^NbLd1\\3\\NdLf1Z3ZNfLi1Y3UNgLS2o2oMQM\\2d2dM\\M]2b2dM^M]2a2cM_M]2a2cM_M^2`2bM`M_2_2`MbMa2]2_McMb2\\2^MdMc2[2]MeMd2Z2\\MfMd2Z2\\MfMd2Z2\\MfMe2X2\\MhMd2X2\\MhMd2W2]MiMc2V2^MjMc2T2^MlMb2U2]MkMd2T2\\MlMe2S2[MmMe2R2\\MnMe2P2\\MPNe2n1\\MRNe2j1^MVNc2g1_MYNa2d1bM\\N_2a1cM_N]2`1dM`N]2^1dMbN^2[1cMeN_2Y1aMgN`2W1aMiNa2U1_MkNb2T1^MlNc2R1^MnNc2n0nGoN_53c2n0`MROa2m0_MSOa2l0`MTOa2k0_MUOb2j0^MVOb2i0_MWOb2g0_MZO`2e0aMZO`2f0`MZOa2d0`M\\Oa2b0`M^Oa2a0_M_Ob2`0^M@c2>^MBd2<\\MDi24ZMLl2LVM4j8000000000000000000000000000000000000001O0000000000O10000000000000000000000000000000000000000000000000000000000000000000000000aM7iEIV::hEFW:=gECX:`0fE@Y:b0fE^OY:c0gE]OY:d0fE\\OY:e0gE[OY:e0gE[OY:e0gE[OY:e0gE[OY:d0hE\\OX:d0hE\\OX:c0iE]OW:b0jE^OW:?kEAU:?kEAV:=kECV:0nA2P>MQB3o=MQB4n=LQB6n=JRB6n=JRB7m=IRB8n=IQB7o=JmA:R>EoAc0i=\\OXBf0f=ZOZBg0e=YO[Bg0e=YO[Bg0e=YO[Bh0d=YOZBh0f=XOZBh0f=YOYBh0f=XOZBh0f=XOYBk0e=SO]Bn0b=RO^BP1`=PO_BQ1a=oN_BQ1a=oN_BR1`=nN`BR1`=nN`BR1`=nNaBR1^=nNbBR1^=nNbBR1^=nNbBS1]=mNdBR1\\=nNdBR1\\=nNdBS1[=mNfBR1Z=nNhBQ1W=POiBo0W=QOiBP1V=oNkBQ1U=oNlBP1T=POlBQ1S=oNnBP1R=POnBP1R=POnBQ1Q=oNPCP1P=POPCP1P=POQCo0o\\O8[[Ld0m;QOQEMVOc1i;`NQEMVOd1h;_NRENUOc1i;_NoD3UO_1l;^NoD=VOi0l;jNmD4VOO3S1i;jNnD4WON2T1i;jNnD4WOL4V1g;jNnD5AP1b;kNnD5_OP1c;kNnD6^OP1c;jNoD8[Oo0f;iNoD8[Oo0f;iNoD8ZOGLR1l;oNnD8YOGNa0NKm;EnD8WOI0?NKm;EnD9UOI2=OLl;EnD9UOI2>NJn;FnD8YOE14N`0j;_OnD8\\ODM4O`0k;@mD8\\ODM4Oc0h;]OPE8\\ODL6Ob0h;\\OQE8[OFM4Nb0i;\\OQE8ZOGN4Lb0k;[ORE7XOI0h0LVOj;2SE6WOI04L;Pl:F^F8b9H^F9a9G`F8`9HbF6^9JdF4\\9LfF3Y9MhF2X9NiF0X90hF0X90hF0X90hF0X90hF0X90hFOY91gFOY91hFNX92j200000000000000000000000000000000001O000000000000O100001O0000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000N" + } + ], + "model_output": "An individual wearing a red ski jacket with a black helmet and goggles, equipped with ski goggles and a black backpack, is going down the snow.", + "eval_result": "True" + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_1b_simple.json b/evaluation/GAR-Bench/model_outputs/gar_1b_simple.json new file mode 100644 index 0000000000000000000000000000000000000000..52670c7e854469ab6860adfb2c7f73732cb17224 --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_1b_simple.json @@ -0,0 +1,2240 @@ +[ + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000227491.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[f`46i>3N001fA3`0DQ<9^CU1_4M1O2O000O22M0000O010O00001O0QDJ7FT6a0bI6N\\O_6>ZIf0OmNg6>VIn0IgNR7;RIU1GaNV7;QIY1D_NY7:oHl1_N^M>?U8:iHS3Y7mLcHU3]7mL`HT3a7lL]HU3d7kLZHW3f7iLXHX3i7hLVHX3k7hLSHY3n7gLPHZ3P8gLnG[3R8eLmG[3S8eLlG\\3T8eLkG[3U8gLhGZ3X8gLgGY3X8jLfGV3Y8lLfGT3X8oLfGQ3T8WMkGj2m7^MRHb2i7cMWH]2f7gMXHZ2e7jMZHV2b7nM^HR2_7RN_Hn1`7TN`Hl1^7VNbHi1]7ZNbHe1^7\\NbHd1\\7^NdHa1\\7aNcH^1\\7dNdHnNKaNc0ENc0R7[2aHgNo1bNc5g2]HgNV2[N^5o2[HgNQ:T3O1O1O1O102N2M2O3K4aHhJc42^I[5i1gJo33XJl5c1UJd3P7YLQIe3U7ULnHj3U7PLnHo3W7kKkHT4^7ZKlHe4\\7nJjHQ5c8O100O1O100O100O100000000000000000000000000000000001O000dMWKaJi4[5ULjIm3S6XLUInNJk4P7UMjHo2T7_20001O001N2XI[HS6g7hI_HU6b7mI]HQ6c7PJ^Hn5d7RJ[Hm5f7SJ[Hk5f7TJ[Hk5f7SJ^Hj5c7UJ_Hh5c7WJ_Hg5^8O001N2N2O1O1O2L3M5L4L2N4L2M3O1O001N101O001N101O0O2O1O0O2N101N2O0N3L4M3M3M3L4B>M3M3M3DUD^MP<_2:N2O2N2M2O2N2L5J6WOlBUOc=;YWf4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_0.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000029397.jpg", + "mask_rles": [ + { + "size": [ + 449, + 640 + ], + "counts": "Zl95l=1O1O001O01O0000000010O012MU6CaIg0^6ZOVIQ1j6S301O001O0001O00000000000000001O002N3M:QIiH`6^7N100O1O00001O000000001O00001O00001O0001O0000000000000000000000000O100O100O10000O1O1N2O1N2N2N2O1O1O1O100O101O0O1000000O10000O100O100O1O1O1O10001O0O10000000000000O11O00000000001O000000O2O00000000000O1O1O1N2J6J6K5J6L4J6J6L4J6K5I7J6K5K5K5I7I7K5K5M3J6K6L3J6O10001O1O2N2N2N2N3M2N2N3M2N2N2N3M3M2N2N2N3M4L1N102M5I6H7J8G9Edm<" + }, + { + "size": [ + 449, + 640 + ], + "counts": "Qa_4?_=6L2L5L3M3N2N1O3M1O2O0O100L5ZOe0L5J5M3N3M2O1N2O1O2L4N4lN`M^Fd2k8f1E;@`0H7L301O1O1O001O1O00001O000000001O00000000000000000000000000000000O1000000000000O100N2O1M3CXH\\Jm7]4`HeKA5Z8S4W1M3]Oc0M3L4L4M3K5L4I7F:I7H8J6E;H8EXCVOTR;5a[E6H7L4RLBSJa0h5LnI7o56dI7n5g0UI_Of6e0UI^Oi6h0PIK^68oH9P7LcH=\\7T30001O001O0000000000001O00000000001O000000000000000000O1000000O10000O\\NlI]KS6Y6O1O1O1N2N2N2O1N2N2O1O10000O10000O10000000000N2gNgGjLY8U3iGiLX8U3kGjLU8T3PHgLR8V3SHgLn7V3WHfLk7X3XHcLl7X3[H`Lk7[3`1L4\\MQG0U9M\\G_Oj8?]2O1O1O1O1N2O1O1O1N2O1O1O1N2NSaZ1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_1.png", + "model_output": " is standing on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000163117.jpg", + "mask_rles": [ + { + "size": [ + 500, + 376 + ], + "counts": "[o0Q82QH\\7V8N000O2HhGRIX8U7001O01O000000000O1O1O1N2N2dL`HcNa7Z1cHdN^7Z1eHeN[7[1gHcNY7]1kH_NU7a1oH[NQ7f1nH[NQ7e1oH]No6d1oH`Nn6a1PIaNo6`1PI`NP7`1PI`NP7a1PI_No6d1nH]NQ7d1jHVKOV3W7e1hHWK0T3X7S5000O1O100000000PNgHfKX7Y4jHfKV7Z4kHeKU7Y4PIeKo6Z4UIoIE^1U7b4^I^Kb6b4^I_Ka6a4_I^Kb6b4]I_Kc6a4]I^Kd6b4ZI_Kg6b4XI]Ki6d4XIYKi6h4WITKl6m4SIQKo6P5QInJP7S5gHhIMU1\\7S5aHhI120S1^7T5_HjI022o0_7l601O2N4L1O1O`HhHc6X7]IjHa6n0iHT5c0RJb6T7[IUIA@h6[7eIYI@]Ok6Z7eI]I[6c6dI]I]6c6cI]I]6b6dI\\I^6d6cI[I^6d6bI[I`6d6`I\\Ib6b6`I]Ib6`6_IaI`6_6`IZI]OES7?nHl5c0VJ_6NmHk5f0VJ]6_6dIQI[O3LMU7P7cIoH@0J0T7P7[1O02M2L4WKhHQ1\\7nNhHn0[7POfHP1Z7POdHeLMX4_7TOhHl0X7TOhHl0X7VOfHj0Z7YObHi0]7i3M2O01O01N11O0000O100000001O01OO3N3M001O1O2N00TMVIoLi6P3ZInLf6R3[ImLe6R3\\InLd6Q3]InLd6R3\\InLd6R3\\ImLe6T3ZIlLf6U3ZIiLg6X3WIiLa5cMoJN@g5InLi5\\MgJNA47\\8[6cGeIX8b6fG]IY8P7O2N1O01ZOlGmIR8k6O1O01001N0100001OO01UMgH[MY7d2gH\\MZ7d2fH\\MZ7d2gH[MY7e2gH[MY7f2fHZM[7f2bH\\M]7^501O0000O1UKaH`1a7^N_Ha1c7^N^H`1c7\\N\\HQM3a4b7]NcHb1_7\\NcH`1`7`NbH\\1_7dNcHY1_7gN]H\\1e7dNUH^1d0fK^6n2iH]1j0fK\\6X7gIhHZ6U7U1M2O00101aNgG^JIo0_8d4kG]KU8a4mG_KS8^3fGZL89R8\\3gGZL7 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_2.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000170613.jpg", + "mask_rles": [ + { + "size": [ + 640, + 439 + ], + "counts": "eiT57gc03M2N2O2K4N2K5L4O1O1N2jLL^B6^=n1UAjL0`1k>`3N1OkLeAj0X>UOmAj0R>TORBj0n=SO\\Bg0c=WOeBc0[=YOmBd0S=YORCe0mVS<@oC`0P<@RD>o;@SD`0m;@TD>l;AWD>i;AYD=i;AYD;j;DXD4o;MQDKV<5kCD\\<l8BTG8S9HkFGg99YF]OQ:c0nEYOX:f0hEUO^:j0bEPOd:P1]EhNk:2\\BMk2HQ;8YBM_a03a^OM_a02c^OM]a02d^ON]a00d^O1[a0Ng^O1Za0Lh^O4Xa0Jj^O6Va0Il^O6Ta0Hn^O8Sa0Eo^O;Qa0DQ_O;o`0Cj^OCDj0ca0_Oj^OJBg0ea0ZOl^O1^Of0bb0ZO^]Of0\\b0WOa]O34e0Zb0ZO`]O26d0Zb0Cf]O[MgNIg2e;UNVG`0ZMeNJf2e;VNTGc0[M[1a;RNRGg0[M^NNe2d;WNQGl0bMh0\\;]NoFn0jM`0W;aNmFR1lM=W;aNkFU1mM:W;bNhF\\1oMOZ;fNoDH5j1\\O^M1Y2^;8iD:FVM4V2^;c3]DWJ4V2`;e3_DVJMR2d;j3_DRJOQ2d;o3aDnK`;S4aDgKc;Z4\\DcKg;W63L5gNjCbJ\\<^5eC[Jb^6d;]JYD`5h;dJVDX5m;jJQDQ5S`0VBUOP>k0UBlNo=S1YB^No=a1ZBoMm=P2S2O1N1O2N3L3K5K6J7H9G8H;EVhV4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is talking to .", + "image": "images/caption_simple_3.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000465822.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dh^31e;1ig72RXH4J7I8J4M3N3L4]OROVFR1h9RORFR1k9?M3N3L2O100O2N1O1O1O1O1O2N1O100O2N101N1O1O1O2O0O101N1O1O101O0O2O0O100_HQMZ6P3dIQM[6P3bISM^6m2`IUM`6k2_IWM`6i2]IZMc6f2QIgMn6Z2oHhMR7W2kHmMT7[3010O0mLfHi1Z7VNiHh1X7WNlHf1S7YNQId1o6\\NTIa1m6]NWIa1i6]NZIa1g6]N\\Ia1e6]N]Ic1c6XNcIf1_6UNfIi1Z6TNjIk1W6RNlIbNh0`7dNRJc0`Nh0W9WOkFh0T9XOnFg0R9XOPGg0P9VOTGj0j8VOXGi0h8VO[Gh0f8WO\\Gh0c8WOaGg0^8YOcGg0ZOnNi8:oGS1o7kNSHU1n7hNSHX1o7dNTH\\1S9001O010O10O00010O01O010O001O010O00010O10O010O01O010O0100O010O0010O0101N1O2M2I8YOl0BcPb0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "f[b41c;5M3M3N3L2N3N3M2N2N10O01O000000O2N100O1O2N100O101N1O1O2O0O1O1O1O1O101N1O1O101N1O2N`[e0" + } + ], + "question": "What is doing with the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_4.png", + "model_output": " is holding the ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "32kc0k0XO2N001O000000001O000000001O000000001O0000001O00000000001O0000001O00000000001O0000001O0000001O0000000000001O00000000001O00000000001O000000001O000000001O000000001O0000001O0000001O0000001O0000001O000000001O0000001O000000001O0000000000O10000O100O100O1O1O100O1O100O100O100O1O1000000O100000000000000000000001O00001O000000001O0000001O00001O0000000000001O0000000000000000000000000000000000001O0000000000001O0000001O001O0000001O00000000000000001O0000001O1O001O001O1O001O1O001O001O001O1O001O00001O001O001O00001O001O00001O00000000001O00001O00001O00001O0000001O0000001O0000001O001O0000001O000000001O000000001O001O0000001O00O1000000O100O100O10000O100O100O10000O1000000O100000000000000O10000O10000000000000000001O0000000000001O000000001O000000000000001O000000000001O001O000000000000000000001O00000O1000001O0O100O1N2O100O1N3M2O1O2N1N2N3N1O1O1N3M2O1N3N100O100O2O0000000000d^OROg?m0V@WOi?i0V@YOi?h0V@XOj?h0V@YOj?g0T@ZOl?g0S@ZOl?g0R@[Om?f0R@[Om?f0`_OoNG=i`0a1P_OcNo`0X200O1O1O1O1O1O010O001O1O1O2N1O1O1O0001O01O0000001O01O0000010O0000000001O0000001O00000001O01O00000000001O0001O01O000000001O01O000001O01O00010O00000010O0001O0001O000001O0000010O000001O00000010O00000001O000000010O00000001O01O00001O01O0000010O000000000010O00001O0000000010O0001O0000000010O00000001O00000001O00001O;bLX_O46KO67HL001Je0[?" + }, + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y and ?", + "answer": " is in front of .", + "image": "images/caption_simple_5.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000128051.jpg", + "mask_rles": [ + { + "size": [ + 360, + 640 + ], + "counts": "U4V4R7000000000000000001O00000000000000000000000000000001O000000001O00001O2N1O001O00O10000O1O100O1010O00000O1001O01O1O0000000000O1L4O2N10000000000000001O0O2O00O11O000001O010O001O00001O3M4M2M1O1O2N1O001O01O00001O0010O01N10001O1O001O1N3M2N3M2M4K5J6E;K5K4O3L3N3M2N2M5L2N4K3L5J8SO^FFi9DXF8 located relative to ?", + "answer": " is in front of .", + "image": "images/caption_simple_6.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000560266.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "ocP1335iZLAf3>\\LAd3>^L@c3`0_L^Ob3`0aL]O`3b0cL\\O^3b0dL\\O_3a0cL^O_3?dL^Oe39]LFe36`LFd35hPY4" + } + ], + "question": "What are and doing to each other?", + "answer": " is looking at and biting .", + "image": "images/caption_simple_7.png", + "model_output": " is biting ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000024919.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "iah21bYQ14bSoN0O2O0O1O1O000UN8nE1k9 and doing to each other?", + "answer": " and are pushing each other.", + "image": "images/caption_simple_8.png", + "model_output": " is pushing ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231169.jpg", + "mask_rles": [ + { + "size": [ + 454, + 640 + ], + "counts": "WaS2=f=g0SOb0E9H4L3M2N4K6K3N1O1N3M4M3L2O2M4M3L4M1O1O2M4M2N1N2O2M4M2M2O1O2N1O2N1N2O2N1N2O1O2N1O2M3N001OSLmFi2P9WMSGi2k8VMWGj2g8WMZGj2d8UM]Gl2b8TM_Gl2`8TMaGl2^8SMdGn2Y8RMiGm2W8SMjGn2T8QMmGP3R8oLPHQ3o7oLRHR3l7nLUHQ3k7nLVHS3i7mLXHT3f7kL\\HU3b7kL`HV3^7jLbHW3]7hLeHY3Y7fLiHZ3U7fLlH[3S7cLPI]3o6bLRI_3m6aLTI`3j6_LWIb3g6^L[Ic3c6[L`Ie3_6ZLcIf3\\6YLeIh3Z6TLkIk3U6SLnIo3o5nKUJT4h5jKZJW4e5gK]JY4i7000O10000O10000O100O2O001N1O10000O2O0O01001N1O10O010001N10O11O1N1O1O10001N0100O02N2N1O1O1O1O2N1O1O1O100O2N1O001O2O2L10100O3M10O00010O10O10O0100O10O001000O01O0001O010O0001O1O010O000010O0000O2O001N2O001O001O000O2O1O1O001O0010O0000O3OO01O1O001O001O1O001O00000010O2N1O2NO2M2000001O00001N101N1001000O0000010O001O002N001NZGTKT8l4c000N3N2O001N2O1N2O001O1O2M1O2N102L3L4M3O1O1O2M2N2UObEkMb:P2bEmMi;AVDLW1>^NnA1O0000Oea1" + }, + { + "size": [ + 454, + 640 + ], + "counts": "^_^51U>00000000001O2N2N010N2O1O1O1O1O001O2cFCm4>RKBo4>\\41O2N1O1O1O1O3eB0OXOg04XO064W:b3L0O3jEZLg9S4O0100O1O3Ng0XO2N1O10O6J100cE]LU:d37O02N1O2O0O010O01O0000001O001O00000000001O000000000000000000000001O00000000O11O000000O1000000000000001O00001O00001O001O000000000000hMeEe0[:WOlEe0U:[O^FeNVO0f00ZO:\\:R1Z2O10000000000000000000000000000000000000hFdNf5\\1ZJdNf5\\1ZJdNf5\\1b30000000000000hFeNe5[1[JeNe5[1[JeNe5[1[JeNe5[1c300000000000000000000001O001O001^FaN[6_1dIbN\\6^1aIeN_6\\1[IoN_6R1^IQOa6o0^ITO`6m0UG`NK1?N>d0S8Z3RGfLm8^3nFeLQ9X40000000000000000000000000000000000000001O0000000000O11O\\OnFhK22P9i3fGUL[8j3gGULY8k3hGTLX8l3iGRLX8n3P100000000000000000000001O00001O1O:F1OO010000000O10000000000O10000000000000000O1000nF" + } + ], + "question": "Where is located relative to ?", + "answer": " is beside .", + "image": "images/caption_simple_9.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000551822.jpg", + "mask_rles": [ + { + "size": [ + 453, + 640 + ], + "counts": "Y_P3e0Z=]OhBn0o and ?", + "answer": " is attached to .", + "image": "images/caption_simple_10.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000498463.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "j`T7a0]>201O000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000O100000000000000000000000000000000000000000001O001O000000001O000000000000000000000000000000000000000000TH" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l_T7i0V>2O000000000O1O1bNVOdDl0Y;YOaCKd0V1k;YO_CY1` and ?", + "answer": " is on top of .", + "image": "images/caption_simple_11.png", + "model_output": " is on top of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000275198.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^[P34j>5J5M1M4M2N2N2M3N2N2M3N2N2O1N1O2N2O1N2O1O1O1O1O1O2O1N1O010O10000VO\\NWDf1c;jNRDU1l;POUDm0j;UOVDj0i;XOTDYOKX1P[O1K[NoM9Oo00W1j2WO\\O2KbNPNn00V1h2YO]O5HTNRN21N0X12iNMX2g2@^O5G[NUNQ1OhN0X2b1eM`N6e0e1^17EX2lNnLNd0_1:EQNXNY10bN1X2nM;ZMWO_3P1nMQO[3;QN;i0[O_Lg0]1XOeNN`4b0^LQOW64R1l0gHPOY17k:h0mCSOU17n:f0kCUOV15R;k0mDVOmNGO3i;o0[E^OkNCj;n0Q2F[BXOf=h09O00100O1O10O10O10O0010O1O1O10O01O001O01N11O101N00100O010O1OO2N1100O00O2O1O100O0O2N2001OO100O10000O10ON32N0WOQB`0X>O1N2O1O1L6IoRe6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is looking at .", + "image": "images/caption_simple_12.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000257896.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "`fV46dc0`0D:F9H4M1N1O2O2M101O0001N2O0O1O2N2N1O2N2N2M3N1O2N2N1O2N2N2N1O2N2N1O2M2N3N1O2N2N1O\\Q[4" + }, + { + "size": [ + 640, + 480 + ], + "counts": "ZT`21mc04M4K4M2O1N2O1N100O101M5M1N101N2O2M2O000O2N102N1N101N101[CnNZ6S1cIVOV6j0hIYOW6h0eI_OV6d0eIDV6 and ?", + "answer": " is wearing .", + "image": "images/caption_simple_13.png", + "model_output": " is wearing ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000034417.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "hc[4?n:=I:G4N1O101N10000J6O10000O2O00000000lNQF`0o9^OTFa0m9^OTFb0k9\\OWFc0k9VO\\Fh0[:A>O2O3N1O100OO02O0O100O010O0010O01O0010O01O000O101NVUi0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "kZm35\\;7I6K6L3M3N3N2M4iEkNg9d1N2O0O1O1F:O1O01O0O2C=N2O1ROmE12\\OT:?SFBI4\\:9SFFn97TFIn94SFLo91RFNQ:MRF3i:O100O2NTf]1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_14.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000047585.jpg", + "mask_rles": [ + { + "size": [ + 640, + 424 + ], + "counts": "V91Z2>_1C`L0f00000O100O10000000000O10000000000000000O1O10000000000001O001O00001O000000001eNd]OP1\\b0lNi]OS1Wb0kNk]OU1`b001O000000000000000000O100O1O1O1O1O1O1KeNd]O\\1\\b0dNc]O]1]b0dNb]O\\1ab00001OO10000000000000000000000001O0000000000LdNd]O20T1\\b0jNk]OU1ab0O001O00000000000000O10000000000000000j]OlN`a0T1f0000O10000O100O1O1N2O1O1O100000000000000000000000000000000bGiNSNW1m1oNmMQ1S2XOXFJ`6n0X35YF_No5\\1g37WFbNo5W1j3k0SLUOm3n0oKSOQ4o0mKQOS4S1iKmNW4T1hKlNX4V1eKkN[4V1dKjN\\4X1bKhN^4[1_KeNa4\\1^KdNb4^1\\KbNc4c1YK]Ng4g1TKZNl4l1nJTNn4`NUF_3j4QNQ5`NUFc3e4nMV5_NUFh3`4iM`5kNnER3\\4TNP6^NkE\\3R4VNQ7X1lHhNd7l0XHTOZ8=cGC_8?]GAe8a0XG^Oi8e0SG[On8i0mFWOU9n0dFRO]9b1hD]MWOQ1QlFIQ9:jFJT99fFLW97dFNY96aFO\\93aF1\\91]FhK[OX4X:1]FhKYOX4Y:0aFeKXOZ4U:MmF2S9OmF1R90nF0R90oFOP92PGNo83RGLm85RGKo86oFKP95QGKo84SGKl85WGIi87TGKl85TGLk85UGKk85UGJk87UGIk87UGIj88VGHj88WGGV7nKoH]4JEU7SLnHX4MET7ZLhHS43DS7[LiHR42DT7[LjHQ42DQ7aLiHm31FT7dLeHg34IT7dLfHd34IU7hLbH`36Km6^KbHY1:`36Jm6PMkH[36Cn6TMjH[37Am6WMjHZ37Al6YMiHY39_Om6j2mHZMQ7m2hHTMW7k7O2kH\\Cl6m<0O0O3N1O010N1O101O000000000O1O1O1N200O100001O0002N1O001O1O0000001SEfHR9[7kFiHS9W7jFlHV9U7gFoHW9R7eFRIZ9o6dFSI[9n6dFSI[9n6eFRIZ9P7eFoH[9T7cFlH\\9V7mESH and ?", + "answer": " is standing on .", + "image": "images/caption_simple_15.png", + "model_output": " is standing on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000234757.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y\\Z52k>6J5K4L5K5K4M3N2O1N2O100O1000000000001O00000O100O0O1O2N10O0jN`Bn0`=ROaBm0_=ROcBn0\\=ROeBP1X=POhBP1X=POiBP1V=QOiBo0W=ROhBn0Y=SOeBm0[=<1O1\\OoBWOQ=e0VCXOk1iA0V>OjA3U>MkA3U>MkA3U>MkA4T>MkA3U>NjA2W>NiA1W>;2O001N2N2N4JbRT3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "oXT4:e>2O1N3N1O0aE>QLUOZ9=UJg1V5ZN]J[2[5eM^Jl2W5VMbJX3V5hLaJg3X5ZL]JV4^5jKZJd4`5\\KYJT5^5lJ^J_5[5bJ`Jf5\\5[J^Jk5a5VJYJo5g5QJUJS6k5mISJU6l5lIoIZ6P6fIlI^6T6bIhIb6X6]IdIh6[6YIcIi6]6m000O10000000O00100O1000O1000O1000O100000000O101O001O1N1000000O100000SNXKaIi4m5^2B>iNW1kNU1iNV1_Nb1O1O2M201N2N2N2M4L[bT4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is in front of .", + "image": "images/caption_simple_16.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000008899.jpg", + "mask_rles": [ + { + "size": [ + 539, + 640 + ], + "counts": "Z>e1V?0000000000000O1000000O10000O1O1O100O100O100000000O100000000001O000000000O101O00JZNXAe1h>[NXAe1g>[NZAe1e>]NYAd1e>^N[Ab1c>`N]A`1d>^N]Ab1c>\\N_Ad1a>\\N^Ae1b>YN_Ah1i>10000L4N2000000KoM`AQ2`>oM`AQ2`>oM`AQ2d>1O100O1O100O11O000000O1O1O100O01000O10000O1000XNbAS1^>kNeAT1[>lNeAT1[>lNeAT1[>lNeAT1[>kNfAU1Z>jNgAV1Y>iNhAW1X>hNiAX1W>gNjAY1V>gNjAY1V>fNkAZ1U>eNlA[1T>dNmA\\1S>cNnA]1R>bNoA^1Q>aNPB_1P>aNPB_1P>`NQB`1o=`NQB`1o=_NRBa1n=]NTBc1m=[NTBe1a>0000000000000000000000O10000000000001O00O100000000000000000001O2N:FY2PBfMP>[2PBdMQ>\\2oAeMP>[2QBeMn=[2RBeMn=[2RBeMn=[2PBfMQ>Z2PBeMP>[28000iAeMn=[2QBgMn=Y2SBeMo=Y2:000000000000000O10000000000000000jAeMl=[2SBfMm=Z2:000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000O100O100000000O1000000000000000000000000000000kAbMm=^2SBbMm=^2800000000000000000000000000000O100000000001O0000O1000000000000000000000000000000000000000O11O0000000000000000O11O0000O1000000000000000000000000001OO0101O0001O0O10000000000000000000001O000000000000O1000000000000000000000O1001O00O1000000000000000000000000001OO10000001OO100001O00nNfACZ>=hA@Y>`0hA_OX>`0kA_OT>a0mA^OS>b0mA]OT>c0mA\\OS>d0nA[OR>e0oA[OP>e0RBYOn=g0SBWOn=i0SBVOm=j0TBUOk=l0WBROi=n0WBSOh=m0YBQOh=o0YBPOg=P1ZBoNf=Q1[BnNe=R1\\BmNd=S1\\BmNd=S1]BlNc=T1^BkNb=U1`BiN`=W1aBhN_=X1aBhN_=X1aBhN_=X1aBhN_=X1bBgN^=Y1bBgN^=Y1bBgN^=Y1aBhN`=W1aBhN_=X1aBhN_=X1aBgN`=Y1`BgN`=Y1`BgN`=Y1`BgN_=Z1bBeN^=[1cBdN]=\\1Q10001O00O100000000001O0000000000`AeN`=[1o0100001O00000000000000O11O00000000000000O1001O00000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000001O0O1M4ZO]@Aih5h0PgJ=H2O000001O000000001O00001O00001O00001O00^B" + }, + { + "size": [ + 539, + 640 + ], + "counts": "VTZ22e`08J4M2N2O0O1_OAf@`0X?Cf@=Y?Ee@i0N1O1J6O1N3L3N200O100000O102N1O2N2N2N6I2O1O3mNbAC`>_OfA6?Fm=3dA6e0Ai=7dA6^?He@4^?If@2^ea7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_17.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000260261.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "Xi`33lc02M3N2M2O2N2N2N101O1O10O1000000O10000O10000O10O1000O1O001O0O2M3O0O2O001N2O1M2O]^P4" + }, + { + "size": [ + 640, + 426 + ], + "counts": "dbZ32mc03N2N6J2O0O001O01O01O1O001O01O01O00000001O0000000O10O1O100O1O1O1N2O01000O1gK@QEa0n:DmD=Q;GlD:S;IjD8U;IjD8U;JjD6U;KjD5W;KhD6W;KhD6W;KgD7W;KfD7[;HaD=^;BaDa0_;_O^Dd0a;]OZDg0g;YOVDj0h;WOUCCeN^1U>POQCP2nNZE5_;I]4Objb1" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_18.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000301563.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^2m:_20000000000000000O2O000001O01O0000001O0O1000010O000000000O10000001O01O00000O101O00001O00001O001O000]KXMbNi2]1XMbNh2^1WMdNi2o50000001O000O2O2cE_Mk9Q300010O0001O000O2O001O00001O1O00001O001]KZL^Og3`0ZL@f3`0ZL@g3?YLAg3R50000001O000^KWLAj3>VLBj3>VLBk3=ULCk3o4@cFQM]9n2dFRMi4EDY3cKRMi4EDS3kKWM`4FER3mKWM^4FFS3mKWM\\4FGS3mKWM\\4FGS3mKXMn8h2RGXMn8h2TGVMl8j2UGUMk8k2UGUMk8l2TGTMl8l2UGSMk8m2VGSMi8m2XGRM^OJW9T3[GRM^OJW9T3\\GQM\\OLX9S3\\GSMe8n2ZGRMf8n2ZGRMg8m2YGTMe8m2[GSMe8n2ZGQMg8o2YGQMg8o2ZGPMf8Q3YGoLg8Q3YGoLg8Q3YGoLg8Q3YGnLh8R3YGmLg8T3YGjLh8V3g00000001O000000001O0000001O001O00000000001O0000000000001O000000000000001O00000000001O000000001O0000001O0010OO101O000000000000001O0000000000002N1O001O0000000000001O0000000bHgLi4Y3eJ^MV5b2aJnMX5S2]J\\NCkMX5i3SK`N_OSNV5]3WK_Og4a0XK@h4`0WKBh4?UKDj44d4`6lK`IT4Z6SLfIl3T6[LkIe3T6]LlIb3S6`LmI_3S6bLmI]3S6cLnI\\3R6eLnIZ3S6fLlIZ3U6fLkIY3X6eLhIZ3Z6dLgI[3Z6eLfIZ3[6fLeIY3]6fLcIY3_6fLaIY3b6eL]I[3e6dL[I[3f6dL[I[3f6eLZIoNOg3i6YMWIoN2g3h6ZMWIkN5i3e6\\MUIjN8i3c6^MUIgN:j3b6_MiIa2Y6^MgIa2Z6^MgIa2Z6_MeIa2_6\\M`Id2b6[M^Id2e6ZMZIf2g6ZMXIf2i6ZMUIg2l6XMTIh2m6XMRIh2Q7VMmHk2V7SMiHm2Y7QMgHo2[7PMdHP3]7oLeHo2\\7PMeHo2]7PMbHP3`7nL`HR3a7mL_HS3c7lL\\HT3f7jLZHV3g7iLYHW3h7hLYHW3h7iLWHW3j7hLVHX3k7gLUHY3l7gLSHY3n7fLRHZ3o7eLQH[3P8dLPH\\3Q8cLoG]3Q8dLnG\\3S8cLmG]3S8cLmG]3T8cLkG]3V8bLjG^3W8aLiG_3X8`LhG`3Y8_LgGa3Z8^LfGb3[8^LdGb3]8]LdGb3]8]LcGc3]8]LcGc3^8\\LbGd3^8]LaGc3`8\\L`Gd3`8\\L_Ge3b8ZL^Gf3c8XL^Gh3S901O000000O1000000O100000000hK\\LgNe3X1^LeNc3Z1aLcN_3\\1dLbN\\3e0XLUL`0T3X3f0[LSLa0T3T3i0_LmKa0Y3o2j0hMVOV2j0kMVOT2j0mMVOR2i0PNWOo1j0QNVOn1j0SNVOl1j0VNSLWN`2c3]1WNRLWN`2b3^1XNQLWNCOk2a3a1YNQLZN^2]3a1ZNQLYN^2\\3a1\\NPLXN_2\\3b1\\NoKYN]2[3d1]NnKYN]2Z3e1^NnKXN\\2Z3g1^NmKXN[2Z3h1_NmKWNZ2Z3i1`NmKVNY2Z3j1bNlKUNX2Y3m1bNkKTNW2[3n1bNkKSNV2[3o1cNkKRNT2\\3Q2cNkKQNS2\\3R2dNkKoMR2^3T2bNkKPNP2^3U2cNkKoMo1^3V2eNjKmMo1^3X2eNiKlMn1`3Y2eNiKjMm1b3Z2eNiKiMl1b3[2fNiKiMj1a3]2gNiKhMi1a3^2hNiKgMh1a3_2iNiKfMg1a3a2iNhKfMe1b3c2iNhKfMc1a3e2jNhKfMa1`3g2kNhKfM^1`3j2kNhKeM]1`3k2lNhKdM\\1`3l2mNhKbM\\1a3l2nNhKaM[1a3m2oNhK`MZ1a3n2POhK_MX1b3Q3oNgK_MW1b3R3QOfK]MW1b3S3ROfK[MW1c3S3SOfKZMV1c3T3TOfKXMV1d3T3UOfKWMU1d3U3VOfKVMT1d3W3VOeKVMS1d3X3WOfKSMR1f3X3XOfKQMQ1h3Y3XOfKPMQ1g3Y3ZOfKnLQ1h3Y3[OSMe0m2]OQMc0o2^OoLc0Q3^OnLb0R3^OnLb0R3_OgKjLe0h3d3_OkLa0U3@jL`0W3@gLa0Y3AdL`0\\3AcL?]3AhKbL;n3m3AgKfL7i3S4@fKiL5g3U4AeKiL4g3W4@eKjL2g3Y4@eKjL0f3\\4@cKkLOf3^4AbKjLMf3a4C_KQ1b4PO\\KP1d4l300001O00000000000000001O00000000000000001O00000000000000001O0000000000001O00000000001O000000000000001O00000000000000001O0000001O0000000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000001O0000000000000000001O00000000001O0000000000001O0000001O001O000000000000001O0000000000000000000000001O0001O0000000000O101O00000001O0000O10001O00000000000000001O000000000000000000000000000000000000000000000000001O01O0001O0000]J" + }, + { + "size": [ + 428, + 640 + ], + "counts": "ff[21[=00000SX<3egC8oBJSL4L4H8E<0O102N3M4L1O3M3M3M1O2N1O101N3N2M1100O001N2O4L1O000001N5L1N1O1O2O0O2N3M2O2L3N1O1O2N101N2N1O1O100O1O100O1000O010_LfGo1Z8PNfGQ2Y8nMhGR2Y8nMhGQ2W8oMfGdN2\\3X8nMmGR2S8mMmGT2S8mMlGS2T8PNiGP2W8PNiGo1X8mMmGR2R8nMnGS2R8mMgGgN3\\3V8QNkGm1U8SNmGl1S8SNmGm1T8QNnGo1Q8RNnGo1R8QNnGo1Q8RNPHm1P8RNQHo1n7PNRHQ2n7PNQHo1P8RNPHm1o7WNoGg1R8WNoGi1S8UNnGi1T8mMcGUO9m2U8kMeGZO3j2Z8kMeGZO2i2Z8kMgGYO1k2Y8jMXHV2j7gMXHY2i7dMYH\\2h7aMZH_2g7]M\\Hb2i7TMSGNW1m2V9O2N2N2M4L3M2O3L3N4L1O1O1O1O1O1O1O1O1O2N1O010O0O101N2N101N2O1O1O2N1O1N2N6K1O0O2O1O1O1O1N101O1O1O0O2N1O201L2O2M4MZ>InA0N4M0O2L41O01ON11O10O101N3ImB0Pko2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_19.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000016598.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Sme635l0Ni0OYNe`0Z3K3N2O1O010000O100O001M3N2O1O1O1O100O10000O10000O10000YOZ_OTNf`0j1^_OSNc`0l1`_ORN``0m1b_ORN^`0n1c_OQN]`0n1d_OQN]`0o1d_OPN\\`0o1f_OPNZ`0P2f_OPNZ`0P2g_OoMY`0Q2g_OoMY`0P2i_OoMW`0Q2i_OoMW`0Q2i_OoMW`0Q2j_OnMV`0R2j_OnMV`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OnMV`0R2k_OmMU`0S2k_OmMU`0S2l00000000000O10000010O01O001O1O1N2L5iNj]O_OXc06g\\OLb^X1" + }, + { + "size": [ + 640, + 478 + ], + "counts": "Vj133Nac0a0^\\OAPc0l0Fg5mAYJV>Z5jAlJ]>S4fA^Lg>^3[AnKM9l>g3WA_Lm>_3SA`LQ?]3QAaLR?\\3UA[Lo>b3RA]LR?]3`@]L?6S?Z3PAfLR?W3P1YO\\_OeMi\\1" + } + ], + "question": "What is doing with ?", + "answer": " is holding ", + "image": "images/caption_simple_20.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439854.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "bhP11[:2O0O101N1O100O1O100O1O010O2O0O3N0OLTFIl9650010O0010O010O010O0010O0010O010O000010O0100O100O2OMRFHl98UFHk97WFHh996O1O100O2MaY`3" + }, + { + "size": [ + 333, + 500 + ], + "counts": "hmT19o0JR8=gGDX8`0SGG3Jj8o0SGROl8X100ZOjNUHV1i7nNUHQ1k7QOTHo0k7SOTHm0k7UOTHk0l7VORHL26l70QHJ54j73PHI73i75oGH;0f78oGH=Me7 and ?", + "answer": " is on .", + "image": "images/caption_simple_21.png", + "model_output": " is being ridden by ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000012062.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "l`k7>k<4L4[C\\O^ and ?", + "answer": " is in front of .", + "image": "images/caption_simple_22.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000121586.jpg", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "VTZ191IY>g0KO=YOaA2OO4Oi=0]B0H002OOk=2SB20:1Bk=k0VBTOT<;VEHdN:3EV5]A0R>a0M2N2N101N2O1N1O000O1O100O100O100O10000O100010O01O4L1O0001O00001O1O001N2N2M3N2O2M2N3M4L4L2M6Inok5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_23.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000476704.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "V8T5X8001O000000O10000000000010O02N1O00003L3N0000010O5K2N0O2O00000001O00000000O01O1L4M30000O10001OO1000000001O0000000000O1000000000000000000000O1000001O000000O100000000001O00000000O2O00101N3M3M1O0000000O11O00000000RL\\GT3d8jL_GU3a8gLcGDJ[3d8nLTHl2l7RMWHm2U9O00000010O0001N100000001O0000000000000000001O001O000000001O0000O10000001O0000000000000000001O00000000000000001O0000000000000001O0001O0O100000000001O0001O0O100000000001O00O100000001O00000000000000001O00000000000000001O0000000001O0000O100000001O0000000000001O00000000001O1O001O000000000000000000000000000001O0000000000000001O000000O100001O000000000O100000000000001O01O00000000000000000001O00000000000000000000000000001O0000000000000000001O000O10000000000000000001O0001N010000000000000001O00001O00000000O101O000000000000000000001O00000000000000000000000000000000000000001O000000O10000O10000O10000O10000000000001O4L1O001O001O0000001O000000000000O10000O1O1O1O1N2N2N2N2N2M3N2L4O1O1O1O10000O10000O100O10000O100O10000O10000000000O100O100O100O100O1O1O1O1O1O100O10000000eL_FP1K`0i9[N`Fb0IF1[1U:nNmEA2a1X:]N`Fb1e:0000000001O001O00002N3M1O002N1O2N001O1O1O1O00O10O10O2N100O100L4O1O1O1N2O100YLoNSKQ1_4^O`Kb0_4_O`Kb0`4^O`Kb0`4^O`Kb0`4^O`Kb0`4]ORISOf1`1X5]OQIVOe1]1Y5^OQIXOd1Z1[5^OPI\\Ob1V1^5^OPI^O`1T1`5^OPI_O`1R1_5POcHI?4_1S1_5oNfHG?2_1X1\\5oNULQ1k3oNULQ1k3nNVLR1j3mNWLS1j3jNXLV1h3gNTKMYN]1c6eNUK2TNY1g6eNUK6PNU1k6dNVK=hMP1R7bNWKb0bMm0X7^NXKg0]Ml0[7ZN[Kl0WMk0^:Q100001O00000000000000001O000000000000000000O100000000O1O1O1N2N2O1SOPMgFg3^91FYLoFg3P9\\LmFe3Q9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_24.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000211042.jpg", + "mask_rles": [ + { + "size": [ + 640, + 458 + ], + "counts": "YdX34ic05L3N2N2N2N101N1O2N1O2N1O2O1N101N2O0O2N100O1O0001O0O1O2O001O001N10O20O0001O01O01O1O01O0O2O0O1O100O10001O01O01O001O1O001M3N2K7Ghbf0L_]YO5`N3n]OK;7ea0b0n]OFQb0U1N2N2N2I7K5K5N201O00000000000001O00001O000000001O0000001O00001O00001O00001O00001O00001O001O0000001N10001O00001O001O00001O0O2O001O001O001N101O001O1N101O001O0O2O1O0O2O1O1N101N2O1N101N2O1O1N2N2O2M1O2O1N3L3N3L5Jbcc1" + }, + { + "size": [ + 640, + 458 + ], + "counts": "]dW3;bc04M2L4L4N2oK^OaDe0^:X1RB_Nd1?U<_2WChMc^3m0N3M2O1N2O1O1O1O1N2O1O1O1N1O2N1_Ng_OYO]`0f0h_OnN``0P1X10001M2M4J5E;K6L5JnX=0`SB3WhQ3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to and leaning on .", + "image": "images/caption_simple_25.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000055299.jpg", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "e and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_26.png", + "model_output": " is standing on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435206.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "b531`7c5`H]J`7c54000\\H]J`7c5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5400000000000000O11O0000O10000001O00O11O0000O1000000000000001O00O1000000000000001O00O1000000000000001O00O10000001OO1001O000000O11O0000000000000000000000000000001O0000O1000000000000001ZH]Jb7b54000000000000000000000000000[H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b541O000[H\\Jc7f501O000000O100O11O0000N]H\\Jc7c530000000000000000000000O1001O00O1000000O100001O0000000000O1000000000000000000000000O10000001O00O1O1O1000000001O00O100O100O10000000000O100N200001O1O001O000000000000000000000000000000000000000000000000000000001O00001O001O001O001O1O0000001O00001O001O000mH^J]6c5`I_J`6a5`I`JCLg6e5dIeJ\\6\\5bIfJ]6[5_IhJa6X5^IiJb6X5]IhJc6X5]IjJa6W5]IkJb6U5^IlJa6T5^ImJb6o5O000000001O00001O001UJ^Ik4b6TK`Ik4`6UK`Ik4`6TKbIk4_6SKbIm4^6RKcIn4]6RKcIn4^6PKcIP5^6oJbIQ5^6mJdIS5\\6kJfIU5[6iJfIW5[6gJfIY5[6eJfI[5[6dJfI[5U701O1O1O1O1O4L000000O11O00001O0000001O0000O10000000000000000000[LjGU2W8hMkGX2U8hMkGX2U8hMkGX2U8gMlGY2U8fMkGZ2U8fMkGZ2U8fMkGZ2V8eMjG[2V8dMkG\\2V8cMjG]2W8bMjG]2X8gLfG<2m2Y8fLeG=1n2[8cLfG?Nn2\\8bLhG?Lo2^8_LgGa0LP3i8oLYGP3h8nLYGR3g8mL\\GQ3e8mL\\GS3f8jL]GT3e8iL]GV3e8gL\\GY3\\9O00001O2N001O00001O00001O001O001O1O1O001O1O1O001O001O1O2N001O001O1O001O1O1O1O000WN[El0f:mNaER1`:kNcET1^:kNbEU1^:jNdEU1\\:jNeEV1\\:gNgEX1Z:dNjE[1X:`NkE`1Q;eNXDQ1h;nN]Dn0P<01O00001O100O2N0000001O00003MM3O100O1O100O10000002N6J00001O1O3M2N1O2N2M5L3M1O1O0Oml`2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "Vbn3:e< doing in relation to the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_27.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137950.jpg", + "mask_rles": [ + { + "size": [ + 415, + 640 + ], + "counts": "WW1231N11O0O11N10?0<4 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_28.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435208.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PjNXAb1c>`NWAg1g>ZNVAk1g>VNTAR2g>PNVAW2e>jMXA^2b>`M_Ae2\\>[MdAg2[>SMhAR3V>jLlA[3Q>dLoA^3o=dLnA`3P>`LoAb3P>_LnAb3Q>_LnAc3P>_LmAd3P>^LnAd3o=`LPB`3k=jLnAY3i=PMTBS3g=RMWBo2h=SMVBn2i=VMRBk2n=d1O1O1O10O010O0100000O01000O10O010000000001O0000001O000000001O001O000O2eKPBd2Q>YMRBf2n=ZMTBd2m=ZMUBe2k=[MWBc2j=[MXBd2j=ZMXBd2l=XMXBd2i=ZMXBf2m=YL_Ag0e0P3U>iL`Bd2a=PMQCi2V?L6K2N2N1O1O0O100000001O0O102N1N01N1101O1N01000O01N110O1O101N101O0O2N2N100O100O1000O10O1O10O1@i^OWNVa0f1P_OWNQa0a1X_O]Ni`0a1l0O1N2N2N2N2N2M4M2L4L5I7J6N6CWgZ6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_29.png", + "model_output": " is sitting on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137576.jpg", + "mask_rles": [ + { + "size": [ + 563, + 640 + ], + "counts": "0g15UOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AUOf>k0ZAUOf>l0YATOg>l0YATOg>l0YATOg>k0ZATOg>l0YATOg>l0YATOg>l0YATOg>l0YASOh>m0XASOh>m0XASOh>m0XASOh>m0XASOh>m0XAROi>n0WAROi>n0WAROi>n0WAROi>n0WAQOj>o0UAROk>n0UAROk>n0UAQOl>P1TAoNl>Q1TAoNl>Q1TAnNm>Q1SAPOm>P1TAoNl>Q1TAoNl>Q1TAoNl>Q1TAnNm>R1SAnNm>R1SAnNm>R1SAnNm>S1RAmNn>R1SAmNn>S1RAmNn>S1RAmNn>S1RAmNn>S1RAlNo>T1QAlNo>T1QAkNP?U1PAkNP?U1PAkNP?V1o@iNR?V1o@jNQ?V1o@jNQ?V1o@jNS?T1n@jNS?V1m@jNS?V1m@jNS?V1m@iNT?W1l@iNU?V1k@jNU?W1j@iNV?W1j@iNV?W1j@hNX?W1h@iNY?V1g@iN[?V1e@jN[?V1e@jN\\?U1d@kN]?T1c@kN^?U1b@kN_?T1a@lN_?T1b@kN]?V1c@jN]?V1c@iN^?W1b@iN^?W1b@iN^?W1b@hN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@fN^?[1b@eN^?[1c@dN]?\\1c@cN]?^1c@bN]?^1c@bN]?^1c@bN]?^1c@aN^?_1;000000001O000001O1O000010O00000001O0001O0001O000000001O001O00001O001N10001N10001O0O2O001O001N101O001O1N10001O0O101O000O101O1N101O1O1O0O2O2N1Ao^O9Wa0NS_S8" + }, + { + "size": [ + 563, + 640 + ], + "counts": "adj0X1P`0?Q@`N]?e1e@ZNZ?g1f@YNX?i1h@TNY?m1h@QNY?m16M4K5J7F:F:F;CY\\34ncL04HWc62lfM2dTKM26Kn01PO4N000NV>[3I100O1O0000000001O001N2O001N2O1O1O1O1O1N2O1N2O2N1O1O001N1O101O000000K5M4N10000001O001O1M4I6N2M5fMk@P2X?oMh@Q2Y?32OO02O000O1O3N0O10001N100O101O0O1000001O000O2O00001O00001O1O001O001O001O001O0O2O001O001O0O2O0000001O000O2O000010O0001O00001O0O101O001N100O1O2N1O3L2N3N2N3N2M9H1O001O00000000001N1O10gon6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_30.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000126137.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bQ`1`0]<`0L2J7OO2mNmCl0SQE6c;I_D6b;H`D6b;H`D7a;HaD5a;IcD4a;FbD5aQZ5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_31.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000573943.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Ya`84l>5Kb0^O6J1O1O000O01N2N2M3N200O1000001O000000001O000000001O0000000000001O002Ldd9" + }, + { + "size": [ + 480, + 640 + ], + "counts": "doj7g0U>5K5N10000000000000O010O10000001O1O1O1O001O0000000000O2O0O1001O01O000O1000010O0000000O1O2N1O6I_`j0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_32.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000225532.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "h_d21f;1O1O2N1O1N2O1O1O1O001O001O001O0oDBi:?VEBi:?VECh:=XEDg:=ZEBe:>\\ECb:>]EDa:=_ED_:=aEC^:=bED]:=cED[:=eECZ:=gEDW:=hEEV:SHAl7?UHAj7a0UH^Ok7c0UH\\Ok7e0TH[Ol7f0THYOl7g0VHWOj7j0VHUOj7l0VHSOj7n0VHQOj7P1VHoNj7R1VHmNj7S1WHlNi7U1WHjNi7V1XHiNh7Y1WHfNi7[1WHdNi7\\1XHcNh7_1WH`Ni7a1WH^Ni7c1WH\\Ni7d1XH[Nh7f1YHXNf7j1ZHUNf7l1ZHSNf7n1ZHQNg7o1YHPNg7Q2YHnMg7S2YHlMg7U2XHkMh7V2XHiMh7X2XHgMh7Z2XHeMh7[2YHdMg7]2YHbMf7`2ZH_Mf7b2ZH]Mf7d2ZH[Mf7f2ZHYMf7h2ZHWMf7j2ZHUMf7l2ZHSMg7m2ZHPMg7P3`01O1O1O100O002N001O001O2N001O1O1O1O1O1O0000000000000000000000000000cMQHR1o7eN_HV1a7eNgHX1X7fNPIU1Q7hNXIQ1h6nNbIi0]6VOiIf0W6YOmId0S6\\OnIc0R6\\OQJb0o5^ORJ`0o5@SJ>n5@TJ2fMOU80WJKdML14OLU89WJ and ?", + "answer": " is driving on .", + "image": "images/caption_simple_33.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000424349.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "XlW48l<>`CBX3L^4a1YKdNb4e1UK_Ng4g1RK^Nl4f1oJ]NP5g1jJ\\NU5h1fJZNX5j1dJXNZ5l1bJVN^5l1_JUN`5n1WIdMLa0m6V3nHlLR7X3bHPM]7_4O1O100O100O10000O100O100O100O10000O10000O100O100O10000001O00000000O10000O1_NoIiKQ6V4[J^Kf5_4cJYK_5`4mJYKU5e4oJSKgNK[6P5j1N2N2O1O1O1N2N2O1O1O1O1O100O100O100O100000000O1000000O10000001O0000000000001O001O001O001O000eIcKi3^4nKlKP4V4fK\\KXNc0R6Q4dKTL\\4m3aKVL^4k3`KVL`4k3^KWLa4j3]KWLc4k3[KVLd4k3ZKWLe4j3XKYLg4j3TKXLf3kNZLP5IYLk3nNWLl4G[LP4mNVLn6i3TIVLl6i3VIVLj6i3[ISLf6k3\\ITLd6k3_ISLa6l3bIRL^6n3dIPL\\6o3iIlKX6T4mIgKR6Y4`100000000O10001O0O101O001N101N1O1N3J5H9EZFiLm9g2c0L5L3N201N101O0000001N2O1O1N101O1O1O00001O001O1O1O1O001O00000001O0001N110O00000001O0001O000000001N10000O101N1O2O001O000O2O001O1O001O1N2N2O0O2O1O2N1O2M2O2N1O1N2O0O2O1N2N1N5L6GcbP1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "^j7=g<6I8L4`ETO[8Q1bGRO[8R1cGoNY8f1UG[Ng8l1RGXNg8Q2UGQNf8U2XGlMf8V2YGkMd8Z2XGhMc8^2[GcMc8`2ZGbMd8a2ZG`Mc8d2[G]Mb8g2\\GZMa8k2\\GVMb8j3O1O2L3QM^KUKJW1j4[3^LcLc3[3`LbLb3[3aLdL`3Y3cLcLa3\\3`LbLb3Z3bLeL_3Z3bLdL`3[3aLdL`3Z3cLdL^3[3fLbLZ3]3lL]LU3c3lL[LU3e3kLZLV3d3PMWLQ3h3SMTLn2l3TMRLl2n3TMQLm2n3UMQLk2o3UMPLl2P4VMmKk2R4XMkKi2U4WMiKk2W4UMhKl2X4UMeKm2[4Q300O1O100O1000000001O00O10000O100000000O1000000000000000000000000000000O11O000000000000O1000000000000001O001O001O0eJeKg1[4WNhKh1Y4SNlKl1T4QNoKo1R4nMQLQ2o3nMRLR2o3lMSLS2n3jMTLV2m3hMULW2l3fMVLZ2l3dMUL[2l3cMUL]2l3bMUL]2m3aMTL^2m3`MUL_2m3_MVL^2P4\\MTL`2R4YMRLd2T4VMQLe2R4WMPLh2R4UMPLj2R4TMoKk2Z4eKTJP1e1Z3_4_LmKY3\\4XLlKf3n6N001O0000001O1O2N3M4L3M2M2O1O2PO[FVNi9f1_FQNd9l1aFPNb9m1o0N3L5K4M3M3L4L4K5M3L4K8I_ko5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_34.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000173302.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "en\\43U=10000O101O00000001NVDMfo19k[N4O100000001QOBQE>l:HQEd0a:@]ET1n9nNQFR1n9QOPFo0P:ROoENE7]:LmELS;5lDKS;7lDIT;8kDHU;9jDGV;9jDGV;:jDEV;gDBY;>gDBY;>gDBY;>gDBY;>hDAX;?hD@Y;`0gD@Y;`0gD@Y;?iD@J3g:=_E7`:I[EmEClNOW;>mEBmN1U;=mECnN0U;=mECnN0U;>lEBoN0T;?mEAPOOS;`0mEAPOOS;`0mE@QO0m96jF:8@QO0k9:jF6:@RO0i9;jF5;@RO0h9>iF3;AROOc03X8nDAR;?nDAR;?nDAR;?nDAR;?nDAR;?nDA6Ln9c0lE`0S:@mE`0S:@mE?T:AlEOHROh:o0aE_OR;`0oD_OR;a0oD^OQ;b0oD^OQ;a0oDG9D\\jd3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_35.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000352760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 544 + ], + "counts": "PX1`bW16YQiN6J3N3L2O3M3M1O2N2N2O1N2N2N1O2O1O1N200O001O1001O0O2O1O0O100O11OO100000000O10O10O01000O100O010O100O1O02OO10000O010O100O00100O01000O010O00100O010O010O01O010hMQO\\Ao0a>TO`Ak0_>VOaAk0\\>XOa_OJm1m0a>@_Aa0^>AbA?]>BdA=Z>DgA=W>DjAEnA;Q>GnA:o=HRB7l=KTB5k=LVB3g=O[B1a=1`BO^=2cBN]j7`1`EPNg2?i7\\1hERNa2b0d7Z1QFPN]2g0`7S1[FTNW2h0\\7d0SG]Nd1o0X7`0WKAf4>^KAa4>aKB]4>eKBZ4=hKDU4`1o_OYNg12Y>]1\\2N2N1O2N101O001O0010O2O0O2O2[OlNZ^OV1_a0TO[^On0^a0o0\\EaMa3c2]L^Ma3e2\\L]Ma3g2]L[Ma3g2]LZMa3j2\\LWMb3l2\\GPM\\36V5n2oF\\Ne2iNY6Q3dFQO`2PNk6\\6jHeIU7d6\\FSIe1;m7e7eFWHL8]9b9N2O1O2M2O1O2N002N1O2N1O010O00O100O2M2O2K4N3M3C`FVFd9c9`0K5J6L4fM[FQJm9h5`FmIe9m5eFeIh9R6_2B7]O`0E;J6J6M2N3N2N2O1O10001O000001O1O00100O2N1O100O2N1O2O1N2N3M2O2M3N1NZLVAY2g>fM_AX2_>gMdAX2Z>iMiAU2U>lMmAS2R>lMQBS2n=mMSBS2k=mMYBQ2f=PNZBo1f=QN[Bn1d=RN^Bm1c=RN^Bn1`=SNaBm1^=SNcBm1]=RNdBn1[=RNfBo1X=PNjBQ2T=oMmBR2R=mMoBT2o and ?", + "answer": " is below .", + "image": "images/caption_simple_36.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000344614.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Qag32kc04N2M2O100N201M2O1O2K6K4W@VOUXOa0lJj@00Y4Ta0[N8H3K5J6J6K5K5oNk]O6Vb0In]O3Sb0Lo]O0Tb0LU^OGSb06m0M3M3NRXj02ngUO001O000000000000001OO10000001O00001O001O1O001O001O001O0000001O0000001O0000000000001O0000001O000000O100001O1O00001O001O000000000000001O00001O1O000000001O000000001O0000001O00000000001OO1001O1O00001O0000001O00000000001O00000000001O001O00000000001O001O1O2N00OV]OnNhb0Z1K7I7I1O1O1O1O2N000000O100O1N2001O1OO1O1O1M30000000000000000003M1O1O001O00000000001O0R^ORN60J1010N10k`0n1h_OTN``0l1a_OTN@Ne`0o1^_OUNG4ONl`0m2o@oLUN61JO11NS>S3nBlLPOW2Y=l0lCoLiNS2Y=0iBeNU1XOiN;1c1m<6TCcNT1IjNc1jLGc8K_GX4`8iK^GX4b8iK\\GX4d8c3kGfEb6\\:WIgFk5[9lIdDOZ40gK2n01RONO150;OF0Lm0lc0" + }, + { + "size": [ + 640, + 478 + ], + "counts": "W_W6e0Vc0:F9I6K4M4K4L4M3M3M3M2N3M2N3M2N3M2O1N3N0O2N2O1O1N2O1N2O1O1O001O1N101O1O1O1O001O001O001O001O001O001O0001O0001O01O010O001O00001O001O001O001O001O001O001O1N2O1O001N2O1O0O2O1N2O1N3M2O1N2N3M2O1N3M2N3M2N3M3L3N4K4L4M3K7I6J8H=]On_n0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_37.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000272148.jpg", + "mask_rles": [ + { + "size": [ + 378, + 640 + ], + "counts": "V4c7W40000000000O100000000000000001O00O1000000000000001O00O1000O1O11O00001O2N001O0000JbHPL^7P4bHoK_7Q4aHoK_7V4001O0K`HPL_7Q4aHoK_7U41O100001O0001O00O1000000000000001O00O100000000000000001OO1000000000000000000000000000000000010OO0101O000000000000000000001O0kJfKj2Z4UMhKj2X4VMiKi2W4VMjKj2V4VMjKj2V4TMlKl2T4UMkKk2U4UMkKk2U4WMhKj2X4\\2000lIfKh4Z4VKhKj4X4VKgKk4Y4[100000RJgK[4Y4eKgK[4Z4b10000000000001O2N00000000000001O]IeKe00RO0l3[4_LcKb08mNKQ4[4nMkKPNJR4[4oMjKoMKR4[4PNhKoMMQ4[4QNgKnMNQ4[4RNfKlM0R4Z4RNfKlM0R4Z4RNeKlM2S4X4SNcKkM4S4X4]NhKd1Y4YNiKg1W4XNjKg1W4XNjKh1V4WNkKi1U4WNkKi1U4WNjKj1V4oMbKjM7W4W4oMbKlM4V4Z4nMbKa2]4_McKa2]4]21000000000000001O00O10000000000000O1000000001O000000000000000[LbKN^41gKKY45gKKY43iKMW40lK0T4NnK2R4NnK2R4NoK1Q4OoK1Q40nK0R42kKOU42jKNV42jKNW42gKOY44cKM]4h30000000000000000001O00O1001O0000000000001O000oI`Kh4`4XK_Ki4a4X1000000000000000000000000001O000000O11O0dNeKYK[4f4gKYKY4g4gKYKX4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4g4iKYKW4g4iKYKW4g4iKYKW4h4hKXKX4h4hKYKX4i2]KRN:TOZ4h2_KSN7UOZ4h2_KSN7UOZ4g2`KUN5TO[4g2aKYL0d14\\O[4g2hKmMM\\O[4g2hKmMN[OZ4h2hKmMN[OZ46]KZ1;TON^OY44^KY1;VON]OY44^KX1ZOL[OZ4b2kKSNK[OZ4a2kKUNLYOY4b2kKUNLZOX4a2kKVNMYOX4b2jKTNOZOV4e2hKRN2YOW4h2aKPN8YOW4d4jK\\KV4c4kK]KV4b4jK]KW4b4iK_KW4`4jKaKV4]4kKcKU4]4kKbKV4]4kKcKU4]4kKcKU4]4kKcKU4^4iKcKT4nN^Ka5 and ?", + "answer": " is over .", + "image": "images/caption_simple_38.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000222317.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "kc^1g0T>>E7J2N2O0O1fCQOnN_O10[;`1fEFlNkNn:_1WF2`9N`F9Y9GgFl0f8SO[GX1Z8hNgG]1S8cNmGb1n7^NRHf1j7ZNVHj1e7WN[Hk1c7UN^Hl1`7TN`Hm1_7RNbHo1]7QNcHQ2[7oMeHR2Z7nMfHS2Y7mMgHT2X7lMhHT2X7kMiHV2V7jMjHW2U7iMkHX2T7hMlHY2S7gMmHZ2R7fMnH[2P7fMPI[2o6dMRI\\2n6dMRI]2m6cMSI^2l6bMTI_2k6aMUI`2j6`MVIa2i6_MVIc2i6]MWId2g6]MYId2f6\\MZIe2e6[M[If2d6YM]Ih2b6XM^Ih2b6XM^Ij2`6VM`Ik2_6TMbIm2]6SMcIm2]6SMcIn2\\6RMdIYOkNK2P2]7mNfISOTOG0Y2T7mNhIkN@GJ`2m6nNiIiN:Z2l5mNjIgN=\\2h5mNlIdN?_2d5mNoKR1P4nNQLR1n3nNSLR1l3nNTLS1k3mNVLS1i3mNZLQ1d3PO]LP1b3PO_LP1`3PO`LQ1_3oNdLo0[3QOfLo0Y3QOhLo0W3QOjLo0U3QOlLn0T3ROmLn0Q3SOPMn0n2ROSMm0m2SOTMm0k2SOUMn0j2ROWMn0h2ROYMo0e2QO\\Mo0c2QO]Mo0c2QO]MP1b2PO^MQ1a2oN_MR1_2nNcMR1\\2nNdMS1[2mNeMS1[2mNeMT1Z2kNgMV1X2iNiMX1V2hNjMY1U2fNlM[1R2eNoM\\1P2cNQN^1n1aNSN_1m1aNSN`1l1_NUNa1k1]NWNc1h1^NXNb1h1^NXNb1h1]NYNc1g1\\NYNe1g1ZNZNf1f1YN[Ng1e1XN\\Nh1d1XN\\Nh1d1WN]Ni1b1XN^Nh1b1XN^Nh1b1XN^Nh1b1XN]Ni1c1WN]Ni1c1WN]Ni1c1WN]Ni1b1YN]Ng1c1YN\\Nh1d1XN\\Nh1d1XN\\Nh1d1YN[Ng1e1YNZNh1f1YNYNg1g1YNYNg1g1ZNXNf1h1ZNWNg1i1YNWNg1h1ZNXNf1h1[NVNf1j1YNWNg1i1WNYNi1g1VNZNj1f1VNZNj1f1UN[Nk1e1TN\\Nl1d1SN^Nl1a1TNaNk1_1TNcNk1]1UNdNj1\\1UNeNk1[1TNgNk1Y1TNhNl1X1TNiNk1W1TNjNl1V1SNmNk1S1UNoNi1Q1VNQOi1o0WNQOi1o0WNSOg1l0YNXOd1h0[N]Oa1c0_N@^1`0aNC]1=cND\\1P2@SN>n1^OVNb0j1\\OXNd0h1[OYNe0g1YO\\Nf0d1YO]Ng0c1WO_Ni0a1TObNl0^1SOcNl0^1ROeNm0[1ROgNm0Y1ROiNm0W1QOkNo0U1POmNo0S1oNPOP1P1oNQOQ1o0nNTOP1l0nNWOP1j0oNXOP1h0oNZOP1f0POZOP1f0nN^OP1b0oN@P1`0POAo0?POBP1>POCo0=PODo0=QOCo0=QOCo0=QOCn0>ROBn0>ROBn0>QOCo0=QOCo0=QOCo0=QOCo0=ROBn0>ROBn0>ROBm0?SOAm0?SOAm0?SOAm0?SOAm0?SOBk0?UOAk0?UOBj0>WOAi0?WOAh0`0XO@h0`0XOAg0?YOAg0?YOBf0>ZOAg0?ZO@f0`0ZO@f0`0ZO_Og0a0YO_Og0a0ZO]Of0d0ZO[Og0e0YO[Of0f0ZOZOf0f0[OYOe0g0[OXOf0h0ZOXOf0h0[OVOf0j0ZOVOf0j0ZOVOUN^LV1\\4e0VOSNaLW1Y4g0TOoMhLY1T4h0TOPNgLX1U4h0TOSNdLU1X4h0TORNgLS1V4l0ROoMlLS1R4n0ROnMnLS1P4o0SOiMRMV1l3Q1ROhMSMW1k3R1QOfMVMW1i3S1QOcMYMZ1f3S1QOaM\\MZ1d3U1oN_MaM[1`3V1nN`MbMZ1`3W1lN_MfMY1^3X1kN_MhMY1]3X1lNhLfLc0R1\\1\\3Y1POYMfM]1Z3[1POVMgM^1Z3\\1POTMiM^1W3^1QORMjM^1V3`1POQMlM^1T3a1QOoLmM_1R3c1QOkLPNa1o2d1ROjLRN_1l2g1SOhLSN_1k2i1o0UNQOl1Q1QNoNo1S1nMnNR2U1kMkNV2X1fMhNZ2[1cMeN]2_1^MbNc2h1iLfIK0Ob4]3m6001O000000001O00001O0000001O000000001O0000001O0000001O0000001O0000001O00000O2N1L4K5F:J7H7M3L4L5M2N2O2M2N2O1O3M2M2O2N1O100O1O101N1O1O2O0O10000O1000001N10001N1000000O2O00001O000O101O00001N100N2J7L3N2O2N1O1N200O2N101N1O100O101O000O101O0O101O000O101O0O10001N1000000[JgHc3Z7ZLkHc3U7\\LVIY3k6gL_Io2a6PMaIo2`6PMbIn2^6QMaJQ2_5oM`KeNiL0O07h1b7CfMcNa2\\1c5O0000001O000O2O00000000001O00001N1000001N1000001O00001O00000000001N1000001O0000VK" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`UZ33k>2O2N2N1O2N1O2O0O2N1O101N00100O1O010O01000O010O101N10000O2O00001O0O101O001O0O2O001O001N101O001O001O1O2N0000001O000000001O0000001O0010O01mBfN^ and ?", + "answer": " is lying on .", + "image": "images/caption_simple_39.png", + "model_output": " is lying on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231088.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "cna02jc05L5L3M3M2M4L4L4L4L4K5J6J6H8N1O2N2O2N1O1O1N3N1O1O1N2O3L2O1O2N2M3N2M2O1O1N2O001N10000O10000O10000O1000000O101N10000O100O101N10000O10000O2O0O101N100O101O0O101Z@XLQ?i3PAVLP?j3QATLo>m3b01O0000O1O1B\\@`Ld?[3a@fL^?Q3g@nLZO5m?e2g@gMY?T2f@SNY?d1j@cNV?X1e@ROZ?e0]@SNO`1d?:_@1a?@V@PO4DKo1l?[OY@X1f?hNZ@X1f?a101O01O0000000O1O101N100O102M4M5J4M5J6K2M[OPMj_OOc0m2d?ZM[@b2f?aM[@[2e?kMY@P2i?RNX@j1h?YNZ@b1g?dNU@X1l?lNT@P1m?UOR@f0n?]OS@>n?FR@6o?LR@0o?4o_OHR`0 and ?", + "answer": " is attached to .", + "image": "images/caption_simple_40.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000421923.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "_`Q7f0Yc0=D`0@`0_O>C4L3L2O1M3M3O1N2O10O01O0000000O2N100M32N2N00O1O001O1O1N2N2I6L5O100O10O10O010000N20O0100000O2N1001O000000000000O1O2N100004VMm_O_1e0" + }, + { + "size": [ + 640, + 426 + ], + "counts": "ogo42jc06L2O1L5N1O1d^OFR?:m@HR?8m@KP?7n@LXNA``0e0SAO[N^O``0d0TA7k>JTA6k>KUA5k>JUA7k>ITA8l>Hl@d0P?\\Oo@e0Q?[On@g0R?YOm@f0T?]Oh@d0W?^Oh@a0X?@i@>X?Cg@ and ?", + "answer": " is inside .", + "image": "images/caption_simple_41.png", + "model_output": " is inside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000057149.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "_hW4 and ?", + "answer": " is in front of .", + "image": "images/caption_simple_42.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000577932.jpg", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "VTR5 and ?", + "answer": " is touching .", + "image": "images/caption_simple_43.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000311002.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZWb02W=3O0N20001N10001N10000O2O0000000O10000O100000000000O01000O1K6Mmh[1MWWdN5K3M2O2N1O1O1O1O001O0O101O001O1O1O2N1O1O1N2O1O1O1O0010O103L10O01O3M6I5K8B[b97e]F2N3L6J3M4K5L4K6K4L4L2O3M3]D_NV;c2XO0O2O0000000000001O0000000000000000000000000000001O0000001O00000000000000000000001O0001O00001ZMkET2U:jMoET2R:hMRFX2`:00000010N100000O10O01N101K5F:N2O10O10000O10000000000O11N10000000000000O10000000000000000001O000001O00000000000000000000000000000O2O000O100000000O101O000O1000000O2O000000000O2O00000O101nNYE]Oh:c0YE\\Og:c0ZE]Of:c0ZE]Of:c0[E[Og:d0[EZOe:f0[EZOe:f0\\EYOd:g0]EWOd:i0\\EWOd:g0^EYOc:f0^EYOb:f0_EZOa:f0_EZOa:f0_EYOb:g0^EYOb:g0_EXOb:g0^EYOb:g0_EWOb:i0^EWOb:i0_EVOa:j0_EUOc:j0^EUOb:k0_ETOa:l0`ESO`:m0`ESO`:m0aEQO`:o0aEoN`:Q1m001O000000000000000000000000000000001O000000000O100000000O100000001O0O100000000O100O100O100O1O1O2J5K5N2O100O10000000000000000000001O000O100O5IT[k1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\gc3:n<4M3L4L3L4N2L4L5M2M3M3N2N3H7O2N1O1]OSNfEa1BkNe:JbE[1MjN_:0_EV15kNY:d1iE`NR:_1QFbNm9\\1UFgNh9Y1YFjNb9Y1\\FhNc9`1UFaNj9`2jFjLW8V3cGQM\\8P3aGRM_8W3VGkLj8l3O000000O100O2N1_OnFfLU9W3b0L3jNV1J4M2EmD^NV;d18000O11O1O10O10O100nN^D:d;D^D:c;D`D9c;DaD8b;F`D8b;DeD6_;Dcol3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_44.png", + "model_output": " is standing on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000525600.jpg", + "mask_rles": [ + { + "size": [ + 326, + 640 + ], + "counts": "TeV33o99H7J5L5I6L4K5L4G:I:_HnM^6W2YIRNb6l2L2O100001\\O^IbMc6W2iIbMZ6X2m0K5K5L5K4N3M2M4N16J5L2M3MSOnGIo70ZH1e7GhH5W7@VI>\\8M1M2M6K^Yh2" + }, + { + "size": [ + 326, + 640 + ], + "counts": "oTe2:i9:ZFBT9P1H3M2O2M2N2N3M2OO0M3M4L3O2M2N2O2O0O100O10O2O001N2O4J9G?_O8Hk0VOYN_I1U6dNeIZ1a0Ge5g0hJjN\\5V1i1O3O1XOPHYOQ8a0l0H9N3F`oX3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_45.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000378139.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VRa17R=4M2M5L2O1O0O2O1N2O0O100O2O0O10000000000O11O000000O100O1O1N200O1O1O1O1O1O1O2N1O2N1O2N1O1O1O2N1O2N2N2N1O101N100O100O1000000O10000O0100000O100000O100000000O1000O1000O10000000000O10000000001O00O10O2O00O100000O10000000000O1000O1000000000O100000000O100O11O0000O100000000000000O100000O10O10000O10000000000O1000O10000000O1000000O10000000O01000000000000O10000000O10O100000000O1000O10000000O10000000O10O100O10001O00000000O010001O000O01000000000O100000000O100000000O10000000O10O10000000000O10O100000O10000O1O100000O10O10000000000000000O100000O1000O100000000O1000O100000O10000000000O10O100000O10000000000O10000O100000O01000000000000O1000001O000O100000000O10000O100000000O100000000O10000O2O000O101N2N101N2N1N3N1O2N100O101N100O1O2O0O101N100O2N2ORjP2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "_]l23W=2O1O007I00000O1O00100O1O1O010000O106H4J]fU5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_46.png", + "model_output": " is inside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000189806.jpg", + "mask_rles": [ + { + "size": [ + 400, + 500 + ], + "counts": "d\\W32[<6I7^OK_D:[;b0K4M3N2N2M3N2N2O1N2N1O1O1O1O100O1M2000N2O010N200O1001O1O001OO100O100100O1ONSFWNX9h1hF[NV9e1kF\\NT9b1mF_NR9_1QGaNn8_1RGaNn8_1SG`Nm8`1SG_Nm8c1SG\\Nm8e1SGYNo8g1QGXNo8i1QGWNo8i1QGWNn8k1PGUNQ9k1nFVNQ9l1lFUNU9m1dFWN\\9m1]FWNc9[200O100000O100000000O100000000O1O1M3N2N2N2N2N2O10PFlMd9T2ZFoMe9]210000O10000O100O10O0100O2O0O100O1CUFTNl9l1Y:N^E5\\:R1M4M3M2O2N1O1O2N2N1O1O1O2N1000O01O100O10O01000O010O001N1DUFSNm9j1WFSNk9m1 and ?", + "answer": " is looking at .", + "image": "images/caption_simple_47.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000515445.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "fm_7:[>d0A doing with ?", + "answer": " is leaning on .", + "image": "images/caption_simple_48.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000203580.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Pc]43V=3N2N100O1O1O1O1O100000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000O10001O1O2O0ON101O100O10001O000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O1000000000O100000000000000000000001N2O_TV1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "dYT47S=2N3L4N1N2O1N2N2O0O100O1O2N100O1O101N1O101N2N101N1O2O0O1O10O01O1O100O100O1O100O100O1O100O01000O1O100O010O01G]DhNe;W171O2O1N101N2O1N2O1M3N2N101O00000O100000000O100000O01000000O10000O010O1000000O10O10O1000O010O1N1K6N1101O2M3N0O2O1O0O100O10000O100000000001OTElMa:S2]GlMg6T2[HlMPO0K00 and ?", + "answer": " is over .", + "image": "images/caption_simple_49.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000499622.jpg", + "mask_rles": [ + { + "size": [ + 456, + 412 + ], + "counts": "Qh>1W>0O2O0O100O1000001N10000000000O1000000000000O10000O100O1O1O1O1O1O1O1O10000N3O000O1000000O1O100O10000O1000000O10000O100O100O100O100O101N100000000O100O10000O1000000O100000000O100000000O2O00000O10000000000O1000001O000000000O100000000O10000000001O0O100000000000000O1000000000000000000000001O000000000000O010N2L4N101N2N2N2O1O00100O001O010O10O010O01000O10O011OO10000O100O1O100O10001N10000O2O0O2O001O001O1O1O1O1O1O2N3M2N2N1O3M3M1O1O00001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000003fNjC[O3g0UTEBl:=UECk:=UECl: located relative to ?", + "answer": " is on .", + "image": "images/caption_simple_50.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000135872.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZYl01Z=0iR20hfb01^V[O001O01NmW10Pkb08kl[O5J5L7I3K2O20O1OjCSON050_;l0\\D\\O3Jb;i0[D[O1Oi;b0UD[O0N16Q<;nC_O1;U<2PDMR<1nCOSCa0_O;E=D7H5L4K2N1O100O1O100[N_M_Hb2a7_M]Hb2b7_M]Hb2c7_M\\Ha2d7_M[Hb2e7_MYHb2g7_MXHa2h7aMUH`2j7eMQH\\2o7^NVGc1j8_100000000000000O100000000O100000000O100000000O100O100O10000O1O1O100O100O1O1O100O100O1O1O1O100O100O1O1O100O1O1O100O100O1O100O1O1O1O100O1O1O1@eJnH]5o6a0O1O1O100O1O100O1O1O10000O1O10aJQIb4n6^KVI_4j6`KWI`4i6_KYIj1OKh6lMUIIi6bMWIo1O`0a7@\\Hc0d7\\O[Hf0e7ZOYHh0S6ZMRKm1kNi0R6[MbIO]1l1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1oNk0Q6\\MaIN`1i1oNm0P6_MQK:bNf0=a1P6_MRK4gNk06c1P6XMdI3`14iNm03d1n5ZMfI0a14jNm01e1n5XMhI2U2o0VNg1V6XMcKQ1WNg1V6XMcKP1XNh1`8fMPGE52;c2R6ZMiK7kMG238e2R6[MiK4WNKNf2R6[MjK2XNLLg2f5YMRJ3U20WNLMh2e5YMRJ3U20jNd2o4ZMQJ2W2OiNe2o4YMSJ2U20iNe2]5YMlK0gNg2]5YMmKOfNh2]5YMmK0eNg2^5YMmK1dNf2`5XMkK3dNf2a5WMkK4cNe2T8[MlGe2T8\\MkGd2U8\\MkGd2V8\\MiGd2W8\\MiGd2W8\\MiGd2k4XM]M5gMc2l4XM^M4eMe2l4XM_M4eMc2l4YM_M5dMb2l4ZM_M5eMo0XO3d5iN_M6dMc0ZOUO3V1`5kN`M7cM91e0l4kN_M8eMO9m0d4kN^M:WOj0\\3kN\\MX7BXG1_1=Y7DVG0a1;Y7FVGOa15^7NoFNc12_71mFOb11a7OnF0a10a71mF0b1Nb73jF1c1Lc73jF1c1Lc73iF2c1Lc73jF2b1Je74hF3c1Ie74gF4c1If73fF59^Of0:[83eF67Ah06\\83dF85Bj03]83dF84Dj00`83aF;2Dl0Oa82`F=0Do0Ma82_F?MFR1Ib82_FT1c0bN@8^93]FV1:_NF336`92\\FY16jNMKa93[FX17kNLJb9m1aFYNLKc9l1aFZNKJd9m1aFXNKKd9m1bFWNJLd9n1aFVNJMe9m1aFVNJMf9l1`FgN`9Y1aFfN_9Z1aFTNI1g9k1`FSNJ2g9j1`FSNJ2f9l1_FRNK2f9l1`FRNH3h9k1aFbN_9^1aFbN_9_1aF`N_9`1aF`N`9_1`FaN`9_1aF`N_9`1aF`N_9_1cF`No0\\Ol6T2UH`Nc0^ORO2T8o1YH`N93_7[1YHcNM>h7n0]H9b7G^H9b7F_H:a7F^H and ?", + "answer": " is on .", + "image": "images/caption_simple_51.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439994.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "T?12b0OE2I114O100075d`0j2K2O0001O00001O000O10001O00001O0000001O0O101O0000001O0O11O010O0001O001O00001O00001O001O001O0000001O010O00001N10010O00001O0000001O00001O00001O01O0001N100001O00O20O001O01O00001O000001O0001O00001O00001O001O1O1O1O01OO1001N100000000000000001O1O1O0000O1N200O1000001O00000000000000000000000001O00000001OO100000010O2M2O2O1N1O1O3M>B001O001O0001O000000000VOfNn^OZ1Qa0lNk^OS1Ua0oNi^OR1Va0oNi^OQ1Va0POj^OP1Va0QOi^Oo0Wa0ROh^On0Xa0ROh^On0Xa0SOg^Om0Ya0SOg^Om0Ya0TOe^Om0[a0TOd^Ol0]a0TOa^Om0_a0TO`^Ol0`a0XO[^Oi0ea0i00000000000000000000000000000001O000001O0000000000000000000000000001O0000000001O0000000000000001O0000000001O0000O100000001O000001OO100000000000000001OO2O00000000000000000000000000001O01O001OO1000000000000001O0001O000000000000000000001O0000000000000000010O000O1000000000000010OO100001O00000000]OU^OQOka0k0\\^OROea0l0]^OSOca0l0_^OSOaa0l0a^ORO`a0l0e^OQO[a0m0k^OnNVa0m0o^OTOPa0h0T_OXOl`0e0W_O[Oj`0:`_OEa`09a_OGa`04c_OKYb000000O2O00000O1D<00K6L2M4N2M3O1J6I7O0101O`0^OZk;UOlaC7KJk05Ra0j1O1N10001O0000001O000000001O000000001O000000000000001O00000001O0O1000000N]3" + }, + { + "size": [ + 640, + 428 + ], + "counts": "oUc62kc06VMN_A3\\>6`AK[><`AG]><_AG^>>]AG`>9VAMh>5T@oN=P1^?2U@nN6X1c?KU@PO4X1g?HT@QOL`1o?]OU@U1j?jNS@Z1n?gNo_OV1MgMT`0b35O2M3M2N3M3N1N1M1O003N3N3M3N110;E7I>Ba0k_OkK^?]4N1O2OO01O01O1O1O1O1O1O1N2nL_@^1g?_N^@NJ@>^O^?f0i@MRa0NT_ONo`0OT_OOn`00X_OJh`06Z_OHg`07Z_OHg`06\\_OGf`08b1N2N2M4MmQ3NVnL1ag<" + } + ], + "question": "What is doing on ?", + "answer": " is walking on .", + "image": "images/caption_simple_52.png", + "model_output": " is walking on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000468501.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`h_11d;3HOfD3Z;5O101O00001O000000O100O10000O100000000000000000000000O10000000000O10000000O10O10000YGJVOLk4:mKMVOKl48lK2UOGm47mK7SOCn47nK;oN_OS56lK`0nNZOU57lKb0lNXOW57kKe0lNTOY57iKj0jNQO[56jKl0iNnN]55jKP1gNkN]57jKR1fNhN_57jKT1eNeNa57iKW1dNbNb58iKY1bN_Ne59hK[1aN\\Ng59fK`1_NXNi5:gKb1[NVNm59fKY2Y4hMfKY2Y4hMfKY2Y4iMdKX2[4jMdKW2[4nM_KT2c3VM_Ko0e06oN0l4nN]K]17EELf4TO[Kg1OYO4I_4[O\\Ka1IkM7_1c2f5kMPJZOa0j2`5jMSKU2n4jMSKU2o4fMVKY2j4`M_IIi1g2i4`M[K`2f4_M[Ka2f4]MZKd2f4XM^Kh2h6100fGVMe7Q2mGbN=^Og7j1THfN4Aj7d1VHjNOBS8Z1QHSOKDX8S1PHYOGD]8n0nG^OCEb8h0mGD@Df8b0nGJ[ODQ96gG5XOEW9JhGa0oNFb:;\\EEe:f01O1N2O2N2N1O1O1O2L3N3L4Kl[h2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "ccR42d;3N2N1O1O1O2O01OO1O10000N2O1O1O1O0L4M3O01N2010O3M2N2N1N2N1O2N1O01O0N2001O1N200O1O1O1O1O001N100gNhNTHY1j7jNTHV1k7mNSHS1n7mNQHT1n7mNPHT1P8mNoGT1Q8lNoGS1S8lNlGU1T8lNkGS1W8lNiGS1X8oNfGQ1[8oNdGP1^8oNbGQ1_8nNaGQ1`8PO_Go0c8SOZGm0g8QOZGn0h8QOWGo0j8ROUGn0k8ROUGm0l8SOTGm0l8SOTGl0n8TOQGl0n8UORGj0o8VOQGi0P9WOPGi0o8XOQGg0o8ZOQGf0^1nN]5YIc0_1nN[5?WIb0m8^OSGb0l8^OUGa0k8_OVG`0i8AXG>h8BYG=g8CZG and ?", + "answer": " is looking at .", + "image": "images/caption_simple_53.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000171190.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "mW_8d03Ej=m0J3O1O2O0O101N10001N100O2O01O100O001O0iNcBl0]=QOcBQ1^=mNaBU1_=610O00001O001O001O10O000N2J6K5G:J5L4K5K5N3M2OnB0m<7M2N2O100O0O2001O1N101O_NF\\D9d;J[D5e;L[D3e;NZD2k1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[k_71m>2O2N2N100O1O2N1O101N1O1O1O2N100O1O2N1O1O101N1O1O1O1N2N2O2N1N2XOTOYCo0dk0V;@[DE?k0W;AZDB>m0Y;2fDN[;1eDO[;2dDM^;3aDM`;3_DMb;3[DOg;0TD4m;_1002N1O2N1O1O1O1O1O1O1O`NfMZFZ2d9lMYFS2f9QNWFo1h9SNVFn1j9SNTFn1k9SNTFn1l9RNTFn1Q5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_54.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000565391.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "e6173a2M\\M0R>3nBM13QO14O[1NbN3NNM0e=n2\\BiNOOOZN218Z4f7f7K1N2O001O1O001O001O001O001O1N101O00001O001O001O1O1O00001O001O001O1O001O001O00001O1N2O1O001\\LPC6P=]3001O1O1O1O001O00002N001O1O001O1O1O1O1O001O002N1O001O1O2N2N1O2N1O1O1O3M2N2N4L2N1O001O2RKaAW4b>eK`AHNL0W4f>n04L4L3M5K6J;`KZ@P4m?N1O1O3M1O1O00003M1O1O00O1002N01O03M0O100001O00O1001O0000O100001O0000000000000000000000000000000000O11O00O10000000000000000000000000000000000000000000000001O00000000000000000jE`Ll3`3UL_Lk3a3QL_LS4a3V6000dE_LY4a3S6000bE_L]4a3cK_L]4a3cK_L]4a3Q600000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000001O000000000000000000001O0000001O001O0000001O0000001O001O00001O1O001O001O001O1O001O1O1O1O1O2X@lK`?\\4N2N2N2N2N2N3M2N2N2N3M3M2N2[EdJb6_5n36J3M2N2N3M3M3M2N1O3M5K3M2N2N3M3L4M3N1N3M3M3M1O2N2N3M3M1O2N2N4L2N2N2eEjGh8X8UGjGj8Z8nFbGPO4R:]8kF_GSO4R:_8mFbGR9a8jF`GV9b8eFbGZ9b8_FaGa9b8\\F^Gd9c8ZF^Gf9d8QFoF09NIP:d9PF\\FP:c9QF^Fn9b9RF^Fn9j901O000000001O00000N31N00LoEYFR:f9oEYFQ:g940lEXFP:h9oEYFQ:g940000000001O00000O11O000000000001O000O11O0aM_FYHNo1c9h5_FYHOn1d9g5]F[HOn1f9]5XFfH5NOm1d9\\5]FgH:1Db1b9i5TGdH[Oc1a9i5^GVJa8g5ZFgHV1c1_8]N\\FS7X1bJ\\8[N\\FS7W1cJ]8`5dG`J]8`5bG`J_8_5aGaJa8]5_GdJ`8\\5`GdJ_8]5aGcJ^8^5bGbJ`8\\5_GfJ`8Y5_GiJa8W5YGoJd8T5\\GlJd8T5\\GlJe8S5[GmJg8Q5YG^IQOm0f9e5YGZIUOQ1`9h5YGXIWOP1`9h5YGXIWOQ1`9f5YGUI[OU1]9e5XGUI]OU1W9i5]GQI]OV1V9i5]GQI]OV1T9k5_GoH]OV1T9k5_GPI[OV1V9j5_GoH\\OW1V9j5VGgHA27\\1R9k5UGiHAN9_1Q9j5VGhH2^1h8j5VGgH3`1f8i5VGiH3^1c8m5YGfH5\\1_8R6lF_HO<3D:_1g8S6lFRIL^O2O0O9a1k8R6nFUIM^O:[1k8S6mFUIM^O:Z1k8V6kF\\I4nNOa1P9c6mFaH3]OMb1Q9Z9XKWE]LKV12GP;^NoD0Ob0o5_O^J5c0Iac0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "n??240_OO30N11OO171H4<21KEi?o2Z@_MLBj?o3001O001O001O001O00001O001O001O1O001O001O001O001O1O001O001O001O001O1O001O1O001O001O1O001O1O001O1O001O001O001O1O1O1O001O1O1O001O1O1O2N1O001O1O1O1O1O1O2N2N1O1O2N2N1O1O2N2N1O2N2N3M2N2N3M3M4L5K6YOW]OK`c0O3M2N1O1O1O2NQhY7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_55.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000322829.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "1b8i4001O5K0000001O0000M3N200000000000000001O000000O100000000001O00000000O1001O000000O1000000000000001O001OO10000000000000000000000000O2OO100001O00O10000000001N10O2O01N1000OZKeK>[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR and ?", + "answer": " is in front of .", + "image": "images/caption_simple_56.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000535523.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "Zeh52Y=2iB0P=600O2O001N101O1O1N2O3M1O001O1O3M0O10001O01O010O01O1O10001OO1000O01O010O101OO1cCTOY^:CbE<^:DdE:]:FbE;]:FbE;]:FcE:\\:GcE9\\:IcE9[:GeE:Z:GdEX:CcEa0Y:CeE?V:GiE:Q:LnE4k94SFMj97UFJj96VFJ\\9gNeF`1MJV9oNlFX1LJU9QOoFj2P9WMoFi2Q9WMPGh2Q9XMmFi2T9WMkFh2W9f0001O001O1O1O001O001O1O001O1O001O00001O1O001O001O00001O001O001O001O1O001O00001O1O001O001O00001O001O001O001O001O001O001O001O00001O1O00001O001O001O001O001O00001O001O001O001O001O00001O001O00001O0000001O000000000000000000TE" + }, + { + "size": [ + 428, + 640 + ], + "counts": "eiR73X=1O2N2N1O2N100O2@GjC:h;3YDM`;:_DH_;8aDI`;5`DLa;2_D0`;O`D2`;M`D5_;JaD7_;HaD9`;DaD=_;B`Da0_;^OaDc0_;\\OaDe0`;XO`Dj0`;UO`Dl0`;ROaDo0_;oNbDR1j;2\\OmNmDU1k:hNmD47U1k:hNoD07Y1i:hN^EZ1b:fN\\E\\1d:dN[E]1d:cN[E`1W;1M3M3O1M3N1N3N2M102O0010O2O0O10000O2O0O101N1O2N1O1O2M2O2L4M2M4L4L4K5I7K5J8GeW:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_57.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000276018.jpg", + "mask_rles": [ + { + "size": [ + 640, + 416 + ], + "counts": "Una05kc01N102N1O2N1O0O2O0O2M2M3M3K6K4J6L4N2O1O101O000d@cNU<]1cCmN[WOeAo0[>QO]AW1c>iNWA]1R>VMVB\\1Dc1U>RMXBY1_Oi1X>oLZBX1[Ol1[>kLZBR2\\O9[>dMZBQ2_O9V>fM\\BP2^OdM`BT1CROM\\2j0jM[;D[DQ1oLAQ3`0oL_OQ3b0mL^OS3c0hHUL32\\2T3j4h0^IZLc1n2P5i0YI_Ld1d2X5m0nHhL5AMj2S7m0lHhL4\\OOf2\\7U1bHiL\\1b1e6e1oGiLY1[1R7k1fGkLS1]1W7i1fGjLo0`1\\7g1dGiLn0b1^7e1cGkLe0h1i7^1aGjL?o1P8W1bGiLk7TM_H^2C`0n7RM`H_2_O`0R8oLaH_7`7_HbH`7_7^HdH`7]7_HcHa7^7]HdHb7]7\\HdHd7^7YHeHe7]7XHeHg7\\7WHeHi7]7SHeHm7^7YGjGJk0m8e9O00001bNTEfIl:Y6]E_Id:^6bE^I_:`6dE^I]:_6iE]IW:^6PF`IQ:\\6TFbIm9\\6YF_Ig9a6^FZIc9e6n1M101O1O1O1N2O1O1O1N2N3N1M4A?]M[B_Nn=YNcAk2U1UNg=h1]2L1N2N2O2M3N2M2O2N2M4K6I4L5K6J6K5J5M5I7J7EejR4" + }, + { + "size": [ + 640, + 416 + ], + "counts": "9Y33PNd0hN]O0c01]O4O0f0KZO7OK0O1o0NXO0K`0R1@a05\\42^7LbH3a7J`H5f=00O1O1000000O100O100Oig19fWNId\\O`0Sc0?QOYOT^OQ1ca0k0C=L5L4K9H9G6JR5@=C:F3L10000O1O1M3L4L4J6L4J6M3K5K5O1O1O1O1O1O1lEcHb7`7]HcH`7^7_HdH^7^7aHeHZ7^7dHeHZ7\\7dHhHW7[7hHhHT7Z7kHhHS7Y7lHjHP7X7oHlHk6W7UIkHe6Y7ZIlH`6V7_IPIYNmN_7W8WJhIUOYOa3T7YMQJ\\NBl3b6gMmLd1X3[NQMT1X3kNPMg0W3XOXMXN\\IT12ROj9a1\\M`N\\3_1c700O1O1O1O1O1O1O1O1O1O100O1O1N2O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O1O1N2O1N2O1NRTg5" + } + ], + "question": "What is the relationship between and ?", + "answer": " is guiding .", + "image": "images/caption_simple_58.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404249.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "Y[i17hc03M3M1O2O0O2O1n\\OD^b0<`]OH]b09b]OI]b07b]OK]b06b]OJ^b07a]OI^b08a]OI_b08`]OH`b01_]OC1<`b00a]ODNd:n0cHg0gMmNf9<_HP1eMeNk9>[HR1gMaNm9i0nGk0PN_NQ:R1\\EgMj0f0CU4U:gMoEYN^O]4c:\\MkEc3T:cLcEa3]:Q300O100O001dNiFnGW9n7cFQG;l0R9o7WGQHh8m7\\GRHd8k7`GSH`8m7bGQH_8n7cGQH\\8n7gGPHZ8o7hGoGY8o7mGlGS8T8SH_GU8`8e101OO2H701O010lMYGnHg8P7bGhH_8Y7[HkGf7T8_HeGe7Z8Q2N2ZOSEmGJ0Z;l7f0N2H9M2M3L5O_FQIQ6o6ZIiHiMj0d8h6cH_JX7i5RHlJNZMm6U8fHgJ7ZMR7Z;kHgDV7Y;hHhDX7X;eHlDZ7T;bHPE_7h;0000001O1XE_Ho8a7PGaHo8`7mFcHS9_7hFeHW9\\7bFjH^9Y7^FiHa9X7ZFlHf9W7UFkHk9V7RFmHm9[7iEgHW:b8O001N101O0O1O1O2N1N2O2L4M3eLoE_LW:`NVFn2HUN]:jNQFY2AaL9X2V:nNTFR1_1H_8TOZF`0a2]OX73bFLb2Ko68jKBY4>lKXOX4h0a7O10X]OjNeb0Y100Z]OhNcb0W1^]OhNbb0X14O12ON10O01O1002ON001O1000\\]OkN\\b0T193M1O1O0000O1O0106KM2N1100004K1O2O0002N10N11O0011NO21M13MO101N2O11OO000O21ON011O100O21M10N101O01O02M1O2N2M1O2N2N2N3M2O3J]bf2" + } + ], + "question": "What is doing on ?", + "answer": " is riding .", + "image": "images/caption_simple_59.png", + "model_output": " is playing on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000098287.jpg", + "mask_rles": [ + { + "size": [ + 640, + 415 + ], + "counts": "m;4o2O^MNa22cMNk;0hC02:KHa0Nb03RO9K07FI1140KO:7FO0d04WO02KO001OO106OK0_;7`Dm7];;000000000000O1000000001O000000O1iNaGPFOc0a8V9_GQF`0g0Q8X9`GPFa0f0o7Z9SHgFm7Y9PHjFP8V9jGPGV8P9hGRGX8n8gGSGY8l8hGTGX8[:O2N1O1O1O2N1O001O001O1O1O00000000000000000000O10000O1J6000000gNgGjFZ8T9iGjFX8U9jGiFW8V9jGiFW8W9iGiFW8X9iGfFX8Z9hGfFX8Z9hGfFX8Z9hGfFX8Z9hGeFY8[9hGdFX8\\9hGdFX8\\9hGcFY8\\9hGdFX8\\9iGbFX8]9iGbFX8^9iG`FX8_9iG`FX8_9iG`FX8^9kG_FW8a9iG]FY8b9jGYFY8f9Q10000001OO1O1001O00000000000000O100000000O100000000O1O1000000000000001O0000000000000000O100000000000000O100001O00000000O10000O100000000000000000000O1000000000000000[GYFQ7g9cHYFlN;^8\\9fHZFjN;`8Z9gHiFY7W9gHiFY7W9cHmF]7T9\\HRGd7n8WHYFZOk0_8l8SH[Gm7e8QH]Go7f8gGaGY8R:100O1K_GjDb8S;7N2M3N2O1L4I7M3O1M31O1O002N1O2N4L1O3M1O5K5K1O2N1O]N^GmG]8R8iGjGV8V8mGhGR8W8RHfGn7Z8VHbGj7^8YH_Gg7a8`HXG`7g8fHSG[7m8iHoFW7Q9kHmFU7S9nHjFR7W9oHfFR7Z9RIaFo6_9RI_Fo6a9h1O100000000000000000000O100000000000000000000O1000000000000001O00O100O1000000000000000000000000000000000000000000O10000000000O1000000000000001OO100000000O1O10000001O000000000000000000O100000000001O000000O100001O0000O10000O1000000000000000000000000000000000000000000O11O0000O1000000000000000000O10000000000000000000000002N001O000000000000000000O100O100N2M3O10000001O00000000O100000000001O001`FlFlN100j3NXL0T4Wc0dKk\\O1?" + }, + { + "size": [ + 640, + 415 + ], + "counts": "UeX31T22a?4U@4h?NV@4h?NV@4i?NR@6l?LP@8o?In_O:P`0Hm_O:S`0Je_O:\\`0LX_O3n`00e^O8^a0R1201O1N2LJPNd^Ok1[a0=2M3O100O21O002N1O2N2O1N2O1O3McMe^OK2g04 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_60.png", + "model_output": " is standing on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278973.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "Sfn13W=1N2O0000001O1O1O1N102N1O1O000O2O001O1O0O101N1000000O10000000001O1O00001O001O00001O00001O00000O10000O100000000000O01001O00O10000O10000O1000O100000000000000O10000000000O2O0000001O0O2O001O001O0O2O001O001O000O10000O10001O000O2O00001N101O001O001O0O2O001O00001O0010O010O010O10O010O010O1O2MiQ`4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "fjT12V=?A5M1N100O0001O010O010001O0O2O0O10001CW9k1O2O0O01_OkFkLU9S3nFlLS9Q3PGnLP9o2TGPMm8m2VGRMj8m2XGRMi8h2]GWMd8f2_GYMb8d2aG[M`8`2dGaM\\8\\2gGcMZ8[2hGdMY8Z2iGeMY8W2`1L5J;E7J6J7H8@>Ghdg4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on top of .", + "image": "images/caption_simple_61.png", + "model_output": " is on top of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000104198.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dS2b0P;6J6N3M3N2N1O2N2M101IfNnE\\1P:6O20O2ON1O101O001XG\\N[7e1WHjNg7c1bGgN_8X2O1000000O1001O00000000O1O11O1O000jNaGSO_8S200O2N20O1O000O02O1N10O100000O10000lNkLfIP3TOjLT7=fIe2]6^McIT2VO^M_7a0ZIe1S7`NkHZ1W7kNhHU1V7nNhHR1X7ROdHo0\\7ROcHn0^7SO_Hn0b7SO\\Hm0d7UOYHl0h7TOSHP1n7\\11O00O1O1O100O1O1O10000001O001OO1O1O10000O1000000O100O100O10O1001N100O1O1O1POTLUJl3h5XLVJi3f5^LVJc3h5`LVJa3j5`LUJ`3j5cLTJ]3k5fLQJ\\3o5gLkI\\3U6S11O2N0000O100O1N2N2O2N1O1O10O10001O2N0010O0O100O1N2N2N2M3M3M3M3L4N2K5J6L4N20000O100O1GQIjKo6U4TIhKm6e3VIbLNHl6Z3SIfL88HHm6Z3SIeL89EG01P7Y3TIfL7b0e6g2UIeL8e0b6W3RIkLn6o301O00001O3M4L9G4L5K5K3M3M3M2NO100O100O1001O001O1O1O001O2N1O1O1N1001N10O10O2O00O200_KoIZ3R6dLQJZ3o5dLTJ[3l5cLWJ\\3j5bLWJ^3j5`LXJ_3j5^LXJa3j5\\LXJc3n601O1O1O1O001O001O001O1O001O1O1O1O00000000OgMSHj0l7VOXHg0g7YOZHg0f7WO\\Hi0d7UO^Hk0b7TO`Hk0a7TO_Hl0b7POaHP1`7mNbHS1`7kN`HU1b7hN_HY16XNU6=gIb2X6YMlIh2Z6lLkIT3]7O001O001O001O1O1O00001O001O001O000000000000001J5ZO\\GPNn8k1b0D;N2O01O1N1O1100OO10O2N2O1N2O1N3M3N3K?BZTW1\\O[lH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "2g15N1aNd7a1[HN0bNe7a1ZHM1bNe7a1ZHM1bNe7a1[HL1bNd7b1[HL0cNe7b1ZHK1cNe7b1[HJ0dNe7b1[HJ0dNe7b1[HJ0dNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7d1ZHG1eNe7d1[HF0fNe7d1[HE1gNd7d1[HE1fNe7f1YHD2gNd7e1[HC1gNe7f1ZHC1gNe7g1YHB1hNf7f1YHB1hNf7f1YHB1hNf7f1ZHA0iNf7f1ZHA0iNf7g1YH@1iNf7g1ZH_O0jNf7g1ZH_OOkNg7f1ZH_OOkNg7g1YH]O1lNf7g1YH]O1lNf7g1ZH\\O0mNf7g1ZH\\O0mNf7g1ZH\\O0mNf7h1YH[O1mNf7h1YH[O1mNf7h1ZHZO0nNf7h1ZHZO0nNf7i1YHYO1mNg7j1XHYO1mNg7j1YHXO0nNg7j1YHXO0nNg7j1YHXO0nNg7k1XHWO0oNh7j1YHVOOPOh7j1YHUO0QOg7j1YHUO0QOg7k1YHSO0ROg7k1YHSO0ROg7k1YHSO0ROg7l1XHRO1ROg7l1XHRO1ROg7l1YHQO0SOg7l1YHQO0SOg7l1YHQO0SOg7m1XHPO1SOg7m1XHPO1SOg7m1YHoN0TOg7m1YHoN0SOh7n1XHoN0SOh7o1XHmNOUOi7n1XHmNOUOi7n1XHmN0TOh7o1XHmNOTOj7o1WHlN0UOi7P2VHkN2SOi7R2VHjN1SOj7S2UHjN1SOj7S2UHjN1QOl7V2RHiNV8W1jGiNV8W1jGiNV8W1kGcNZ8]1fG_N^8b1aG]N`8c1`G\\Na8d1_GZNc8f1^GXNc8h1]GWNd8j1[GVNj0He6R2aHUNa09h6b1hHTN6ELm0n6Z1PISN6HJP1l6U1TISN6HJU1g6Q1XIQN8II\\1`6j0_ImMbJkMYOl1Q69dJmM[Oo1l55eJoM_OQ2g50ZJlMM92P2b5K_J_NOh1`5IaJ_NOi1_5IaJ^N0i1_5IaJ^N0j1^5HbJ^N0j1^5HcJ]NOk1^5HcJ\\N0l1]5IbJ[N1l1]5IbJ[N1l1]5IcJZN0m1]5IcJZNOn1^5HcJZNOn1^5HcJZNOo1]5HcJYN0o1]5HcJYN0o1]5HdJXNOP2]5HdJXNOo1^5IcJXNOo1^5IcJWN0P2]5IcJWN0P2]5JbJVN1o1^5KbJUN0R12oN]5j1aJUN0o07oNY5m1`JUN0m0:POV5n1`JUN0h0a0SOo4Q2_JTN1f0f0QOk4U2_JSN0g0g0POj4V2_JSN0f0j0mNi4Z2]JRN0g0m0iNh4_2ZJQN2f0i6Y1VIPN1g0i6Y1VIPN0i0i6W1WIPN0j0h6V1XIPN0j0h6W1WIoM1k0g6U1YIPN0j0h6W1WIoM1j0h6W1XInM0j0i6X1WInM0h0k6Z1UInM0g0l6\\1SImM1e0n6^1QIlM2e0n6_1PIlM2e0n6_1PIlM1e0P7_1PIkM0e0Q7a1nHjM1e0Q7a1nHjM1e0Q7a1nHjM1f0P7`1oHjM1f0P7`1oHjM1g0o6_1PIjM1h0n6^1QIjM0j0n6\\1SIiMOj0o6]1RIiMOj0o6^1QIgM1j0o6_1PIgM1i0P7`1oHgM1g0R7b1mHgM1d0U7e1jHgM1a0X7h1gHgM1>[7k1eHfM0;_7o1aHfM09a7R2^HeM07e7T2[HeM04h7W2XHeM1Ok7\\2THeM1FT8e2kGeM\\8[2dGeM\\8[2dGdM]8\\2cGdM]8\\2dGbM]8^2cG`M_8_2=N2F:E;G9C=N2O100002N1lEiNf9e2oN2VG]M\\8c2cG^M]8b2bG_M^8a2bG_M1MS8d2lG_M1OQ8c2nG]M13n7`2QH]M1[N]O1c5i2bJmN;[N_O0c5h2cJoN7[NCNc5h2cJRO2ZNHMb5g2dJFJdMa5f2eJFIbMe5i2aJEJbMe5i2aJDKcMd5i2`JEL`MNNf5m2`JEL`MNOe5l2aJEKaMONe5l2aJDLbMNNe5l2aJDLeMc5g2aJDLbMNNe5l2aJDKbM0Nd5l2aJDKcMOMe5l2aJDKbM0Nd5l2aJXOMoM1Nf5k2[JXO0lM11d5j2\\JYO8mM\\5j2\\JYO8lM]5l2ZJXOW6h0iIXOW6g0jIYOV6g0jIYOV6g0jIYOV6g0jIYOV6g0iIZOW6f0iIZOW6g0hIYOX6g0hIYOX6f0iIZOW6f0iIZOW6f0iIZOW6f0iI[OV6e0jI\\OU6d0kI^OS6b0mI@Q6`0oIAP6?oIBQ6>oIBQ6>PJAP6?oIBQ6>oIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6>nIBS6>mIBS6>mIBS6>mIAT6?lIAT6?lIAT6?lIAT6?lIAT6?lIAT6>lICT6>kIBU6>kIAV6>kIBU6>kIBU6>kIBU6>kIBU6=lICT6=lICT6=lICT6=kICV6=jICV6=jICV6=jIBW6>iIBW6=jIBW6>iIAW6`0iI_OX6a0hI^OZ6a0eI_O\\6`0eI_O[6b0eI]O\\6c0dI\\O^6c0bI\\O^6e0bIZO_6f0aIYO`6g0`IXOa6g0`IYO`6g0_IYOb6g0^IWOd6i0\\IVOe6j0[IVOe6j0[IUOf6k0ZISOh6l0YISOh6m0XIROi6n0WIQOj6o0VIPOk6P1UIoNl6P1TIPOm6P1TIeNJjNT7b2SI[M\\8R2n0I7M3J6I7J6L4I7J6K5K5LPic0NTW\\OS1cDhNY:\\1mEiNd9W1[FkNd9S1\\FoNd9P1[FQOf9l0\\FUOd9j0[FXOe9f0[F\\Oe9b0\\F_Od9?\\FCd9;\\FHd95\\FMd91\\F?U9@kFb08kNFMZ8d0hGf0LfN55JO[8>jGj0KeN63KO[8=jGm0JdN63KO0MU8?oGP1LaN54JO2MT8Ih7m0hG\\O?Gj7i0gGD?Cj7g0eGJa0@h7d0jGL=Ch7>iG3?@B^Oo7l0RH71kN6m0GVOf7i0^H:OjN6U1[7F`HRIPN0c1m6=SIPN0d1l6 located relative to ?", + "answer": " is over .", + "image": "images/caption_simple_62.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000224051.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "WeS3451j4o0j3R4QLbJ1<6n0k3Q4mKhJ199l0k3Q4jKkJ37f0j3V4aKoJ93=g0k3f4gKcJ>L_O6_4Y5eKdJ?EE8CDf4j5cKeJ`0AK6X4c5^KeJ]6Y5cIgJ^6Y5bIfJ`6X5aIgJa6W5_IiJb6W5^IgJc6Y5]IgJd6X5\\IhJ=H]OOe5a5`JhJ7=U5j4dJiJ6?U5h4eJiJ5`0W5f4dJjJ5?Y5f4bJmJ3=\\5f4_JPK38a5g4\\JRK25e5h4ZJSK04g5i4YJTKN4i5h4YJRLi5m3WJQLk5P4TJoKn5P4RJoKo5Q4QJnKP6S4oIkKS6U4mIiKV6V4jIiKW6V4kIhKV6X4jIhKV6X4kIdKX6\\4hI\\K`6d4aImJm6S5SIlJn6U5RIjJn6V5RIjJn6W5RIgJo6Z5QIdJP7`5mH]JU7d591QI^J\\O?o5S5aJ_KY5b4eJdKU5]4iJgKU5Y4jJiKU5V4kJlKT5T4kJnKS5S4lJnKT5S4kJmKU5S4jJnKU5S4kJmKT5T4kJnKT5S4kJmKT5S4nJlKR5R4PKoKn4Q4SKZLb4e3`K^L[4b3fK_LY4a3gK_LX4b3hK^LW4c3iK]LW4c3hK^LW4c3iK^LU4c3jK]LW4c3iKULCoM11b4l5jK]LT4d3lK]LR4d3mK]LR4e3nK[LP4f3PLZLo3h3PLXLo3l3oKTLo3Q4nKnKQ4S4oKnKP4S4QLlKl3V4VLhKi3Z4VLgKg3[4YLgKd3Z4\\LfKb3\\4^LeK`3\\4`LeK^3\\4bLeK\\3\\4dLdKZ3_4eLbKY3_4gLcKT3a4lL_KQ3c4oL_Km2d4SM\\Ki2g4WM[Kf2e4\\M\\K_2h4`MYK^2i4`M[K[2g4dM[KX2i4hMWKT2l4lMYKl1j4TNYKe1k4\\NYK[1k4eNXKT1m4lNYKk0i4UOg4001O001O0O100O00O010OO200001N2N200O3LhTi1" + }, + { + "size": [ + 428, + 640 + ], + "counts": "T:X3T:000O100OoMRFmNMc1n9ROSFD5CM657H30EQ:MPF50K1N08NJ7NL584J4KHV:NoEO15LI60>2[O069JJQ:KPF33O15LJ5OL1OO:2E158KJk99VFG42JL77JH3;0MMKi9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_63.png", + "model_output": " is parked on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530099.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "T\\Y11d;3N1O100O1O1O1O1O1O100O1O1O100O1O1O1O1O1O1O100O1O1O1O100O1O1O1O1O1O100O1O1O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O010O1O1O2N1O100O1O1O1O100O1O1O1O100N200O1O1O100O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1N2O1O100O100O1O1O100O100O1O100O1O10000O10000O100O100O10000O10000O10000O10000O1000000O100O10000O100O10000O100O100O10000000000O100O10000O10000O1000000O100O10000O10000O1000000000000O1OVM[Hd1e7ZN\\Hg1d7VN_Hj1a7UN_Hl1a7SN_Hn1a7QN_HP2`7PN`HQ2`7nM`HS2`7mM_HT2`7kMaHV2_7iMaHX2^7hMaHZ2_7eMaH\\2_7bMbH_2]7aMcH`2]7_McHb2]7]MdHc2\\7\\McHf2\\7[McHf2]7XMdHi2[7VMfHk2Z7TMfHm2Z7RMfHo2Y7QMgHP3Y7oLhHQ3X7nLgHT3Y7kLgHV3X7jLhHW3X7hLhHY3X7fLhH[3X7dLhH]3X7bLiH^3W7aLiH`3V7`LjHa3W7\\LiHf3b72O1O1O1N2O1O1O1O1O1O1O1O100O100000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000GhKmH11X4R7gKmH21V4R7hKmH21V4R7kKPIU4P7kKPIU4Q7jKoHV4R7gKPIY4W700000001O000000cH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "Zel11f;1N2O3L2O0O2O0O2O1N2N3N3L3M102M1O3L2N3O1N2O1N3M2N2M2O1O1O1O1O1N2O1O001O1O10O00010O01O100O001O100O100O100O1N3O1N2N1O2OO10O0100O010O010O10O01O10000O010O1000000O10O1000O1000O010000O10000O1O01000O1O1000O01O1000O0100000O01O1000O10O1000O01000000O01000O1000O1000O0100000O1000000O0100000O1000000000000O10000O1000000O100O1000000O101O0O10001N101N101O001O000O2O001O001N10001N1O1O1O2N1O1N3N1OaEROX:l0gEVOY:i0gEYOY:e0gE[OZ:d0gE\\OZ:a0fEA[:=eED[: and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_64.png", + "model_output": " is on top of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000202339.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Vk_18cc06I6J7J5J7I6J6J7J6J5M3M4L3M3M3M3O1000000000000000000000O100000000O100000000O10000000000O011O001Og0XOc0^O9G:F4K6Kohi6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "koo18gc03J6L3L3L5L3L4K4O2M3O1H8L2O0J3QN`NP@IY1m1k>j1O10M3O1O1010o_O_Lg0LSOe0n=h3[BRMEcNR34U7^4kEYMf2]N\\7d6[H^Id7j6SHXIk7P7mGQIR8[7aGhH^8^7\\GbHc8c7YG]Hg8g7UGYHP7H[IS8AVHn63[Ii7DUHP79UIf7HPHR7a0QIb7JmGS7f0PI^7KmGT7h0nH_7JjGW7l0kH[7LjGX7m0kH[7JiGZ7o0jH[7HfG_7P1hHZ8W7iGfHWNIj9`7SHbHUN4a9[7[H]HXN3a9`7XH[HW8e7R20O2N1001OO10O0100O1O2O0RFWHQN6JJ2No8l7SIWHoMj0i4[OVLNU1f7LYHmMk0f4l0\\MP61oHmM_O2F`4P2aKVN_1h7`0cHnMDa0KR4]2lLa5b0kHZ3e1nKa5d0mHe3e1\\K`5m0kH[NTOX5b2[K`5Q1jHP4e1kJd5P1kHX4a1cJg5P1mH^4^1UJn5Y1hHf4m9TKXFn4h9nJZFV5c9dJoDTO_1[6a9UJPE@i1]6V9QJoFQ6Q9jISGW6Z;002O0O2N010O1O2OO01O10O02N001O100O1O2O1N2N4L4L3N1N3M3N1NTJiCP18iMI\\1Ud600000O10O01O1O1000O01O1[OVOR^Ok0la0\\OP^Oc0Pb0@m]Oa0Pb0Cn]O>Qb0Dn]O;Sb0Fk]O;Tb0Gj]O9Wb0h0000O101N1O101N1O2O2M2^O_]OBcb08h]O_O\\b0=f0K6KggS4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is carrying .", + "image": "images/caption_simple_65.png", + "model_output": " is being held by ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000172396.jpg", + "mask_rles": [ + { + "size": [ + 351, + 640 + ], + "counts": "3_2`82M5L3M3M6J3M2N5K5K4L2N4L3M3M6J4L2N5K6J3M2N3M5K2N8H4L6J1O3M3N1M5L3M3N7H3L5L2N4L3M3M3M4L4L5K2N3M4L4L4M1M5L3M4L5K1O2N6J3M4M3K3N4L4L4M1M2O5K3M5K2N2N4L2N2N2N00001O000000000001OO10000001OO100001O00O100001O00O100001O00O100000000000000001OO10000001OO10000001OO10000000000000000000000000O1001O00000000000000O1001O000000O1001O00O1001O000000O10000000000001O00O1000000000000001O00O1000000000000001OO100000000000000001O00O1000000000000001O00O10000000000001O000000O1001OO10000001O00O1000000000000001O00O1000000000000001OO1000000000000001O0000O1000000000000001OO10000000001O0000O1001O00O100000000000000000cILo14^40000000000000000000000000O1001O000001N11N1001N10O2O0001O0000O1001N10O2O0001O0000O1001O00O10000001OO10000001OO10000001OO100001O00O100001O00O100001O00O100001O0000O1001O00O10000001OO1000000000000001O0000O10000000000001O00O1000000000001OO100000000000000001O00O1000000000000001O00O2OO101OO100001O000000O10000000000001O00O10000000000001O000000O11O0000O1001O000000000000O10000001O0000O1001OO100QJMR13n4O00000000000000O1001O0000000000000000O11O0000000000000000O11O000000000000000000000000000000000000O11O00O100N2001OO1L4N20000O1O1N2H@mEd0P:7I7K5N2O1O1L4L4O1N2D]NXGg1f8:F:M3O1N2N2N3L2N[MSH^2a7`M]H65Y2]7h0G9K5N2M3M3N3N`LXIj2h6WMXIi2V6bLlINUO:01f4h2gKjLB6l0MVO20M93[4m2lKnL_O5k0NVO12M20K4e4i2PLRM[O2ONm02VOO<2@33K^4j2XLWMXO00Nl03BNB6O02J_4f2\\L[MSO5l0KC:BJ1K_4b2`L_MoN5l0JD;@K2J_4_2cLbMlN4m0LB:AJ3K^4W2kKiMF4b0M\\O20Lm02VON7d0I]O^4V2SLjM21ZO49K78H and ?", + "answer": " is inside .", + "image": "images/caption_simple_66.png", + "model_output": " is inside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404839.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "_l02g15\\NK?0E0b`07j@S1S?POj@S1U?lNi@FdN^1b`0kNa@IXOY1X`0mN`@LXOV1W`0oNX@HWO74HM[1``0oNT@7]OP1``0hNQ@:^Oo0b`0eNP@T2Q`0kMo_OT2R`0mMj_OV2V`0jMi_OW2X`0hMh_OX2Y`0fMi_OX2Y`0fMh_O4@j1i`0SNf_O3Ai1l`0RNb_O6Bh1l`0TN`_O4Aj1Pa0VN[_ONGl1n`0XNX_OMJk1n`0YNW_OQ2j`0nMT_OT2l`0lMR_OV2Ua0cMk^O]2Wa0bMg^O7On1Xa0`0M32N2N0O01000OgN\\MXAc2h>^M]@0;b2W?_M]@14OD`2j?aM^@3FL42M^2]`0_Mg_OO03M_2k5iMQ5U2hJYNS5g1hJbNT5^1hJlNR5T1kJYOk4f0RKKa44XK:b4ETJfMZLO40?k2k8^OmITOhLb1e7oLgIX2b1P3V6nLdI_3W6]LeIW4o5gKnI`4P6`KiIg4W6YKeIk4Z6VKbIn4]6SK_IQ5a6oJ[IU5e6kJYIW5h6iJUIY5k6gJPI^5i6`HfGR2^1b5l6\\HfGR2U1oMgNi7^8VHfGT2l0P6d7RJWHQ6h7\\JdGl5[8m20000000000LdDdG\\;h7dDlH\\;g6hDiHN?Z;m70002NLgDaGY;^8hDbGX;]86ZLRE]KIo0G4^;\\3mE^KcNh03=^;[3WFULYNa0_;X3[FVLVNb03hNV;]4fFbM`9]2eF_M[9`2kFgKgM^1d1aNZN3a9U4TIUMQOfNn7T4RIUMPOgNo7S4TITMmNiNo7Q4ZIRMhNlNn7P4\\IPLQMm0\\1ROa8P4SIUMXNnNf8m3QIVMWNoNh8j3SIVMTNoNk8j3RIWMRNnNn8j3QIYMjMROV9d3QIZMjMmNY9h3nH[MlMiNW9l3nHYMhMkN^9k3kHYMfMnN_9i3kHYMdMPOa9g3kHYMdMPOa9g3kHYMcMPOc9f3lHYM_MQOg9e3kHZM]MQO]84kG`3]2YM[MSOU8e0bG\\OAc3]3ZM[MQOP8o0`GQ3V3oL[MPOm7a5iJ^KT6c4nI[KQ6g4oIXKUMQOR8i5iJWKTMPOR8j5jJVKTMPOR8j5jJVKTMPOS8h5jJXKP6f4PJYKW6a4iI_K[6\\4fIeK]6W4cIhKa6T4`IlKb6Q4`InKc6o3\\ISLg6i3YIXLoLoNQ9h4PJYLj6d3VI\\LkLQO]9`4iI_LfLTOd9Z4fIcLgLQOe9Y4eIfLgLQOd9X4eIiLaLlNR:Z4]IRMXLfN\\:V4\\IXMh6d2XI[Mk6c2UI]Mm6a2SIVMZ7f2fHWM`7f2`HYMd7e2[H[Mi7a2WH`Mh7`2XH`Mh7`2XH`Mh7a2WH_Mi7a2WH_Mi7a2VHaMi7_2WHaMi7`2VH`Mj7`2VHaMi7`2VH`Mj7`2UHaM2kNQ4e3lK`M4jNP4f3mK_Mk7b2TH^Ml7c2SH^Ml7c2RH^M5hNT4k3gK\\Mn7e2RHZMn7g2QHYMo7i2oGXMP8i2oGWMb0iNa3R4kKUMS8l2lGTMa0kNg3S4gKQMb0lNg3T4fKQMb0kNh3T4fKQMb0kNh3U4dKQMf0hNf3X4TKhKkMX1k:R3WGjKjMU1`3iNa3Y4SKlKjMS1`3hNd3Z4oJoKkMo0b3hNd3[4lJSLkMj0U;U3kFdMT9]2iFeMW9]2fFdMZ9^2_FgMa9\\2WFiMi9b2bEhM^:m501O1iHYGS3g8S40hN\\GTGe8f8cGWG^8`8kG_GU8]8PHbGQ8Z8SHeGm7X8WHgGi7l7fHRHZ7j7kHUHV7o5SGcJ;iNa1a0Q7R6UG^Jc2[OX6V6YG\\I1a0j2Gl5k3SGZNh0hMf;m3bCZN^=e1dBYN]=U41N100O1hM_BQNb=]1bBoN_=o0gBiLEh1e=^1iBgLGg1`=a1lBdLGi1^=a1ZC^MPO3g=]2eDSM^;j2jDnLX;n2o2L4M3M3Ll^O[MUa0R2R_OVNSa0h1Q_OTNPa0k1c000O2N101N101O1O0O3[Ol]OZObc0JZmm3" + }, + { + "size": [ + 640, + 427 + ], + "counts": "nY:1nc02N1a]70^bH3N1O2M8H4M2M3N2M5K6K1O1N2O1O0OI`]OoNab0Q1`]OnN`b0S1_]OmN^b0V1a]OkN^b0\\100O010O2OO1000OO2M3M2O200003M00N2O10000N\\]OiNcb0X1201O3ROo]OHQb05Q^OKoa05Q^OKoa05Q^OKTb0OgQ?6nn@6N000O020O00000004LBHU]O7kb0IX]O4hb0MY]OOib02V]ONib03b01O1O001O1O1O1O00001O4L2N1O2N2N3M1O1O001O1O0000001O0O1000O1100O1O0000001O001O00001O000000001O000000000O2O0000010O00001O0O101O01O01N2N2M201O000000001O00000000001O0000000010O000001O0000001O0j@_Nm;a1QDbNo;]1QDeNm;[1SDgNk;Z1UDfNj;Z1VDhNh;X1YDiNe;X1]DfNa;[1`DfN^;[1aDfN^;Z1bDhN\\;X1eDiNY;X1gDhNX;X1iDhNV;X1jDiNU;X1jDiNU;W1kDiNU;V1lDkNS;U1mDkNS;U1lDmNS;T1kDnNT;R1kDoNU;Q1iDROV;n0iDSOW;m0hDTOX;l0YD\\NgMi0P>k0YDaNaMe0V>j0YDCg;=YDCh;=WDBj;>VDBj;>WDAj;>VD^On;c0QDQO`MZO1Na>g1nCQO`MZOf>f1iCPOaMZOg>f1gCTOZeDBZ;>fDBY;?iD_OW;a0kD\\OV;d0kD[OU;e0kD^OR;b0oD^OP;b0PEBl:>UEBk:>TECf9lNWDa1S2C`9WOXDV1Y2D_8XOeD6?n0^2DW8n0lD]On2ER8n2nGRMn7R3RHoLj7T3VHmLf7X3ZHfLc7^3]HbLa7_3`HaL]7`3dH`L[7b3\\FaKF31k0i9d3[FmK[O21RO2\\1U:d3[FXM_OTOT:g3XFmNf9T1XFoNf9S1VFQOh9e5N1O1O101_KSFlNQ:n0RFQOU:g0mEYOT:c0oE]OR:a0oE^OY:8jEHX:OoE0T:KoE4U:FnE:U:BlE>X:mKoDT3m0n0g:^NdEb1a:UNcEk1`:\\MQDmNd1g3]:fLhFZ3^9ZLhFf3i and ?", + "answer": " is beside .", + "image": "images/caption_simple_67.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000069138.jpg", + "mask_rles": [ + { + "size": [ + 640, + 371 + ], + "counts": "o96b0NDNM72KQb04[^OLD;056@Hj00SO48LJNOV?9XAKB=35NYO1]7i9nHVFL1d00`7i9YIWFg6i9ZIVFf6j9ZIVFf6j9ZIVFf6j9ZIVFf6j9[IUFe6j9i10000001O000UJUFa1k9Z4000VJUF_1k9`NWF_1i9aNWF_1i9bNUF_1k9[4001O00000XJTF\\1m9cJSFf31e1m9cJTFg30e1W:bJgEb32l1X:[NhEd1Y:[NgEe1`:TN`El1d:PN\\EP2e:oM[EQ2e:PN[Eo1f:_KXE`11Q3j:\\KUEc11Q3n:XKQEh10P3T;SKlD`7T;f0000000001O0000OaJlDT2T;lMmDS2S;]300O1O1OlHoD[5P;fJPEZ5m:iJSEI2e3j:[NVEd1g:_NYEa1g:_NZE`1f:aNYE_1g:aNZE^1f:bNZE^1f:bNZE^1g:aNYE_1g:bNXE^1i:aNWE_1i:bNVE^1k:aNUE_1l:aNSE_1m:aNSE_1n:`NSE_1n:i30iJQE]1P;bNPE^1P;i31O0jJoD]1Q;bNPE^1P;bNPE^1P;cNoD]1Q;i3O10lJPEX1o:iNREV1m:kNTET1k:mNUES1k:mNVER1i:oNWEQ1h:QOWEo0i:QOXEn0i:QOWEo0i:QOWEo0i:QOXEn0h:SOWEm0i:SOWEm0i:SOXE_LV1b2b9WNUEo03PNm1i2k8VNZE\\O2_OL7S3h2e7iM\\JV2g;00000000000001O001O00O11O00000000001YNQ_OGea0VOZ^O1olo0NoRPOV1Ya0]1N2O100000000O1000000O1N200000000000000O10000000000001O0000O100000000001O00000000O10000001O000000000000O10000001O00000000O1O1000000O1000000000000YMRMkA0Z2o2Qh?S1dB`NcM1i?_1hB`NX=`1iB_NW=`1iBaNW=_1iBaNW=_1iBaNW=_1iBaNW=_1eB`NdM1g?_1eBaNcM0h?_1dBbNdMOh?`1cBaNeMOh?`1cBaNeMOh?`1bBbNfMNh?`1bBgN]=Y1cBgN]=Y1dBfN\\=Z1dBfN\\=Y1dBhN\\=Y1cBgN]=Y1cBgN]=Y1cBcNeMNh?_1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=W1aBiN_=W1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=X1_BiNa=W1_BiNa=W1_BiNa=V1`BjN`=V1_BlN_=U1aBkNV<^2jCbMQ;c3oD]Lf:n3YESLf:R4ZC`Kb1`0T;e4^D\\Kb;n4SDSKm;`5\\CdJd<\\600O100O1O1O10000000000O101O0000000000000000000000000000000001O00O1001O0000000000000000000000000001O0000000O10000O100000000000001N10000O2[Od0oMnG_F0a0j8R3\\Gj0m:oN^E[OBZM5Gc;Y2YDhMi1T1TNiNR>[1n[Z2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_68.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000342367.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "QnQ62n>1m>NYRO1e>0ZA4a>M`A3`>L_A6`>I`A9c>I\\A2l0MVO2V=MiB2N4c0=e<]OkB9Nm0f;jNgE1cNb1d;UO[DB<^1X;BbD>];EaD;\\;IcD8Z;h1M3M3M4L4L3N3N11O000000000010O0001O0000001O000O101O00001O001O00001O0000010O0001O01O00001O010O001O010O0000001O0jLQE\\2o:]MYEFI`2o:fM[EII_2m:aMaEOC_2h;`MZD_2e;aM\\D^2d;bM^D\\2b;dM_D[2U5K4N2O2N1O1O1O100O2N1N2O2O0O100O2N1QNZOdEf0V:EdE and ?", + "answer": " is in front of .", + "image": "images/caption_simple_69.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000263796.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "[i;a0Tc0>L2N3N3M1N4K3O2N3f]O`Nma0l1M8I1\\OhM]_O[2``0hM]_O]2^`0fM__O^2^`0e0L5L5K3L2O0O2O000O10000O101O001O002N0002N2N3M1O3M2N6J4L1O1O2N1O001O000000TOoLi@Q3V?SMf@n2Z?UMc@k2]?XM]@k2b?XM[@i2e?[MU@g2k?g0O10000O1001O00O1O1O100O1000000000000002N2N1O2N6J4L1O1O0000000000VO^LVAb3h>dLTA\\3k>gLRAZ3n>hLo@Y3P?lLi@W3W?mLc@U3\\?h000O10000O100O100O1000000000000000000O10000000000O10000O10000O10000O1000000O100O11O1O00O1O10000000000N2O100N200000000O10000001O1O5K3M8H2N6J3M2N3M3M2N2N1O1O1O2N1O1O1O001O1O1O1O1O001O1O2N1VM__OS2a`0gMk_OS2W`0gMQ@U2P`0hMT@V2l`0O1O2N2N1O2N1O1O1O1O001O001O1O001O001O001O001O0000001O000000001O0000001O00000000000000000000000000000000000000000000O10000O100O10000O1O1O1O100O1N2O1O1O1O1O1O1001O002N3MO1O1C=N2TOhMY_O;7m1[`0^Nd_Ob1[`0`Nd_O`1[`0bNc_O_1]`0R1BhLT@X3k?iLU@W3k?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3m?iLS@W3l?jLS@W3m?kLQ@U3o?>O1N2N2K5A?O1O100O100001O001O0000XMZKWFe4h9^KaCMc2e4l9bKRF^4n9bKRF^4n9cKRF\\4n9cKSF]4m9bKUF]4l9^KaC0c2b4l9]KbC1b2b4l9]KbC1b2b4g<0O1O100O100O100O10000000000001O00000000000000O10000000000000000O1000000000000001O001O00001O001O1O001O1O3M2N1O1O2N1O1O001O00001O1O002N3M2N2N1O2N3M1O2fMi_Og0W`0UOQ@g0Q`0TOT@j0o?_N`_O2i0\\1i?`N__OLQ1b1b?`N^_OLS1b1a?^Nj@`1n`0M4L1O002N2N3M1O001O1O2N2N2N1O2N3M1O2N1O2N3M1O2N1O2N5KQl`0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dad3a0\\c0:G8G8H7J5K6K3L7J4M1N4L2O2M4M1N2O2N2^_OWMh?k2T@ZMj?g2S@\\Ml?f2Q@\\Mm?f2Q@\\Mn?e2Q@[Mn?g2P@ZMP`0\\3N1O100O1O1O1O2Z@nK\\?\\4N001O100O00100O10O01O010O10O0100h@\\KS?`4RA`Kk20o8`4VD`Kk20o8`4\\31000000O10001O000O10000000000000001O000000000000001O0000000001O000000000000001O00000000001O000001h@\\KS?c4l@`KR?g41O0100O01O00010O10O01O0O2J6L4N101O1O1O1O001O2N010N2O1O1ZOT@VMm?h2U@WMk?g2X@WMj?h2W@WMj?g2W@XMk?g2X@UMi?j2[@nLk?n2f0N1M4N2N1O2L3M4L3M4N2K6K4I8J6M4\\Ombf2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_70.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000119828.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`oY1a0P;:C;I8I5M4K4N3L3M3N1O2N1O2M3N1N3N2M3N2N1O2M2O2N1O2N1O2N1O2N2N1O2M2O2N1N3N2N1O1O2N1O1O1O2N100O101N1O101N1O1O2O0O101N10000O100O101N1000000O10000000000O1000000O10000000O100000O10000000O100O10000O10000O100O100O1O1O100O10000000000O1000000000000000001O0000000010O00000000001O0001O01O01O0010O00010O010O2O1N2O1N2N4M3M3L4L2O0O1fJoJl3T5lKlJnN2W5T5fKjJUO3T5U5eKiJTO4W5V5aKnJ`4V5ZKkJf4W5YKhJg4Y5UK^JD9X5S6001O00010O001O001O00001O001O001O0000001O00000000001O0000000000010O00001O00001N100010OO2O001O1O0O4MOLUJ^Jj40QLc5WOZJg4:lKU5[OfJm49dKP5CdJj4]6VKbIk4_6SKcI1Ne4_6[KbI0Of4_6ZK`I20e4_6YKaI20f4^6XKbIo4_65O1OjJcIS5`61O1O1O1NgJeIY5V67M2N3K401M2N2O2H7B>^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "_Sg31h;0N0bh01[WO4M5K3N3MCPE4n:LUE4i:K[E3d:M]E3a:NaE1^:OeEOZ:0hEOX:1g00PE2V:0gE3X:g001M2O1N101N2O001O001O00001N100O100O100O10000000000000000000000000000000000001O0000001O0000001O001O0O2O0O2O0J7K5J5L6I8H\\eR1" + } + ], + "question": "Where is located relative to ?", + "answer": " is attached to .", + "image": "images/caption_simple_71.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000001993.jpg", + "mask_rles": [ + { + "size": [ + 419, + 640 + ], + "counts": "]8a0a<1O2O1O0O10001N10000O2O0O10000O2O000O10001O0O100000000O2O00000O100001POTDg0Vo:CoD>P;]3fDiKc8]4^OcK\\H]4d7dKZH]4e7fKWH\\4i7gKRHf2c9Gf0N1010O10O100O010O100O10O10O100O0100000O10O100000O100001N2O3M3M2N2NGnEcMP:]2RFdMk9\\2WFdMg9\\2[FdMb9]2`FdM]9]2dFcMZ9_2gF`MW9b2iF^MU9c2lF]MR9d2PG[Mn8g2SGXMk8[2fFkM>Jk8W2nFnM7Ki8U2TGPN4Ig8V2ZGoM0Hf8X2]GoMV9P2mFPNQ9d1XF]Ni00n8]1aF^Ne05h8[1jGdNU8[1nGeNP8X1THhNk7T1[HlNc7P1cHPO\\7k0jHUOT7g0RIYOm6b0XI_Of6=`IB`69fIGY65kILT6OSJOm5LYJ4[9010O1000O1N2O1MbVX5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_72.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000221502.jpg", + "mask_rles": [ + { + "size": [ + 320, + 640 + ], + "counts": "nR63g99K3O1N1O2O1N2O1O0jFZOR9k0N5YGTOQ8n0mGUOk0Nk5n0YITOj00l5m0ZITOg01YOGY6W1gIROLL>5CJ[6S1`IjN4f0;OP6b0aIkN2g0NWO7g0X6h1dIZN[6h1cIYN]6e20DcIWM[6i2gIUMY6m2eIRM]6n2dImL_6n2n1g5QNkI0?n1f5TNjIMa0o1d5VNjIKb0o1d5UNjILc0o1d5TNiIMc0n1d5WNhIKd0n1d5XNhIHe0P2c5YNhIFe0Q2d5WNiIEe0U2c5TNdJl1]5SNcJn1]5WN^Ji1b5VN_Ji1a5WN_Jj1`5VNaJi1_5WNaJi1_5WNaJi1`5VN`Jk1_5UNaJk1`5TN`Jl1b5QN_JP2c5mM]JT2c5jM_JU2a5kM_JV2a5iM_JW2e602N2N2M5K8I;E5K8G;F9Febm4" + }, + { + "size": [ + 320, + 640 + ], + "counts": "Qgj0131f91XF1g95N2N1O101N1O100O1000000O1O10000O10000O100O100O2O0mN_OgHa0i5I`Ie0b0Bn5T1RJmNl5U1RJlNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5V1RJkNm5U1RJlNn5S1SJmNm5T1RJlNn5T1RJlNn5T1RJmNm5S1TJnNj5R1VJQOg5o0YJSOe5m0[J]O[5c0eJC^O_Nc5n1oJOd0UNi2l1cL8:nMS3i1cLa00hM]3h1bLg0DZMK:o3i1\\LZ1d3a201O00O1001O000000[MXLROh3n0`LiNa3V1eLeN[3[1mL]NS3c1WMRNj2n1YMnMh2R2aMcMa2]2cM^M^2X1XLmN]1F\\2\\1[LkN]1CZ2b1YLkN`1]OZ2h1VLkN`1\\O[2i1ULkNa1ZO[2k1TLkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNR6V1nIjNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIjNV6V1jIjNV6V1jIjNV6V1jIjNV6V1jIjNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6T1jIlNV6T1jIlNV6T1jIlNV6T1jIlNV6T1iImNW6S1iImNW6S1iImNW6S1iIlNX6T1hImNW6T1hIlNX6S1iImNW6S1iIlNX6T1hIlNX6T1hImNW6S1iImNW6S1iImNW6S1iIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1hIjNX6V1gIkNY6U1gIkNY6U1gIkNY6U1hIiNY6W1gIiNY6W1gIiNY6W1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1fIjNZ6V1fIiN[6W1eIiN[6W1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1dIjN\\6V1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIhN\\6X1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1dIhN\\6X1dIhN\\6W1fIhNZ6Y1eIgN[6Y1eIgN[6Z1bIhN^6]200000000000000000001O00:Fg0`JnKd3Y4TLkKi3X5NlLaKi0]4jNRLU1m3jNULW1i3hNXLY1g3gNXL[1g3eNXL\\1h3dNXL]1g3cNZL]1e3cN[L]1e3cN[L]1e3cN[L]1e3cN[L^1d3bN\\L^1d3bN\\L^1d3bN\\LjN6@YO]1U49]LiNg0d0l2c0\\LjNR19b2l0]LkN]1MW2W1\\LkNc1IQ2[1^LkNe1Fn1X1cLRO`1Da2f0PLF`1CT32]K;`1BZ4>iK^OX4b0c2O1000001O00001O00000000002N001O00000O100000001N5Hgm?N]R@21Oci:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_73.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000312586.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "`eo63X=2N1N3L3N3HD]C>b<61N101O000O100O2O00O1O101N2O103`CXOT and ?", + "answer": " is in front of .", + "image": "images/caption_simple_74.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000187236.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y6c0n0i1U:XNkEg1T:^NjE`1U:cNjE\\1V:fNjEX1V:iNlES1U:oNjEP1U:SOjEl0V:UOiEk0W:WOhEh0W:\\OfEd0Z:]OeEb0\\:@aEa0_:A_E?`:C_E=a:E\\E;e:GYE9g:IVE8j:KSE5m:o100O\\LTEV3m:gLVEX3j:fLYEY3e:hL]EW3c:iL]EW3d:gL^EX3d:eL]E[3[7dLnK[3l7000O1000000000YM_Do1a;oMbDo1`;oMcDo1_;nMbDR2a;iMbDV2_;gMcDY2_;cMcD]2P<0O100000O010O1000O01000O10O010O1O01WObMPE^2o:cMPE]2P;cMQE]2o:cMPE]2]:bMXF]2i9cMVF]2k9cM[E16\\2_:eMZE06Z2a:gMWEO8[2a:gMVEN8[2b:jMREM;Z2c:kMoDMoBDP=;QCFn<:QCGo<8RCIm<7RCJn<6RCJn<5RCMl<4TCLl<4SCMm<2SCOm<1SCOm<0SC1m and ?", + "answer": " is lying on .", + "image": "images/caption_simple_75.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000255749.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "W_U1e0Z2H]7Q1bG]OZ8j0[G]Oc8g0WG]Oh8f0QG_Om8f0eFEZ9U2O2N100O100O1N2M3NAXGbLd8`3`G^L`8_3cGaL]8\\3gGcLY8Z3jGeLW8X3mGgLS8W3oGhLR8U3R1I7_Ob0WN]E`0n:]OZEi]Y2" + }, + { + "size": [ + 424, + 640 + ], + "counts": "m`T28i<;I4M3N1O1O2N1O1O1O1N200O1O1O1O2N1O100O1L4M4L3N2O1O10000O10O100nNlDGKD[;b0nDGIEZ;b0PEGY;9hDD[;:gDC\\;;j0M3N2N4JhSc5" + } + ], + "question": "What are and doing in relation to ?", + "answer": " and are entering .", + "image": "images/caption_simple_76.png", + "model_output": " and are looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000575243.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZiV34Q=6M3L4L4N2M2N2O1O2O0N3O001N10001O000O2O010O001O00001O10O0001O001O010O001M3O1O010000O10O01O100O1O00100O1O1O010O10O01OO20O0100O01O100O100O00100O100O010O1O1O100O010O100O101N1O100O1000001N1O100O1N2O2O0O1O2N100O1O2O0O2O001N100O1O101N101O001Ni`b3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_77.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000473118.jpg", + "mask_rles": [ + { + "size": [ + 500, + 346 + ], + "counts": "Uod01c?00000\\im00cVRO9_@Kh>f0G9D<\\ObNgBi1W=;K5M3M3O1000000001OAaMhC`2Th80bIBfM>h80aIDfM]2KYN:m8]O]F`0U2O`N4n8<]HCeN2n8;ZHFfNOR9;UHJgNJV9=lG0lNCY9=bFkNc0Y1@@\\9<_FROXORB7X>GmAMchm1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is jumping from .", + "image": "images/caption_simple_78.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + } + ], + "question": "What is doing on the ?", + "answer": " is walking on the .", + "image": "images/caption_simple_79.png", + "model_output": " is walking on the ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509131.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mVb55T=2N2M2N4L3M4M2M4L5L3M1O2O1O2N3M3ZDfNU;k1N2L2O00100O0010O1O1O010O2N1O0O10001N1O2O1N2N2O1N2N2N2N2N3M2N4L5K4K4K7HS[Q2" + }, + { + "size": [ + 425, + 640 + ], + "counts": "^_^58P=e1\\N7I5K6J5Ld0\\O0001O001N2O1O2M2O2N3L4L101N2N2M3N4QNPE]1V;WNTEe1];J5K5J6J6K6H8Hc[]2" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to .", + "image": "images/caption_simple_80.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000167902.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "[9Y1^:0O1000O10000000O100000O10000000O100000000000O1000000O1000000000O100000O10O100000000000O1000O1000000000O1000O1000000000000O01000000O1000O1000000000O1000O1000000000O10000000O1000O100000000000O1000O10000000O100000O10000000000O10O100000000000O100000O1000000000O100000O100000O10000000O100000O10000000000000O0100000000000000O10O1001O00O1000000000O10O10000000000000O10O100000000000O10000000O100000O100000000000O10000000O100000O10O10000000000000000O10O100000000000000O0100000000000O10000000O0100000O2O00001O001O001O1N2O2N2N3M2N6J8H4Kk]10UbN3N3L5VEFo9>lECDL]:f0dEE^:j00000000O01000000000O1000000000O1000O100000000O1000O10O1000000000000O1000001O00001O1O2M2O2N3M2N1O001N4M001O[ODSF8l9JTF5l9NQF2o91mE0S:4gEMZ:h0O1000000000O0100000000000O010000000O10000000000000O010000000000000O010000000000O10O10000000O100000000O10O100000O1001OO1000000000O01000000000000O0100000000000000O010000000000000O100000O1000000000000000O100000O100000000000000O1000O100000000O10000000O1000000000000000O010000000000000000O10O1000000000O1000000000O010000000000000O1000000\\H" + }, + { + "size": [ + 375, + 500 + ], + "counts": "anV15Z;MhD;o: and ?", + "answer": " is over .", + "image": "images/caption_simple_81.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000097924.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "n7a4o70000000000@VHZLj7e3XHYLh7g3ZHYLe7c0WHS24ZMe7b0eHg1FgMe7b0gHe1DiMf7a0jHb1@mMf7`0lHc1\\OnMh7?lHd1[OmMi7?mHd1YOmMj7?mHc1]OjMg7b0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7b0nHb1_OlMc7b0nHb1_OlMd7a0mHd1^OkMg7?kHg1\\OkMj7=jHh1\\OkMk7k1[9UNfFj1[9TNgFk1o901O000000O1SOUN^F2OO:j1Y9UN]F95g1c9TNcFk1]9QNhFn1Y9SNeFm1[9SNfFl1Z9TNfFl1P:O1O0000001O00000000001O1N1001O000O10O10O100O100001O00000001O000O100O100O1001O1O001O0000O1YOWNUFN?k1\\9ZNaFi1^9XNaFi1b9SN`Fl1S:O1O1O001OXNZNYHNTOh1a8]NgHc1Y7^NgHa1W7eNeH[1Z7gNfHX1Y7kNfHT1Z7mNfHR1Y7ROeHm0[7UOdHj0]7UOcHk0_7SOaHm0g7gN^HX1]9000000000000000O100N2L400@`0000000000oMlEH0k1d:N00O10000TNXNPIh1P7YNZH\\2f7hMUHY2k7mMmGL_Oj1c8bNeGDNf1]8[OfGd0Z8[OhGd0W8\\OkGc0U8\\OmGc0S8\\OnGd0Q8]OPHb0P8^OPHb0P8]OQHc0o7]OQHc0n7^ORHb0n7^ORHb0n7^ORHb0o7^OPHb0Q8]OoGc0R8]OmGc0U8[OjGf0W8YOiGg0Z8VOfGj0\\8TOcGm0^8RO]GS1f8iNTG^1m8aN^GT1d8iN^GV1e8fN]GY1e8dN]G[1e8bN\\G^1f8`NZG`1i8\\NYGc1j8[NUGe1l8\\NbF10c1_9\\N_F21c1m9]NSFc1m9]NRFd1m9_NPFb1m9b0fNaMcH_2\\7eMaH[2_7fM`HZ2`7iM^HV2b7kM]HU2b7oM[HQ2e7QNZHn1f7SNYHm1g7TNXHl1h7VNVHj1j7ZNRHf1m7\\NRHd1n7^NPHb1P8`NnG`1R8bNkG_1T8eNiG[1W8gNfGZ1Z8hNcGY1]8]100O1000000000000O100000000000000000000O1O1001O0000000000000000001OO10000001O000000000000000000000000000000000000000000000000001O0000000000000000000000000000001OO100000000000000000000000000000000O1001O00000000`G" + }, + { + "size": [ + 400, + 600 + ], + "counts": "R]^16T<;H9G4oKXOULk0f3CoK?n3HkK;R4KiK8T4MfK6Y4NbK4\\4O`K4_41ZK2d45TKNf4hJDX5 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_82.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509656.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0b6^800001O00O1001O000000000000001O000000000000001O00000000000000000000000000000000000000000000O1001O00O1001O00001O00N2bNmGPKJ=O@^8S5jGkJO0l8c5oFbJn8Y6_O4L2N0000000000000000001O00000000000000000000O1000000000;\\IbG?In11a0Y[JVOf5R1_JaNa5d1dJRN\\5V2gJ\\M\\5l2X36J?A9G:F:F:F:F:F>B7I5K5K1O0000000000000000000000O1lJeGc3[8[LhGc3Y8\\LiGc3W8\\LoG]3S8XLhGRO8_4U8_LcGQO9Z4Z8eL]GQO9X4\\8eLPHZ3P8eLQH`3j7`LWHc3e7]L[He3c7[L]Hh3`7WLaHj3^7SLeHn3Z7QLhHo3W7QLiHo3W7PLjHP4V7PLjHQ4U7oKlHP4T7mKgG]OV1e4S7QLkHQ4U7oKkHQ4U7lKgG]OU1g4U7]KfG0e1]4d6`KkG1b1_4c6aKjG0c1_4c6aKkGO_1N^Nb4X8aKjG0`1b4f6]KjG2_1b4h6[KhG4`1a4h6[KhG4`1a4h6[KhG2b1b4n6]KQIc4o6]KPIe4P7ZKoHg4e8O101O0O100000000O10000O1010O11N1O1O100O1O002N3N1N1N2O00001O000000O10O1O1O1N2O1N3M3M45L0OOO01O2O0O1O001O1O0O2O00001O001O00001O001dLmDi2T;TMoDi2i;K3L3N2N1O100O1O2O0O1O1O1O2O0O100O101N100O10O0101O0O2N1N2O2L4M3L3N3N2K4L5N3J6L4I6D^A12OPUP5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aah1c0U>?C`0B;F:E>D5K7H6oDlLm9Z4I4L5K5K9H3M2N0O01O1O1O2O0O10000O1O2O0O1O101K41ON3M2NiE]KW:b431O1OH9O0O2N1M4M201N2O001O1O1O001000bFWLe7i3YHZLf7h3UH\\Lj7f3QH^Ln7d3mG`LQ8c3iGaLW8`3fGbLZ8a3aGbL^8c3UGiLh8j4L4M2N2N000000001OO100O1O1L4N2M3M4L3O1O1N2N2N2O2M2O1N2O1O1M3N2N2N2L4L4L5L3000000O110O001O0000001O000000010O00001O00000001O0000000000000O1000000O103cEbKR:m4I3M9F5L2N1O:F2N1N2N100O2O000000001N101O000O10OFjIeG02V6^8kI`GZ5`0lJQ8J_GP5`9721M21OO200_Ob0M2N2N3O0O2J501O0O2L4N3O0O2M5L3N3M>\\HaHk6o7J6I5K and ?", + "answer": " is in front of .", + "image": "images/caption_simple_83.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000140658.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "fb076LN2M;[b02g]O=Qb0m0O1G9K5J6H`Ml^Od2Sa05B>DZLWAN_Oj3Z?ZLRAl3m>d000O100O1O1N2N200O1O100O10000O1G9C=K5N2L4I7lNjITD`6k;bIRD`6m;cInC`6Rn2o10000O^ARMTk2eATM\\>k2eAVMZ>j2eAWMD0e=h2_BVMTO3h0Oe=h2hBXMZ>h2fAXMZ>h2fAWM[>h2_10U@XMFOf=i2fCXMZg2iAZMV>e2kAZMV>f2b1O1000000OTA\\M^l2PBTMP>n2nARMR>P3^11O1O002N3Z@hL^>Y3_@iLa`0[300ZDcLi6^3l4001O1O2N3RD[L[7g3eDZLX?g3g@YLY?g3g@YLY?g3`000001O001O3Q@TLd?P4W@RLh?S400000O1O100O10000001OO100000000000000000000O10000000kCnK\\8R4cGoK]8Q4cGoK]8R4cGmK]8T4g3OlCmK]8S4bGnK^8R4h31O00001O0jClK`8U4_GkKa8U4_GlK^L5i;o3kGlK]LOk;U4iGkK]LOj;V4iGkK]LOj;W4k3000000001O00000000001O00001O00001O0000001O0000001O0000000000001O00001O001O00000000001O00001O00000000001O001O00001O000000001O0000001O0000000000001O001O001O0000000000001O000000001O00000000001O00001O000000001O00001O00000000000000001O001O00000000001O001O00000000001O000000001O0000000000001O000000001O0000001O00001O00000000000000001O1O00002N3M2N001O0\\H^JbIN00O1Y41bKj0l0Sc0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dhj26fc07L2M3M2O2N2N1O1O1O1O1O1O1O100O010O010O0100O010O1O100O1O1O1O1O1O1O2M2O2M3M4LPlh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_84.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Sjn73k?3N2N1O2N2N2N102M2N2N1N3O1N2N2M3N2O0O2M3N2M3N2N2O1N2N2O001N2O2FUN[Bn1_=XN\\Bl1`==L3[E^M[7S3_HVMY7n2aHWM\\7m2aHVM\\7n2aHSM^7o2`HRM^7Q3`HQM^7>`F]1Q2VN^7P3bHQM\\7P3dHPM[7P3fHQMY7o2gHRMW7o2hHRMW7o2iHRMV7n2jHRMU7o2kHQMU7n2lHSMS7m2lHTMS7T2oFcMT1]OoN26j0h8R2RGdMP1U1m7W1TGcMP1U1m7W1TGdMP1S1m7W1UGfMn0Q1o7W1TGhMo0n0n7Y1UGiMn0l0n7Y1VGjMn0j0n7Z1VGlMm0h0n7[1UGmMo0f0m7[1VGoMm0d0o7[1VGPNm0b0o7\\1UGSNm0?o7\\1VGTNm0>n7]1UGUNo0;n7^1UGVNo09n7_1UGXNn06o7a1SGYNQ12n7c1SG[NQ1Nn7e1RG]NR1Kn7g1QG^NS1EP8l1mF_N];`1dD_N];`1cDaN^;]1bDcN_;]1`DdN`;[1aDdN`;[1`DeNb;Y1]DiNc;W1\\DiNe;V1ZDlNg;S1XDmNi;R1WDnNk;Q1SDQOm;h0XCgNl0`0n;f0YD[Oh;c0YD]Oh;c0WD]Oj;c0UD\\Om;c0SD]Oo;b0oC_OT<>lCBY<:fCE_<7bCH`<6`CIa and ?", + "answer": " is running on .", + "image": "images/caption_simple_85.png", + "model_output": " is running on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000106048.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^9l3`90000[MaF^1^9X10000000ZMbF^1^9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1\\9Y1000000000000000YMdF^1\\9bNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1g8]MiGV1@]1g8]MiGV1@]1f8^MiGV1A\\1f8^MiGV1A\\1f8^MiGV1A\\1f8_MhGU1B\\1f8_MhGU1B\\1f8_MhGV1A[1g8_MhGV1A[1g8_MhGV1A[1g8`MgGU1B[1g8`MgGU1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1A\\1g8`MhGT1A\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1C[1f8bMgGS1C[1f8bMfGT1DZ1f8bMfGT1DZ1f8bMfGT1C[1g8bMeGS1D[1f8cMfGh3Z8XLfGh3Z8XLfGR1D[1f8cMfGR1EZ1e8dMfGR1EZ1e8dMfGh3Z8XLfGh3Z8XLfGR1D[1f8dMeGQ1E[1f8dMeGg3[8YLdGh3\\8XLdGh3\\8XLdGh3\\8XLeGQ1E[1e8fMdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8[LcGe3]8[LcGe3]8\\LbGd3^8[LcGe3]8[LcGe3]8\\LbGd3^8\\LbGP1IV1e8jMbGP1IV1e8jMbGP1IV1e8jMbGd3^8\\LbGd3^8\\LbGd3^8]LaGc3_8]LaGc3_8]LaGc3_8]LaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1d8lMbGn0JV1d8lMbGn0JV1d8mMaGm0KV1d8mMaGa3_8_LaGa3_8_LaGa3_8_LaGa3_8`L`G`3`8`L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3`8aL_G_3a8aL_G_3a8bL^G^3c8aL]G_3c8aL]G_3c8aL]G_3c8aL]G_3c8e000O100001O000000O1000000000000000000000000O100000000O10000001O0000001O1O4L1O1O1O1O1O001O0000001O0O20O01O1O000000000000000000000000000000O10000O1O1O1000000O100000000001O000000001O0000001O00001O0000001O000000001O000000001O00001O001O000000001O00000000001O00000000001O001O00001O00001O000000001O0000001O00001O00001O1O1O6J2N1O00001O00001O1O2N1O1O1O1O1O000000000000000000000000O10000000000O1M3L4O100001O00000000000000001O001OO1001O00000000000000000000O1001O1O1O00O1O1O1001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000001O0000000000000000000000000000000000001O1O00001O001O001O001O0000O1001O0000O11O000000O1000000O1O1O100O100O100000000O100000000000000000000000000000000O100000000000000000000000000O10000000000000000GgL^FZ3Z9`0N2O1001O0000000000000000001O00O1001O00000000000000000000000000001O00000000000000000000000000001O0000000000000000000000001O000000000000001O00O11O00000000000000000000000TLcFi3]9WLcFi3\\9XLdFh3\\9XLdFh3\\9XLdFh3]9WLdFh3\\9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3_900000000000000000000000O1000000000000000000000000O100aF" + }, + { + "size": [ + 428, + 640 + ], + "counts": "[jl16XJ500O1O10000O2O00003L3N2N1N2O1O1O1N1000000O101O001N2O0O2OO01O0100000O10O1M3O11N100O100O100O101O000O1000001N101N100O101O000O10000O2O0000000O2O000O2O0O1O101O000O101O00000O2O0O100O101O0O10001O0O101O000O2O000O100O2O0O1000001N10001O0O101O0O100O2N1000001N10000000001N3N1O002L4M2O01OO3N3L2O1O1O1O1N10001N1O10O0100000O010O1O1O10OO2O1N13M1000000O100000001O0O100000001O000000001O000000000000000000000O1000000000000000000000000N20000O10000000000000000000000000000000000000000000000000000001O1OO1O1001O1O001O0000000000000000000000000000001O00000000000O100001O00O1001O0001N100001O000000000001O0000000000001O00000000001O00000000001O0000000000001O000000001O0000000O2O00001O1O001O001O2N1O001O1O3L3N2N1O6J:F4L5K4L9G7I6J4L8H5K2cK]Gl3e8oKaGU2H^Og8]NaGo1OC`8^NbG`0L]O1=a0V1k7hNeG3j1o0Y6WOnGCQ2R1j5DUHVOU2R1i5HRHUOW2j0n52jGUO[2?R6 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_86.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000052565.jpg", + "mask_rles": [ + { + "size": [ + 458, + 640 + ], + "counts": "e;d2g;O00001O00001O000000001O00000000000000000000001O00001O0000000000001O001O00O10000001O000000000000000000000000000000000000001O00000000001O000000000000001OO100000000000000001O000000O100001O00000000000nMQDi1P4N1O0O2O1N2O0O2O1O1N100010O000000100N100O100000O1O101O1N4L7H7J5J;F8]CVNWUO_B0_f\\2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_87.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000165039.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "[8m1^;001O1N2O0001O1O01OnNfD3Z;LhD3X;MhD3X;MhD3X;MhD30ROS;k0lD32ROR;j0mD41SOQ;i0nD40UOQ;g0nD51UOP;f0oD51VOn:f0QE40XOn:d0RE40XOm:d0TE4OYO^:MgEf0L30[O\\:NbEh02O0[O]:o0cEF0\\O\\:n0eEEO^O[:m0gEDN_O[:m0hECM@[:m0iEBKC[:k0jEBKC[:j0kECJDY:j0nEAIFX:i0oE@JGW:i0oE@JHU:h0RFAHHU:g0SFAGIU:g0TF6k9JUF6k9JUF7j9IVF7j9IVF7j9IUF9j9FWF:j9EVFk9fNlE>8l0m9eNkE`07l0n9cNkEa07m0n9`NlEc06n0m9_NmEc06n0n9^NlEc07P1m9\\NlEd07P1m9\\NlEd06R1n9YNlEe06R1n9YNlEe06S1n9WNlEf05T1o9VNlEe06o0T:\\NfEe06i0`:WO`Ef0d:YO\\Ed0S;POmDn0i;N2N;E;FgV`0D\\i_O0XC9_aHB_7>`HC`7=`HC`7=`HC`7=`HC`7B8J3N213OL00O000001O000000000000O100O0@cNRE_1m:dNPE\\1Q;?0O1N2O1O01000000O0100000O10000O10000O100001N100000O10000000000O1000O1000000000O10000000000000O10O100000000000000000001O0O200O1O1O1O000000001O000001N11O000O01000000O10000O1O2N1N200O1O2N11O00O10001O0000001O000011N0010O10O00000001O00001N100O2O001O0O2N1000001N100O1O1O100O1000000000001O001N2O00001N101O3L6K2M3K5K5L3MP`o0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_88.png", + "model_output": " is driving on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000370270.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "bc01oc01N=D0000000000000O10000O1O1O1O100O1O100O1O100O1O1O1O1O1O100O1O1O1O1O100O1O1O1O1O100O100O1O1O1O100O1O1O1O1O1L_Nh]Ob1ha0c0N2N2O1J6J6O1000000000000001O2NO100001O0000O1lN_Mj@`2V?eMh@X2W?jMi@U2W?lMh@T2X?lMh@T2X?lMh@T2X?mMg@S2Y?mMg@S2Y?PNd@P2\\?PNd@P2\\?TN_@m1a?TN\\@n1e?RNY@o1g?QNY@o1g?QNY@o1g?QNY@o1h?PNX@P2h?PNX@P2h?PNW@Q2i?oMV@R2j?bMQ@O0`2S`0YMR@V3]`0O0000001O0000O100000000000000O100O100O1O1O1N2O1O1O1N2N2M3O1N200O11O0000000000000000000000000[Lk_O_3U`0aLl_O^3T`0aLm_O_3S`0bLl_O^3T`0bLk_O_3V`0]Lm_Oc3W`012N1O1O5K2N1O001O000000001OO1000000O1000000O100O100O100O1O1O100O100O10000O100O100I_Lm_Oc3S`0500O1000N3N1O1O002O000O100001O002N4L0000000000000000000000000000000000000000000001OO1000000000000000000000000000000000000O1I7N2000000000000001OO10000000000000000000000001O0000000000000DU@cLk?S3f0O1O1O1N2O2M2O10000O10000O1O1HS_OYMQa0c28N2M3N2N2M3N2O2M2O1M3O1O2N1N2E;E;N`0QOok83^TG1N2N2O100O1N2L4N2O100O1L4M3O1O1O1M3O1O1M3N2E;N2N2M3N2I7N2L4O1L4L4J6N2L4L4H8L4M3O1OUMc@U1\\?eNcA@`?>T2DmSl1" + }, + { + "size": [ + 640, + 480 + ], + "counts": "Zdf151Mdc0e0B:G7I`0A1O1N2N2mN]Ng_Od1U`0aNi_O`1R`0gNj_O[1T`0gNk_OZ1R`0jNk_OX1R`0lNk_OU1R6PN`3m0]FT1P6RNa3k0_FS1n5TNb3k0^FR1o5UNa3n0ZFo0`1WN?GoN8]ON[9Y3cG^M3l0iN^N`9Y3dFZL?`18X1e8n0`FoLLV19XNOg2\\9kN\\FZN:S8Z9bIoFY6P9hIWGP6j8PJVGP6j8PJUGQ6k8oIVGP6j8oIXGP6h8PJYGo5g8QJYGn5h8SJWGm5i8SJWGl5j8SJWGl5j8TJVGk5k8UJUGi5n8VJRGh5P9XJoFg5S9ZJjFg5X9XJgFh5Z9XJdFh5_9WJ]Fh5i9XJUFc5R:`JhEi3j0gJk9c1UEe3\\Bk3g>b05K5N2K5J5O2O13M4L4L3L5L3M3M6J4L4M4L4K4M5J4M3M4M2`DcIV9`6fF_I[9d6cFZI`9d6aF[Ia9c6P2N1N2N3N2M3N1RK]B\\3f=bL\\BZ3i=cL[BX3h=fLZBCFj2S>aMYBAKi2o=bMYBBLh2m=eMYBAMDAn2]>iMXBB4_2h=lMVBB8\\2d=PNVBA;l0QO7a>YOTD@YNb0g=Igkh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_89.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481413.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "loi3?g<:F9I6K4L4K4M4M2N2M3O1M3N2N2N2O0O2N101N1O100O2O0O100000000001O1N2O3M0O10000O1O010O001O100O2N2M3N3M2M3M3N3L4L4L4K6I9HWYh3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "mkd29P=3L4M2N2K5L4L4N2N1M3O1O1ON2NdEQOb8g0cG_OZ8b0bGC]8<`GJ]87`GM_82^G3`8N]G6b8I\\G doing with the purple frisbee ?", + "answer": " is holding .", + "image": "images/caption_simple_90.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000092839.jpg", + "mask_rles": [ + { + "size": [ + 517, + 640 + ], + "counts": "a:a5`:4J6N3O0001O001O001O2N1O001O1O001O2N1O2N3M1O001O2N1O1O1O3M2N2N1O1O2N1O2N2N2N5K2N1O1O1O1O001O00001O2N2N001O1O1O2N001O2N2N1O1O002N3M1O2N2N2N1O2N1O2N1O2N2N2N2N2N2N2N4L1O2N2N1O1O002M5L2N101N4L5J5L1O2N2N2N1N200O2N1N3N0010O00001O000000001O000O1000O11O000000000O1000000001O0000000000000000000000000000O1000000000000000000000000000000001O0000000000000000000000001O0000000000000000000000001O0000000000001O0000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000001O00000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000000000000000000000000000000000000000000000001O0000O1000000000000000000000O010000O2O000O01000O100O10000O10000O100O10000O1O10000O100O10000O100O10000O10000O10000O100O100O100O10000O100O100O1000000O100O10000O100O100O10000O100O10000O100O10000O10000O2O0O01000O10000O2O0O1O1000O01000000O2O000O1000O01000000O100O1000000O100O10000O1O100O100O100O10000O10000O10000O10000O10000O10000O10000O100O100O100O10000O10000O10000O10000O1000000O100O2O000O01000O2O000O100O1000O10O2O0000000O100000O100001O0002NN2N20O1gLZC1N6O]2i<\\McC1G4NU2h located relative to ?", + "answer": " is sitting on .", + "image": "images/caption_simple_91.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000336209.jpg", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "e6]2S;00000000000000001N1000000000000000000000000O1000000000001O000O1000000000000000000O10000000000000000000O10000000O10001O1O000000000000001O0000000O100000000000000000001O00O1001O00000O1000001O002N2N006I9H1O0000000000000000001O0000000000000O10000000000000O100001O00001O000O100000001O0000000000000O1000000000000000000000O100O1O10O10O1N2O100O100O1O1O010O1000000000000001O00001O001O0O2O001O2N2N2N2M2O000000000000000000000000000000000000000000000O101O0000000O10000000000O100000000000001O001O0000000000001O00000000000000001O0O100000000000O1000000000000000000000000000O10000000N2O1O10000O1O2N3M3N6I5KUFAk9`0TF_Om9a0SF_Om9a0TF^Ol9c0UF[Ok9f0UFXOl9h0TFXOl9k0RFTOn9m0QFSOo9n0QFPOP:S1nElNR:U1oEiNQ:W1oEiNQ:[1lEdNT:]1mEaNS:`1oE]NQ:d1oEZNR:h1nEVNR:n1kEQNU:Q2jElMX:U2RFiMe9Y2XFiMg9X2XFiMg9X2XFhMg9Z2WFgMi9Y2WFgMi9Y2WFgMi9Y2WFgMi9Z2WFeMi9\\2VFdMj9]2VFcMh9`2VFfMd9\\2ZFfMd9\\2ZFeMe9\\2YFfMf9Z2ZFfMf9Z2ZFfMf9Z2YFgMg9X2WFkMi9U2VFlMj9S2WFmM:^OA6R9`2RGmM;Fc8\\2SGoM8Hc8Y2UGoM8Hc8Y2RG[MNd0_8^3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGdLR8\\3nGdLR8\\3nGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3PHdLP8\\3RHbLm7_3QHcLo7]3PHdLP8\\3PHdLP8\\3PHdLP8\\3PHdLP8\\3oGeLP8\\3PHdLP8\\3PHeLo7[3QHeLo7[3QHeLo7[3QHeLo7\\3PHdLo7]3PHdLP8\\3PHdLP8\\3oGeLQ8[3oGeLQ8[3PHdLP8\\3PHdLP8\\3oGeLP8\\3oGeLQ8[3nGfLR8Z3mGhLR8X3mGiLS8W3mGiLS8W3mGiLS8X3lGhLT8\\3hGdLX8]3gGcLY8[3iGeLV8[3kGeLU8]3iGcLW8\\4O00O1O1001O000O2O1O1O1O00003M001N10000000001O001N2O1O00001O0O10000000001O00000O10000000000000000O10000000001O000000O10gJ" + }, + { + "size": [ + 432, + 640 + ], + "counts": "2[5U800000000O10000000000000000000000O10UHiJ]7W5>0000O1000000000000O10000000000000000O1000000000000000000O100000000000000000000O10000000000O10000000000O10000000000000000O1000000000000000000O100000000000000O100001O0000O1000000000000O1000000000000O10000000000000000O10000000000000000O10000000000O10000000000O1000000000000000000O100000000O1G]GdKd8[4900O1O1O1O1N2N2O1O1O1O1O1O1001O1dFTLR9_4F1O000000000000O1000000000000O1000000000000O10000000000000000O1000000000000000000O100000000000000O10000000000O100000000000000O1000000000000O10000000000O100000000000000000000O1000000O]GhKP8X4PHhKP8X4d00000O10_GkKg7U4UHiK^O8Y8o3XHVLf7j3\\HULc7k3^HULa7k3`HUL_7k3cHTL\\7l3fHSLY7m3U100O100O100000000000000O1O1N2N2N2O1N2N200O1O100000000000000000000001O000XG]LUOOU8e3bHlLZ7T3eHRMV7o2iHRMV7n2iHTMV7m2iHTMV7m2dHZMZ7h2aH\\M^7e2`H]M_7[4O1O001O00000000B[HPKf7l4_HSKa7j4cHTK^7k4dHRK^7k4e0N2N20000O10000O100O100O1000000O10000000000001O001O0000001OO100O100O1000000O1000000000000O10000O1O100000000001O001O001O:F1O1O3M6J1O000000O11O1O1O001O1O1O001O0000000000000000000000000000000000000000000000O1000000000000000000O1O1O1000000000000000000000000000000000000000000000000001O000000001O0000001O00000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000O1000000000000000000O10000000000000000O1000000000000O100O1L4RNn1M3O100001_FSMSON01Q9Q5YO1O1O0000000000000000000N" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_92.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000458325.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "]ca33Q17k:MkD;S;K^D?\\;j01L3O2M4N2O4L2M2N202N1O1XOlC6g doing on the ?", + "answer": " is crossing the .", + "image": "images/caption_simple_93.png", + "model_output": " is walking on the ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000350122.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lPT5g0U>7H8J5K4L5L2O2M2N2O1O1O1O1O1O100N12N3M1O1O0O2O2N2N5J3M3M5K:D3M101O001O0O2O001O1N1O2O00O010000O01O01O01000O000O2O1N2WO_OTCa0lfDJV;Q2O0OO2H8L4N2N2N2N2O1000YD]NR:c1iEeNMgNi9k4O01VK[FW4g9eK\\FZ4f9bK]F\\4V:N1N2M3M3^N`E[Ng:`1`1J5L5\\OSCoNR=m0UCmNo doing with the bicycle ?", + "answer": " is pushing .", + "image": "images/caption_simple_94.png", + "model_output": " is pushing the bicycle ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000295809.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "d547k9_5UFaJk9_5UFaJk9_5UFaJk9^5VFaJk9_5UFaJk9\\5XFcJi9]5WFcJi9]5WFcJi9]5WFcJi9]5WFbJj9]5WFbJj9^5WFcJg9]5YFcJg9]5XFdJh9\\5XFcJi9]5WFbJj9^5VFbJj9^5WF`Jj9`5VFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WF`Jj9`5;O10000O10000000000000000O1000000O100000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000O1N200002NN2000000000000O11O00O10000O1000000000000O1000000000000000000O100000000000000O10000000000000000000000O1000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1001O000000O11O0000O100001O00O100001OO10000000000000000000000O11O000000O11O00O100LPEXKP;l401OO1000000000000LPEXKP;l4000000000000000000000000000000000000000001O0000001O000000O11O000000ISEZKn:l40IQE^Kn:i410000001O00HRE]Ko:b4SE]Km:d4RE]Km:c4SE^Kl:b4TE]Km:c4RE^Kn:i411OMRETKn:i4UEWKk:i4TEWKm:h4TEXKl:h4TEWKm:i4SEWKm:i4SEWKm:i45000000O10000000000000000O11O003MM300001O00O1O11O2N1cMZK^If4n8MmDZKR;f4nDZKR;j4000001O00O100001OO100001O0000O10\\NTKTHl4`91OO10000001O0000000000000000000000000bLTKhKl4f70000000000000000000000000000001O000000000000O1001O000000000000000000000dNSKeGm4[8SKeGm4g9000000000000000000000UNRKdHn4W90000000000000000000000000000000000000000000000000000000000000000000000000000O11O0000LQEVKP;j4PEVKP;n4O0000000000000000000000000000000000LPEWKQ;i4PEVKP;n4O0000LQEWKo:i4QEWKo:i4QEWKo:m4000LPEWKQ;i4PEVKP;n4O0000MPEVKP;i4QEVKP;k42001O00001O1O00LQEWKo:i4QEWKo:l4100001O1O001O00001O0000001O0000001O1O1O3M001O1O0000001O1O1O00001O1O00001O1O3M1O001O001O0000001O1O1O00001O1O001O001O001O000000001O00001O1O1O2N1O1O1O1O001O001O5K00001O6J1O001O1O1O001O1O001O1O000000RI" + }, + { + "size": [ + 512, + 640 + ], + "counts": "PT[86h?3O0O2N10000O1000001N10001O00000O101O00000000000O10000000O1O010O0100O100O100O100O1O1O100O1000000O100000000000000000000000000000000000000000N2D=G]`86e_G5K4L000001O00000000000O100000[L" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_95.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000511760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "i?9V3HZ=d0a_OD00000:0F0J5Q1MQOe?S400000000000000000000O1001O00O10000000000000000000000000000000000000000000000000000000000000000001O00O1001O000000O10000000000000000001O000000000000O11O0000000000000000000000000000000000O100000000000000000000000000000000O100001O0QLb@\\3^?bLe@4Ih2b?oL]Ao2k?O1O>B:gM^^O`1Tb0M1O1O000000000000000000000000001O0000000000000000ZM^NnBb1R=`NlB`1S=eNiB[1W=iNeBW1[=lNbBT1^=mNPAB2M7Om0_?n0`@RO_?P1_@QOa?Q1\\@POe?e20ZMe@g0[?QOn@n0R?QOPAXO[OR1e?ERAmN]OK1b1`?BkA=U>[OSBe0m=YOUBg0k=XOVBh0j=WOWBi0h=XOXBh0h=XOXBh0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=XOVBh0j=XOcAAnNW1_?ZOf@TO and ?", + "answer": " is standing on .", + "image": "images/caption_simple_96.png", + "model_output": " is standing on ." + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_1b_simple_eval.json b/evaluation/GAR-Bench/model_outputs/gar_1b_simple_eval.json new file mode 100644 index 0000000000000000000000000000000000000000..2c3f2e7f5a8c329fe092d4ddae1843ae383ba9b3 --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_1b_simple_eval.json @@ -0,0 +1,2337 @@ +[ + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000227491.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[f`46i>3N001fA3`0DQ<9^CU1_4M1O2O000O22M0000O010O00001O0QDJ7FT6a0bI6N\\O_6>ZIf0OmNg6>VIn0IgNR7;RIU1GaNV7;QIY1D_NY7:oHl1_N^M>?U8:iHS3Y7mLcHU3]7mL`HT3a7lL]HU3d7kLZHW3f7iLXHX3i7hLVHX3k7hLSHY3n7gLPHZ3P8gLnG[3R8eLmG[3S8eLlG\\3T8eLkG[3U8gLhGZ3X8gLgGY3X8jLfGV3Y8lLfGT3X8oLfGQ3T8WMkGj2m7^MRHb2i7cMWH]2f7gMXHZ2e7jMZHV2b7nM^HR2_7RN_Hn1`7TN`Hl1^7VNbHi1]7ZNbHe1^7\\NbHd1\\7^NdHa1\\7aNcH^1\\7dNdHnNKaNc0ENc0R7[2aHgNo1bNc5g2]HgNV2[N^5o2[HgNQ:T3O1O1O1O102N2M2O3K4aHhJc42^I[5i1gJo33XJl5c1UJd3P7YLQIe3U7ULnHj3U7PLnHo3W7kKkHT4^7ZKlHe4\\7nJjHQ5c8O100O1O100O100O100000000000000000000000000000000001O000dMWKaJi4[5ULjIm3S6XLUInNJk4P7UMjHo2T7_20001O001N2XI[HS6g7hI_HU6b7mI]HQ6c7PJ^Hn5d7RJ[Hm5f7SJ[Hk5f7TJ[Hk5f7SJ^Hj5c7UJ_Hh5c7WJ_Hg5^8O001N2N2O1O1O2L3M5L4L2N4L2M3O1O001N101O001N101O0O2O1O0O2N101N2O0N3L4M3M3M3L4B>M3M3M3DUD^MP<_2:N2O2N2M2O2N2L5J6WOlBUOc=;YWf4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_0.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000029397.jpg", + "mask_rles": [ + { + "size": [ + 449, + 640 + ], + "counts": "Zl95l=1O1O001O01O0000000010O012MU6CaIg0^6ZOVIQ1j6S301O001O0001O00000000000000001O002N3M:QIiH`6^7N100O1O00001O000000001O00001O00001O0001O0000000000000000000000000O100O100O10000O1O1N2O1N2N2N2O1O1O1O100O101O0O1000000O10000O100O100O1O1O1O10001O0O10000000000000O11O00000000001O000000O2O00000000000O1O1O1N2J6J6K5J6L4J6J6L4J6K5I7J6K5K5K5I7I7K5K5M3J6K6L3J6O10001O1O2N2N2N2N3M2N2N3M2N2N2N3M3M2N2N2N3M4L1N102M5I6H7J8G9Edm<" + }, + { + "size": [ + 449, + 640 + ], + "counts": "Qa_4?_=6L2L5L3M3N2N1O3M1O2O0O100L5ZOe0L5J5M3N3M2O1N2O1O2L4N4lN`M^Fd2k8f1E;@`0H7L301O1O1O001O1O00001O000000001O00000000000000000000000000000000O1000000000000O100N2O1M3CXH\\Jm7]4`HeKA5Z8S4W1M3]Oc0M3L4L4M3K5L4I7F:I7H8J6E;H8EXCVOTR;5a[E6H7L4RLBSJa0h5LnI7o56dI7n5g0UI_Of6e0UI^Oi6h0PIK^68oH9P7LcH=\\7T30001O001O0000000000001O00000000001O000000000000000000O1000000O10000O\\NlI]KS6Y6O1O1O1N2N2N2O1N2N2O1O10000O10000O10000000000N2gNgGjLY8U3iGiLX8U3kGjLU8T3PHgLR8V3SHgLn7V3WHfLk7X3XHcLl7X3[H`Lk7[3`1L4\\MQG0U9M\\G_Oj8?]2O1O1O1O1N2O1O1O1N2O1O1O1N2NSaZ1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_1.png", + "model_output": " is standing on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000163117.jpg", + "mask_rles": [ + { + "size": [ + 500, + 376 + ], + "counts": "[o0Q82QH\\7V8N000O2HhGRIX8U7001O01O000000000O1O1O1N2N2dL`HcNa7Z1cHdN^7Z1eHeN[7[1gHcNY7]1kH_NU7a1oH[NQ7f1nH[NQ7e1oH]No6d1oH`Nn6a1PIaNo6`1PI`NP7`1PI`NP7a1PI_No6d1nH]NQ7d1jHVKOV3W7e1hHWK0T3X7S5000O1O100000000PNgHfKX7Y4jHfKV7Z4kHeKU7Y4PIeKo6Z4UIoIE^1U7b4^I^Kb6b4^I_Ka6a4_I^Kb6b4]I_Kc6a4]I^Kd6b4ZI_Kg6b4XI]Ki6d4XIYKi6h4WITKl6m4SIQKo6P5QInJP7S5gHhIMU1\\7S5aHhI120S1^7T5_HjI022o0_7l601O2N4L1O1O`HhHc6X7]IjHa6n0iHT5c0RJb6T7[IUIA@h6[7eIYI@]Ok6Z7eI]I[6c6dI]I]6c6cI]I]6b6dI\\I^6d6cI[I^6d6bI[I`6d6`I\\Ib6b6`I]Ib6`6_IaI`6_6`IZI]OES7?nHl5c0VJ_6NmHk5f0VJ]6_6dIQI[O3LMU7P7cIoH@0J0T7P7[1O02M2L4WKhHQ1\\7nNhHn0[7POfHP1Z7POdHeLMX4_7TOhHl0X7TOhHl0X7VOfHj0Z7YObHi0]7i3M2O01O01N11O0000O100000001O01OO3N3M001O1O2N00TMVIoLi6P3ZInLf6R3[ImLe6R3\\InLd6Q3]InLd6R3\\InLd6R3\\ImLe6T3ZIlLf6U3ZIiLg6X3WIiLa5cMoJN@g5InLi5\\MgJNA47\\8[6cGeIX8b6fG]IY8P7O2N1O01ZOlGmIR8k6O1O01001N0100001OO01UMgH[MY7d2gH\\MZ7d2fH\\MZ7d2gH[MY7e2gH[MY7f2fHZM[7f2bH\\M]7^501O0000O1UKaH`1a7^N_Ha1c7^N^H`1c7\\N\\HQM3a4b7]NcHb1_7\\NcH`1`7`NbH\\1_7dNcHY1_7gN]H\\1e7dNUH^1d0fK^6n2iH]1j0fK\\6X7gIhHZ6U7U1M2O00101aNgG^JIo0_8d4kG]KU8a4mG_KS8^3fGZL89R8\\3gGZL7 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_2.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000170613.jpg", + "mask_rles": [ + { + "size": [ + 640, + 439 + ], + "counts": "eiT57gc03M2N2O2K4N2K5L4O1O1N2jLL^B6^=n1UAjL0`1k>`3N1OkLeAj0X>UOmAj0R>TORBj0n=SO\\Bg0c=WOeBc0[=YOmBd0S=YORCe0mVS<@oC`0P<@RD>o;@SD`0m;@TD>l;AWD>i;AYD=i;AYD;j;DXD4o;MQDKV<5kCD\\<l8BTG8S9HkFGg99YF]OQ:c0nEYOX:f0hEUO^:j0bEPOd:P1]EhNk:2\\BMk2HQ;8YBM_a03a^OM_a02c^OM]a02d^ON]a00d^O1[a0Ng^O1Za0Lh^O4Xa0Jj^O6Va0Il^O6Ta0Hn^O8Sa0Eo^O;Qa0DQ_O;o`0Cj^OCDj0ca0_Oj^OJBg0ea0ZOl^O1^Of0bb0ZO^]Of0\\b0WOa]O34e0Zb0ZO`]O26d0Zb0Cf]O[MgNIg2e;UNVG`0ZMeNJf2e;VNTGc0[M[1a;RNRGg0[M^NNe2d;WNQGl0bMh0\\;]NoFn0jM`0W;aNmFR1lM=W;aNkFU1mM:W;bNhF\\1oMOZ;fNoDH5j1\\O^M1Y2^;8iD:FVM4V2^;c3]DWJ4V2`;e3_DVJMR2d;j3_DRJOQ2d;o3aDnK`;S4aDgKc;Z4\\DcKg;W63L5gNjCbJ\\<^5eC[Jb^6d;]JYD`5h;dJVDX5m;jJQDQ5S`0VBUOP>k0UBlNo=S1YB^No=a1ZBoMm=P2S2O1N1O2N3L3K5K6J7H9G8H;EVhV4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is talking to .", + "image": "images/caption_simple_3.png", + "model_output": " is looking at .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000465822.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dh^31e;1ig72RXH4J7I8J4M3N3L4]OROVFR1h9RORFR1k9?M3N3L2O100O2N1O1O1O1O1O2N1O100O2N101N1O1O1O2O0O101N1O1O101O0O2O0O100_HQMZ6P3dIQM[6P3bISM^6m2`IUM`6k2_IWM`6i2]IZMc6f2QIgMn6Z2oHhMR7W2kHmMT7[3010O0mLfHi1Z7VNiHh1X7WNlHf1S7YNQId1o6\\NTIa1m6]NWIa1i6]NZIa1g6]N\\Ia1e6]N]Ic1c6XNcIf1_6UNfIi1Z6TNjIk1W6RNlIbNh0`7dNRJc0`Nh0W9WOkFh0T9XOnFg0R9XOPGg0P9VOTGj0j8VOXGi0h8VO[Gh0f8WO\\Gh0c8WOaGg0^8YOcGg0ZOnNi8:oGS1o7kNSHU1n7hNSHX1o7dNTH\\1S9001O010O10O00010O01O010O001O010O00010O10O010O01O010O0100O010O0010O0101N1O2M2I8YOl0BcPb0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "f[b41c;5M3M3N3L2N3N3M2N2N10O01O000000O2N100O1O2N100O101N1O1O2O0O1O1O1O1O101N1O1O101N1O2N`[e0" + } + ], + "question": "What is doing with the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_4.png", + "model_output": " is holding the .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "32kc0k0XO2N001O000000001O000000001O000000001O0000001O00000000001O0000001O00000000001O0000001O0000001O0000000000001O00000000001O00000000001O000000001O000000001O000000001O0000001O0000001O0000001O0000001O000000001O0000001O000000001O0000000000O10000O100O100O1O1O100O1O100O100O100O1O1000000O100000000000000000000001O00001O000000001O0000001O00001O0000000000001O0000000000000000000000000000000000001O0000000000001O0000001O001O0000001O00000000000000001O0000001O1O001O001O1O001O1O001O001O001O1O001O00001O001O001O00001O001O00001O00000000001O00001O00001O00001O0000001O0000001O0000001O001O0000001O000000001O000000001O001O0000001O00O1000000O100O100O10000O100O100O10000O1000000O100000000000000O10000O10000000000000000001O0000000000001O000000001O000000000000001O000000000001O001O000000000000000000001O00000O1000001O0O100O1N2O100O1N3M2O1O2N1N2N3N1O1O1N3M2O1N3N100O100O2O0000000000d^OROg?m0V@WOi?i0V@YOi?h0V@XOj?h0V@YOj?g0T@ZOl?g0S@ZOl?g0R@[Om?f0R@[Om?f0`_OoNG=i`0a1P_OcNo`0X200O1O1O1O1O1O010O001O1O1O2N1O1O1O0001O01O0000001O01O0000010O0000000001O0000001O00000001O01O00000000001O0001O01O000000001O01O000001O01O00010O00000010O0001O0001O000001O0000010O000001O00000010O00000001O000000010O00000001O01O00001O01O0000010O000000000010O00001O0000000010O0001O0000000010O00000001O00000001O00001O;bLX_O46KO67HL001Je0[?" + }, + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y and ?", + "answer": " is in front of .", + "image": "images/caption_simple_5.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000128051.jpg", + "mask_rles": [ + { + "size": [ + 360, + 640 + ], + "counts": "U4V4R7000000000000000001O00000000000000000000000000000001O000000001O00001O2N1O001O00O10000O1O100O1010O00000O1001O01O1O0000000000O1L4O2N10000000000000001O0O2O00O11O000001O010O001O00001O3M4M2M1O1O2N1O001O01O00001O0010O01N10001O1O001O1N3M2N3M2M4K5J6E;K5K4O3L3N3M2N2M5L2N4K3L5J8SO^FFi9DXF8 located relative to ?", + "answer": " is in front of .", + "image": "images/caption_simple_6.png", + "model_output": " is on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000560266.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "ocP1335iZLAf3>\\LAd3>^L@c3`0_L^Ob3`0aL]O`3b0cL\\O^3b0dL\\O_3a0cL^O_3?dL^Oe39]LFe36`LFd35hPY4" + } + ], + "question": "What are and doing to each other?", + "answer": " is looking at and biting .", + "image": "images/caption_simple_7.png", + "model_output": " is biting .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000024919.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "iah21bYQ14bSoN0O2O0O1O1O000UN8nE1k9 and doing to each other?", + "answer": " and are pushing each other.", + "image": "images/caption_simple_8.png", + "model_output": " is pushing .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231169.jpg", + "mask_rles": [ + { + "size": [ + 454, + 640 + ], + "counts": "WaS2=f=g0SOb0E9H4L3M2N4K6K3N1O1N3M4M3L2O2M4M3L4M1O1O2M4M2N1N2O2M4M2M2O1O2N1O2N1N2O2N1N2O1O2N1O2M3N001OSLmFi2P9WMSGi2k8VMWGj2g8WMZGj2d8UM]Gl2b8TM_Gl2`8TMaGl2^8SMdGn2Y8RMiGm2W8SMjGn2T8QMmGP3R8oLPHQ3o7oLRHR3l7nLUHQ3k7nLVHS3i7mLXHT3f7kL\\HU3b7kL`HV3^7jLbHW3]7hLeHY3Y7fLiHZ3U7fLlH[3S7cLPI]3o6bLRI_3m6aLTI`3j6_LWIb3g6^L[Ic3c6[L`Ie3_6ZLcIf3\\6YLeIh3Z6TLkIk3U6SLnIo3o5nKUJT4h5jKZJW4e5gK]JY4i7000O10000O10000O100O2O001N1O10000O2O0O01001N1O10O010001N10O11O1N1O1O10001N0100O02N2N1O1O1O1O2N1O1O1O100O2N1O001O2O2L10100O3M10O00010O10O10O0100O10O001000O01O0001O010O0001O1O010O000010O0000O2O001N2O001O001O000O2O1O1O001O0010O0000O3OO01O1O001O001O1O001O00000010O2N1O2NO2M2000001O00001N101N1001000O0000010O001O002N001NZGTKT8l4c000N3N2O001N2O1N2O001O1O2M1O2N102L3L4M3O1O1O2M2N2UObEkMb:P2bEmMi;AVDLW1>^NnA1O0000Oea1" + }, + { + "size": [ + 454, + 640 + ], + "counts": "^_^51U>00000000001O2N2N010N2O1O1O1O1O001O2cFCm4>RKBo4>\\41O2N1O1O1O1O3eB0OXOg04XO064W:b3L0O3jEZLg9S4O0100O1O3Ng0XO2N1O10O6J100cE]LU:d37O02N1O2O0O010O01O0000001O001O00000000001O000000000000000000000001O00000000O11O000000O1000000000000001O00001O00001O001O000000000000hMeEe0[:WOlEe0U:[O^FeNVO0f00ZO:\\:R1Z2O10000000000000000000000000000000000000hFdNf5\\1ZJdNf5\\1ZJdNf5\\1b30000000000000hFeNe5[1[JeNe5[1[JeNe5[1[JeNe5[1c300000000000000000000001O001O001^FaN[6_1dIbN\\6^1aIeN_6\\1[IoN_6R1^IQOa6o0^ITO`6m0UG`NK1?N>d0S8Z3RGfLm8^3nFeLQ9X40000000000000000000000000000000000000001O0000000000O11O\\OnFhK22P9i3fGUL[8j3gGULY8k3hGTLX8l3iGRLX8n3P100000000000000000000001O00001O1O:F1OO010000000O10000000000O10000000000000000O1000nF" + } + ], + "question": "Where is located relative to ?", + "answer": " is beside .", + "image": "images/caption_simple_9.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000551822.jpg", + "mask_rles": [ + { + "size": [ + 453, + 640 + ], + "counts": "Y_P3e0Z=]OhBn0o and ?", + "answer": " is attached to .", + "image": "images/caption_simple_10.png", + "model_output": " is attached to .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000498463.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "j`T7a0]>201O000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000O100000000000000000000000000000000000000000001O001O000000001O000000000000000000000000000000000000000000TH" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l_T7i0V>2O000000000O1O1bNVOdDl0Y;YOaCKd0V1k;YO_CY1` and ?", + "answer": " is on top of .", + "image": "images/caption_simple_11.png", + "model_output": " is on top of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000275198.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^[P34j>5J5M1M4M2N2N2M3N2N2M3N2N2O1N1O2N2O1N2O1O1O1O1O1O2O1N1O010O10000VO\\NWDf1c;jNRDU1l;POUDm0j;UOVDj0i;XOTDYOKX1P[O1K[NoM9Oo00W1j2WO\\O2KbNPNn00V1h2YO]O5HTNRN21N0X12iNMX2g2@^O5G[NUNQ1OhN0X2b1eM`N6e0e1^17EX2lNnLNd0_1:EQNXNY10bN1X2nM;ZMWO_3P1nMQO[3;QN;i0[O_Lg0]1XOeNN`4b0^LQOW64R1l0gHPOY17k:h0mCSOU17n:f0kCUOV15R;k0mDVOmNGO3i;o0[E^OkNCj;n0Q2F[BXOf=h09O00100O1O10O10O10O0010O1O1O10O01O001O01N11O101N00100O010O1OO2N1100O00O2O1O100O0O2N2001OO100O10000O10ON32N0WOQB`0X>O1N2O1O1L6IoRe6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is looking at .", + "image": "images/caption_simple_12.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000257896.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "`fV46dc0`0D:F9H4M1N1O2O2M101O0001N2O0O1O2N2N1O2N2N2M3N1O2N2N1O2N2N2N1O2N2N1O2M2N3N1O2N2N1O\\Q[4" + }, + { + "size": [ + 640, + 480 + ], + "counts": "ZT`21mc04M4K4M2O1N2O1N100O101M5M1N101N2O2M2O000O2N102N1N101N101[CnNZ6S1cIVOV6j0hIYOW6h0eI_OV6d0eIDV6 and ?", + "answer": " is wearing .", + "image": "images/caption_simple_13.png", + "model_output": " is wearing .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000034417.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "hc[4?n:=I:G4N1O101N10000J6O10000O2O00000000lNQF`0o9^OTFa0m9^OTFb0k9\\OWFc0k9VO\\Fh0[:A>O2O3N1O100OO02O0O100O010O0010O01O0010O01O000O101NVUi0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "kZm35\\;7I6K6L3M3N3N2M4iEkNg9d1N2O0O1O1F:O1O01O0O2C=N2O1ROmE12\\OT:?SFBI4\\:9SFFn97TFIn94SFLo91RFNQ:MRF3i:O100O2NTf]1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_14.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000047585.jpg", + "mask_rles": [ + { + "size": [ + 640, + 424 + ], + "counts": "V91Z2>_1C`L0f00000O100O10000000000O10000000000000000O1O10000000000001O001O00001O000000001eNd]OP1\\b0lNi]OS1Wb0kNk]OU1`b001O000000000000000000O100O1O1O1O1O1O1KeNd]O\\1\\b0dNc]O]1]b0dNb]O\\1ab00001OO10000000000000000000000001O0000000000LdNd]O20T1\\b0jNk]OU1ab0O001O00000000000000O10000000000000000j]OlN`a0T1f0000O10000O100O1O1N2O1O1O100000000000000000000000000000000bGiNSNW1m1oNmMQ1S2XOXFJ`6n0X35YF_No5\\1g37WFbNo5W1j3k0SLUOm3n0oKSOQ4o0mKQOS4S1iKmNW4T1hKlNX4V1eKkN[4V1dKjN\\4X1bKhN^4[1_KeNa4\\1^KdNb4^1\\KbNc4c1YK]Ng4g1TKZNl4l1nJTNn4`NUF_3j4QNQ5`NUFc3e4nMV5_NUFh3`4iM`5kNnER3\\4TNP6^NkE\\3R4VNQ7X1lHhNd7l0XHTOZ8=cGC_8?]GAe8a0XG^Oi8e0SG[On8i0mFWOU9n0dFRO]9b1hD]MWOQ1QlFIQ9:jFJT99fFLW97dFNY96aFO\\93aF1\\91]FhK[OX4X:1]FhKYOX4Y:0aFeKXOZ4U:MmF2S9OmF1R90nF0R90oFOP92PGNo83RGLm85RGKo86oFKP95QGKo84SGKl85WGIi87TGKl85TGLk85UGKk85UGJk87UGIk87UGIj88VGHj88WGGV7nKoH]4JEU7SLnHX4MET7ZLhHS43DS7[LiHR42DT7[LjHQ42DQ7aLiHm31FT7dLeHg34IT7dLfHd34IU7hLbH`36Km6^KbHY1:`36Jm6PMkH[36Cn6TMjH[37Am6WMjHZ37Al6YMiHY39_Om6j2mHZMQ7m2hHTMW7k7O2kH\\Cl6m<0O0O3N1O010N1O101O000000000O1O1O1N200O100001O0002N1O001O1O0000001SEfHR9[7kFiHS9W7jFlHV9U7gFoHW9R7eFRIZ9o6dFSI[9n6dFSI[9n6eFRIZ9P7eFoH[9T7cFlH\\9V7mESH and ?", + "answer": " is standing on .", + "image": "images/caption_simple_15.png", + "model_output": " is standing on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000234757.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y\\Z52k>6J5K4L5K5K4M3N2O1N2O100O1000000000001O00000O100O0O1O2N10O0jN`Bn0`=ROaBm0_=ROcBn0\\=ROeBP1X=POhBP1X=POiBP1V=QOiBo0W=ROhBn0Y=SOeBm0[=<1O1\\OoBWOQ=e0VCXOk1iA0V>OjA3U>MkA3U>MkA3U>MkA4T>MkA3U>NjA2W>NiA1W>;2O001N2N2N4JbRT3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "oXT4:e>2O1N3N1O0aE>QLUOZ9=UJg1V5ZN]J[2[5eM^Jl2W5VMbJX3V5hLaJg3X5ZL]JV4^5jKZJd4`5\\KYJT5^5lJ^J_5[5bJ`Jf5\\5[J^Jk5a5VJYJo5g5QJUJS6k5mISJU6l5lIoIZ6P6fIlI^6T6bIhIb6X6]IdIh6[6YIcIi6]6m000O10000000O00100O1000O1000O1000O100000000O101O001O1N1000000O100000SNXKaIi4m5^2B>iNW1kNU1iNV1_Nb1O1O2M201N2N2N2M4L[bT4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is in front of .", + "image": "images/caption_simple_16.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000008899.jpg", + "mask_rles": [ + { + "size": [ + 539, + 640 + ], + "counts": "Z>e1V?0000000000000O1000000O10000O1O1O100O100O100000000O100000000001O000000000O101O00JZNXAe1h>[NXAe1g>[NZAe1e>]NYAd1e>^N[Ab1c>`N]A`1d>^N]Ab1c>\\N_Ad1a>\\N^Ae1b>YN_Ah1i>10000L4N2000000KoM`AQ2`>oM`AQ2`>oM`AQ2d>1O100O1O100O11O000000O1O1O100O01000O10000O1000XNbAS1^>kNeAT1[>lNeAT1[>lNeAT1[>lNeAT1[>kNfAU1Z>jNgAV1Y>iNhAW1X>hNiAX1W>gNjAY1V>gNjAY1V>fNkAZ1U>eNlA[1T>dNmA\\1S>cNnA]1R>bNoA^1Q>aNPB_1P>aNPB_1P>`NQB`1o=`NQB`1o=_NRBa1n=]NTBc1m=[NTBe1a>0000000000000000000000O10000000000001O00O100000000000000000001O2N:FY2PBfMP>[2PBdMQ>\\2oAeMP>[2QBeMn=[2RBeMn=[2RBeMn=[2PBfMQ>Z2PBeMP>[28000iAeMn=[2QBgMn=Y2SBeMo=Y2:000000000000000O10000000000000000jAeMl=[2SBfMm=Z2:000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000O100O100000000O1000000000000000000000000000000kAbMm=^2SBbMm=^2800000000000000000000000000000O100000000001O0000O1000000000000000000000000000000000000000O11O0000000000000000O11O0000O1000000000000000000000000001OO0101O0001O0O10000000000000000000001O000000000000O1000000000000000000000O1001O00O1000000000000000000000000001OO10000001OO100001O00nNfACZ>=hA@Y>`0hA_OX>`0kA_OT>a0mA^OS>b0mA]OT>c0mA\\OS>d0nA[OR>e0oA[OP>e0RBYOn=g0SBWOn=i0SBVOm=j0TBUOk=l0WBROi=n0WBSOh=m0YBQOh=o0YBPOg=P1ZBoNf=Q1[BnNe=R1\\BmNd=S1\\BmNd=S1]BlNc=T1^BkNb=U1`BiN`=W1aBhN_=X1aBhN_=X1aBhN_=X1aBhN_=X1bBgN^=Y1bBgN^=Y1bBgN^=Y1aBhN`=W1aBhN_=X1aBhN_=X1aBgN`=Y1`BgN`=Y1`BgN`=Y1`BgN_=Z1bBeN^=[1cBdN]=\\1Q10001O00O100000000001O0000000000`AeN`=[1o0100001O00000000000000O11O00000000000000O1001O00000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000001O0O1M4ZO]@Aih5h0PgJ=H2O000001O000000001O00001O00001O00001O00^B" + }, + { + "size": [ + 539, + 640 + ], + "counts": "VTZ22e`08J4M2N2O0O1_OAf@`0X?Cf@=Y?Ee@i0N1O1J6O1N3L3N200O100000O102N1O2N2N2N6I2O1O3mNbAC`>_OfA6?Fm=3dA6e0Ai=7dA6^?He@4^?If@2^ea7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_17.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000260261.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "Xi`33lc02M3N2M2O2N2N2N101O1O10O1000000O10000O10000O10O1000O1O001O0O2M3O0O2O001N2O1M2O]^P4" + }, + { + "size": [ + 640, + 426 + ], + "counts": "dbZ32mc03N2N6J2O0O001O01O01O1O001O01O01O00000001O0000000O10O1O100O1O1O1N2O01000O1gK@QEa0n:DmD=Q;GlD:S;IjD8U;IjD8U;JjD6U;KjD5W;KhD6W;KhD6W;KgD7W;KfD7[;HaD=^;BaDa0_;_O^Dd0a;]OZDg0g;YOVDj0h;WOUCCeN^1U>POQCP2nNZE5_;I]4Objb1" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_18.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000301563.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^2m:_20000000000000000O2O000001O01O0000001O0O1000010O000000000O10000001O01O00000O101O00001O00001O001O000]KXMbNi2]1XMbNh2^1WMdNi2o50000001O000O2O2cE_Mk9Q300010O0001O000O2O001O00001O1O00001O001]KZL^Og3`0ZL@f3`0ZL@g3?YLAg3R50000001O000^KWLAj3>VLBj3>VLBk3=ULCk3o4@cFQM]9n2dFRMi4EDY3cKRMi4EDS3kKWM`4FER3mKWM^4FFS3mKWM\\4FGS3mKWM\\4FGS3mKXMn8h2RGXMn8h2TGVMl8j2UGUMk8k2UGUMk8l2TGTMl8l2UGSMk8m2VGSMi8m2XGRM^OJW9T3[GRM^OJW9T3\\GQM\\OLX9S3\\GSMe8n2ZGRMf8n2ZGRMg8m2YGTMe8m2[GSMe8n2ZGQMg8o2YGQMg8o2ZGPMf8Q3YGoLg8Q3YGoLg8Q3YGoLg8Q3YGnLh8R3YGmLg8T3YGjLh8V3g00000001O000000001O0000001O001O00000000001O0000000000001O000000000000001O00000000001O000000001O0000001O0010OO101O000000000000001O0000000000002N1O001O0000000000001O0000000bHgLi4Y3eJ^MV5b2aJnMX5S2]J\\NCkMX5i3SK`N_OSNV5]3WK_Og4a0XK@h4`0WKBh4?UKDj44d4`6lK`IT4Z6SLfIl3T6[LkIe3T6]LlIb3S6`LmI_3S6bLmI]3S6cLnI\\3R6eLnIZ3S6fLlIZ3U6fLkIY3X6eLhIZ3Z6dLgI[3Z6eLfIZ3[6fLeIY3]6fLcIY3_6fLaIY3b6eL]I[3e6dL[I[3f6dL[I[3f6eLZIoNOg3i6YMWIoN2g3h6ZMWIkN5i3e6\\MUIjN8i3c6^MUIgN:j3b6_MiIa2Y6^MgIa2Z6^MgIa2Z6_MeIa2_6\\M`Id2b6[M^Id2e6ZMZIf2g6ZMXIf2i6ZMUIg2l6XMTIh2m6XMRIh2Q7VMmHk2V7SMiHm2Y7QMgHo2[7PMdHP3]7oLeHo2\\7PMeHo2]7PMbHP3`7nL`HR3a7mL_HS3c7lL\\HT3f7jLZHV3g7iLYHW3h7hLYHW3h7iLWHW3j7hLVHX3k7gLUHY3l7gLSHY3n7fLRHZ3o7eLQH[3P8dLPH\\3Q8cLoG]3Q8dLnG\\3S8cLmG]3S8cLmG]3T8cLkG]3V8bLjG^3W8aLiG_3X8`LhG`3Y8_LgGa3Z8^LfGb3[8^LdGb3]8]LdGb3]8]LcGc3]8]LcGc3^8\\LbGd3^8]LaGc3`8\\L`Gd3`8\\L_Ge3b8ZL^Gf3c8XL^Gh3S901O000000O1000000O100000000hK\\LgNe3X1^LeNc3Z1aLcN_3\\1dLbN\\3e0XLUL`0T3X3f0[LSLa0T3T3i0_LmKa0Y3o2j0hMVOV2j0kMVOT2j0mMVOR2i0PNWOo1j0QNVOn1j0SNVOl1j0VNSLWN`2c3]1WNRLWN`2b3^1XNQLWNCOk2a3a1YNQLZN^2]3a1ZNQLYN^2\\3a1\\NPLXN_2\\3b1\\NoKYN]2[3d1]NnKYN]2Z3e1^NnKXN\\2Z3g1^NmKXN[2Z3h1_NmKWNZ2Z3i1`NmKVNY2Z3j1bNlKUNX2Y3m1bNkKTNW2[3n1bNkKSNV2[3o1cNkKRNT2\\3Q2cNkKQNS2\\3R2dNkKoMR2^3T2bNkKPNP2^3U2cNkKoMo1^3V2eNjKmMo1^3X2eNiKlMn1`3Y2eNiKjMm1b3Z2eNiKiMl1b3[2fNiKiMj1a3]2gNiKhMi1a3^2hNiKgMh1a3_2iNiKfMg1a3a2iNhKfMe1b3c2iNhKfMc1a3e2jNhKfMa1`3g2kNhKfM^1`3j2kNhKeM]1`3k2lNhKdM\\1`3l2mNhKbM\\1a3l2nNhKaM[1a3m2oNhK`MZ1a3n2POhK_MX1b3Q3oNgK_MW1b3R3QOfK]MW1b3S3ROfK[MW1c3S3SOfKZMV1c3T3TOfKXMV1d3T3UOfKWMU1d3U3VOfKVMT1d3W3VOeKVMS1d3X3WOfKSMR1f3X3XOfKQMQ1h3Y3XOfKPMQ1g3Y3ZOfKnLQ1h3Y3[OSMe0m2]OQMc0o2^OoLc0Q3^OnLb0R3^OnLb0R3_OgKjLe0h3d3_OkLa0U3@jL`0W3@gLa0Y3AdL`0\\3AcL?]3AhKbL;n3m3AgKfL7i3S4@fKiL5g3U4AeKiL4g3W4@eKjL2g3Y4@eKjL0f3\\4@cKkLOf3^4AbKjLMf3a4C_KQ1b4PO\\KP1d4l300001O00000000000000001O00000000000000001O00000000000000001O0000000000001O00000000001O000000000000001O00000000000000001O0000001O0000000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000001O0000000000000000001O00000000001O0000000000001O0000001O001O000000000000001O0000000000000000000000001O0001O0000000000O101O00000001O0000O10001O00000000000000001O000000000000000000000000000000000000000000000000001O01O0001O0000]J" + }, + { + "size": [ + 428, + 640 + ], + "counts": "ff[21[=00000SX<3egC8oBJSL4L4H8E<0O102N3M4L1O3M3M3M1O2N1O101N3N2M1100O001N2O4L1O000001N5L1N1O1O2O0O2N3M2O2L3N1O1O2N101N2N1O1O100O1O100O1000O010_LfGo1Z8PNfGQ2Y8nMhGR2Y8nMhGQ2W8oMfGdN2\\3X8nMmGR2S8mMmGT2S8mMlGS2T8PNiGP2W8PNiGo1X8mMmGR2R8nMnGS2R8mMgGgN3\\3V8QNkGm1U8SNmGl1S8SNmGm1T8QNnGo1Q8RNnGo1R8QNnGo1Q8RNPHm1P8RNQHo1n7PNRHQ2n7PNQHo1P8RNPHm1o7WNoGg1R8WNoGi1S8UNnGi1T8mMcGUO9m2U8kMeGZO3j2Z8kMeGZO2i2Z8kMgGYO1k2Y8jMXHV2j7gMXHY2i7dMYH\\2h7aMZH_2g7]M\\Hb2i7TMSGNW1m2V9O2N2N2M4L3M2O3L3N4L1O1O1O1O1O1O1O1O1O2N1O010O0O101N2N101N2O1O1O2N1O1N2N6K1O0O2O1O1O1O1N101O1O1O0O2N1O201L2O2M4MZ>InA0N4M0O2L41O01ON11O10O101N3ImB0Pko2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_19.png", + "model_output": " is on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000016598.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Sme635l0Ni0OYNe`0Z3K3N2O1O010000O100O001M3N2O1O1O1O100O10000O10000O10000YOZ_OTNf`0j1^_OSNc`0l1`_ORN``0m1b_ORN^`0n1c_OQN]`0n1d_OQN]`0o1d_OPN\\`0o1f_OPNZ`0P2f_OPNZ`0P2g_OoMY`0Q2g_OoMY`0P2i_OoMW`0Q2i_OoMW`0Q2i_OoMW`0Q2j_OnMV`0R2j_OnMV`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OnMV`0R2k_OmMU`0S2k_OmMU`0S2l00000000000O10000010O01O001O1O1N2L5iNj]O_OXc06g\\OLb^X1" + }, + { + "size": [ + 640, + 478 + ], + "counts": "Vj133Nac0a0^\\OAPc0l0Fg5mAYJV>Z5jAlJ]>S4fA^Lg>^3[AnKM9l>g3WA_Lm>_3SA`LQ?]3QAaLR?\\3UA[Lo>b3RA]LR?]3`@]L?6S?Z3PAfLR?W3P1YO\\_OeMi\\1" + } + ], + "question": "What is doing with ?", + "answer": " is holding ", + "image": "images/caption_simple_20.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439854.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "bhP11[:2O0O101N1O100O1O100O1O010O2O0O3N0OLTFIl9650010O0010O010O010O0010O0010O010O000010O0100O100O2OMRFHl98UFHk97WFHh996O1O100O2MaY`3" + }, + { + "size": [ + 333, + 500 + ], + "counts": "hmT19o0JR8=gGDX8`0SGG3Jj8o0SGROl8X100ZOjNUHV1i7nNUHQ1k7QOTHo0k7SOTHm0k7UOTHk0l7VORHL26l70QHJ54j73PHI73i75oGH;0f78oGH=Me7 and ?", + "answer": " is on .", + "image": "images/caption_simple_21.png", + "model_output": " is being ridden by .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000012062.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "l`k7>k<4L4[C\\O^ and ?", + "answer": " is in front of .", + "image": "images/caption_simple_22.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000121586.jpg", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "VTZ191IY>g0KO=YOaA2OO4Oi=0]B0H002OOk=2SB20:1Bk=k0VBTOT<;VEHdN:3EV5]A0R>a0M2N2N101N2O1N1O000O1O100O100O100O10000O100010O01O4L1O0001O00001O1O001N2N2M3N2O2M2N3M4L4L2M6Inok5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_23.png", + "model_output": " is in front of .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000476704.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "V8T5X8001O000000O10000000000010O02N1O00003L3N0000010O5K2N0O2O00000001O00000000O01O1L4M30000O10001OO1000000001O0000000000O1000000000000000000000O1000001O000000O100000000001O00000000O2O00101N3M3M1O0000000O11O00000000RL\\GT3d8jL_GU3a8gLcGDJ[3d8nLTHl2l7RMWHm2U9O00000010O0001N100000001O0000000000000000001O001O000000001O0000O10000001O0000000000000000001O00000000000000001O0000000000000001O0001O0O100000000001O0001O0O100000000001O00O100000001O00000000000000001O00000000000000001O0000000001O0000O100000001O0000000000001O00000000001O1O001O000000000000000000000000000001O0000000000000001O000000O100001O000000000O100000000000001O01O00000000000000000001O00000000000000000000000000001O0000000000000000001O000O10000000000000000001O0001N010000000000000001O00001O00000000O101O000000000000000000001O00000000000000000000000000000000000000001O000000O10000O10000O10000O10000000000001O4L1O001O001O0000001O000000000000O10000O1O1O1O1N2N2N2N2N2M3N2L4O1O1O1O10000O10000O100O10000O100O10000O10000000000O100O100O100O100O1O1O1O1O1O100O10000000eL_FP1K`0i9[N`Fb0IF1[1U:nNmEA2a1X:]N`Fb1e:0000000001O001O00002N3M1O002N1O2N001O1O1O1O00O10O10O2N100O100L4O1O1O1N2O100YLoNSKQ1_4^O`Kb0_4_O`Kb0`4^O`Kb0`4^O`Kb0`4^O`Kb0`4]ORISOf1`1X5]OQIVOe1]1Y5^OQIXOd1Z1[5^OPI\\Ob1V1^5^OPI^O`1T1`5^OPI_O`1R1_5POcHI?4_1S1_5oNfHG?2_1X1\\5oNULQ1k3oNULQ1k3nNVLR1j3mNWLS1j3jNXLV1h3gNTKMYN]1c6eNUK2TNY1g6eNUK6PNU1k6dNVK=hMP1R7bNWKb0bMm0X7^NXKg0]Ml0[7ZN[Kl0WMk0^:Q100001O00000000000000001O000000000000000000O100000000O1O1O1N2N2O1SOPMgFg3^91FYLoFg3P9\\LmFe3Q9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_24.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000211042.jpg", + "mask_rles": [ + { + "size": [ + 640, + 458 + ], + "counts": "YdX34ic05L3N2N2N2N101N1O2N1O2N1O2O1N101N2O0O2N100O1O0001O0O1O2O001O001N10O20O0001O01O01O1O01O0O2O0O1O100O10001O01O01O001O1O001M3N2K7Ghbf0L_]YO5`N3n]OK;7ea0b0n]OFQb0U1N2N2N2I7K5K5N201O00000000000001O00001O000000001O0000001O00001O00001O00001O00001O00001O001O0000001N10001O00001O001O00001O0O2O001O001O001N101O001O1N101O001O0O2O1O0O2O1O1N101N2O1N101N2O1O1N2N2O2M1O2O1N3L3N3L5Jbcc1" + }, + { + "size": [ + 640, + 458 + ], + "counts": "]dW3;bc04M2L4L4N2oK^OaDe0^:X1RB_Nd1?U<_2WChMc^3m0N3M2O1N2O1O1O1O1N2O1O1O1N1O2N1_Ng_OYO]`0f0h_OnN``0P1X10001M2M4J5E;K6L5JnX=0`SB3WhQ3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to and leaning on .", + "image": "images/caption_simple_25.png", + "model_output": " is attached to .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000055299.jpg", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "e and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_26.png", + "model_output": " is standing on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435206.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "b531`7c5`H]J`7c54000\\H]J`7c5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5400000000000000O11O0000O10000001O00O11O0000O1000000000000001O00O1000000000000001O00O1000000000000001O00O10000001OO1001O000000O11O0000000000000000000000000000001O0000O1000000000000001ZH]Jb7b54000000000000000000000000000[H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b541O000[H\\Jc7f501O000000O100O11O0000N]H\\Jc7c530000000000000000000000O1001O00O1000000O100001O0000000000O1000000000000000000000000O10000001O00O1O1O1000000001O00O100O100O10000000000O100N200001O1O001O000000000000000000000000000000000000000000000000000000001O00001O001O001O001O1O0000001O00001O001O000mH^J]6c5`I_J`6a5`I`JCLg6e5dIeJ\\6\\5bIfJ]6[5_IhJa6X5^IiJb6X5]IhJc6X5]IjJa6W5]IkJb6U5^IlJa6T5^ImJb6o5O000000001O00001O001UJ^Ik4b6TK`Ik4`6UK`Ik4`6TKbIk4_6SKbIm4^6RKcIn4]6RKcIn4^6PKcIP5^6oJbIQ5^6mJdIS5\\6kJfIU5[6iJfIW5[6gJfIY5[6eJfI[5[6dJfI[5U701O1O1O1O1O4L000000O11O00001O0000001O0000O10000000000000000000[LjGU2W8hMkGX2U8hMkGX2U8hMkGX2U8gMlGY2U8fMkGZ2U8fMkGZ2U8fMkGZ2V8eMjG[2V8dMkG\\2V8cMjG]2W8bMjG]2X8gLfG<2m2Y8fLeG=1n2[8cLfG?Nn2\\8bLhG?Lo2^8_LgGa0LP3i8oLYGP3h8nLYGR3g8mL\\GQ3e8mL\\GS3f8jL]GT3e8iL]GV3e8gL\\GY3\\9O00001O2N001O00001O00001O001O001O1O1O001O1O1O001O001O1O2N001O001O1O001O1O1O1O000WN[El0f:mNaER1`:kNcET1^:kNbEU1^:jNdEU1\\:jNeEV1\\:gNgEX1Z:dNjE[1X:`NkE`1Q;eNXDQ1h;nN]Dn0P<01O00001O100O2N0000001O00003MM3O100O1O100O10000002N6J00001O1O3M2N1O2N2M5L3M1O1O0Oml`2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "Vbn3:e< doing in relation to the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_27.png", + "model_output": " is looking at .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137950.jpg", + "mask_rles": [ + { + "size": [ + 415, + 640 + ], + "counts": "WW1231N11O0O11N10?0<4 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_28.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435208.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PjNXAb1c>`NWAg1g>ZNVAk1g>VNTAR2g>PNVAW2e>jMXA^2b>`M_Ae2\\>[MdAg2[>SMhAR3V>jLlA[3Q>dLoA^3o=dLnA`3P>`LoAb3P>_LnAb3Q>_LnAc3P>_LmAd3P>^LnAd3o=`LPB`3k=jLnAY3i=PMTBS3g=RMWBo2h=SMVBn2i=VMRBk2n=d1O1O1O10O010O0100000O01000O10O010000000001O0000001O000000001O001O000O2eKPBd2Q>YMRBf2n=ZMTBd2m=ZMUBe2k=[MWBc2j=[MXBd2j=ZMXBd2l=XMXBd2i=ZMXBf2m=YL_Ag0e0P3U>iL`Bd2a=PMQCi2V?L6K2N2N1O1O0O100000001O0O102N1N01N1101O1N01000O01N110O1O101N101O0O2N2N100O100O1000O10O1O10O1@i^OWNVa0f1P_OWNQa0a1X_O]Ni`0a1l0O1N2N2N2N2N2M4M2L4L5I7J6N6CWgZ6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_29.png", + "model_output": " is sitting on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137576.jpg", + "mask_rles": [ + { + "size": [ + 563, + 640 + ], + "counts": "0g15UOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AUOf>k0ZAUOf>l0YATOg>l0YATOg>l0YATOg>k0ZATOg>l0YATOg>l0YATOg>l0YATOg>l0YASOh>m0XASOh>m0XASOh>m0XASOh>m0XASOh>m0XAROi>n0WAROi>n0WAROi>n0WAROi>n0WAQOj>o0UAROk>n0UAROk>n0UAQOl>P1TAoNl>Q1TAoNl>Q1TAnNm>Q1SAPOm>P1TAoNl>Q1TAoNl>Q1TAoNl>Q1TAnNm>R1SAnNm>R1SAnNm>R1SAnNm>S1RAmNn>R1SAmNn>S1RAmNn>S1RAmNn>S1RAmNn>S1RAlNo>T1QAlNo>T1QAkNP?U1PAkNP?U1PAkNP?V1o@iNR?V1o@jNQ?V1o@jNQ?V1o@jNS?T1n@jNS?V1m@jNS?V1m@jNS?V1m@iNT?W1l@iNU?V1k@jNU?W1j@iNV?W1j@iNV?W1j@hNX?W1h@iNY?V1g@iN[?V1e@jN[?V1e@jN\\?U1d@kN]?T1c@kN^?U1b@kN_?T1a@lN_?T1b@kN]?V1c@jN]?V1c@iN^?W1b@iN^?W1b@iN^?W1b@hN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@fN^?[1b@eN^?[1c@dN]?\\1c@cN]?^1c@bN]?^1c@bN]?^1c@bN]?^1c@aN^?_1;000000001O000001O1O000010O00000001O0001O0001O000000001O001O00001O001N10001N10001O0O2O001O001N101O001O1N10001O0O101O000O101O1N101O1O1O0O2O2N1Ao^O9Wa0NS_S8" + }, + { + "size": [ + 563, + 640 + ], + "counts": "adj0X1P`0?Q@`N]?e1e@ZNZ?g1f@YNX?i1h@TNY?m1h@QNY?m16M4K5J7F:F:F;CY\\34ncL04HWc62lfM2dTKM26Kn01PO4N000NV>[3I100O1O0000000001O001N2O001N2O1O1O1O1O1N2O1N2O2N1O1O001N1O101O000000K5M4N10000001O001O1M4I6N2M5fMk@P2X?oMh@Q2Y?32OO02O000O1O3N0O10001N100O101O0O1000001O000O2O00001O00001O1O001O001O001O001O0O2O001O001O0O2O0000001O000O2O000010O0001O00001O0O101O001N100O1O2N1O3L2N3N2N3N2M9H1O001O00000000001N1O10gon6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_30.png", + "model_output": " is over .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000126137.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bQ`1`0]<`0L2J7OO2mNmCl0SQE6c;I_D6b;H`D6b;H`D7a;HaD5a;IcD4a;FbD5aQZ5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_31.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000573943.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Ya`84l>5Kb0^O6J1O1O000O01N2N2M3N200O1000001O000000001O000000001O0000000000001O002Ldd9" + }, + { + "size": [ + 480, + 640 + ], + "counts": "doj7g0U>5K5N10000000000000O010O10000001O1O1O1O001O0000000000O2O0O1001O01O000O1000010O0000000O1O2N1O6I_`j0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_32.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000225532.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "h_d21f;1O1O2N1O1N2O1O1O1O001O001O001O0oDBi:?VEBi:?VECh:=XEDg:=ZEBe:>\\ECb:>]EDa:=_ED_:=aEC^:=bED]:=cED[:=eECZ:=gEDW:=hEEV:SHAl7?UHAj7a0UH^Ok7c0UH\\Ok7e0TH[Ol7f0THYOl7g0VHWOj7j0VHUOj7l0VHSOj7n0VHQOj7P1VHoNj7R1VHmNj7S1WHlNi7U1WHjNi7V1XHiNh7Y1WHfNi7[1WHdNi7\\1XHcNh7_1WH`Ni7a1WH^Ni7c1WH\\Ni7d1XH[Nh7f1YHXNf7j1ZHUNf7l1ZHSNf7n1ZHQNg7o1YHPNg7Q2YHnMg7S2YHlMg7U2XHkMh7V2XHiMh7X2XHgMh7Z2XHeMh7[2YHdMg7]2YHbMf7`2ZH_Mf7b2ZH]Mf7d2ZH[Mf7f2ZHYMf7h2ZHWMf7j2ZHUMf7l2ZHSMg7m2ZHPMg7P3`01O1O1O100O002N001O001O2N001O1O1O1O1O1O0000000000000000000000000000cMQHR1o7eN_HV1a7eNgHX1X7fNPIU1Q7hNXIQ1h6nNbIi0]6VOiIf0W6YOmId0S6\\OnIc0R6\\OQJb0o5^ORJ`0o5@SJ>n5@TJ2fMOU80WJKdML14OLU89WJ and ?", + "answer": " is driving on .", + "image": "images/caption_simple_33.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000424349.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "XlW48l<>`CBX3L^4a1YKdNb4e1UK_Ng4g1RK^Nl4f1oJ]NP5g1jJ\\NU5h1fJZNX5j1dJXNZ5l1bJVN^5l1_JUN`5n1WIdMLa0m6V3nHlLR7X3bHPM]7_4O1O100O100O10000O100O100O100O10000O10000O100O100O10000001O00000000O10000O1_NoIiKQ6V4[J^Kf5_4cJYK_5`4mJYKU5e4oJSKgNK[6P5j1N2N2O1O1O1N2N2O1O1O1O1O100O100O100O100000000O1000000O10000001O0000000000001O001O001O001O000eIcKi3^4nKlKP4V4fK\\KXNc0R6Q4dKTL\\4m3aKVL^4k3`KVL`4k3^KWLa4j3]KWLc4k3[KVLd4k3ZKWLe4j3XKYLg4j3TKXLf3kNZLP5IYLk3nNWLl4G[LP4mNVLn6i3TIVLl6i3VIVLj6i3[ISLf6k3\\ITLd6k3_ISLa6l3bIRL^6n3dIPL\\6o3iIlKX6T4mIgKR6Y4`100000000O10001O0O101O001N101N1O1N3J5H9EZFiLm9g2c0L5L3N201N101O0000001N2O1O1N101O1O1O00001O001O1O1O1O001O00000001O0001N110O00000001O0001O000000001N10000O101N1O2O001O000O2O001O1O001O1N2N2O0O2O1O2N1O2M2O2N1O1N2O0O2O1N2N1N5L6GcbP1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "^j7=g<6I8L4`ETO[8Q1bGRO[8R1cGoNY8f1UG[Ng8l1RGXNg8Q2UGQNf8U2XGlMf8V2YGkMd8Z2XGhMc8^2[GcMc8`2ZGbMd8a2ZG`Mc8d2[G]Mb8g2\\GZMa8k2\\GVMb8j3O1O2L3QM^KUKJW1j4[3^LcLc3[3`LbLb3[3aLdL`3Y3cLcLa3\\3`LbLb3Z3bLeL_3Z3bLdL`3[3aLdL`3Z3cLdL^3[3fLbLZ3]3lL]LU3c3lL[LU3e3kLZLV3d3PMWLQ3h3SMTLn2l3TMRLl2n3TMQLm2n3UMQLk2o3UMPLl2P4VMmKk2R4XMkKi2U4WMiKk2W4UMhKl2X4UMeKm2[4Q300O1O100O1000000001O00O10000O100000000O1000000000000000000000000000000O11O000000000000O1000000000000001O001O001O0eJeKg1[4WNhKh1Y4SNlKl1T4QNoKo1R4nMQLQ2o3nMRLR2o3lMSLS2n3jMTLV2m3hMULW2l3fMVLZ2l3dMUL[2l3cMUL]2l3bMUL]2m3aMTL^2m3`MUL_2m3_MVL^2P4\\MTL`2R4YMRLd2T4VMQLe2R4WMPLh2R4UMPLj2R4TMoKk2Z4eKTJP1e1Z3_4_LmKY3\\4XLlKf3n6N001O0000001O1O2N3M4L3M2M2O1O2PO[FVNi9f1_FQNd9l1aFPNb9m1o0N3L5K4M3M3L4L4K5M3L4K8I_ko5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_34.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000173302.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "en\\43U=10000O101O00000001NVDMfo19k[N4O100000001QOBQE>l:HQEd0a:@]ET1n9nNQFR1n9QOPFo0P:ROoENE7]:LmELS;5lDKS;7lDIT;8kDHU;9jDGV;9jDGV;:jDEV;gDBY;>gDBY;>gDBY;>gDBY;>hDAX;?hD@Y;`0gD@Y;`0gD@Y;?iD@J3g:=_E7`:I[EmEClNOW;>mEBmN1U;=mECnN0U;=mECnN0U;>lEBoN0T;?mEAPOOS;`0mEAPOOS;`0mE@QO0m96jF:8@QO0k9:jF6:@RO0i9;jF5;@RO0h9>iF3;AROOc03X8nDAR;?nDAR;?nDAR;?nDAR;?nDAR;?nDA6Ln9c0lE`0S:@mE`0S:@mE?T:AlEOHROh:o0aE_OR;`0oD_OR;a0oD^OQ;b0oD^OQ;a0oDG9D\\jd3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_35.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000352760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 544 + ], + "counts": "PX1`bW16YQiN6J3N3L2O3M3M1O2N2N2O1N2N2N1O2O1O1N200O001O1001O0O2O1O0O100O11OO100000000O10O10O01000O100O010O100O1O02OO10000O010O100O00100O01000O010O00100O010O010O01O010hMQO\\Ao0a>TO`Ak0_>VOaAk0\\>XOa_OJm1m0a>@_Aa0^>AbA?]>BdA=Z>DgA=W>DjAEnA;Q>GnA:o=HRB7l=KTB5k=LVB3g=O[B1a=1`BO^=2cBN]j7`1`EPNg2?i7\\1hERNa2b0d7Z1QFPN]2g0`7S1[FTNW2h0\\7d0SG]Nd1o0X7`0WKAf4>^KAa4>aKB]4>eKBZ4=hKDU4`1o_OYNg12Y>]1\\2N2N1O2N101O001O0010O2O0O2O2[OlNZ^OV1_a0TO[^On0^a0o0\\EaMa3c2]L^Ma3e2\\L]Ma3g2]L[Ma3g2]LZMa3j2\\LWMb3l2\\GPM\\36V5n2oF\\Ne2iNY6Q3dFQO`2PNk6\\6jHeIU7d6\\FSIe1;m7e7eFWHL8]9b9N2O1O2M2O1O2N002N1O2N1O010O00O100O2M2O2K4N3M3C`FVFd9c9`0K5J6L4fM[FQJm9h5`FmIe9m5eFeIh9R6_2B7]O`0E;J6J6M2N3N2N2O1O10001O000001O1O00100O2N1O100O2N1O2O1N2N3M2O2M3N1NZLVAY2g>fM_AX2_>gMdAX2Z>iMiAU2U>lMmAS2R>lMQBS2n=mMSBS2k=mMYBQ2f=PNZBo1f=QN[Bn1d=RN^Bm1c=RN^Bn1`=SNaBm1^=SNcBm1]=RNdBn1[=RNfBo1X=PNjBQ2T=oMmBR2R=mMoBT2o and ?", + "answer": " is below .", + "image": "images/caption_simple_36.png", + "model_output": " is over .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000344614.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Qag32kc04N2M2O100N201M2O1O2K6K4W@VOUXOa0lJj@00Y4Ta0[N8H3K5J6J6K5K5oNk]O6Vb0In]O3Sb0Lo]O0Tb0LU^OGSb06m0M3M3NRXj02ngUO001O000000000000001OO10000001O00001O001O1O001O001O001O0000001O0000001O0000000000001O0000001O000000O100001O1O00001O001O000000000000001O00001O1O000000001O000000001O0000001O00000000001OO1001O1O00001O0000001O00000000001O00000000001O001O00000000001O001O1O2N00OV]OnNhb0Z1K7I7I1O1O1O1O2N000000O100O1N2001O1OO1O1O1M30000000000000000003M1O1O001O00000000001O0R^ORN60J1010N10k`0n1h_OTN``0l1a_OTN@Ne`0o1^_OUNG4ONl`0m2o@oLUN61JO11NS>S3nBlLPOW2Y=l0lCoLiNS2Y=0iBeNU1XOiN;1c1m<6TCcNT1IjNc1jLGc8K_GX4`8iK^GX4b8iK\\GX4d8c3kGfEb6\\:WIgFk5[9lIdDOZ40gK2n01RONO150;OF0Lm0lc0" + }, + { + "size": [ + 640, + 478 + ], + "counts": "W_W6e0Vc0:F9I6K4M4K4L4M3M3M3M2N3M2N3M2N3M2O1N3N0O2N2O1O1N2O1N2O1O1O001O1N101O1O1O1O001O001O001O001O001O001O0001O0001O01O010O001O00001O001O001O001O001O001O001O1N2O1O001N2O1O0O2O1N2O1N3M2O1N2N3M2O1N3M2N3M2N3M3L3N4K4L4M3K7I6J8H=]On_n0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_37.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000272148.jpg", + "mask_rles": [ + { + "size": [ + 378, + 640 + ], + "counts": "V4c7W40000000000O100000000000000001O00O1000000000000001O00O1000O1O11O00001O2N001O0000JbHPL^7P4bHoK_7Q4aHoK_7V4001O0K`HPL_7Q4aHoK_7U41O100001O0001O00O1000000000000001O00O100000000000000001OO1000000000000000000000000000000000010OO0101O000000000000000000001O0kJfKj2Z4UMhKj2X4VMiKi2W4VMjKj2V4VMjKj2V4TMlKl2T4UMkKk2U4UMkKk2U4WMhKj2X4\\2000lIfKh4Z4VKhKj4X4VKgKk4Y4[100000RJgK[4Y4eKgK[4Z4b10000000000001O2N00000000000001O]IeKe00RO0l3[4_LcKb08mNKQ4[4nMkKPNJR4[4oMjKoMKR4[4PNhKoMMQ4[4QNgKnMNQ4[4RNfKlM0R4Z4RNfKlM0R4Z4RNeKlM2S4X4SNcKkM4S4X4]NhKd1Y4YNiKg1W4XNjKg1W4XNjKh1V4WNkKi1U4WNkKi1U4WNjKj1V4oMbKjM7W4W4oMbKlM4V4Z4nMbKa2]4_McKa2]4]21000000000000001O00O10000000000000O1000000001O000000000000000[LbKN^41gKKY45gKKY43iKMW40lK0T4NnK2R4NnK2R4NoK1Q4OoK1Q40nK0R42kKOU42jKNV42jKNW42gKOY44cKM]4h30000000000000000001O00O1001O0000000000001O000oI`Kh4`4XK_Ki4a4X1000000000000000000000000001O000000O11O0dNeKYK[4f4gKYKY4g4gKYKX4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4g4iKYKW4g4iKYKW4g4iKYKW4h4hKXKX4h4hKYKX4i2]KRN:TOZ4h2_KSN7UOZ4h2_KSN7UOZ4g2`KUN5TO[4g2aKYL0d14\\O[4g2hKmMM\\O[4g2hKmMN[OZ4h2hKmMN[OZ46]KZ1;TON^OY44^KY1;VON]OY44^KX1ZOL[OZ4b2kKSNK[OZ4a2kKUNLYOY4b2kKUNLZOX4a2kKVNMYOX4b2jKTNOZOV4e2hKRN2YOW4h2aKPN8YOW4d4jK\\KV4c4kK]KV4b4jK]KW4b4iK_KW4`4jKaKV4]4kKcKU4]4kKbKV4]4kKcKU4]4kKcKU4]4kKcKU4^4iKcKT4nN^Ka5 and ?", + "answer": " is over .", + "image": "images/caption_simple_38.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000222317.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "kc^1g0T>>E7J2N2O0O1fCQOnN_O10[;`1fEFlNkNn:_1WF2`9N`F9Y9GgFl0f8SO[GX1Z8hNgG]1S8cNmGb1n7^NRHf1j7ZNVHj1e7WN[Hk1c7UN^Hl1`7TN`Hm1_7RNbHo1]7QNcHQ2[7oMeHR2Z7nMfHS2Y7mMgHT2X7lMhHT2X7kMiHV2V7jMjHW2U7iMkHX2T7hMlHY2S7gMmHZ2R7fMnH[2P7fMPI[2o6dMRI\\2n6dMRI]2m6cMSI^2l6bMTI_2k6aMUI`2j6`MVIa2i6_MVIc2i6]MWId2g6]MYId2f6\\MZIe2e6[M[If2d6YM]Ih2b6XM^Ih2b6XM^Ij2`6VM`Ik2_6TMbIm2]6SMcIm2]6SMcIn2\\6RMdIYOkNK2P2]7mNfISOTOG0Y2T7mNhIkN@GJ`2m6nNiIiN:Z2l5mNjIgN=\\2h5mNlIdN?_2d5mNoKR1P4nNQLR1n3nNSLR1l3nNTLS1k3mNVLS1i3mNZLQ1d3PO]LP1b3PO_LP1`3PO`LQ1_3oNdLo0[3QOfLo0Y3QOhLo0W3QOjLo0U3QOlLn0T3ROmLn0Q3SOPMn0n2ROSMm0m2SOTMm0k2SOUMn0j2ROWMn0h2ROYMo0e2QO\\Mo0c2QO]Mo0c2QO]MP1b2PO^MQ1a2oN_MR1_2nNcMR1\\2nNdMS1[2mNeMS1[2mNeMT1Z2kNgMV1X2iNiMX1V2hNjMY1U2fNlM[1R2eNoM\\1P2cNQN^1n1aNSN_1m1aNSN`1l1_NUNa1k1]NWNc1h1^NXNb1h1^NXNb1h1]NYNc1g1\\NYNe1g1ZNZNf1f1YN[Ng1e1XN\\Nh1d1XN\\Nh1d1WN]Ni1b1XN^Nh1b1XN^Nh1b1XN^Nh1b1XN]Ni1c1WN]Ni1c1WN]Ni1c1WN]Ni1b1YN]Ng1c1YN\\Nh1d1XN\\Nh1d1XN\\Nh1d1YN[Ng1e1YNZNh1f1YNYNg1g1YNYNg1g1ZNXNf1h1ZNWNg1i1YNWNg1h1ZNXNf1h1[NVNf1j1YNWNg1i1WNYNi1g1VNZNj1f1VNZNj1f1UN[Nk1e1TN\\Nl1d1SN^Nl1a1TNaNk1_1TNcNk1]1UNdNj1\\1UNeNk1[1TNgNk1Y1TNhNl1X1TNiNk1W1TNjNl1V1SNmNk1S1UNoNi1Q1VNQOi1o0WNQOi1o0WNSOg1l0YNXOd1h0[N]Oa1c0_N@^1`0aNC]1=cND\\1P2@SN>n1^OVNb0j1\\OXNd0h1[OYNe0g1YO\\Nf0d1YO]Ng0c1WO_Ni0a1TObNl0^1SOcNl0^1ROeNm0[1ROgNm0Y1ROiNm0W1QOkNo0U1POmNo0S1oNPOP1P1oNQOQ1o0nNTOP1l0nNWOP1j0oNXOP1h0oNZOP1f0POZOP1f0nN^OP1b0oN@P1`0POAo0?POBP1>POCo0=PODo0=QOCo0=QOCo0=QOCn0>ROBn0>ROBn0>QOCo0=QOCo0=QOCo0=QOCo0=ROBn0>ROBn0>ROBm0?SOAm0?SOAm0?SOAm0?SOAm0?SOBk0?UOAk0?UOBj0>WOAi0?WOAh0`0XO@h0`0XOAg0?YOAg0?YOBf0>ZOAg0?ZO@f0`0ZO@f0`0ZO_Og0a0YO_Og0a0ZO]Of0d0ZO[Og0e0YO[Of0f0ZOZOf0f0[OYOe0g0[OXOf0h0ZOXOf0h0[OVOf0j0ZOVOf0j0ZOVOUN^LV1\\4e0VOSNaLW1Y4g0TOoMhLY1T4h0TOPNgLX1U4h0TOSNdLU1X4h0TORNgLS1V4l0ROoMlLS1R4n0ROnMnLS1P4o0SOiMRMV1l3Q1ROhMSMW1k3R1QOfMVMW1i3S1QOcMYMZ1f3S1QOaM\\MZ1d3U1oN_MaM[1`3V1nN`MbMZ1`3W1lN_MfMY1^3X1kN_MhMY1]3X1lNhLfLc0R1\\1\\3Y1POYMfM]1Z3[1POVMgM^1Z3\\1POTMiM^1W3^1QORMjM^1V3`1POQMlM^1T3a1QOoLmM_1R3c1QOkLPNa1o2d1ROjLRN_1l2g1SOhLSN_1k2i1o0UNQOl1Q1QNoNo1S1nMnNR2U1kMkNV2X1fMhNZ2[1cMeN]2_1^MbNc2h1iLfIK0Ob4]3m6001O000000001O00001O0000001O000000001O0000001O0000001O0000001O0000001O00000O2N1L4K5F:J7H7M3L4L5M2N2O2M2N2O1O3M2M2O2N1O100O1O101N1O1O2O0O10000O1000001N10001N1000000O2O00001O000O101O00001N100N2J7L3N2O2N1O1N200O2N101N1O100O101O000O101O0O101O000O101O0O10001N1000000[JgHc3Z7ZLkHc3U7\\LVIY3k6gL_Io2a6PMaIo2`6PMbIn2^6QMaJQ2_5oM`KeNiL0O07h1b7CfMcNa2\\1c5O0000001O000O2O00000000001O00001N1000001N1000001O00001O00000000001N1000001O0000VK" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`UZ33k>2O2N2N1O2N1O2O0O2N1O101N00100O1O010O01000O010O101N10000O2O00001O0O101O001O0O2O001O001N101O001O001O1O2N0000001O000000001O0000001O0010O01mBfN^ and ?", + "answer": " is lying on .", + "image": "images/caption_simple_39.png", + "model_output": " is lying on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231088.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "cna02jc05L5L3M3M2M4L4L4L4L4K5J6J6H8N1O2N2O2N1O1O1N3N1O1O1N2O3L2O1O2N2M3N2M2O1O1N2O001N10000O10000O10000O1000000O101N10000O100O101N10000O10000O2O0O101N100O101O0O101Z@XLQ?i3PAVLP?j3QATLo>m3b01O0000O1O1B\\@`Ld?[3a@fL^?Q3g@nLZO5m?e2g@gMY?T2f@SNY?d1j@cNV?X1e@ROZ?e0]@SNO`1d?:_@1a?@V@PO4DKo1l?[OY@X1f?hNZ@X1f?a101O01O0000000O1O101N100O102M4M5J4M5J6K2M[OPMj_OOc0m2d?ZM[@b2f?aM[@[2e?kMY@P2i?RNX@j1h?YNZ@b1g?dNU@X1l?lNT@P1m?UOR@f0n?]OS@>n?FR@6o?LR@0o?4o_OHR`0 and ?", + "answer": " is attached to .", + "image": "images/caption_simple_40.png", + "model_output": " is attached to .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000421923.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "_`Q7f0Yc0=D`0@`0_O>C4L3L2O1M3M3O1N2O10O01O0000000O2N100M32N2N00O1O001O1O1N2N2I6L5O100O10O10O010000N20O0100000O2N1001O000000000000O1O2N100004VMm_O_1e0" + }, + { + "size": [ + 640, + 426 + ], + "counts": "ogo42jc06L2O1L5N1O1d^OFR?:m@HR?8m@KP?7n@LXNA``0e0SAO[N^O``0d0TA7k>JTA6k>KUA5k>JUA7k>ITA8l>Hl@d0P?\\Oo@e0Q?[On@g0R?YOm@f0T?]Oh@d0W?^Oh@a0X?@i@>X?Cg@ and ?", + "answer": " is inside .", + "image": "images/caption_simple_41.png", + "model_output": " is inside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000057149.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "_hW4 and ?", + "answer": " is in front of .", + "image": "images/caption_simple_42.png", + "model_output": " is in front of .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000577932.jpg", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "VTR5 and ?", + "answer": " is touching .", + "image": "images/caption_simple_43.png", + "model_output": " is attached to .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000311002.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZWb02W=3O0N20001N10001N10000O2O0000000O10000O100000000000O01000O1K6Mmh[1MWWdN5K3M2O2N1O1O1O1O001O0O101O001O1O1O2N1O1O1N2O1O1O1O0010O103L10O01O3M6I5K8B[b97e]F2N3L6J3M4K5L4K6K4L4L2O3M3]D_NV;c2XO0O2O0000000000001O0000000000000000000000000000001O0000001O00000000000000000000001O0001O00001ZMkET2U:jMoET2R:hMRFX2`:00000010N100000O10O01N101K5F:N2O10O10000O10000000000O11N10000000000000O10000000000000000001O000001O00000000000000000000000000000O2O000O100000000O101O000O1000000O2O000000000O2O00000O101nNYE]Oh:c0YE\\Og:c0ZE]Of:c0ZE]Of:c0[E[Og:d0[EZOe:f0[EZOe:f0\\EYOd:g0]EWOd:i0\\EWOd:g0^EYOc:f0^EYOb:f0_EZOa:f0_EZOa:f0_EYOb:g0^EYOb:g0_EXOb:g0^EYOb:g0_EWOb:i0^EWOb:i0_EVOa:j0_EUOc:j0^EUOb:k0_ETOa:l0`ESO`:m0`ESO`:m0aEQO`:o0aEoN`:Q1m001O000000000000000000000000000000001O000000000O100000000O100000001O0O100000000O100O100O100O1O1O2J5K5N2O100O10000000000000000000001O000O100O5IT[k1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\gc3:n<4M3L4L3L4N2L4L5M2M3M3N2N3H7O2N1O1]OSNfEa1BkNe:JbE[1MjN_:0_EV15kNY:d1iE`NR:_1QFbNm9\\1UFgNh9Y1YFjNb9Y1\\FhNc9`1UFaNj9`2jFjLW8V3cGQM\\8P3aGRM_8W3VGkLj8l3O000000O100O2N1_OnFfLU9W3b0L3jNV1J4M2EmD^NV;d18000O11O1O10O10O100nN^D:d;D^D:c;D`D9c;DaD8b;F`D8b;DeD6_;Dcol3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_44.png", + "model_output": " is standing on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000525600.jpg", + "mask_rles": [ + { + "size": [ + 326, + 640 + ], + "counts": "TeV33o99H7J5L5I6L4K5L4G:I:_HnM^6W2YIRNb6l2L2O100001\\O^IbMc6W2iIbMZ6X2m0K5K5L5K4N3M2M4N16J5L2M3MSOnGIo70ZH1e7GhH5W7@VI>\\8M1M2M6K^Yh2" + }, + { + "size": [ + 326, + 640 + ], + "counts": "oTe2:i9:ZFBT9P1H3M2O2M2N2N3M2OO0M3M4L3O2M2N2O2O0O100O10O2O001N2O4J9G?_O8Hk0VOYN_I1U6dNeIZ1a0Ge5g0hJjN\\5V1i1O3O1XOPHYOQ8a0l0H9N3F`oX3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_45.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000378139.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VRa17R=4M2M5L2O1O0O2O1N2O0O100O2O0O10000000000O11O000000O100O1O1N200O1O1O1O1O1O1O2N1O2N1O2N1O1O1O2N1O2N2N2N1O101N100O100O1000000O10000O0100000O100000O100000000O1000O1000O10000000000O10000000001O00O10O2O00O100000O10000000000O1000O1000000000O100000000O100O11O0000O100000000000000O100000O10O10000O10000000000O1000O10000000O1000000O10000000O01000000000000O10000000O10O100000000O1000O10000000O10000000O10O100O10001O00000000O010001O000O01000000000O100000000O100000000O10000000O10O10000000000O10O100000O10000O1O100000O10O10000000000000000O100000O1000O100000000O1000O100000O10000000000O10O100000O10000000000O10000O100000O01000000000000O1000001O000O100000000O10000O100000000O100000000O10000O2O000O101N2N101N2N1N3N1O2N100O101N100O1O2O0O101N100O2N2ORjP2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "_]l23W=2O1O007I00000O1O00100O1O1O010000O106H4J]fU5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_46.png", + "model_output": " is inside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000189806.jpg", + "mask_rles": [ + { + "size": [ + 400, + 500 + ], + "counts": "d\\W32[<6I7^OK_D:[;b0K4M3N2N2M3N2N2O1N2N1O1O1O1O100O1M2000N2O010N200O1001O1O001OO100O100100O1ONSFWNX9h1hF[NV9e1kF\\NT9b1mF_NR9_1QGaNn8_1RGaNn8_1SG`Nm8`1SG_Nm8c1SG\\Nm8e1SGYNo8g1QGXNo8i1QGWNo8i1QGWNn8k1PGUNQ9k1nFVNQ9l1lFUNU9m1dFWN\\9m1]FWNc9[200O100000O100000000O100000000O1O1M3N2N2N2N2N2O10PFlMd9T2ZFoMe9]210000O10000O100O10O0100O2O0O100O1CUFTNl9l1Y:N^E5\\:R1M4M3M2O2N1O1O2N2N1O1O1O2N1000O01O100O10O01000O010O001N1DUFSNm9j1WFSNk9m1 and ?", + "answer": " is looking at .", + "image": "images/caption_simple_47.png", + "model_output": " is looking at .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000515445.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "fm_7:[>d0A doing with ?", + "answer": " is leaning on .", + "image": "images/caption_simple_48.png", + "model_output": " is holding .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000203580.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Pc]43V=3N2N100O1O1O1O1O100000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000O10001O1O2O0ON101O100O10001O000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O1000000000O100000000000000000000001N2O_TV1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "dYT47S=2N3L4N1N2O1N2N2O0O100O1O2N100O1O101N1O101N2N101N1O2O0O1O10O01O1O100O100O1O100O100O1O100O01000O1O100O010O01G]DhNe;W171O2O1N101N2O1N2O1M3N2N101O00000O100000000O100000O01000000O10000O010O1000000O10O10O1000O010O1N1K6N1101O2M3N0O2O1O0O100O10000O100000000001OTElMa:S2]GlMg6T2[HlMPO0K00 and ?", + "answer": " is over .", + "image": "images/caption_simple_49.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000499622.jpg", + "mask_rles": [ + { + "size": [ + 456, + 412 + ], + "counts": "Qh>1W>0O2O0O100O1000001N10000000000O1000000000000O10000O100O1O1O1O1O1O1O1O10000N3O000O1000000O1O100O10000O1000000O10000O100O100O100O100O101N100000000O100O10000O1000000O100000000O100000000O2O00000O10000000000O1000001O000000000O100000000O10000000001O0O100000000000000O1000000000000000000000001O000000000000O010N2L4N101N2N2N2O1O00100O001O010O10O010O01000O10O011OO10000O100O1O100O10001N10000O2O0O2O001O001O1O1O1O1O1O2N3M2N2N1O3M3M1O1O00001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000003fNjC[O3g0UTEBl:=UECk:=UECl: located relative to ?", + "answer": " is on .", + "image": "images/caption_simple_50.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000135872.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZYl01Z=0iR20hfb01^V[O001O01NmW10Pkb08kl[O5J5L7I3K2O20O1OjCSON050_;l0\\D\\O3Jb;i0[D[O1Oi;b0UD[O0N16Q<;nC_O1;U<2PDMR<1nCOSCa0_O;E=D7H5L4K2N1O100O1O100[N_M_Hb2a7_M]Hb2b7_M]Hb2c7_M\\Ha2d7_M[Hb2e7_MYHb2g7_MXHa2h7aMUH`2j7eMQH\\2o7^NVGc1j8_100000000000000O100000000O100000000O100000000O100O100O10000O1O1O100O100O1O1O100O100O1O1O1O100O100O1O1O100O1O1O100O100O1O100O1O1O1O100O1O1O1@eJnH]5o6a0O1O1O100O1O100O1O1O10000O1O10aJQIb4n6^KVI_4j6`KWI`4i6_KYIj1OKh6lMUIIi6bMWIo1O`0a7@\\Hc0d7\\O[Hf0e7ZOYHh0S6ZMRKm1kNi0R6[MbIO]1l1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1oNk0Q6\\MaIN`1i1oNm0P6_MQK:bNf0=a1P6_MRK4gNk06c1P6XMdI3`14iNm03d1n5ZMfI0a14jNm01e1n5XMhI2U2o0VNg1V6XMcKQ1WNg1V6XMcKP1XNh1`8fMPGE52;c2R6ZMiK7kMG238e2R6[MiK4WNKNf2R6[MjK2XNLLg2f5YMRJ3U20WNLMh2e5YMRJ3U20jNd2o4ZMQJ2W2OiNe2o4YMSJ2U20iNe2]5YMlK0gNg2]5YMmKOfNh2]5YMmK0eNg2^5YMmK1dNf2`5XMkK3dNf2a5WMkK4cNe2T8[MlGe2T8\\MkGd2U8\\MkGd2V8\\MiGd2W8\\MiGd2W8\\MiGd2k4XM]M5gMc2l4XM^M4eMe2l4XM_M4eMc2l4YM_M5dMb2l4ZM_M5eMo0XO3d5iN_M6dMc0ZOUO3V1`5kN`M7cM91e0l4kN_M8eMO9m0d4kN^M:WOj0\\3kN\\MX7BXG1_1=Y7DVG0a1;Y7FVGOa15^7NoFNc12_71mFOb11a7OnF0a10a71mF0b1Nb73jF1c1Lc73jF1c1Lc73iF2c1Lc73jF2b1Je74hF3c1Ie74gF4c1If73fF59^Of0:[83eF67Ah06\\83dF85Bj03]83dF84Dj00`83aF;2Dl0Oa82`F=0Do0Ma82_F?MFR1Ib82_FT1c0bN@8^93]FV1:_NF336`92\\FY16jNMKa93[FX17kNLJb9m1aFYNLKc9l1aFZNKJd9m1aFXNKKd9m1bFWNJLd9n1aFVNJMe9m1aFVNJMf9l1`FgN`9Y1aFfN_9Z1aFTNI1g9k1`FSNJ2g9j1`FSNJ2f9l1_FRNK2f9l1`FRNH3h9k1aFbN_9^1aFbN_9_1aF`N_9`1aF`N`9_1`FaN`9_1aF`N_9`1aF`N_9_1cF`No0\\Ol6T2UH`Nc0^ORO2T8o1YH`N93_7[1YHcNM>h7n0]H9b7G^H9b7F_H:a7F^H and ?", + "answer": " is on .", + "image": "images/caption_simple_51.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439994.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "T?12b0OE2I114O100075d`0j2K2O0001O00001O000O10001O00001O0000001O0O101O0000001O0O11O010O0001O001O00001O00001O001O001O0000001O010O00001N10010O00001O0000001O00001O00001O01O0001N100001O00O20O001O01O00001O000001O0001O00001O00001O001O1O1O1O01OO1001N100000000000000001O1O1O0000O1N200O1000001O00000000000000000000000001O00000001OO100000010O2M2O2O1N1O1O3M>B001O001O0001O000000000VOfNn^OZ1Qa0lNk^OS1Ua0oNi^OR1Va0oNi^OQ1Va0POj^OP1Va0QOi^Oo0Wa0ROh^On0Xa0ROh^On0Xa0SOg^Om0Ya0SOg^Om0Ya0TOe^Om0[a0TOd^Ol0]a0TOa^Om0_a0TO`^Ol0`a0XO[^Oi0ea0i00000000000000000000000000000001O000001O0000000000000000000000000001O0000000001O0000000000000001O0000000001O0000O100000001O000001OO100000000000000001OO2O00000000000000000000000000001O01O001OO1000000000000001O0001O000000000000000000001O0000000000000000010O000O1000000000000010OO100001O00000000]OU^OQOka0k0\\^OROea0l0]^OSOca0l0_^OSOaa0l0a^ORO`a0l0e^OQO[a0m0k^OnNVa0m0o^OTOPa0h0T_OXOl`0e0W_O[Oj`0:`_OEa`09a_OGa`04c_OKYb000000O2O00000O1D<00K6L2M4N2M3O1J6I7O0101O`0^OZk;UOlaC7KJk05Ra0j1O1N10001O0000001O000000001O000000001O000000000000001O00000001O0O1000000N]3" + }, + { + "size": [ + 640, + 428 + ], + "counts": "oUc62kc06VMN_A3\\>6`AK[><`AG]><_AG^>>]AG`>9VAMh>5T@oN=P1^?2U@nN6X1c?KU@PO4X1g?HT@QOL`1o?]OU@U1j?jNS@Z1n?gNo_OV1MgMT`0b35O2M3M2N3M3N1N1M1O003N3N3M3N110;E7I>Ba0k_OkK^?]4N1O2OO01O01O1O1O1O1O1O1N2nL_@^1g?_N^@NJ@>^O^?f0i@MRa0NT_ONo`0OT_OOn`00X_OJh`06Z_OHg`07Z_OHg`06\\_OGf`08b1N2N2M4MmQ3NVnL1ag<" + } + ], + "question": "What is doing on ?", + "answer": " is walking on .", + "image": "images/caption_simple_52.png", + "model_output": " is walking on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000468501.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`h_11d;3HOfD3Z;5O101O00001O000000O100O10000O100000000000000000000000O10000000000O10000000O10O10000YGJVOLk4:mKMVOKl48lK2UOGm47mK7SOCn47nK;oN_OS56lK`0nNZOU57lKb0lNXOW57kKe0lNTOY57iKj0jNQO[56jKl0iNnN]55jKP1gNkN]57jKR1fNhN_57jKT1eNeNa57iKW1dNbNb58iKY1bN_Ne59hK[1aN\\Ng59fK`1_NXNi5:gKb1[NVNm59fKY2Y4hMfKY2Y4hMfKY2Y4iMdKX2[4jMdKW2[4nM_KT2c3VM_Ko0e06oN0l4nN]K]17EELf4TO[Kg1OYO4I_4[O\\Ka1IkM7_1c2f5kMPJZOa0j2`5jMSKU2n4jMSKU2o4fMVKY2j4`M_IIi1g2i4`M[K`2f4_M[Ka2f4]MZKd2f4XM^Kh2h6100fGVMe7Q2mGbN=^Og7j1THfN4Aj7d1VHjNOBS8Z1QHSOKDX8S1PHYOGD]8n0nG^OCEb8h0mGD@Df8b0nGJ[ODQ96gG5XOEW9JhGa0oNFb:;\\EEe:f01O1N2O2N2N1O1O1O2L3N3L4Kl[h2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "ccR42d;3N2N1O1O1O2O01OO1O10000N2O1O1O1O0L4M3O01N2010O3M2N2N1N2N1O2N1O01O0N2001O1N200O1O1O1O1O001N100gNhNTHY1j7jNTHV1k7mNSHS1n7mNQHT1n7mNPHT1P8mNoGT1Q8lNoGS1S8lNlGU1T8lNkGS1W8lNiGS1X8oNfGQ1[8oNdGP1^8oNbGQ1_8nNaGQ1`8PO_Go0c8SOZGm0g8QOZGn0h8QOWGo0j8ROUGn0k8ROUGm0l8SOTGm0l8SOTGl0n8TOQGl0n8UORGj0o8VOQGi0P9WOPGi0o8XOQGg0o8ZOQGf0^1nN]5YIc0_1nN[5?WIb0m8^OSGb0l8^OUGa0k8_OVG`0i8AXG>h8BYG=g8CZG and ?", + "answer": " is looking at .", + "image": "images/caption_simple_53.png", + "model_output": " is looking at .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000171190.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "mW_8d03Ej=m0J3O1O2O0O101N10001N100O2O01O100O001O0iNcBl0]=QOcBQ1^=mNaBU1_=610O00001O001O001O10O000N2J6K5G:J5L4K5K5N3M2OnB0m<7M2N2O100O0O2001O1N101O_NF\\D9d;J[D5e;L[D3e;NZD2k1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[k_71m>2O2N2N100O1O2N1O101N1O1O1O2N100O1O2N1O1O101N1O1O1O1N2N2O2N1N2XOTOYCo0dk0V;@[DE?k0W;AZDB>m0Y;2fDN[;1eDO[;2dDM^;3aDM`;3_DMb;3[DOg;0TD4m;_1002N1O2N1O1O1O1O1O1O1O`NfMZFZ2d9lMYFS2f9QNWFo1h9SNVFn1j9SNTFn1k9SNTFn1l9RNTFn1Q5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_54.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000565391.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "e6173a2M\\M0R>3nBM13QO14O[1NbN3NNM0e=n2\\BiNOOOZN218Z4f7f7K1N2O001O1O001O001O001O001O1N101O00001O001O001O1O1O00001O001O001O1O001O001O00001O1N2O1O001\\LPC6P=]3001O1O1O1O001O00002N001O1O001O1O1O1O1O001O002N1O001O1O2N2N1O2N1O1O1O3M2N2N4L2N1O001O2RKaAW4b>eK`AHNL0W4f>n04L4L3M5K6J;`KZ@P4m?N1O1O3M1O1O00003M1O1O00O1002N01O03M0O100001O00O1001O0000O100001O0000000000000000000000000000000000O11O00O10000000000000000000000000000000000000000000000001O00000000000000000jE`Ll3`3UL_Lk3a3QL_LS4a3V6000dE_LY4a3S6000bE_L]4a3cK_L]4a3cK_L]4a3Q600000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000001O000000000000000000001O0000001O001O0000001O0000001O001O00001O1O001O001O001O1O001O1O1O1O1O2X@lK`?\\4N2N2N2N2N2N3M2N2N2N3M3M2N2[EdJb6_5n36J3M2N2N3M3M3M2N1O3M5K3M2N2N3M3L4M3N1N3M3M3M1O2N2N3M3M1O2N2N4L2N2N2eEjGh8X8UGjGj8Z8nFbGPO4R:]8kF_GSO4R:_8mFbGR9a8jF`GV9b8eFbGZ9b8_FaGa9b8\\F^Gd9c8ZF^Gf9d8QFoF09NIP:d9PF\\FP:c9QF^Fn9b9RF^Fn9j901O000000001O00000N31N00LoEYFR:f9oEYFQ:g940lEXFP:h9oEYFQ:g940000000001O00000O11O000000000001O000O11O0aM_FYHNo1c9h5_FYHOn1d9g5]F[HOn1f9]5XFfH5NOm1d9\\5]FgH:1Db1b9i5TGdH[Oc1a9i5^GVJa8g5ZFgHV1c1_8]N\\FS7X1bJ\\8[N\\FS7W1cJ]8`5dG`J]8`5bG`J_8_5aGaJa8]5_GdJ`8\\5`GdJ_8]5aGcJ^8^5bGbJ`8\\5_GfJ`8Y5_GiJa8W5YGoJd8T5\\GlJd8T5\\GlJe8S5[GmJg8Q5YG^IQOm0f9e5YGZIUOQ1`9h5YGXIWOP1`9h5YGXIWOQ1`9f5YGUI[OU1]9e5XGUI]OU1W9i5]GQI]OV1V9i5]GQI]OV1T9k5_GoH]OV1T9k5_GPI[OV1V9j5_GoH\\OW1V9j5VGgHA27\\1R9k5UGiHAN9_1Q9j5VGhH2^1h8j5VGgH3`1f8i5VGiH3^1c8m5YGfH5\\1_8R6lF_HO<3D:_1g8S6lFRIL^O2O0O9a1k8R6nFUIM^O:[1k8S6mFUIM^O:Z1k8V6kF\\I4nNOa1P9c6mFaH3]OMb1Q9Z9XKWE]LKV12GP;^NoD0Ob0o5_O^J5c0Iac0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "n??240_OO30N11OO171H4<21KEi?o2Z@_MLBj?o3001O001O001O001O00001O001O001O1O001O001O001O001O1O001O001O001O001O1O001O1O001O001O1O001O1O001O1O001O001O001O1O1O1O001O1O1O001O1O1O2N1O001O1O1O1O1O1O2N2N1O1O2N2N1O1O2N2N1O2N2N3M2N2N3M3M4L5K6YOW]OK`c0O3M2N1O1O1O2NQhY7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_55.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000322829.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "1b8i4001O5K0000001O0000M3N200000000000000001O000000O100000000001O00000000O1001O000000O1000000000000001O001OO10000000000000000000000000O2OO100001O00O10000000001N10O2O01N1000OZKeK>[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR and ?", + "answer": " is in front of .", + "image": "images/caption_simple_56.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000535523.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "Zeh52Y=2iB0P=600O2O001N101O1O1N2O3M1O001O1O3M0O10001O01O010O01O1O10001OO1000O01O010O101OO1cCTOY^:CbE<^:DdE:]:FbE;]:FbE;]:FcE:\\:GcE9\\:IcE9[:GeE:Z:GdEX:CcEa0Y:CeE?V:GiE:Q:LnE4k94SFMj97UFJj96VFJ\\9gNeF`1MJV9oNlFX1LJU9QOoFj2P9WMoFi2Q9WMPGh2Q9XMmFi2T9WMkFh2W9f0001O001O1O1O001O001O1O001O1O001O00001O1O001O001O00001O001O001O001O1O001O00001O1O001O001O00001O001O001O001O001O001O001O001O00001O1O00001O001O001O001O001O00001O001O001O001O001O00001O001O00001O0000001O000000000000000000TE" + }, + { + "size": [ + 428, + 640 + ], + "counts": "eiR73X=1O2N2N1O2N100O2@GjC:h;3YDM`;:_DH_;8aDI`;5`DLa;2_D0`;O`D2`;M`D5_;JaD7_;HaD9`;DaD=_;B`Da0_;^OaDc0_;\\OaDe0`;XO`Dj0`;UO`Dl0`;ROaDo0_;oNbDR1j;2\\OmNmDU1k:hNmD47U1k:hNoD07Y1i:hN^EZ1b:fN\\E\\1d:dN[E]1d:cN[E`1W;1M3M3O1M3N1N3N2M102O0010O2O0O10000O2O0O101N1O2N1O1O2M2O2L4M2M4L4L4K5I7K5J8GeW:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_57.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000276018.jpg", + "mask_rles": [ + { + "size": [ + 640, + 416 + ], + "counts": "Una05kc01N102N1O2N1O0O2O0O2M2M3M3K6K4J6L4N2O1O101O000d@cNU<]1cCmN[WOeAo0[>QO]AW1c>iNWA]1R>VMVB\\1Dc1U>RMXBY1_Oi1X>oLZBX1[Ol1[>kLZBR2\\O9[>dMZBQ2_O9V>fM\\BP2^OdM`BT1CROM\\2j0jM[;D[DQ1oLAQ3`0oL_OQ3b0mL^OS3c0hHUL32\\2T3j4h0^IZLc1n2P5i0YI_Ld1d2X5m0nHhL5AMj2S7m0lHhL4\\OOf2\\7U1bHiL\\1b1e6e1oGiLY1[1R7k1fGkLS1]1W7i1fGjLo0`1\\7g1dGiLn0b1^7e1cGkLe0h1i7^1aGjL?o1P8W1bGiLk7TM_H^2C`0n7RM`H_2_O`0R8oLaH_7`7_HbH`7_7^HdH`7]7_HcHa7^7]HdHb7]7\\HdHd7^7YHeHe7]7XHeHg7\\7WHeHi7]7SHeHm7^7YGjGJk0m8e9O00001bNTEfIl:Y6]E_Id:^6bE^I_:`6dE^I]:_6iE]IW:^6PF`IQ:\\6TFbIm9\\6YF_Ig9a6^FZIc9e6n1M101O1O1O1N2O1O1O1N2N3N1M4A?]M[B_Nn=YNcAk2U1UNg=h1]2L1N2N2O2M3N2M2O2N2M4K6I4L5K6J6K5J5M5I7J7EejR4" + }, + { + "size": [ + 640, + 416 + ], + "counts": "9Y33PNd0hN]O0c01]O4O0f0KZO7OK0O1o0NXO0K`0R1@a05\\42^7LbH3a7J`H5f=00O1O1000000O100O100Oig19fWNId\\O`0Sc0?QOYOT^OQ1ca0k0C=L5L4K9H9G6JR5@=C:F3L10000O1O1M3L4L4J6L4J6M3K5K5O1O1O1O1O1O1lEcHb7`7]HcH`7^7_HdH^7^7aHeHZ7^7dHeHZ7\\7dHhHW7[7hHhHT7Z7kHhHS7Y7lHjHP7X7oHlHk6W7UIkHe6Y7ZIlH`6V7_IPIYNmN_7W8WJhIUOYOa3T7YMQJ\\NBl3b6gMmLd1X3[NQMT1X3kNPMg0W3XOXMXN\\IT12ROj9a1\\M`N\\3_1c700O1O1O1O1O1O1O1O1O1O100O1O1N2O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O1O1N2O1N2O1NRTg5" + } + ], + "question": "What is the relationship between and ?", + "answer": " is guiding .", + "image": "images/caption_simple_58.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404249.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "Y[i17hc03M3M1O2O0O2O1n\\OD^b0<`]OH]b09b]OI]b07b]OK]b06b]OJ^b07a]OI^b08a]OI_b08`]OH`b01_]OC1<`b00a]ODNd:n0cHg0gMmNf9<_HP1eMeNk9>[HR1gMaNm9i0nGk0PN_NQ:R1\\EgMj0f0CU4U:gMoEYN^O]4c:\\MkEc3T:cLcEa3]:Q300O100O001dNiFnGW9n7cFQG;l0R9o7WGQHh8m7\\GRHd8k7`GSH`8m7bGQH_8n7cGQH\\8n7gGPHZ8o7hGoGY8o7mGlGS8T8SH_GU8`8e101OO2H701O010lMYGnHg8P7bGhH_8Y7[HkGf7T8_HeGe7Z8Q2N2ZOSEmGJ0Z;l7f0N2H9M2M3L5O_FQIQ6o6ZIiHiMj0d8h6cH_JX7i5RHlJNZMm6U8fHgJ7ZMR7Z;kHgDV7Y;hHhDX7X;eHlDZ7T;bHPE_7h;0000001O1XE_Ho8a7PGaHo8`7mFcHS9_7hFeHW9\\7bFjH^9Y7^FiHa9X7ZFlHf9W7UFkHk9V7RFmHm9[7iEgHW:b8O001N101O0O1O1O2N1N2O2L4M3eLoE_LW:`NVFn2HUN]:jNQFY2AaL9X2V:nNTFR1_1H_8TOZF`0a2]OX73bFLb2Ko68jKBY4>lKXOX4h0a7O10X]OjNeb0Y100Z]OhNcb0W1^]OhNbb0X14O12ON10O01O1002ON001O1000\\]OkN\\b0T193M1O1O0000O1O0106KM2N1100004K1O2O0002N10N11O0011NO21M13MO101N2O11OO000O21ON011O100O21M10N101O01O02M1O2N2M1O2N2N2N3M2O3J]bf2" + } + ], + "question": "What is doing on ?", + "answer": " is riding .", + "image": "images/caption_simple_59.png", + "model_output": " is playing on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000098287.jpg", + "mask_rles": [ + { + "size": [ + 640, + 415 + ], + "counts": "m;4o2O^MNa22cMNk;0hC02:KHa0Nb03RO9K07FI1140KO:7FO0d04WO02KO001OO106OK0_;7`Dm7];;000000000000O1000000001O000000O1iNaGPFOc0a8V9_GQF`0g0Q8X9`GPFa0f0o7Z9SHgFm7Y9PHjFP8V9jGPGV8P9hGRGX8n8gGSGY8l8hGTGX8[:O2N1O1O1O2N1O001O001O1O1O00000000000000000000O10000O1J6000000gNgGjFZ8T9iGjFX8U9jGiFW8V9jGiFW8W9iGiFW8X9iGfFX8Z9hGfFX8Z9hGfFX8Z9hGfFX8Z9hGeFY8[9hGdFX8\\9hGdFX8\\9hGcFY8\\9hGdFX8\\9iGbFX8]9iGbFX8^9iG`FX8_9iG`FX8_9iG`FX8^9kG_FW8a9iG]FY8b9jGYFY8f9Q10000001OO1O1001O00000000000000O100000000O100000000O1O1000000000000001O0000000000000000O100000000000000O100001O00000000O10000O100000000000000000000O1000000000000000[GYFQ7g9cHYFlN;^8\\9fHZFjN;`8Z9gHiFY7W9gHiFY7W9cHmF]7T9\\HRGd7n8WHYFZOk0_8l8SH[Gm7e8QH]Go7f8gGaGY8R:100O1K_GjDb8S;7N2M3N2O1L4I7M3O1M31O1O002N1O2N4L1O3M1O5K5K1O2N1O]N^GmG]8R8iGjGV8V8mGhGR8W8RHfGn7Z8VHbGj7^8YH_Gg7a8`HXG`7g8fHSG[7m8iHoFW7Q9kHmFU7S9nHjFR7W9oHfFR7Z9RIaFo6_9RI_Fo6a9h1O100000000000000000000O100000000000000000000O1000000000000001O00O100O1000000000000000000000000000000000000000000O10000000000O1000000000000001OO100000000O1O10000001O000000000000000000O100000000001O000000O100001O0000O10000O1000000000000000000000000000000000000000000O11O0000O1000000000000000000O10000000000000000000000002N001O000000000000000000O100O100N2M3O10000001O00000000O100000000001O001`FlFlN100j3NXL0T4Wc0dKk\\O1?" + }, + { + "size": [ + 640, + 415 + ], + "counts": "UeX31T22a?4U@4h?NV@4h?NV@4i?NR@6l?LP@8o?In_O:P`0Hm_O:S`0Je_O:\\`0LX_O3n`00e^O8^a0R1201O1N2LJPNd^Ok1[a0=2M3O100O21O002N1O2N2O1N2O1O3McMe^OK2g04 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_60.png", + "model_output": " is standing on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278973.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "Sfn13W=1N2O0000001O1O1O1N102N1O1O000O2O001O1O0O101N1000000O10000000001O1O00001O001O00001O00001O00000O10000O100000000000O01001O00O10000O10000O1000O100000000000000O10000000000O2O0000001O0O2O001O001O0O2O001O001O000O10000O10001O000O2O00001N101O001O001O0O2O001O00001O0010O010O010O10O010O010O1O2MiQ`4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "fjT12V=?A5M1N100O0001O010O010001O0O2O0O10001CW9k1O2O0O01_OkFkLU9S3nFlLS9Q3PGnLP9o2TGPMm8m2VGRMj8m2XGRMi8h2]GWMd8f2_GYMb8d2aG[M`8`2dGaM\\8\\2gGcMZ8[2hGdMY8Z2iGeMY8W2`1L5J;E7J6J7H8@>Ghdg4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on top of .", + "image": "images/caption_simple_61.png", + "model_output": " is on top of .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000104198.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dS2b0P;6J6N3M3N2N1O2N2M101IfNnE\\1P:6O20O2ON1O101O001XG\\N[7e1WHjNg7c1bGgN_8X2O1000000O1001O00000000O1O11O1O000jNaGSO_8S200O2N20O1O000O02O1N10O100000O10000lNkLfIP3TOjLT7=fIe2]6^McIT2VO^M_7a0ZIe1S7`NkHZ1W7kNhHU1V7nNhHR1X7ROdHo0\\7ROcHn0^7SO_Hn0b7SO\\Hm0d7UOYHl0h7TOSHP1n7\\11O00O1O1O100O1O1O10000001O001OO1O1O10000O1000000O100O100O10O1001N100O1O1O1POTLUJl3h5XLVJi3f5^LVJc3h5`LVJa3j5`LUJ`3j5cLTJ]3k5fLQJ\\3o5gLkI\\3U6S11O2N0000O100O1N2N2O2N1O1O10O10001O2N0010O0O100O1N2N2N2M3M3M3M3L4N2K5J6L4N20000O100O1GQIjKo6U4TIhKm6e3VIbLNHl6Z3SIfL88HHm6Z3SIeL89EG01P7Y3TIfL7b0e6g2UIeL8e0b6W3RIkLn6o301O00001O3M4L9G4L5K5K3M3M3M2NO100O100O1001O001O1O1O001O2N1O1O1N1001N10O10O2O00O200_KoIZ3R6dLQJZ3o5dLTJ[3l5cLWJ\\3j5bLWJ^3j5`LXJ_3j5^LXJa3j5\\LXJc3n601O1O1O1O001O001O001O1O001O1O1O1O00000000OgMSHj0l7VOXHg0g7YOZHg0f7WO\\Hi0d7UO^Hk0b7TO`Hk0a7TO_Hl0b7POaHP1`7mNbHS1`7kN`HU1b7hN_HY16XNU6=gIb2X6YMlIh2Z6lLkIT3]7O001O001O001O1O1O00001O001O001O000000000000001J5ZO\\GPNn8k1b0D;N2O01O1N1O1100OO10O2N2O1N2O1N3M3N3K?BZTW1\\O[lH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "2g15N1aNd7a1[HN0bNe7a1ZHM1bNe7a1ZHM1bNe7a1[HL1bNd7b1[HL0cNe7b1ZHK1cNe7b1[HJ0dNe7b1[HJ0dNe7b1[HJ0dNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7d1ZHG1eNe7d1[HF0fNe7d1[HE1gNd7d1[HE1fNe7f1YHD2gNd7e1[HC1gNe7f1ZHC1gNe7g1YHB1hNf7f1YHB1hNf7f1YHB1hNf7f1ZHA0iNf7f1ZHA0iNf7g1YH@1iNf7g1ZH_O0jNf7g1ZH_OOkNg7f1ZH_OOkNg7g1YH]O1lNf7g1YH]O1lNf7g1ZH\\O0mNf7g1ZH\\O0mNf7g1ZH\\O0mNf7h1YH[O1mNf7h1YH[O1mNf7h1ZHZO0nNf7h1ZHZO0nNf7i1YHYO1mNg7j1XHYO1mNg7j1YHXO0nNg7j1YHXO0nNg7j1YHXO0nNg7k1XHWO0oNh7j1YHVOOPOh7j1YHUO0QOg7j1YHUO0QOg7k1YHSO0ROg7k1YHSO0ROg7k1YHSO0ROg7l1XHRO1ROg7l1XHRO1ROg7l1YHQO0SOg7l1YHQO0SOg7l1YHQO0SOg7m1XHPO1SOg7m1XHPO1SOg7m1YHoN0TOg7m1YHoN0SOh7n1XHoN0SOh7o1XHmNOUOi7n1XHmNOUOi7n1XHmN0TOh7o1XHmNOTOj7o1WHlN0UOi7P2VHkN2SOi7R2VHjN1SOj7S2UHjN1SOj7S2UHjN1QOl7V2RHiNV8W1jGiNV8W1jGiNV8W1kGcNZ8]1fG_N^8b1aG]N`8c1`G\\Na8d1_GZNc8f1^GXNc8h1]GWNd8j1[GVNj0He6R2aHUNa09h6b1hHTN6ELm0n6Z1PISN6HJP1l6U1TISN6HJU1g6Q1XIQN8II\\1`6j0_ImMbJkMYOl1Q69dJmM[Oo1l55eJoM_OQ2g50ZJlMM92P2b5K_J_NOh1`5IaJ_NOi1_5IaJ^N0i1_5IaJ^N0j1^5HbJ^N0j1^5HcJ]NOk1^5HcJ\\N0l1]5IbJ[N1l1]5IbJ[N1l1]5IcJZN0m1]5IcJZNOn1^5HcJZNOn1^5HcJZNOo1]5HcJYN0o1]5HcJYN0o1]5HdJXNOP2]5HdJXNOo1^5IcJXNOo1^5IcJWN0P2]5IcJWN0P2]5JbJVN1o1^5KbJUN0R12oN]5j1aJUN0o07oNY5m1`JUN0m0:POV5n1`JUN0h0a0SOo4Q2_JTN1f0f0QOk4U2_JSN0g0g0POj4V2_JSN0f0j0mNi4Z2]JRN0g0m0iNh4_2ZJQN2f0i6Y1VIPN1g0i6Y1VIPN0i0i6W1WIPN0j0h6V1XIPN0j0h6W1WIoM1k0g6U1YIPN0j0h6W1WIoM1j0h6W1XInM0j0i6X1WInM0h0k6Z1UInM0g0l6\\1SImM1e0n6^1QIlM2e0n6_1PIlM2e0n6_1PIlM1e0P7_1PIkM0e0Q7a1nHjM1e0Q7a1nHjM1e0Q7a1nHjM1f0P7`1oHjM1f0P7`1oHjM1g0o6_1PIjM1h0n6^1QIjM0j0n6\\1SIiMOj0o6]1RIiMOj0o6^1QIgM1j0o6_1PIgM1i0P7`1oHgM1g0R7b1mHgM1d0U7e1jHgM1a0X7h1gHgM1>[7k1eHfM0;_7o1aHfM09a7R2^HeM07e7T2[HeM04h7W2XHeM1Ok7\\2THeM1FT8e2kGeM\\8[2dGeM\\8[2dGdM]8\\2cGdM]8\\2dGbM]8^2cG`M_8_2=N2F:E;G9C=N2O100002N1lEiNf9e2oN2VG]M\\8c2cG^M]8b2bG_M^8a2bG_M1MS8d2lG_M1OQ8c2nG]M13n7`2QH]M1[N]O1c5i2bJmN;[N_O0c5h2cJoN7[NCNc5h2cJRO2ZNHMb5g2dJFJdMa5f2eJFIbMe5i2aJEJbMe5i2aJDKcMd5i2`JEL`MNNf5m2`JEL`MNOe5l2aJEKaMONe5l2aJDLbMNNe5l2aJDLeMc5g2aJDLbMNNe5l2aJDKbM0Nd5l2aJDKcMOMe5l2aJDKbM0Nd5l2aJXOMoM1Nf5k2[JXO0lM11d5j2\\JYO8mM\\5j2\\JYO8lM]5l2ZJXOW6h0iIXOW6g0jIYOV6g0jIYOV6g0jIYOV6g0jIYOV6g0iIZOW6f0iIZOW6g0hIYOX6g0hIYOX6f0iIZOW6f0iIZOW6f0iIZOW6f0iI[OV6e0jI\\OU6d0kI^OS6b0mI@Q6`0oIAP6?oIBQ6>oIBQ6>PJAP6?oIBQ6>oIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6>nIBS6>mIBS6>mIBS6>mIAT6?lIAT6?lIAT6?lIAT6?lIAT6?lIAT6>lICT6>kIBU6>kIAV6>kIBU6>kIBU6>kIBU6>kIBU6=lICT6=lICT6=lICT6=kICV6=jICV6=jICV6=jIBW6>iIBW6=jIBW6>iIAW6`0iI_OX6a0hI^OZ6a0eI_O\\6`0eI_O[6b0eI]O\\6c0dI\\O^6c0bI\\O^6e0bIZO_6f0aIYO`6g0`IXOa6g0`IYO`6g0_IYOb6g0^IWOd6i0\\IVOe6j0[IVOe6j0[IUOf6k0ZISOh6l0YISOh6m0XIROi6n0WIQOj6o0VIPOk6P1UIoNl6P1TIPOm6P1TIeNJjNT7b2SI[M\\8R2n0I7M3J6I7J6L4I7J6K5K5LPic0NTW\\OS1cDhNY:\\1mEiNd9W1[FkNd9S1\\FoNd9P1[FQOf9l0\\FUOd9j0[FXOe9f0[F\\Oe9b0\\F_Od9?\\FCd9;\\FHd95\\FMd91\\F?U9@kFb08kNFMZ8d0hGf0LfN55JO[8>jGj0KeN63KO[8=jGm0JdN63KO0MU8?oGP1LaN54JO2MT8Ih7m0hG\\O?Gj7i0gGD?Cj7g0eGJa0@h7d0jGL=Ch7>iG3?@B^Oo7l0RH71kN6m0GVOf7i0^H:OjN6U1[7F`HRIPN0c1m6=SIPN0d1l6 located relative to ?", + "answer": " is over .", + "image": "images/caption_simple_62.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000224051.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "WeS3451j4o0j3R4QLbJ1<6n0k3Q4mKhJ199l0k3Q4jKkJ37f0j3V4aKoJ93=g0k3f4gKcJ>L_O6_4Y5eKdJ?EE8CDf4j5cKeJ`0AK6X4c5^KeJ]6Y5cIgJ^6Y5bIfJ`6X5aIgJa6W5_IiJb6W5^IgJc6Y5]IgJd6X5\\IhJ=H]OOe5a5`JhJ7=U5j4dJiJ6?U5h4eJiJ5`0W5f4dJjJ5?Y5f4bJmJ3=\\5f4_JPK38a5g4\\JRK25e5h4ZJSK04g5i4YJTKN4i5h4YJRLi5m3WJQLk5P4TJoKn5P4RJoKo5Q4QJnKP6S4oIkKS6U4mIiKV6V4jIiKW6V4kIhKV6X4jIhKV6X4kIdKX6\\4hI\\K`6d4aImJm6S5SIlJn6U5RIjJn6V5RIjJn6W5RIgJo6Z5QIdJP7`5mH]JU7d591QI^J\\O?o5S5aJ_KY5b4eJdKU5]4iJgKU5Y4jJiKU5V4kJlKT5T4kJnKS5S4lJnKT5S4kJmKU5S4jJnKU5S4kJmKT5T4kJnKT5S4kJmKT5S4nJlKR5R4PKoKn4Q4SKZLb4e3`K^L[4b3fK_LY4a3gK_LX4b3hK^LW4c3iK]LW4c3hK^LW4c3iK^LU4c3jK]LW4c3iKULCoM11b4l5jK]LT4d3lK]LR4d3mK]LR4e3nK[LP4f3PLZLo3h3PLXLo3l3oKTLo3Q4nKnKQ4S4oKnKP4S4QLlKl3V4VLhKi3Z4VLgKg3[4YLgKd3Z4\\LfKb3\\4^LeK`3\\4`LeK^3\\4bLeK\\3\\4dLdKZ3_4eLbKY3_4gLcKT3a4lL_KQ3c4oL_Km2d4SM\\Ki2g4WM[Kf2e4\\M\\K_2h4`MYK^2i4`M[K[2g4dM[KX2i4hMWKT2l4lMYKl1j4TNYKe1k4\\NYK[1k4eNXKT1m4lNYKk0i4UOg4001O001O0O100O00O010OO200001N2N200O3LhTi1" + }, + { + "size": [ + 428, + 640 + ], + "counts": "T:X3T:000O100OoMRFmNMc1n9ROSFD5CM657H30EQ:MPF50K1N08NJ7NL584J4KHV:NoEO15LI60>2[O069JJQ:KPF33O15LJ5OL1OO:2E158KJk99VFG42JL77JH3;0MMKi9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_63.png", + "model_output": " is parked on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530099.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "T\\Y11d;3N1O100O1O1O1O1O1O100O1O1O100O1O1O1O1O1O1O100O1O1O1O100O1O1O1O1O1O100O1O1O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O010O1O1O2N1O100O1O1O1O100O1O1O1O100N200O1O1O100O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1N2O1O100O100O1O1O100O100O1O100O1O10000O10000O100O100O10000O10000O10000O10000O1000000O100O10000O100O10000O100O100O10000000000O100O10000O10000O1000000O100O10000O10000O1000000000000O1OVM[Hd1e7ZN\\Hg1d7VN_Hj1a7UN_Hl1a7SN_Hn1a7QN_HP2`7PN`HQ2`7nM`HS2`7mM_HT2`7kMaHV2_7iMaHX2^7hMaHZ2_7eMaH\\2_7bMbH_2]7aMcH`2]7_McHb2]7]MdHc2\\7\\McHf2\\7[McHf2]7XMdHi2[7VMfHk2Z7TMfHm2Z7RMfHo2Y7QMgHP3Y7oLhHQ3X7nLgHT3Y7kLgHV3X7jLhHW3X7hLhHY3X7fLhH[3X7dLhH]3X7bLiH^3W7aLiH`3V7`LjHa3W7\\LiHf3b72O1O1O1N2O1O1O1O1O1O1O1O100O100000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000GhKmH11X4R7gKmH21V4R7hKmH21V4R7kKPIU4P7kKPIU4Q7jKoHV4R7gKPIY4W700000001O000000cH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "Zel11f;1N2O3L2O0O2O0O2O1N2N3N3L3M102M1O3L2N3O1N2O1N3M2N2M2O1O1O1O1O1N2O1O001O1O10O00010O01O100O001O100O100O100O1N3O1N2N1O2OO10O0100O010O010O10O01O10000O010O1000000O10O1000O1000O010000O10000O1O01000O1O1000O01O1000O0100000O01O1000O10O1000O01000000O01000O1000O1000O0100000O1000000O0100000O1000000000000O10000O1000000O100O1000000O101O0O10001N101N101O001O000O2O001O001N10001N1O1O1O2N1O1N3N1OaEROX:l0gEVOY:i0gEYOY:e0gE[OZ:d0gE\\OZ:a0fEA[:=eED[: and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_64.png", + "model_output": " is on top of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000202339.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Vk_18cc06I6J7J5J7I6J6J7J6J5M3M4L3M3M3M3O1000000000000000000000O100000000O100000000O10000000000O011O001Og0XOc0^O9G:F4K6Kohi6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "koo18gc03J6L3L3L5L3L4K4O2M3O1H8L2O0J3QN`NP@IY1m1k>j1O10M3O1O1010o_O_Lg0LSOe0n=h3[BRMEcNR34U7^4kEYMf2]N\\7d6[H^Id7j6SHXIk7P7mGQIR8[7aGhH^8^7\\GbHc8c7YG]Hg8g7UGYHP7H[IS8AVHn63[Ii7DUHP79UIf7HPHR7a0QIb7JmGS7f0PI^7KmGT7h0nH_7JjGW7l0kH[7LjGX7m0kH[7JiGZ7o0jH[7HfG_7P1hHZ8W7iGfHWNIj9`7SHbHUN4a9[7[H]HXN3a9`7XH[HW8e7R20O2N1001OO10O0100O1O2O0RFWHQN6JJ2No8l7SIWHoMj0i4[OVLNU1f7LYHmMk0f4l0\\MP61oHmM_O2F`4P2aKVN_1h7`0cHnMDa0KR4]2lLa5b0kHZ3e1nKa5d0mHe3e1\\K`5m0kH[NTOX5b2[K`5Q1jHP4e1kJd5P1kHX4a1cJg5P1mH^4^1UJn5Y1hHf4m9TKXFn4h9nJZFV5c9dJoDTO_1[6a9UJPE@i1]6V9QJoFQ6Q9jISGW6Z;002O0O2N010O1O2OO01O10O02N001O100O1O2O1N2N4L4L3N1N3M3N1NTJiCP18iMI\\1Ud600000O10O01O1O1000O01O1[OVOR^Ok0la0\\OP^Oc0Pb0@m]Oa0Pb0Cn]O>Qb0Dn]O;Sb0Fk]O;Tb0Gj]O9Wb0h0000O101N1O101N1O2O2M2^O_]OBcb08h]O_O\\b0=f0K6KggS4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is carrying .", + "image": "images/caption_simple_65.png", + "model_output": " is being held by .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000172396.jpg", + "mask_rles": [ + { + "size": [ + 351, + 640 + ], + "counts": "3_2`82M5L3M3M6J3M2N5K5K4L2N4L3M3M6J4L2N5K6J3M2N3M5K2N8H4L6J1O3M3N1M5L3M3N7H3L5L2N4L3M3M3M4L4L5K2N3M4L4L4M1M5L3M4L5K1O2N6J3M4M3K3N4L4L4M1M2O5K3M5K2N2N4L2N2N2N00001O000000000001OO10000001OO100001O00O100001O00O100001O00O100000000000000001OO10000001OO10000001OO10000000000000000000000000O1001O00000000000000O1001O000000O1001O00O1001O000000O10000000000001O00O1000000000000001O00O1000000000000001OO100000000000000001O00O1000000000000001O00O10000000000001O000000O1001OO10000001O00O1000000000000001O00O1000000000000001OO1000000000000001O0000O1000000000000001OO10000000001O0000O1001O00O100000000000000000cILo14^40000000000000000000000000O1001O000001N11N1001N10O2O0001O0000O1001N10O2O0001O0000O1001O00O10000001OO10000001OO10000001OO100001O00O100001O00O100001O00O100001O0000O1001O00O10000001OO1000000000000001O0000O10000000000001O00O1000000000001OO100000000000000001O00O1000000000000001O00O2OO101OO100001O000000O10000000000001O00O10000000000001O000000O11O0000O1001O000000000000O10000001O0000O1001OO100QJMR13n4O00000000000000O1001O0000000000000000O11O0000000000000000O11O000000000000000000000000000000000000O11O00O100N2001OO1L4N20000O1O1N2H@mEd0P:7I7K5N2O1O1L4L4O1N2D]NXGg1f8:F:M3O1N2N2N3L2N[MSH^2a7`M]H65Y2]7h0G9K5N2M3M3N3N`LXIj2h6WMXIi2V6bLlINUO:01f4h2gKjLB6l0MVO20M93[4m2lKnL_O5k0NVO12M20K4e4i2PLRM[O2ONm02VOO<2@33K^4j2XLWMXO00Nl03BNB6O02J_4f2\\L[MSO5l0KC:BJ1K_4b2`L_MoN5l0JD;@K2J_4_2cLbMlN4m0LB:AJ3K^4W2kKiMF4b0M\\O20Lm02VON7d0I]O^4V2SLjM21ZO49K78H and ?", + "answer": " is inside .", + "image": "images/caption_simple_66.png", + "model_output": " is inside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404839.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "_l02g15\\NK?0E0b`07j@S1S?POj@S1U?lNi@FdN^1b`0kNa@IXOY1X`0mN`@LXOV1W`0oNX@HWO74HM[1``0oNT@7]OP1``0hNQ@:^Oo0b`0eNP@T2Q`0kMo_OT2R`0mMj_OV2V`0jMi_OW2X`0hMh_OX2Y`0fMi_OX2Y`0fMh_O4@j1i`0SNf_O3Ai1l`0RNb_O6Bh1l`0TN`_O4Aj1Pa0VN[_ONGl1n`0XNX_OMJk1n`0YNW_OQ2j`0nMT_OT2l`0lMR_OV2Ua0cMk^O]2Wa0bMg^O7On1Xa0`0M32N2N0O01000OgN\\MXAc2h>^M]@0;b2W?_M]@14OD`2j?aM^@3FL42M^2]`0_Mg_OO03M_2k5iMQ5U2hJYNS5g1hJbNT5^1hJlNR5T1kJYOk4f0RKKa44XK:b4ETJfMZLO40?k2k8^OmITOhLb1e7oLgIX2b1P3V6nLdI_3W6]LeIW4o5gKnI`4P6`KiIg4W6YKeIk4Z6VKbIn4]6SK_IQ5a6oJ[IU5e6kJYIW5h6iJUIY5k6gJPI^5i6`HfGR2^1b5l6\\HfGR2U1oMgNi7^8VHfGT2l0P6d7RJWHQ6h7\\JdGl5[8m20000000000LdDdG\\;h7dDlH\\;g6hDiHN?Z;m70002NLgDaGY;^8hDbGX;]86ZLRE]KIo0G4^;\\3mE^KcNh03=^;[3WFULYNa0_;X3[FVLVNb03hNV;]4fFbM`9]2eF_M[9`2kFgKgM^1d1aNZN3a9U4TIUMQOfNn7T4RIUMPOgNo7S4TITMmNiNo7Q4ZIRMhNlNn7P4\\IPLQMm0\\1ROa8P4SIUMXNnNf8m3QIVMWNoNh8j3SIVMTNoNk8j3RIWMRNnNn8j3QIYMjMROV9d3QIZMjMmNY9h3nH[MlMiNW9l3nHYMhMkN^9k3kHYMfMnN_9i3kHYMdMPOa9g3kHYMdMPOa9g3kHYMcMPOc9f3lHYM_MQOg9e3kHZM]MQO]84kG`3]2YM[MSOU8e0bG\\OAc3]3ZM[MQOP8o0`GQ3V3oL[MPOm7a5iJ^KT6c4nI[KQ6g4oIXKUMQOR8i5iJWKTMPOR8j5jJVKTMPOR8j5jJVKTMPOS8h5jJXKP6f4PJYKW6a4iI_K[6\\4fIeK]6W4cIhKa6T4`IlKb6Q4`InKc6o3\\ISLg6i3YIXLoLoNQ9h4PJYLj6d3VI\\LkLQO]9`4iI_LfLTOd9Z4fIcLgLQOe9Y4eIfLgLQOd9X4eIiLaLlNR:Z4]IRMXLfN\\:V4\\IXMh6d2XI[Mk6c2UI]Mm6a2SIVMZ7f2fHWM`7f2`HYMd7e2[H[Mi7a2WH`Mh7`2XH`Mh7`2XH`Mh7a2WH_Mi7a2WH_Mi7a2VHaMi7_2WHaMi7`2VH`Mj7`2VHaMi7`2VH`Mj7`2UHaM2kNQ4e3lK`M4jNP4f3mK_Mk7b2TH^Ml7c2SH^Ml7c2RH^M5hNT4k3gK\\Mn7e2RHZMn7g2QHYMo7i2oGXMP8i2oGWMb0iNa3R4kKUMS8l2lGTMa0kNg3S4gKQMb0lNg3T4fKQMb0kNh3T4fKQMb0kNh3U4dKQMf0hNf3X4TKhKkMX1k:R3WGjKjMU1`3iNa3Y4SKlKjMS1`3hNd3Z4oJoKkMo0b3hNd3[4lJSLkMj0U;U3kFdMT9]2iFeMW9]2fFdMZ9^2_FgMa9\\2WFiMi9b2bEhM^:m501O1iHYGS3g8S40hN\\GTGe8f8cGWG^8`8kG_GU8]8PHbGQ8Z8SHeGm7X8WHgGi7l7fHRHZ7j7kHUHV7o5SGcJ;iNa1a0Q7R6UG^Jc2[OX6V6YG\\I1a0j2Gl5k3SGZNh0hMf;m3bCZN^=e1dBYN]=U41N100O1hM_BQNb=]1bBoN_=o0gBiLEh1e=^1iBgLGg1`=a1lBdLGi1^=a1ZC^MPO3g=]2eDSM^;j2jDnLX;n2o2L4M3M3Ll^O[MUa0R2R_OVNSa0h1Q_OTNPa0k1c000O2N101N101O1O0O3[Ol]OZObc0JZmm3" + }, + { + "size": [ + 640, + 427 + ], + "counts": "nY:1nc02N1a]70^bH3N1O2M8H4M2M3N2M5K6K1O1N2O1O0OI`]OoNab0Q1`]OnN`b0S1_]OmN^b0V1a]OkN^b0\\100O010O2OO1000OO2M3M2O200003M00N2O10000N\\]OiNcb0X1201O3ROo]OHQb05Q^OKoa05Q^OKoa05Q^OKTb0OgQ?6nn@6N000O020O00000004LBHU]O7kb0IX]O4hb0MY]OOib02V]ONib03b01O1O001O1O1O1O00001O4L2N1O2N2N3M1O1O001O1O0000001O0O1000O1100O1O0000001O001O00001O000000001O000000000O2O0000010O00001O0O101O01O01N2N2M201O000000001O00000000001O0000000010O000001O0000001O0j@_Nm;a1QDbNo;]1QDeNm;[1SDgNk;Z1UDfNj;Z1VDhNh;X1YDiNe;X1]DfNa;[1`DfN^;[1aDfN^;Z1bDhN\\;X1eDiNY;X1gDhNX;X1iDhNV;X1jDiNU;X1jDiNU;W1kDiNU;V1lDkNS;U1mDkNS;U1lDmNS;T1kDnNT;R1kDoNU;Q1iDROV;n0iDSOW;m0hDTOX;l0YD\\NgMi0P>k0YDaNaMe0V>j0YDCg;=YDCh;=WDBj;>VDBj;>WDAj;>VD^On;c0QDQO`MZO1Na>g1nCQO`MZOf>f1iCPOaMZOg>f1gCTOZeDBZ;>fDBY;?iD_OW;a0kD\\OV;d0kD[OU;e0kD^OR;b0oD^OP;b0PEBl:>UEBk:>TECf9lNWDa1S2C`9WOXDV1Y2D_8XOeD6?n0^2DW8n0lD]On2ER8n2nGRMn7R3RHoLj7T3VHmLf7X3ZHfLc7^3]HbLa7_3`HaL]7`3dH`L[7b3\\FaKF31k0i9d3[FmK[O21RO2\\1U:d3[FXM_OTOT:g3XFmNf9T1XFoNf9S1VFQOh9e5N1O1O101_KSFlNQ:n0RFQOU:g0mEYOT:c0oE]OR:a0oE^OY:8jEHX:OoE0T:KoE4U:FnE:U:BlE>X:mKoDT3m0n0g:^NdEb1a:UNcEk1`:\\MQDmNd1g3]:fLhFZ3^9ZLhFf3i and ?", + "answer": " is beside .", + "image": "images/caption_simple_67.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000069138.jpg", + "mask_rles": [ + { + "size": [ + 640, + 371 + ], + "counts": "o96b0NDNM72KQb04[^OLD;056@Hj00SO48LJNOV?9XAKB=35NYO1]7i9nHVFL1d00`7i9YIWFg6i9ZIVFf6j9ZIVFf6j9ZIVFf6j9ZIVFf6j9[IUFe6j9i10000001O000UJUFa1k9Z4000VJUF_1k9`NWF_1i9aNWF_1i9bNUF_1k9[4001O00000XJTF\\1m9cJSFf31e1m9cJTFg30e1W:bJgEb32l1X:[NhEd1Y:[NgEe1`:TN`El1d:PN\\EP2e:oM[EQ2e:PN[Eo1f:_KXE`11Q3j:\\KUEc11Q3n:XKQEh10P3T;SKlD`7T;f0000000001O0000OaJlDT2T;lMmDS2S;]300O1O1OlHoD[5P;fJPEZ5m:iJSEI2e3j:[NVEd1g:_NYEa1g:_NZE`1f:aNYE_1g:aNZE^1f:bNZE^1f:bNZE^1g:aNYE_1g:bNXE^1i:aNWE_1i:bNVE^1k:aNUE_1l:aNSE_1m:aNSE_1n:`NSE_1n:i30iJQE]1P;bNPE^1P;i31O0jJoD]1Q;bNPE^1P;bNPE^1P;cNoD]1Q;i3O10lJPEX1o:iNREV1m:kNTET1k:mNUES1k:mNVER1i:oNWEQ1h:QOWEo0i:QOXEn0i:QOWEo0i:QOWEo0i:QOXEn0h:SOWEm0i:SOWEm0i:SOXE_LV1b2b9WNUEo03PNm1i2k8VNZE\\O2_OL7S3h2e7iM\\JV2g;00000000000001O001O00O11O00000000001YNQ_OGea0VOZ^O1olo0NoRPOV1Ya0]1N2O100000000O1000000O1N200000000000000O10000000000001O0000O100000000001O00000000O10000001O000000000000O10000001O00000000O1O1000000O1000000000000YMRMkA0Z2o2Qh?S1dB`NcM1i?_1hB`NX=`1iB_NW=`1iBaNW=_1iBaNW=_1iBaNW=_1iBaNW=_1eB`NdM1g?_1eBaNcM0h?_1dBbNdMOh?`1cBaNeMOh?`1cBaNeMOh?`1bBbNfMNh?`1bBgN]=Y1cBgN]=Y1dBfN\\=Z1dBfN\\=Y1dBhN\\=Y1cBgN]=Y1cBgN]=Y1cBcNeMNh?_1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=W1aBiN_=W1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=X1_BiNa=W1_BiNa=W1_BiNa=V1`BjN`=V1_BlN_=U1aBkNV<^2jCbMQ;c3oD]Lf:n3YESLf:R4ZC`Kb1`0T;e4^D\\Kb;n4SDSKm;`5\\CdJd<\\600O100O1O1O10000000000O101O0000000000000000000000000000000001O00O1001O0000000000000000000000000001O0000000O10000O100000000000001N10000O2[Od0oMnG_F0a0j8R3\\Gj0m:oN^E[OBZM5Gc;Y2YDhMi1T1TNiNR>[1n[Z2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_68.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000342367.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "QnQ62n>1m>NYRO1e>0ZA4a>M`A3`>L_A6`>I`A9c>I\\A2l0MVO2V=MiB2N4c0=e<]OkB9Nm0f;jNgE1cNb1d;UO[DB<^1X;BbD>];EaD;\\;IcD8Z;h1M3M3M4L4L3N3N11O000000000010O0001O0000001O000O101O00001O001O00001O0000010O0001O01O00001O010O001O010O0000001O0jLQE\\2o:]MYEFI`2o:fM[EII_2m:aMaEOC_2h;`MZD_2e;aM\\D^2d;bM^D\\2b;dM_D[2U5K4N2O2N1O1O1O100O2N1N2O2O0O100O2N1QNZOdEf0V:EdE and ?", + "answer": " is in front of .", + "image": "images/caption_simple_69.png", + "model_output": " is in front of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000263796.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "[i;a0Tc0>L2N3N3M1N4K3O2N3f]O`Nma0l1M8I1\\OhM]_O[2``0hM]_O]2^`0fM__O^2^`0e0L5L5K3L2O0O2O000O10000O101O001O002N0002N2N3M1O3M2N6J4L1O1O2N1O001O000000TOoLi@Q3V?SMf@n2Z?UMc@k2]?XM]@k2b?XM[@i2e?[MU@g2k?g0O10000O1001O00O1O1O100O1000000000000002N2N1O2N6J4L1O1O0000000000VO^LVAb3h>dLTA\\3k>gLRAZ3n>hLo@Y3P?lLi@W3W?mLc@U3\\?h000O10000O100O100O1000000000000000000O10000000000O10000O10000O10000O1000000O100O11O1O00O1O10000000000N2O100N200000000O10000001O1O5K3M8H2N6J3M2N3M3M2N2N1O1O1O2N1O1O1O001O1O1O1O1O001O1O2N1VM__OS2a`0gMk_OS2W`0gMQ@U2P`0hMT@V2l`0O1O2N2N1O2N1O1O1O1O001O001O1O001O001O001O001O0000001O000000001O0000001O00000000000000000000000000000000000000000000O10000O100O10000O1O1O1O100O1N2O1O1O1O1O1O1001O002N3MO1O1C=N2TOhMY_O;7m1[`0^Nd_Ob1[`0`Nd_O`1[`0bNc_O_1]`0R1BhLT@X3k?iLU@W3k?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3m?iLS@W3l?jLS@W3m?kLQ@U3o?>O1N2N2K5A?O1O100O100001O001O0000XMZKWFe4h9^KaCMc2e4l9bKRF^4n9bKRF^4n9cKRF\\4n9cKSF]4m9bKUF]4l9^KaC0c2b4l9]KbC1b2b4l9]KbC1b2b4g<0O1O100O100O100O10000000000001O00000000000000O10000000000000000O1000000000000001O001O00001O001O1O001O1O3M2N1O1O2N1O1O001O00001O1O002N3M2N2N1O2N3M1O2fMi_Og0W`0UOQ@g0Q`0TOT@j0o?_N`_O2i0\\1i?`N__OLQ1b1b?`N^_OLS1b1a?^Nj@`1n`0M4L1O002N2N3M1O001O1O2N2N2N1O2N3M1O2N1O2N3M1O2N1O2N5KQl`0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dad3a0\\c0:G8G8H7J5K6K3L7J4M1N4L2O2M4M1N2O2N2^_OWMh?k2T@ZMj?g2S@\\Ml?f2Q@\\Mm?f2Q@\\Mn?e2Q@[Mn?g2P@ZMP`0\\3N1O100O1O1O1O2Z@nK\\?\\4N001O100O00100O10O01O010O10O0100h@\\KS?`4RA`Kk20o8`4VD`Kk20o8`4\\31000000O10001O000O10000000000000001O000000000000001O0000000001O000000000000001O00000000001O000001h@\\KS?c4l@`KR?g41O0100O01O00010O10O01O0O2J6L4N101O1O1O1O001O2N010N2O1O1ZOT@VMm?h2U@WMk?g2X@WMj?h2W@WMj?g2W@XMk?g2X@UMi?j2[@nLk?n2f0N1M4N2N1O2L3M4L3M4N2K6K4I8J6M4\\Ombf2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_70.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000119828.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`oY1a0P;:C;I8I5M4K4N3L3M3N1O2N1O2M3N1N3N2M3N2N1O2M2O2N1O2N1O2N1O2N2N1O2M2O2N1N3N2N1O1O2N1O1O1O2N100O101N1O101N1O1O2O0O101N10000O100O101N1000000O10000000000O1000000O10000000O100000O10000000O100O10000O10000O100O100O1O1O100O10000000000O1000000000000000001O0000000010O00000000001O0001O01O01O0010O00010O010O2O1N2O1N2N4M3M3L4L2O0O1fJoJl3T5lKlJnN2W5T5fKjJUO3T5U5eKiJTO4W5V5aKnJ`4V5ZKkJf4W5YKhJg4Y5UK^JD9X5S6001O00010O001O001O00001O001O001O0000001O00000000001O0000000000010O00001O00001N100010OO2O001O1O0O4MOLUJ^Jj40QLc5WOZJg4:lKU5[OfJm49dKP5CdJj4]6VKbIk4_6SKcI1Ne4_6[KbI0Of4_6ZK`I20e4_6YKaI20f4^6XKbIo4_65O1OjJcIS5`61O1O1O1NgJeIY5V67M2N3K401M2N2O2H7B>^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "_Sg31h;0N0bh01[WO4M5K3N3MCPE4n:LUE4i:K[E3d:M]E3a:NaE1^:OeEOZ:0hEOX:1g00PE2V:0gE3X:g001M2O1N101N2O001O001O00001N100O100O100O10000000000000000000000000000000000001O0000001O0000001O001O0O2O0O2O0J7K5J5L6I8H\\eR1" + } + ], + "question": "Where is located relative to ?", + "answer": " is attached to .", + "image": "images/caption_simple_71.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000001993.jpg", + "mask_rles": [ + { + "size": [ + 419, + 640 + ], + "counts": "]8a0a<1O2O1O0O10001N10000O2O0O10000O2O000O10001O0O100000000O2O00000O100001POTDg0Vo:CoD>P;]3fDiKc8]4^OcK\\H]4d7dKZH]4e7fKWH\\4i7gKRHf2c9Gf0N1010O10O100O010O100O10O10O100O0100000O10O100000O100001N2O3M3M2N2NGnEcMP:]2RFdMk9\\2WFdMg9\\2[FdMb9]2`FdM]9]2dFcMZ9_2gF`MW9b2iF^MU9c2lF]MR9d2PG[Mn8g2SGXMk8[2fFkM>Jk8W2nFnM7Ki8U2TGPN4Ig8V2ZGoM0Hf8X2]GoMV9P2mFPNQ9d1XF]Ni00n8]1aF^Ne05h8[1jGdNU8[1nGeNP8X1THhNk7T1[HlNc7P1cHPO\\7k0jHUOT7g0RIYOm6b0XI_Of6=`IB`69fIGY65kILT6OSJOm5LYJ4[9010O1000O1N2O1MbVX5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_72.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000221502.jpg", + "mask_rles": [ + { + "size": [ + 320, + 640 + ], + "counts": "nR63g99K3O1N1O2O1N2O1O0jFZOR9k0N5YGTOQ8n0mGUOk0Nk5n0YITOj00l5m0ZITOg01YOGY6W1gIROLL>5CJ[6S1`IjN4f0;OP6b0aIkN2g0NWO7g0X6h1dIZN[6h1cIYN]6e20DcIWM[6i2gIUMY6m2eIRM]6n2dImL_6n2n1g5QNkI0?n1f5TNjIMa0o1d5VNjIKb0o1d5UNjILc0o1d5TNiIMc0n1d5WNhIKd0n1d5XNhIHe0P2c5YNhIFe0Q2d5WNiIEe0U2c5TNdJl1]5SNcJn1]5WN^Ji1b5VN_Ji1a5WN_Jj1`5VNaJi1_5WNaJi1_5WNaJi1`5VN`Jk1_5UNaJk1`5TN`Jl1b5QN_JP2c5mM]JT2c5jM_JU2a5kM_JV2a5iM_JW2e602N2N2M5K8I;E5K8G;F9Febm4" + }, + { + "size": [ + 320, + 640 + ], + "counts": "Qgj0131f91XF1g95N2N1O101N1O100O1000000O1O10000O10000O100O100O2O0mN_OgHa0i5I`Ie0b0Bn5T1RJmNl5U1RJlNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5V1RJkNm5U1RJlNn5S1SJmNm5T1RJlNn5T1RJlNn5T1RJmNm5S1TJnNj5R1VJQOg5o0YJSOe5m0[J]O[5c0eJC^O_Nc5n1oJOd0UNi2l1cL8:nMS3i1cLa00hM]3h1bLg0DZMK:o3i1\\LZ1d3a201O00O1001O000000[MXLROh3n0`LiNa3V1eLeN[3[1mL]NS3c1WMRNj2n1YMnMh2R2aMcMa2]2cM^M^2X1XLmN]1F\\2\\1[LkN]1CZ2b1YLkN`1]OZ2h1VLkN`1\\O[2i1ULkNa1ZO[2k1TLkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNR6V1nIjNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIjNV6V1jIjNV6V1jIjNV6V1jIjNV6V1jIjNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6T1jIlNV6T1jIlNV6T1jIlNV6T1jIlNV6T1iImNW6S1iImNW6S1iImNW6S1iIlNX6T1hImNW6T1hIlNX6S1iImNW6S1iIlNX6T1hIlNX6T1hImNW6S1iImNW6S1iImNW6S1iIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1hIjNX6V1gIkNY6U1gIkNY6U1gIkNY6U1hIiNY6W1gIiNY6W1gIiNY6W1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1fIjNZ6V1fIiN[6W1eIiN[6W1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1dIjN\\6V1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIhN\\6X1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1dIhN\\6X1dIhN\\6W1fIhNZ6Y1eIgN[6Y1eIgN[6Z1bIhN^6]200000000000000000001O00:Fg0`JnKd3Y4TLkKi3X5NlLaKi0]4jNRLU1m3jNULW1i3hNXLY1g3gNXL[1g3eNXL\\1h3dNXL]1g3cNZL]1e3cN[L]1e3cN[L]1e3cN[L]1e3cN[L^1d3bN\\L^1d3bN\\L^1d3bN\\LjN6@YO]1U49]LiNg0d0l2c0\\LjNR19b2l0]LkN]1MW2W1\\LkNc1IQ2[1^LkNe1Fn1X1cLRO`1Da2f0PLF`1CT32]K;`1BZ4>iK^OX4b0c2O1000001O00001O00000000002N001O00000O100000001N5Hgm?N]R@21Oci:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_73.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000312586.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "`eo63X=2N1N3L3N3HD]C>b<61N101O000O100O2O00O1O101N2O103`CXOT and ?", + "answer": " is in front of .", + "image": "images/caption_simple_74.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000187236.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y6c0n0i1U:XNkEg1T:^NjE`1U:cNjE\\1V:fNjEX1V:iNlES1U:oNjEP1U:SOjEl0V:UOiEk0W:WOhEh0W:\\OfEd0Z:]OeEb0\\:@aEa0_:A_E?`:C_E=a:E\\E;e:GYE9g:IVE8j:KSE5m:o100O\\LTEV3m:gLVEX3j:fLYEY3e:hL]EW3c:iL]EW3d:gL^EX3d:eL]E[3[7dLnK[3l7000O1000000000YM_Do1a;oMbDo1`;oMcDo1_;nMbDR2a;iMbDV2_;gMcDY2_;cMcD]2P<0O100000O010O1000O01000O10O010O1O01WObMPE^2o:cMPE]2P;cMQE]2o:cMPE]2]:bMXF]2i9cMVF]2k9cM[E16\\2_:eMZE06Z2a:gMWEO8[2a:gMVEN8[2b:jMREM;Z2c:kMoDMoBDP=;QCFn<:QCGo<8RCIm<7RCJn<6RCJn<5RCMl<4TCLl<4SCMm<2SCOm<1SCOm<0SC1m and ?", + "answer": " is lying on .", + "image": "images/caption_simple_75.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000255749.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "W_U1e0Z2H]7Q1bG]OZ8j0[G]Oc8g0WG]Oh8f0QG_Om8f0eFEZ9U2O2N100O100O1N2M3NAXGbLd8`3`G^L`8_3cGaL]8\\3gGcLY8Z3jGeLW8X3mGgLS8W3oGhLR8U3R1I7_Ob0WN]E`0n:]OZEi]Y2" + }, + { + "size": [ + 424, + 640 + ], + "counts": "m`T28i<;I4M3N1O1O2N1O1O1O1N200O1O1O1O2N1O100O1L4M4L3N2O1O10000O10O100nNlDGKD[;b0nDGIEZ;b0PEGY;9hDD[;:gDC\\;;j0M3N2N4JhSc5" + } + ], + "question": "What are and doing in relation to ?", + "answer": " and are entering .", + "image": "images/caption_simple_76.png", + "model_output": " and are looking at .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000575243.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZiV34Q=6M3L4L4N2M2N2O1O2O0N3O001N10001O000O2O010O001O00001O10O0001O001O010O001M3O1O010000O10O01O100O1O00100O1O1O010O10O01OO20O0100O01O100O100O00100O100O010O1O1O100O010O100O101N1O100O1000001N1O100O1N2O2O0O1O2N100O1O2O0O2O001N100O1O101N101O001Ni`b3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_77.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000473118.jpg", + "mask_rles": [ + { + "size": [ + 500, + 346 + ], + "counts": "Uod01c?00000\\im00cVRO9_@Kh>f0G9D<\\ObNgBi1W=;K5M3M3O1000000001OAaMhC`2Th80bIBfM>h80aIDfM]2KYN:m8]O]F`0U2O`N4n8<]HCeN2n8;ZHFfNOR9;UHJgNJV9=lG0lNCY9=bFkNc0Y1@@\\9<_FROXORB7X>GmAMchm1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is jumping from .", + "image": "images/caption_simple_78.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + } + ], + "question": "What is doing on the ?", + "answer": " is walking on the .", + "image": "images/caption_simple_79.png", + "model_output": " is walking on the .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509131.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mVb55T=2N2M2N4L3M4M2M4L5L3M1O2O1O2N3M3ZDfNU;k1N2L2O00100O0010O1O1O010O2N1O0O10001N1O2O1N2N2O1N2N2N2N2N3M2N4L5K4K4K7HS[Q2" + }, + { + "size": [ + 425, + 640 + ], + "counts": "^_^58P=e1\\N7I5K6J5Ld0\\O0001O001N2O1O2M2O2N3L4L101N2N2M3N4QNPE]1V;WNTEe1];J5K5J6J6K6H8Hc[]2" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to .", + "image": "images/caption_simple_80.png", + "model_output": " is attached to .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000167902.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "[9Y1^:0O1000O10000000O100000O10000000O100000000000O1000000O1000000000O100000O10O100000000000O1000O1000000000O1000O1000000000000O01000000O1000O1000000000O1000O1000000000O10000000O1000O100000000000O1000O10000000O100000O10000000000O10O100000000000O100000O1000000000O100000O100000O10000000O100000O10000000000000O0100000000000000O10O1001O00O1000000000O10O10000000000000O10O100000000000O10000000O100000O100000000000O10000000O100000O10O10000000000000000O10O100000000000000O0100000000000O10000000O0100000O2O00001O001O001O1N2O2N2N3M2N6J8H4Kk]10UbN3N3L5VEFo9>lECDL]:f0dEE^:j00000000O01000000000O1000000000O1000O100000000O1000O10O1000000000000O1000001O00001O1O2M2O2N3M2N1O001N4M001O[ODSF8l9JTF5l9NQF2o91mE0S:4gEMZ:h0O1000000000O0100000000000O010000000O10000000000000O010000000000000O010000000000O10O10000000O100000000O10O100000O1001OO1000000000O01000000000000O0100000000000000O010000000000000O100000O1000000000000000O100000O100000000000000O1000O100000000O10000000O1000000000000000O010000000000000000O10O1000000000O1000000000O010000000000000O1000000\\H" + }, + { + "size": [ + 375, + 500 + ], + "counts": "anV15Z;MhD;o: and ?", + "answer": " is over .", + "image": "images/caption_simple_81.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000097924.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "n7a4o70000000000@VHZLj7e3XHYLh7g3ZHYLe7c0WHS24ZMe7b0eHg1FgMe7b0gHe1DiMf7a0jHb1@mMf7`0lHc1\\OnMh7?lHd1[OmMi7?mHd1YOmMj7?mHc1]OjMg7b0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7b0nHb1_OlMc7b0nHb1_OlMd7a0mHd1^OkMg7?kHg1\\OkMj7=jHh1\\OkMk7k1[9UNfFj1[9TNgFk1o901O000000O1SOUN^F2OO:j1Y9UN]F95g1c9TNcFk1]9QNhFn1Y9SNeFm1[9SNfFl1Z9TNfFl1P:O1O0000001O00000000001O1N1001O000O10O10O100O100001O00000001O000O100O100O1001O1O001O0000O1YOWNUFN?k1\\9ZNaFi1^9XNaFi1b9SN`Fl1S:O1O1O001OXNZNYHNTOh1a8]NgHc1Y7^NgHa1W7eNeH[1Z7gNfHX1Y7kNfHT1Z7mNfHR1Y7ROeHm0[7UOdHj0]7UOcHk0_7SOaHm0g7gN^HX1]9000000000000000O100N2L400@`0000000000oMlEH0k1d:N00O10000TNXNPIh1P7YNZH\\2f7hMUHY2k7mMmGL_Oj1c8bNeGDNf1]8[OfGd0Z8[OhGd0W8\\OkGc0U8\\OmGc0S8\\OnGd0Q8]OPHb0P8^OPHb0P8]OQHc0o7]OQHc0n7^ORHb0n7^ORHb0n7^ORHb0o7^OPHb0Q8]OoGc0R8]OmGc0U8[OjGf0W8YOiGg0Z8VOfGj0\\8TOcGm0^8RO]GS1f8iNTG^1m8aN^GT1d8iN^GV1e8fN]GY1e8dN]G[1e8bN\\G^1f8`NZG`1i8\\NYGc1j8[NUGe1l8\\NbF10c1_9\\N_F21c1m9]NSFc1m9]NRFd1m9_NPFb1m9b0fNaMcH_2\\7eMaH[2_7fM`HZ2`7iM^HV2b7kM]HU2b7oM[HQ2e7QNZHn1f7SNYHm1g7TNXHl1h7VNVHj1j7ZNRHf1m7\\NRHd1n7^NPHb1P8`NnG`1R8bNkG_1T8eNiG[1W8gNfGZ1Z8hNcGY1]8]100O1000000000000O100000000000000000000O1O1001O0000000000000000001OO10000001O000000000000000000000000000000000000000000000000001O0000000000000000000000000000001OO100000000000000000000000000000000O1001O00000000`G" + }, + { + "size": [ + 400, + 600 + ], + "counts": "R]^16T<;H9G4oKXOULk0f3CoK?n3HkK;R4KiK8T4MfK6Y4NbK4\\4O`K4_41ZK2d45TKNf4hJDX5 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_82.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509656.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0b6^800001O00O1001O000000000000001O000000000000001O00000000000000000000000000000000000000000000O1001O00O1001O00001O00N2bNmGPKJ=O@^8S5jGkJO0l8c5oFbJn8Y6_O4L2N0000000000000000001O00000000000000000000O1000000000;\\IbG?In11a0Y[JVOf5R1_JaNa5d1dJRN\\5V2gJ\\M\\5l2X36J?A9G:F:F:F:F:F>B7I5K5K1O0000000000000000000000O1lJeGc3[8[LhGc3Y8\\LiGc3W8\\LoG]3S8XLhGRO8_4U8_LcGQO9Z4Z8eL]GQO9X4\\8eLPHZ3P8eLQH`3j7`LWHc3e7]L[He3c7[L]Hh3`7WLaHj3^7SLeHn3Z7QLhHo3W7QLiHo3W7PLjHP4V7PLjHQ4U7oKlHP4T7mKgG]OV1e4S7QLkHQ4U7oKkHQ4U7lKgG]OU1g4U7]KfG0e1]4d6`KkG1b1_4c6aKjG0c1_4c6aKkGO_1N^Nb4X8aKjG0`1b4f6]KjG2_1b4h6[KhG4`1a4h6[KhG4`1a4h6[KhG2b1b4n6]KQIc4o6]KPIe4P7ZKoHg4e8O101O0O100000000O10000O1010O11N1O1O100O1O002N3N1N1N2O00001O000000O10O1O1O1N2O1N3M3M45L0OOO01O2O0O1O001O1O0O2O00001O001O00001O001dLmDi2T;TMoDi2i;K3L3N2N1O100O1O2O0O1O1O1O2O0O100O101N100O10O0101O0O2N1N2O2L4M3L3N3N2K4L5N3J6L4I6D^A12OPUP5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aah1c0U>?C`0B;F:E>D5K7H6oDlLm9Z4I4L5K5K9H3M2N0O01O1O1O2O0O10000O1O2O0O1O101K41ON3M2NiE]KW:b431O1OH9O0O2N1M4M201N2O001O1O1O001000bFWLe7i3YHZLf7h3UH\\Lj7f3QH^Ln7d3mG`LQ8c3iGaLW8`3fGbLZ8a3aGbL^8c3UGiLh8j4L4M2N2N000000001OO100O1O1L4N2M3M4L3O1O1N2N2N2O2M2O1N2O1O1M3N2N2N2L4L4L5L3000000O110O001O0000001O000000010O00001O00000001O0000000000000O1000000O103cEbKR:m4I3M9F5L2N1O:F2N1N2N100O2O000000001N101O000O10OFjIeG02V6^8kI`GZ5`0lJQ8J_GP5`9721M21OO200_Ob0M2N2N3O0O2J501O0O2L4N3O0O2M5L3N3M>\\HaHk6o7J6I5K and ?", + "answer": " is in front of .", + "image": "images/caption_simple_83.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000140658.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "fb076LN2M;[b02g]O=Qb0m0O1G9K5J6H`Ml^Od2Sa05B>DZLWAN_Oj3Z?ZLRAl3m>d000O100O1O1N2N200O1O100O10000O1G9C=K5N2L4I7lNjITD`6k;bIRD`6m;cInC`6Rn2o10000O^ARMTk2eATM\\>k2eAVMZ>j2eAWMD0e=h2_BVMTO3h0Oe=h2hBXMZ>h2fAXMZ>h2fAWM[>h2_10U@XMFOf=i2fCXMZg2iAZMV>e2kAZMV>f2b1O1000000OTA\\M^l2PBTMP>n2nARMR>P3^11O1O002N3Z@hL^>Y3_@iLa`0[300ZDcLi6^3l4001O1O2N3RD[L[7g3eDZLX?g3g@YLY?g3g@YLY?g3`000001O001O3Q@TLd?P4W@RLh?S400000O1O100O10000001OO100000000000000000000O10000000kCnK\\8R4cGoK]8Q4cGoK]8R4cGmK]8T4g3OlCmK]8S4bGnK^8R4h31O00001O0jClK`8U4_GkKa8U4_GlK^L5i;o3kGlK]LOk;U4iGkK]LOj;V4iGkK]LOj;W4k3000000001O00000000001O00001O00001O0000001O0000001O0000000000001O00001O001O00000000001O00001O00000000001O001O00001O000000001O0000001O0000000000001O001O001O0000000000001O000000001O00000000001O00001O000000001O00001O00000000000000001O001O00000000001O001O00000000001O000000001O0000000000001O000000001O0000001O00001O00000000000000001O1O00002N3M2N001O0\\H^JbIN00O1Y41bKj0l0Sc0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dhj26fc07L2M3M2O2N2N1O1O1O1O1O1O1O100O010O010O0100O010O1O100O1O1O1O1O1O1O2M2O2M3M4LPlh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_84.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Sjn73k?3N2N1O2N2N2N102M2N2N1N3O1N2N2M3N2O0O2M3N2M3N2N2O1N2N2O001N2O2FUN[Bn1_=XN\\Bl1`==L3[E^M[7S3_HVMY7n2aHWM\\7m2aHVM\\7n2aHSM^7o2`HRM^7Q3`HQM^7>`F]1Q2VN^7P3bHQM\\7P3dHPM[7P3fHQMY7o2gHRMW7o2hHRMW7o2iHRMV7n2jHRMU7o2kHQMU7n2lHSMS7m2lHTMS7T2oFcMT1]OoN26j0h8R2RGdMP1U1m7W1TGcMP1U1m7W1TGdMP1S1m7W1UGfMn0Q1o7W1TGhMo0n0n7Y1UGiMn0l0n7Y1VGjMn0j0n7Z1VGlMm0h0n7[1UGmMo0f0m7[1VGoMm0d0o7[1VGPNm0b0o7\\1UGSNm0?o7\\1VGTNm0>n7]1UGUNo0;n7^1UGVNo09n7_1UGXNn06o7a1SGYNQ12n7c1SG[NQ1Nn7e1RG]NR1Kn7g1QG^NS1EP8l1mF_N];`1dD_N];`1cDaN^;]1bDcN_;]1`DdN`;[1aDdN`;[1`DeNb;Y1]DiNc;W1\\DiNe;V1ZDlNg;S1XDmNi;R1WDnNk;Q1SDQOm;h0XCgNl0`0n;f0YD[Oh;c0YD]Oh;c0WD]Oj;c0UD\\Om;c0SD]Oo;b0oC_OT<>lCBY<:fCE_<7bCH`<6`CIa and ?", + "answer": " is running on .", + "image": "images/caption_simple_85.png", + "model_output": " is running on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000106048.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^9l3`90000[MaF^1^9X10000000ZMbF^1^9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1\\9Y1000000000000000YMdF^1\\9bNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1g8]MiGV1@]1g8]MiGV1@]1f8^MiGV1A\\1f8^MiGV1A\\1f8^MiGV1A\\1f8_MhGU1B\\1f8_MhGU1B\\1f8_MhGV1A[1g8_MhGV1A[1g8_MhGV1A[1g8`MgGU1B[1g8`MgGU1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1A\\1g8`MhGT1A\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1C[1f8bMgGS1C[1f8bMfGT1DZ1f8bMfGT1DZ1f8bMfGT1C[1g8bMeGS1D[1f8cMfGh3Z8XLfGh3Z8XLfGR1D[1f8cMfGR1EZ1e8dMfGR1EZ1e8dMfGh3Z8XLfGh3Z8XLfGR1D[1f8dMeGQ1E[1f8dMeGg3[8YLdGh3\\8XLdGh3\\8XLdGh3\\8XLeGQ1E[1e8fMdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8[LcGe3]8[LcGe3]8\\LbGd3^8[LcGe3]8[LcGe3]8\\LbGd3^8\\LbGP1IV1e8jMbGP1IV1e8jMbGP1IV1e8jMbGd3^8\\LbGd3^8\\LbGd3^8]LaGc3_8]LaGc3_8]LaGc3_8]LaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1d8lMbGn0JV1d8lMbGn0JV1d8mMaGm0KV1d8mMaGa3_8_LaGa3_8_LaGa3_8_LaGa3_8`L`G`3`8`L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3`8aL_G_3a8aL_G_3a8bL^G^3c8aL]G_3c8aL]G_3c8aL]G_3c8aL]G_3c8e000O100001O000000O1000000000000000000000000O100000000O10000001O0000001O1O4L1O1O1O1O1O001O0000001O0O20O01O1O000000000000000000000000000000O10000O1O1O1000000O100000000001O000000001O0000001O00001O0000001O000000001O000000001O00001O001O000000001O00000000001O00000000001O001O00001O00001O000000001O0000001O00001O00001O1O1O6J2N1O00001O00001O1O2N1O1O1O1O1O000000000000000000000000O10000000000O1M3L4O100001O00000000000000001O001OO1001O00000000000000000000O1001O1O1O00O1O1O1001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000001O0000000000000000000000000000000000001O1O00001O001O001O001O0000O1001O0000O11O000000O1000000O1O1O100O100O100000000O100000000000000000000000000000000O100000000000000000000000000O10000000000000000GgL^FZ3Z9`0N2O1001O0000000000000000001O00O1001O00000000000000000000000000001O00000000000000000000000000001O0000000000000000000000001O000000000000001O00O11O00000000000000000000000TLcFi3]9WLcFi3\\9XLdFh3\\9XLdFh3\\9XLdFh3]9WLdFh3\\9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3_900000000000000000000000O1000000000000000000000000O100aF" + }, + { + "size": [ + 428, + 640 + ], + "counts": "[jl16XJ500O1O10000O2O00003L3N2N1N2O1O1O1N1000000O101O001N2O0O2OO01O0100000O10O1M3O11N100O100O100O101O000O1000001N101N100O101O000O10000O2O0000000O2O000O2O0O1O101O000O101O00000O2O0O100O101O0O10001O0O101O000O2O000O100O2O0O1000001N10001O0O101O0O100O2N1000001N10000000001N3N1O002L4M2O01OO3N3L2O1O1O1O1N10001N1O10O0100000O010O1O1O10OO2O1N13M1000000O100000001O0O100000001O000000001O000000000000000000000O1000000000000000000000000N20000O10000000000000000000000000000000000000000000000000000001O1OO1O1001O1O001O0000000000000000000000000000001O00000000000O100001O00O1001O0001N100001O000000000001O0000000000001O00000000001O00000000001O0000000000001O000000001O0000000O2O00001O1O001O001O2N1O001O1O3L3N2N1O6J:F4L5K4L9G7I6J4L8H5K2cK]Gl3e8oKaGU2H^Og8]NaGo1OC`8^NbG`0L]O1=a0V1k7hNeG3j1o0Y6WOnGCQ2R1j5DUHVOU2R1i5HRHUOW2j0n52jGUO[2?R6 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_86.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000052565.jpg", + "mask_rles": [ + { + "size": [ + 458, + 640 + ], + "counts": "e;d2g;O00001O00001O000000001O00000000000000000000001O00001O0000000000001O001O00O10000001O000000000000000000000000000000000000001O00000000001O000000000000001OO100000000000000001O000000O100001O00000000000nMQDi1P4N1O0O2O1N2O0O2O1O1N100010O000000100N100O100000O1O101O1N4L7H7J5J;F8]CVNWUO_B0_f\\2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_87.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000165039.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "[8m1^;001O1N2O0001O1O01OnNfD3Z;LhD3X;MhD3X;MhD3X;MhD30ROS;k0lD32ROR;j0mD41SOQ;i0nD40UOQ;g0nD51UOP;f0oD51VOn:f0QE40XOn:d0RE40XOm:d0TE4OYO^:MgEf0L30[O\\:NbEh02O0[O]:o0cEF0\\O\\:n0eEEO^O[:m0gEDN_O[:m0hECM@[:m0iEBKC[:k0jEBKC[:j0kECJDY:j0nEAIFX:i0oE@JGW:i0oE@JHU:h0RFAHHU:g0SFAGIU:g0TF6k9JUF6k9JUF7j9IVF7j9IVF7j9IUF9j9FWF:j9EVFk9fNlE>8l0m9eNkE`07l0n9cNkEa07m0n9`NlEc06n0m9_NmEc06n0n9^NlEc07P1m9\\NlEd07P1m9\\NlEd06R1n9YNlEe06R1n9YNlEe06S1n9WNlEf05T1o9VNlEe06o0T:\\NfEe06i0`:WO`Ef0d:YO\\Ed0S;POmDn0i;N2N;E;FgV`0D\\i_O0XC9_aHB_7>`HC`7=`HC`7=`HC`7=`HC`7B8J3N213OL00O000001O000000000000O100O0@cNRE_1m:dNPE\\1Q;?0O1N2O1O01000000O0100000O10000O10000O100001N100000O10000000000O1000O1000000000O10000000000000O10O100000000000000000001O0O200O1O1O1O000000001O000001N11O000O01000000O10000O1O2N1N200O1O2N11O00O10001O0000001O000011N0010O10O00000001O00001N100O2O001O0O2N1000001N100O1O1O100O1000000000001O001N2O00001N101O3L6K2M3K5K5L3MP`o0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_88.png", + "model_output": " is driving on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000370270.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "bc01oc01N=D0000000000000O10000O1O1O1O100O1O100O1O100O1O1O1O1O1O100O1O1O1O1O100O1O1O1O1O100O100O1O1O1O100O1O1O1O1O1L_Nh]Ob1ha0c0N2N2O1J6J6O1000000000000001O2NO100001O0000O1lN_Mj@`2V?eMh@X2W?jMi@U2W?lMh@T2X?lMh@T2X?lMh@T2X?mMg@S2Y?mMg@S2Y?PNd@P2\\?PNd@P2\\?TN_@m1a?TN\\@n1e?RNY@o1g?QNY@o1g?QNY@o1g?QNY@o1h?PNX@P2h?PNX@P2h?PNW@Q2i?oMV@R2j?bMQ@O0`2S`0YMR@V3]`0O0000001O0000O100000000000000O100O100O1O1O1N2O1O1O1N2N2M3O1N200O11O0000000000000000000000000[Lk_O_3U`0aLl_O^3T`0aLm_O_3S`0bLl_O^3T`0bLk_O_3V`0]Lm_Oc3W`012N1O1O5K2N1O001O000000001OO1000000O1000000O100O100O100O1O1O100O100O10000O100O100I_Lm_Oc3S`0500O1000N3N1O1O002O000O100001O002N4L0000000000000000000000000000000000000000000001OO1000000000000000000000000000000000000O1I7N2000000000000001OO10000000000000000000000001O0000000000000DU@cLk?S3f0O1O1O1N2O2M2O10000O10000O1O1HS_OYMQa0c28N2M3N2N2M3N2O2M2O1M3O1O2N1N2E;E;N`0QOok83^TG1N2N2O100O1N2L4N2O100O1L4M3O1O1O1M3O1O1M3N2E;N2N2M3N2I7N2L4O1L4L4J6N2L4L4H8L4M3O1OUMc@U1\\?eNcA@`?>T2DmSl1" + }, + { + "size": [ + 640, + 480 + ], + "counts": "Zdf151Mdc0e0B:G7I`0A1O1N2N2mN]Ng_Od1U`0aNi_O`1R`0gNj_O[1T`0gNk_OZ1R`0jNk_OX1R`0lNk_OU1R6PN`3m0]FT1P6RNa3k0_FS1n5TNb3k0^FR1o5UNa3n0ZFo0`1WN?GoN8]ON[9Y3cG^M3l0iN^N`9Y3dFZL?`18X1e8n0`FoLLV19XNOg2\\9kN\\FZN:S8Z9bIoFY6P9hIWGP6j8PJVGP6j8PJUGQ6k8oIVGP6j8oIXGP6h8PJYGo5g8QJYGn5h8SJWGm5i8SJWGl5j8SJWGl5j8TJVGk5k8UJUGi5n8VJRGh5P9XJoFg5S9ZJjFg5X9XJgFh5Z9XJdFh5_9WJ]Fh5i9XJUFc5R:`JhEi3j0gJk9c1UEe3\\Bk3g>b05K5N2K5J5O2O13M4L4L3L5L3M3M6J4L4M4L4K4M5J4M3M4M2`DcIV9`6fF_I[9d6cFZI`9d6aF[Ia9c6P2N1N2N3N2M3N1RK]B\\3f=bL\\BZ3i=cL[BX3h=fLZBCFj2S>aMYBAKi2o=bMYBBLh2m=eMYBAMDAn2]>iMXBB4_2h=lMVBB8\\2d=PNVBA;l0QO7a>YOTD@YNb0g=Igkh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_89.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481413.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "loi3?g<:F9I6K4L4K4M4M2N2M3O1M3N2N2N2O0O2N101N1O100O2O0O100000000001O1N2O3M0O10000O1O010O001O100O2N2M3N3M2M3M3N3L4L4L4K6I9HWYh3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "mkd29P=3L4M2N2K5L4L4N2N1M3O1O1ON2NdEQOb8g0cG_OZ8b0bGC]8<`GJ]87`GM_82^G3`8N]G6b8I\\G doing with the purple frisbee ?", + "answer": " is holding .", + "image": "images/caption_simple_90.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000092839.jpg", + "mask_rles": [ + { + "size": [ + 517, + 640 + ], + "counts": "a:a5`:4J6N3O0001O001O001O2N1O001O1O001O2N1O2N3M1O001O2N1O1O1O3M2N2N1O1O2N1O2N2N2N5K2N1O1O1O1O001O00001O2N2N001O1O1O2N001O2N2N1O1O002N3M1O2N2N2N1O2N1O2N1O2N2N2N2N2N2N2N4L1O2N2N1O1O002M5L2N101N4L5J5L1O2N2N2N1N200O2N1N3N0010O00001O000000001O000O1000O11O000000000O1000000001O0000000000000000000000000000O1000000000000000000000000000000001O0000000000000000000000001O0000000000000000000000001O0000000000001O0000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000001O00000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000000000000000000000000000000000000000000000001O0000O1000000000000000000000O010000O2O000O01000O100O10000O10000O100O10000O1O10000O100O10000O100O10000O10000O10000O100O100O100O10000O100O100O1000000O100O10000O100O100O10000O100O10000O100O10000O10000O2O0O01000O10000O2O0O1O1000O01000000O2O000O1000O01000000O100O1000000O100O10000O1O100O100O100O10000O10000O10000O10000O10000O10000O10000O100O100O100O10000O10000O10000O10000O1000000O100O2O000O01000O2O000O100O1000O10O2O0000000O100000O100001O0002NN2N20O1gLZC1N6O]2i<\\McC1G4NU2h located relative to ?", + "answer": " is sitting on .", + "image": "images/caption_simple_91.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000336209.jpg", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "e6]2S;00000000000000001N1000000000000000000000000O1000000000001O000O1000000000000000000O10000000000000000000O10000000O10001O1O000000000000001O0000000O100000000000000000001O00O1001O00000O1000001O002N2N006I9H1O0000000000000000001O0000000000000O10000000000000O100001O00001O000O100000001O0000000000000O1000000000000000000000O100O1O10O10O1N2O100O100O1O1O010O1000000000000001O00001O001O0O2O001O2N2N2N2M2O000000000000000000000000000000000000000000000O101O0000000O10000000000O100000000000001O001O0000000000001O00000000000000001O0O100000000000O1000000000000000000000000000O10000000N2O1O10000O1O2N3M3N6I5KUFAk9`0TF_Om9a0SF_Om9a0TF^Ol9c0UF[Ok9f0UFXOl9h0TFXOl9k0RFTOn9m0QFSOo9n0QFPOP:S1nElNR:U1oEiNQ:W1oEiNQ:[1lEdNT:]1mEaNS:`1oE]NQ:d1oEZNR:h1nEVNR:n1kEQNU:Q2jElMX:U2RFiMe9Y2XFiMg9X2XFiMg9X2XFhMg9Z2WFgMi9Y2WFgMi9Y2WFgMi9Y2WFgMi9Z2WFeMi9\\2VFdMj9]2VFcMh9`2VFfMd9\\2ZFfMd9\\2ZFeMe9\\2YFfMf9Z2ZFfMf9Z2ZFfMf9Z2YFgMg9X2WFkMi9U2VFlMj9S2WFmM:^OA6R9`2RGmM;Fc8\\2SGoM8Hc8Y2UGoM8Hc8Y2RG[MNd0_8^3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGdLR8\\3nGdLR8\\3nGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3PHdLP8\\3RHbLm7_3QHcLo7]3PHdLP8\\3PHdLP8\\3PHdLP8\\3PHdLP8\\3oGeLP8\\3PHdLP8\\3PHeLo7[3QHeLo7[3QHeLo7[3QHeLo7\\3PHdLo7]3PHdLP8\\3PHdLP8\\3oGeLQ8[3oGeLQ8[3PHdLP8\\3PHdLP8\\3oGeLP8\\3oGeLQ8[3nGfLR8Z3mGhLR8X3mGiLS8W3mGiLS8W3mGiLS8X3lGhLT8\\3hGdLX8]3gGcLY8[3iGeLV8[3kGeLU8]3iGcLW8\\4O00O1O1001O000O2O1O1O1O00003M001N10000000001O001N2O1O00001O0O10000000001O00000O10000000000000000O10000000001O000000O10gJ" + }, + { + "size": [ + 432, + 640 + ], + "counts": "2[5U800000000O10000000000000000000000O10UHiJ]7W5>0000O1000000000000O10000000000000000O1000000000000000000O100000000000000000000O10000000000O10000000000O10000000000000000O1000000000000000000O100000000000000O100001O0000O1000000000000O1000000000000O10000000000000000O10000000000000000O10000000000O10000000000O1000000000000000000O100000000O1G]GdKd8[4900O1O1O1O1N2N2O1O1O1O1O1O1001O1dFTLR9_4F1O000000000000O1000000000000O1000000000000O10000000000000000O1000000000000000000O100000000000000O10000000000O100000000000000O1000000000000O10000000000O100000000000000000000O1000000O]GhKP8X4PHhKP8X4d00000O10_GkKg7U4UHiK^O8Y8o3XHVLf7j3\\HULc7k3^HULa7k3`HUL_7k3cHTL\\7l3fHSLY7m3U100O100O100000000000000O1O1N2N2N2O1N2N200O1O100000000000000000000001O000XG]LUOOU8e3bHlLZ7T3eHRMV7o2iHRMV7n2iHTMV7m2iHTMV7m2dHZMZ7h2aH\\M^7e2`H]M_7[4O1O001O00000000B[HPKf7l4_HSKa7j4cHTK^7k4dHRK^7k4e0N2N20000O10000O100O100O1000000O10000000000001O001O0000001OO100O100O1000000O1000000000000O10000O1O100000000001O001O001O:F1O1O3M6J1O000000O11O1O1O001O1O1O001O0000000000000000000000000000000000000000000000O1000000000000000000O1O1O1000000000000000000000000000000000000000000000000001O000000001O0000001O00000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000O1000000000000000000O10000000000000000O1000000000000O100O1L4RNn1M3O100001_FSMSON01Q9Q5YO1O1O0000000000000000000N" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_92.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000458325.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "]ca33Q17k:MkD;S;K^D?\\;j01L3O2M4N2O4L2M2N202N1O1XOlC6g doing on the ?", + "answer": " is crossing the .", + "image": "images/caption_simple_93.png", + "model_output": " is walking on the .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000350122.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lPT5g0U>7H8J5K4L5L2O2M2N2O1O1O1O1O1O100N12N3M1O1O0O2O2N2N5J3M3M5K:D3M101O001O0O2O001O1N1O2O00O010000O01O01O01000O000O2O1N2WO_OTCa0lfDJV;Q2O0OO2H8L4N2N2N2N2O1000YD]NR:c1iEeNMgNi9k4O01VK[FW4g9eK\\FZ4f9bK]F\\4V:N1N2M3M3^N`E[Ng:`1`1J5L5\\OSCoNR=m0UCmNo doing with the bicycle ?", + "answer": " is pushing .", + "image": "images/caption_simple_94.png", + "model_output": " is pushing the bicycle .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000295809.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "d547k9_5UFaJk9_5UFaJk9_5UFaJk9^5VFaJk9_5UFaJk9\\5XFcJi9]5WFcJi9]5WFcJi9]5WFcJi9]5WFbJj9]5WFbJj9^5WFcJg9]5YFcJg9]5XFdJh9\\5XFcJi9]5WFbJj9^5VFbJj9^5WF`Jj9`5VFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WF`Jj9`5;O10000O10000000000000000O1000000O100000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000O1N200002NN2000000000000O11O00O10000O1000000000000O1000000000000000000O100000000000000O10000000000000000000000O1000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1001O000000O11O0000O100001O00O100001OO10000000000000000000000O11O000000O11O00O100LPEXKP;l401OO1000000000000LPEXKP;l4000000000000000000000000000000000000000001O0000001O000000O11O000000ISEZKn:l40IQE^Kn:i410000001O00HRE]Ko:b4SE]Km:d4RE]Km:c4SE^Kl:b4TE]Km:c4RE^Kn:i411OMRETKn:i4UEWKk:i4TEWKm:h4TEXKl:h4TEWKm:i4SEWKm:i4SEWKm:i45000000O10000000000000000O11O003MM300001O00O1O11O2N1cMZK^If4n8MmDZKR;f4nDZKR;j4000001O00O100001OO100001O0000O10\\NTKTHl4`91OO10000001O0000000000000000000000000bLTKhKl4f70000000000000000000000000000001O000000000000O1001O000000000000000000000dNSKeGm4[8SKeGm4g9000000000000000000000UNRKdHn4W90000000000000000000000000000000000000000000000000000000000000000000000000000O11O0000LQEVKP;j4PEVKP;n4O0000000000000000000000000000000000LPEWKQ;i4PEVKP;n4O0000LQEWKo:i4QEWKo:i4QEWKo:m4000LPEWKQ;i4PEVKP;n4O0000MPEVKP;i4QEVKP;k42001O00001O1O00LQEWKo:i4QEWKo:l4100001O1O001O00001O0000001O0000001O1O1O3M001O1O0000001O1O1O00001O1O00001O1O3M1O001O001O0000001O1O1O00001O1O001O001O001O000000001O00001O1O1O2N1O1O1O1O001O001O5K00001O6J1O001O1O1O001O1O001O1O000000RI" + }, + { + "size": [ + 512, + 640 + ], + "counts": "PT[86h?3O0O2N10000O1000001N10001O00000O101O00000000000O10000000O1O010O0100O100O100O100O1O1O100O1000000O100000000000000000000000000000000000000000N2D=G]`86e_G5K4L000001O00000000000O100000[L" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_95.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000511760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "i?9V3HZ=d0a_OD00000:0F0J5Q1MQOe?S400000000000000000000O1001O00O10000000000000000000000000000000000000000000000000000000000000000001O00O1001O000000O10000000000000000001O000000000000O11O0000000000000000000000000000000000O100000000000000000000000000000000O100001O0QLb@\\3^?bLe@4Ih2b?oL]Ao2k?O1O>B:gM^^O`1Tb0M1O1O000000000000000000000000001O0000000000000000ZM^NnBb1R=`NlB`1S=eNiB[1W=iNeBW1[=lNbBT1^=mNPAB2M7Om0_?n0`@RO_?P1_@QOa?Q1\\@POe?e20ZMe@g0[?QOn@n0R?QOPAXO[OR1e?ERAmN]OK1b1`?BkA=U>[OSBe0m=YOUBg0k=XOVBh0j=WOWBi0h=XOXBh0h=XOXBh0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=XOVBh0j=XOcAAnNW1_?ZOf@TO and ?", + "answer": " is standing on .", + "image": "images/caption_simple_96.png", + "model_output": " is standing on .", + "eval_result": "False" + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_1b_vqa.json b/evaluation/GAR-Bench/model_outputs/gar_1b_vqa.json new file mode 100644 index 0000000000000000000000000000000000000000..a0a64d493511f1a7eca49b51de72ae83c68e4bc8 --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_1b_vqa.json @@ -0,0 +1,12166 @@ +[ + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1582.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`fS2:bo0=C9J3M3M1O1O001N1010O2N2N3N8G4L2O1N3N3M6I4Mgn3NYQL8J4K5L2N2M3N2M2O2M3N2M:G000O1000000O100O10000O1000001O0O10000O100O101N1O100O2O0O2N101N100O101O0O2O0O2O0O2O001N2N101O0O1000O100O100O010O1O100O010O10O10O10O010000O10000O1O100O00100O100O10000O100O100O100O100O100O100O100O1O100O10000O100O1O100O2O000O10000O10001N100O100O100O1O1O2N1N2N4KY[cP1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "igR2`02:`n0j0iQOhNWm0^1cROmNTm0Q2N2N2N1O1O1O10O01O1O100O00100O10000O1O10O001O0O2O11N2O1O1O1N3N1O1N101O2N1O1N101OO10000O1O0KdSOfL^l0V38M3N201O01O0000O1O1N2O2N100O101N101O0O1O2N1O1O2N1N3N1O2O001O1O0010O1000O100O1O1O2N2O03ON2M5K3M4L3M4K6K6J8G3N1N2N2N4KjVaR1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "`i[d0`0i0BLl0bj0WOVVOY3bi0nLZUO2KIMa3hj0a1M4cUOnJfi0T5VVOQKl1Bhe0S6[XOnI[1;Xf0m7N000O2O00000000000000000000000O10000000000000000000O1000000000000000000000000000001O1fJTYO^1nf0]N`YOX1Rg0PLmWOa1\\1^2Wg0mLYYOS3lf0SL_WOLk1P4ei001O001O00010O000O100000001O00000000001O0000000001O000O100000O1000001O00000000001O000000001O00000000001O000000000000000000000000000000000000000000IXLTTOj3hj0PMZUOX3Pj0g1K5J6N2O1O1_OhIQWOa6mh0:N2H8G9I7EZHVXOl7ig06O100000000000000000000000000001O00000000000000000000001O000000001O000001N1000000000O101O001N100O2N1RKTXORNMX2Yh0QNhWO[OX1@]Ok1_O[Ngh05mWOOk1`0\\NVOdi0L[ZO1Pl_=" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Sla<6Xo00jPOQ3TMo_OjC\\O03R1j`0_9WBlEmLBRa0[9[5nLbYORJ6J211Ijf0c4S4M3O101O00001N101O001O0O2O2N2dM^TO7kk0lNnTOn0Xm0J4K3N1O1O00001OO10000001O00000000000000000000000O100001O00000O1001N10000000000000000000000000000000O10000001O000000000000000000000000WHmNe@S1W?TOf@l0X?WOf@j0X?ZOf@f0Y?\\Of@d0X?^Oh@b0W?@h@`0V?Bj@>S?Fm@9P?JPA6n>LRA4m>NRA2l>0TA0j>2VANi>3WAMh>4XALh>4XALg>5YAKg>5YAKg>5YAKf>6ZAJf>6ZAJf>5[AKd>6[AKe>5[AKe>4\\ALd>4\\ALd>3^AKc>4^ALb>4^ALc>2^ANb>2^ANb>2^AOb>O_A1a>O_A1a>O_A0c>O]A1c>0\\A0d>0\\A0e>O[A1f>NZA2h>LXA4i>JWA7j>HVA8k>FVA:k>EUA;m>CSA=n>CRAo5a3gGRLZ2l_OBT`0?j_OBV`0?i_OAW`0`0h_O@Y`0?g_OAY`0?g_OAZ`0?f_O@Z`0`0f_O@[`0?e_OA[`0?e_OA\\`0>d_OB]`0=c_OC^`0l`0_OV_O`0k`0]OW_Oc0l`0YOU_Og0Qa0QOQ_Oo0og010000001O0000000000O10000000O10001O00O2O00000001O0O100000O1000000000000000000000000000000001O0000O1000000000000000000000000000000001O0000O10000000000001O00O1000000O1000000O1YNoN[SO4OJKV1ck0lNbTO^3ij0W1ZOf0gNY1WNeIaXOg7ef0W1[Oe0\\Od0jN`EP\\O\\;Rc0fDi\\O[b3TNl1UNU]OXETd0`9j[OaF`e0V8j1\\Od0XOl0mIVWO\\OolS5" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_0.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2925.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "^hb19Q>>D7I7I7I7I6J6J6J2O1N2O1N2M3N2M3M3M3M3N2M3M3L3L5L4K5G9F:H7O200001O1N3N2N2M4L4L4L5J8H:^OT]R1" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_1.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/49.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "Y_Rb05;6jn0KTQO6kn0MQQO5on0>000001O00000000O101O000O4YOXa\\e0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "hbS:46o0kl0]ORSOS2`j0k2[Oe0cNQJPXOO2Q6kg0UJQXOJ1U6lg0SJQXOI2U6lg0TJQXOG3V6kg0ZJjWO@;W6kg0aJUXO`5jg0`JVXO`5jg0aJTXO`5lg0aJSXO`5lg0aJoWOc5Qh0W1O001O0XXO]HRg0f7hXObHTg0a7hXOhHPg0T80001O0001O000001O00000001O000O100O1WNlXOeJUg0Z5mXOdJSg0]5mXOcJRg0^5nXOaJSg0_5mXO`JTg0a5jXO`JUg0a5kXO_JUg0a5kXO^JVg0a5kXO^JUg0b5lXO^JTg0a5mXO^JTg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0`5mXOaJSg0_5mXObJRg0^5nXObJRg0^5nXObJRg0^5nXObJRg0^5nXObJRg0^5mXObJTg0^5lXObJTg0^5lXObJTg0]5mXObJTg0^5lXObJTg0]5nXObJRg0]5oXOcJQg0]5oXObJRg0^5nXObJRg0^5nXObJRg0]5oXOcJQg0^5hXOgJYg0Y5eXOiJ[g0m6100O1000000O10000O10O100000O10001N10000O1O002WNdXOkI^h0Z5`WOeJ^i0U5j0kNkUOmKfj0^2\\2]M`ecj0" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_2.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2905.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "T[l342NP>f0C9G0O2O03L2O000000000000O2OO10O100000000O10O1000O100000000O10O100000O10000000000O010000000O100000O1000O1000000O100000O10O10O1O001O1O1O1O1O001O1O1O001O1O1O1O1O001O10O01O1O1O1O001O1O1O001O1O1O1O1O001O1O1N2O002Nn^Z3" + }, + { + "size": [ + 460, + 620 + ], + "counts": "cQ_5b0i=2OO010O01O001O01O01O010O0010O01O010O10O01O10O002Mfik2" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_3.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1496.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "0Q5oj0000000000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000000000000000O100000000000000000000000000O10000000000000000O100000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000O10000000000000000000000O1000000000000000000000000000000000000O1000000000000000000000000000000000000O100000000000000000000000000O10000000000000000000000000000O10000000000000000000000O100000000000000000000O1000000000000000000000000000000000000O100000000000000000000O100000000000000O1000000O10000000000000000000000O1000000000000O100N2]Oc0O1O10000O1O100000000000000001O00002eSObLPl0j3M3M2N2N1O1O00001O00000000O10000000000000000000000000000000000O100000000000000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000000000000000O1000000000000000000000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000O1000000000000O1000004L2M:GP1aLgROU2dn0nNYmlg0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_4.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/515.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "YnmT1c1bm0m0L201O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000dRO" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "][gS1:7HUo0j0H2M101O00001O0000000000000000000000O100000000001O0000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O01N10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000dD" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_5.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1132.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^Tfo0;ao06M101O00O10000000000000000000O1000000000000000000000000000000000000000O10001N4JgkP7" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "XjZU1Z1lm0k0L4M2O10O01O00010O000001O01O0010O000010O01O010O0010OO2M3YOj0mNTXf1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "dQV:6ho05K4M1O2N1O101N1000000O10000O100000000O1000000O100O010O100O10000O100O1O1O100O100O100O1O001O100O1O1O10O01O100O1O1O100O1O1O1O001O1O1O1O1O001O1N2O1M30000000000000000000O100000000000000000000000000000000O100000000000000000000000000000000O100000000000000000000000000O100000000000000000000000000000000000000O100000000000000000000000000000000000000000000O100000000000000000000000000000000O100000000000000O10000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O10000000000000000000000000000O1000000000000000000O1000000000000O10000000000000000000000O1000000000000O1000000000000000000O100hNdQOl0`o0XO3M1N2O0000fo[10[ocN=[QOMan04\\QOOcn0E]QO117on0IQQO6Qo0IoPO5So0NjPO2jn0JZQOe0cn0]O[QOe0cn0\\O[QOe0en0\\OZQOe0en0\\OYQOe0gn0:0O1]O\\QOJdn04_QOKan06^QOJbn07]QOIcn0;YQOFfn0j0000000000000000O10000000000001O0000000000000000000000000000O10000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O1000000000000000000O100000000000000000000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000O10000000000000000000000O100000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000O1000000000000000000000000000000000000O1000000000000000000000000O100000000000000000000000000O100000000000000000000000000000000000000O10000000000000000000000000000O10000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000O100000000000000000000000000O1000000000000O100000000000000000000000000000000O1000000000000O1000000000000O1000000O1O1LTPh3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "TQa73a00l0P=jNYC6Ga40WL0e0Nl4mk0]IVTO239O0Od04oNLd0Yn0A\\RO;fm0B]ROCjC4QN12ML6d0e;W9UD_Gmc0n6S\\O`GRg0Z6XYOiI2Lmg0j1eWOd13eLSj0^1TVOc1_j0[NdUOb1^j0\\NdUOc1]j0\\NdUOb1^j0]NdUOa1]j0^NdUOa1]j0^NdUOa1]j0^NeUOa1[j0^NfUOa1[j0_NeUOa1[j0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0]NgUOc1Yj0]NgUOb1Zj0]NhUOb1Xj0^NhUOb1Xj0^NhUOb1Xj0^NhUOb1Xj0]NiUOc1Wj0]NiUOb1Xj0^NhUOb1Xj0^NgUOc1Yj0\\NhUOd1Xj0\\NhUOd1Xj0\\NhUOd1Xj0[NiUOe1Wj0[NiUOe1Wj0[NiUOe1Wj0[NhUOf1Xj0YNiUOg1Wj0YNiUOg1Wj0XNjUOh1Vj0VNlUOj1Tj0UNmUOk1Sj0TNnUOl1Rj0TNoUOk1Qj0TNoUOm1Qj0SNoUOl1Rj0TNnUOl1Rj0SNoUOm1Qj0SNoUOm1Qj0SNoUOm1Qj0RNPVOn1Pj0QNQVOo1oi0PNQVOQ2oi0oMQVOQ2oi0nMRVOR2ni0mMSVOS2mi0mMSVOS2mi0lMTVOT2li0lMTVOT2li0kMUVOU2ki0jMVVOV2ji0iMWVOW2ii0hMXVOX2hi0gMYVOX2hi0gMYVOY2gi0fMZVOZ2fi0eM[VO[2ei0dM\\VO\\2di0cM]VO\\2di0cM\\VO^2di0`M^VO`2bi0_M_VOb2`i0]MaVOc2_i0[McVOe2]i0YMdVOg2[k000000000000000000000O1000000000O10O1000000000000000O10000000000000000000O1000O1000000000000000000000000000000O010000O1001O0O100001O000O100001O0O100000O100O100000O2O00000000O100000000000000000000O1000O10000000000001O00O10000000000000000O01000000000000001O00000000O01000000000000000O1000000001O0O10000000O1000000000000000000O100000O2O000000000000O10000000000000000O100000O101O000000O1000000000000000O10000000000000000O100000000000O1000O10000000000O1000000000000000000000000000000O1000000O1001O000O100000O1000000000000O10O11O00000000O10000000000000000000O10O1000000001O0000O10000O1000000000O1000O10000000000000000000000O100000000000O101O000000001O000000O10000000000000000000O1000000000O100000000O100000000O1000O1000O10000000O2O000000O10000000000000O10000000O10O10001O00000000O10000000000001N010000000000000000000000000O1000000000000000O1000O100000001OO1000000O1001O0000000000O100000O10001O0000O100000000001O000O1000O100000000000000000000O0100000001O00O10000000O100000000000000000O10O101O0000O10000000O1000000000000000000000000O100000000000000000000000000000000000O1000O10000000000000000000000000O100000000000000000000O10000000O100001O000000000000O100000O100000000000000000000O100000O1000001OO1000000000000000000000000O01000001O000000000000O1000000000O10000000000000000000000000O100000O101O0000O1000000000000000000000O100U^OmLaE4MOHNd03ZON238M3NG0jP3jFlL_Lg0dNEH1b0JB015MHm0KS>T3YHQMXINHNY10gN3a`0P3i;0000001N100000O100O1000000001N10O101O00iNmLaUOS3_j0nL_UOS3bj0kL_UOU3fk0000000000000O10000000000000000000WYOnLVJN02O1OOYj0_O^UO1e0OO1]OOO11ON31Z1ik0eNWTON0=L3Ne1ok0PNQTO01X3lk0c0_Oa0iNW1hMYJaXO4H7OP6me0cI^ZO3M4JQ8Ve0jGoZO6L3Mb8Te0[GnZOl9`d0QFa[Oc:hc0V1YOg0ZOf0[OkBT^O`=Ya0^Bh^OV>\\?hARAN10Nd>[>_AZA1M31K238OH0Lb>W>[AWBS1A^O0h>c=i@bBOOga0l, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_6.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2897.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "ZVg37Q>8I5M3M2NDaBM]=OnBMP=1TCOj<1XCOg<0[COd<2]CMc<1_C0`<0aCO^<1cCO]<1cCO\\<3bCNY<:cCGU<\\1N5M5K1O11N2N2N10O000000000000000001O000O100000000O100O2L5eN\\CNH411ZdZ4" + }, + { + "size": [ + 460, + 620 + ], + "counts": "hh\\45U>4M4M2TBBf=c0O001N1O1O1000000001G\\BGe=8]BFe=`00001O3M1O0002N10O0000000001O1N2O000N22N1N2N2O0O10000000000000000001O000_OaB4a=Ib0OTQa3" + }, + { + "size": [ + 460, + 620 + ], + "counts": "i`[7:i=:L300O01O1O1O101N1O1O1O1L6JaXT1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_7.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1116.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dbnh07ho02N2O2cPOKjn0f0000O10001O000000O11O0000001O0O2O00001O0O1CPQOHRo06>N1O3NX]S>" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "egX`04jo02O2K4M4M2N2O1B>O1O10000001TOVQO`0jn0_O\\QO and are in the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_8.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/16.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dk0Y3gl000O1O1O100O1O100O1O100O1O100O2N100O100O100O1O100O100O1O1O1H8N2O1@`0VOQRO_OSn0`0g0O2N1O100O1O100O1O100O1O1O2N1O2L6F\\TZV1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "PP]63mo01O1O1O1O1O1O001O1O1O1O1O1O1O001O1O1O1O1O001O1O1O1O1O001O1O1O1O1O1O001O1O1O001O1O1O1O1O1O1O1O001O1O1O1O1O1O1O1O001O11O000O0100000O1000001O00000O100000O1000O1000000000000O1000000O1000000000000O10O02N1O1O1O1N2O0O3N1N2O1N1O2O2M2O1N101N2O1N2N2N2O1N3M2N2Nnmdm0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "nla:d0]o0Annc0k0dP\\OZ2gM1O1O00001O0000000001O0000000000000000000001O000000000000000000000000000000001O0000000000010O0000O10000M3M3N2O1O1O1O1O1O1O101N100O100O100O10000O10000O100O1N2M3N2N2N2O1O1O1O1N2mNS1O1O10000O1000000000001O00000000O1000O100O100O10000O100N2G9N3N1O3M3M[Tlh0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "0Yd0Y2d@X2\\?hMd@X2_?eMa@[2b?aM_@_2d?]M^@b2i?VMX@j2l?QMU@o2P`0iLT@V3R`0`LR@`3]`0gKm_OY4me0000001O000000000000000000000000000000001O00000000000bMdTO8\\k0QO`UOj0`j0ROfUOl0Zj0SOhUOl0Xj0ROkUOm0Uj0QOnUOn0Rj0QOPVOn0Qj0POQVOo0oi0QOQVOo0oi0POSVOo0mi0POTVOP1li0POTVOP1mi0nNUVOQ1li0mNTVOT1mi0iNUVOW1ki0gNWVOY1ki0bNXVO^1ki0\\NXVOd1Sl0O0000000000000000001O0000000000000000001O00000000000000000000O1\\I\\NbXO1M0i0MYO4_4b1hb0i0o\\OWOPc0k0n\\OVOQc0n0iXO\\Ml3g1[c0]2Y[OeMgd0b3mXOcMSg0e500O1000000O100O1nJ[GR^ON01;OU3g8^>\\Go]O10O2N021M01l2f8Q?]Go]O1001N101N101Na2i8[?\\Go]O2OO2N101N101Na2i8[?\\Go]O2OO2N101N101Na2i8[?\\Go]O2OO2N101O0O2Na2i8[?]Gn]O1O03M101O0O2N`2j8\\?\\Gm]O;3D0O101N10`2h8U?WGZ^Oh0M]O001N101Na2h8U?WGY^Oi0N]ON03N0O10a2f8U?YGY^Oh0NE1F001Na2g8V?XGY^Oh0NE2EO11Na2g8W?VGY^Oi0NE3DO11N`2m8Z?WGQ^O:4E0M^3T9X>nFY^O0fim0CfSO4iW\\2JafdM0iodQ1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_9.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2307.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "m]U61m>3^OORB2n=OQB1o=0oA1P>1nA1Q>0nA0R>1lA0T>0kA17I`=7XB05LT=KRCf0GBV=NnB`0JCY=0jB=KE[=0gBi0Z==101N101O01O5K4L2L1L4L4O11O2N2N2N3N1O101OO1N3M2N1N2O00O002I602O6K5J01O001O0OCdBYOZ=g0iBWOW=i0kBTOU=3hB;g=D[B0K0i=O^B0K0g=LbB3I0V>0jA0V>OlA0T>OmA0T>OnA0R>OQBOo=0RBOo=0ZeW2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Qo_84d>9I6H7L5H7J7L4L4M3M2K6F]N[C_1_10000001O001O1O1O1O1O1O1O1O001O0O2O1O3LaSR4" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_10.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1468.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "_`Uo0a0]o02N2N3N1O1O1O1M3G0\\QOVO27mm0Q1nQOSOPn0_101N100000000000000001O01O000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000O100000000000000000000000000000000000000000000000000000001O00000000000000000000000O10000001O00000000000000001O000000000000001O0O3[NSROH`_f4" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "RoU3k0So03N2N1O1O100O1O100O1O10000O100O100O10001N100O100O1O100O100O100O10000O100O100O10000O100O100O10000O100O100O100O1O10000O1000000O100000000O100000000O100O100O1O1O1O1O100O100O100O100O100O1O100O100O100O1O100O100O1O1O010O1O1YOjMbSOY2Vl0l0M3N2N2M3N2O1O1O1N2O1N2O1O1O1O00100O100O100O100O10000000000O101OO2O000000001O001O001O001O001O1O2N2N2N2N1O3M4L2N1O001O00001O000000VOiLkTOW3oj0QMoTOo2Pk0SMoTOm2Pk0UMoTOk2Pk0VMPUOj2mj0ZMRUOf2kj0]MUUOc2jj0^MVUOb2ij0_MWUOa2hj0`MXUO`2fj0bMZUO^2ej0dMZUO\\2ej0eMZUO\\2ej0fMZUOZ2ej0hMYUOY2fj0a1O1O100O100000000000WL]UOU2cj0kM]UOU2cj0d1000000001O00000]K]UO4Oh2ej0PM_UO6Nj2cj0oL`UO5Ol2bj0nL_UO5On2cj0lL^UO50o2cj0kL]UO60o2ej0iL[UO70Q3hj0eLUUO=3n2jj0cLQUOa05l2Sk0VMlTOj2Tk0VMkTOk2Vk0o000001O0bLfTOZO4V2Vk0`NhTOUO, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_11.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1555.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]hl=7ao09N2N2N2O1M3M2L5N2N2O1O2N1O1O2N3^QOmNQn0d1M2N1O1O00000000000000000001O01O000000000000000000000000000000000000O1O2UOPY]h0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_12.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1503.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "gdf05go08J4L4L4M2N2N2N2N2O1N101N101N10000O2O000O1000000000000000O1000000O2O000O100O1O10O1O10O01O1N2O1O100O101O0000001OO100000001O0O2O1N10000001O00000O01000001O000O10001N101N101O0O2N2N2N1O2N3M3L4K8Gd[YT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "SYc01ho0=G7J4M3M2N2O0O2O0O2O1N101N2O001O000O2O00001O00000000000001O001O00000001O1O001O02N3M00O2N1O100000000001O00001O0001O000O100N2M3L4N2O1O1N2O1O1O1O100O100O01000O10O1O10O0100O1O2O0N2M3J6N2000001N10001O0001O01O00000010O00101N1O1O00000O100000000000000000O200O001O001O1O2N1O2N2N100O0O3N1O1O001N2O1N101O1N2N101N1N3N1O2M3N3LRg^R1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Zjd1Z1\\n0iNiQO^1Qn0cNoQO^1Pn0bNPRO_1om0aNQRO`1nm0`NRRO`1nm0`NRRO`1nm0`NQROb1nm0_NQROa1om0_NPROb1Pn0710O00001O00001O001O00001O00001O00001O00001O00001N10001O0O2O000O2O00001O0001O8Hl0SOS1mNmSgT1" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_13.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/136.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bPk1=ao05K5L3L4L5K4L3M4K4O1O1O001O00000000O10000O100O100O1O100O1O10000O10000O100O1O1N2N2N2N2N2O1N2N2N2O1N2O1N2O1N2N2N2O1N2N2N2O1OQPWT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "c\\VV1R3kl0c2[SO]J03:Nh07Xf0]8H3L2O2N1O1O100O100000000O1N2E" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_14.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/130.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "nml8l0Po0;F9H=B;E5L4L2O2N1O1O1N2[NcMfUO^2Xj0hMaUO[2ai0WMVVOc03Y2ei0\\MPVOb02W2li0kNlUOX1Tj0Z2O10000000000001O1O1Oe0[O3M001O00001O1N101O1O6J`0@4L5K4L8H4L7I6J2N2N2N3M2N4L4L4L3M5K5K4L4L4L3M2N2N2N2N2O1N2N2N2N1N3N2N1O2N2N1N3NVPbl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Q[k8c0Pm0^2N101O0000000000000000000000000000000001O0000000000000000000001O0010O0001O00000000O10001N100O1O100000000000000001O0001O000001O000000000000000000000001O001O>AX2cMhdcl0" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_15.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1900.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "SSR24[`04L4L3M4M2N3M3N1O100O2N101N100O2O001N101O001O0O2O00001O00001N10001O001N101O00001N101O001O001N101O001N1000001O001O001O0O101O001O1N1000001N101O0O2O001N2O000O101O000O2O0O1O2O0O2O0O101O00001O00000O2O001O1O1O1O00001O000O2O001O000O2O001O1O0O2O00001O0O2O001O001O0O2O00001O00001N10001O001N2O001N10001N10001O001N101O1O000O2O00001O000O2O001O1N10001O000O2O00001N101O001O0O2O001O0000001N10001O001N101O1O0O2O00001N101O001O1O0010O01O012M2N1O01O000O2O0O1O2O0N201N100O2O0O1O1O2N1O1O2N100O1O2N100O2O0O1O1O2N1O1O100O2O0O1O1O010OO2N1001O1O100O1O101N100O1O2N1O100O101N1O1O100O2N1O101N1O101N1O1O101N1O1O101N1O100O2O0O1O101N1O1O2N1O101N100O2N1O100O2N1O100O2N1O100O2O0O1O1O2N100O2O0O101N1O1O1O2N100O1O101N1O100O1O1O1O0001O001O000O2O01O100O1O2N1N3M2N3K5L4K5K9EVih3" + }, + { + "size": [ + 530, + 730 + ], + "counts": "nZW55U`09H8H8H8K4I7H8H8K5K5K5N2O2N100O1000001O00000000010O00000000000000001O0000000000001O0001O00000001O00000000001O000001O000001O000000000000001O000000000001O01O00000000001O0000000000010O0000000000001O0000000000001O01O000000000000000O1M3K5J6K5J6J7J5J6K5J6J6L4K5H9KjX_4" + }, + { + "size": [ + 530, + 730 + ], + "counts": "P1l3f<010O000001O0fM]ClN5S1^, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_16.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1108.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "joc12mo02N2O0O1O1O100O2O0O1000000O10000O10000O10000O10000O1000000O10000O10000O100O1000000O1000000O10000O100O10000O10000O1000000O1000000O10000O100O1000000O100000000O10000O100O10000O10000O1000000O10000O100O100O10000O100000000O10000O100O10000O10000O1000000O10O11O0O100O10000O10000000000O100000000O10000O100O100O1000000O10000O100O100O10000O01001O0O100000000O10000O1O10000O1000O01000000O2O0O10000O10000O1000000O100000000O10O02N1O10000O100000000O100O00100O1000000O10001O0O10000O1000O0101O0O10000O1000000O10001N100O10000O1000000O1000000O10000O100O100O100O100000000O100O100O100O1000000O1000000O10000O100O10000O10000O100000000O100O100O100O1000000O10000O100O100O10000O10000O10000O1000000O100O100O100O100000000O1000000O100O10000O10000O10000O1000000O1O100O10000O1000000000000O100001O1OS2mM1O1O001O00001O00001O1O1O1O0000001O000000001O001O00001O0000001O0000001O0000001O001O1O1O001O00001O0000001O001O00001O00001O001O001O001O001O001O00001O0000001O000000001O00001O001O0000001O001O001O1O00001O00001N101O000O2N1O2N2K_PT1JjokN2O0001N20O010O1O000001O0O1OXPZe0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mhVg02; and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_17.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1080.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "noV41no0100O2O0O100O10000O100O10000O10000O10000O10000O100O10000O10000O1000000O10000O1000000O100O10000O1000000O1000000O1000000O1000000000000O100001O000000001O00001O0000001O00001O0000001O001O001O00001O00001O00001O001O00001N1000001N1000001N101N2NXPRP1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "ajd7Y3`k0[1F7L3O1O10O0100001N10001O0O10001N10000O2O000O10001N10000O101O0O101O0O10001N10001O0O10001N100O2O000O2O000O10001N1000000O2O000O101N10000O2O00001N10000O2O000O101O0O101O0O100O2O000O101O000O2O000O101O0O10001N10000O2O000O101N10000O2O000O2O000O101O0O10001N100O10001O0O10001O0O100O2O000O101O0O10001N10001N10001N100O10001N10000O2O1N2Ng0YOSUVk0" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_18.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/640.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "Xl0b0^o00000O10O10O1000O10O10O10000000O10O1000O0100000O0100000O10O10O1000O100000O10O100O10O1000O10O10000O1000O10O1000O100000O01000O10O10O10000O01000O100O2NXTdU1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Wm0[2em0000000O01000O10O1000O10O10O10000O0100000O0100000O0100000O01000O10O10O1000O01000O010000O10O10O100O01000O10O10O100O10O10O100O100O10O02aNZROa0gm0[O^ROc0an0N2N2N2_Oa0N2O1N2N2O2MPR`U1" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_19.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fTZj0X1[m0`1fRO^Mjl0S3I6J4N2N100O1O1O1O1O10O01OM3L5L3L4O1100O10O100000O0001N110O01]YOiLUJ4he0T3n_OeMh?\\2V@iMg?W2X@lMe?U2Y@nMf?Q2Z@RNd?n1Z@UNe?k1Z@WNe?i1Z@YNe?g1Y@\\Nf?d1Y@]Ng?c1V@aN^KUN\\c0Z3TAdN]KUN^c0W3TAfN\\KTN`c0V3SAgN[KUNbc0U3Q\\OfLN4X4n1TLUNec0S3l[O_MW1VO_1V2fMTNhc0[4h\\O^Ke1V2iMSNjc0_4a\\OZKh1X2kMPNkc0a4_\\OYKh1X2mMoMlc0a4]\\OZKg1Y2nMmMnc0a4[\\OZKf1[2PNkMoc0`4Z\\O\\Kc0JCd2@fMPd0a4Y\\O\\Kb0M_Od2DcMRd0a4W\\O_K;a3[O_LSd0a4V\\OT1jc0mNS\\OU1mc0mNn[OV1Rd0\\5000000O1001O00000000000O100000001O0]Im[OY1Sd0fNP\\OX1Pd0fNS\\OY1mc0SMj[OSN;i4kc0SMR\\OkM6Q5hc0SM^\\O^MMI2^4cc0[Nl]OSMiN]4[c0_Nn]ORMjN]4Yc0`Nm]ORMlN9QO\\3Vd0ZOm]OoLoN3YO^3lc0@n]OlLPO1^O^3ec0D\\@:d?F_@7a?Ib@4_?Kd@2\\?Nf@0Z?1g@MZ?2h@LX?4i@UOlJVN\\d0d2i@ROoJYNYd0d2j@POoJZNXd0g2i@nNPKZNXd0g2i@mNQK[NVd0h2j@kNQK\\NWd0h2j@iNRK^NSd0i2l@gNSK_NQd0k2l@dNVK_Noc0k2m@dNVK`Nmc0k2o@cNVK`Nmc0_1_[O@c5>SKbNlc0c0YCh0nHbNnc0OiC[1Qf0M3L4J6M4K4K6JVdP:" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "_VhQ1k0So0g0[Ob0]O=D8H3M1N2O00000O10O1000O101O0000000O10O10000O01000O1000O1000O1000O01000000000O10O100000O010000000O10000000O0100000000O10O100000O1000000O010000000O1000000O10000000O0100000000O100000000O1000000O1000000000O10O100000000O1000000O1000000000O010000000000O01000O1000000O100000O0100000000O0100000O1000000O10O1000O100000O10O100000O10O1000000O010000000O01000000000O010000000O0100000O10O100000O01000000000lJ" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_20.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\^]Q17go0:H1N2O00000000000000000001O00000000O100000001O000001N10000O101N2M3N2M4Lba]5" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mVjl04lo00amT2h0RSjMZOEO1OT3d0^N^O^OMae0]3[YOgL3OOm1d0[OBYOYf0d5iYO^LVf0g6N2O1O001O0000000010O001O2N2O2M4L7Hk1VNT1jNQ1dNSUOeK^k0a1P3ROk0B=CUfR8" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]\\nm06go04N10001O00100O010O10O0100O010O10O0100O010O10O10O10O0100O0100O1O010O1N6GUcl8" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_21.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1621.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "iom24ko02O0O1N2O1O1O1O100O1O1O100O1O1O1O1O1O2OO0100O1O1O1O1N2O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1O2N100O1O1O1O1O1O1O1O010O1O1O1O2N1O001O1O100O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O010O1O1O1O100O1O0000001O000000000000000000O2N100O1001O00O10000001O00O20O000000O2M2O1O100001O1O1O1O1O100O2M2O1O1O1O1O100O1O100O1O1O1O1O1O1O1O100O1O1O1O1O1N200O1O1O100O1O1O1O1O1O1O100O1O1O100O1O1O1O1N200O1O1O1O2O0O1O100O1O1O1O1O1O1O1O1O1O1O1O1O1O2N1O100O1O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1N200O1O1O1O1O100O1O1N2O1O1O1O100O1O100O1O1O1O0101M3I6B?[Od0B`0VOj0\\Of0[O`Wjl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "[bfb085MXo0d0I4L3N2M10010O000000000O02O000000000000000O2N2Gn]`d0" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_22.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2584.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "VP5;P>7I4M9G5K2N2N001O5K1O001O010O000010O01O10O01O010O001O010O0101Nc0]Od0[OVVR8" + }, + { + "size": [ + 460, + 620 + ], + "counts": "]:`1X=G33L4J7I3O3N2M2N5L13H4J6J6J and are in the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_23.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2130.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "]PQ2:F9G4L2O0O1O01O01O010O01O00010O0001O01O001UOYB=f=B`B9`=FfB6Z=IlB1T=OQCLo<5m001O00010O002O1MZTj6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "ScY2b0[>:I1O1O001O000000000000001O00000000000000001O01O1O01O010O00010O01O01O01O000010O0001O010O02N010O00010O01O01O01O010O000010O01O000010O01O00010O001O000010O01O01O01O1O002O4KQaj5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nRg84g>6I6M3K5M4K4K5L4K5K6I6I7I7J6I7G9G9J6G9oNgLSFa3k9l0O0O100000000000000000000000O1O1O1H8G9I7D, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_24.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/297.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "aRgg0;_o0>C7H7H9I7I6K5K5L3L5L4M2L5L3N3K4L4N3N100O2O000O101O0000000O1000000001O00010O000O10O100001O00000000000000000000001O00O10O100000000O10O100000O10000O10000O100O100O10000O100000001O0O10000000000000010O01O001O2N3M5K1O10O01O000000001O01N100000000O2O0000000O1000000O10000O10001O000001O000000000000000000O2O001N2O1N2O2M3M3N2M3M3M3L4M2M4L4M3L5K4L4K7J5K5J6J5K6J9CQng:" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_25.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/552.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\kSS12ko05M2N1010O00010O00010O01O010O1O01O01O0010O0001O001O010O001O00001O00001O0000000000O2N3ITP=6goB5M3N100O010O1O1O100O1O10O0100O1001O0O2O0000001O000O2O00001O0O101O00001O0O101O000O101O000O10001O00001N1000001N100000001N10001NbTS1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "djYj03mo00O2ANnPO2Qo02kPOOTo03kPOMUo04iPOMWo04hPOLWo0?000001N3N2N1O2N3M1N2O1O000O101O000000000O10000000000O10000000000O10000000O100O1N2O1O1N2M3N2N2N2N2M3L5HZVj;" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]Zm, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_26.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/46.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "djY9k0So05L3M2O0O100O10000O10001N1000000O10000000001O000O101O00001O000001O01O00001O101N5K:F6K0O1O10O010O2O10O4M0O02@YROaNkm0k0gROSO\\m0`0PSO]OXm0J`Vll0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "lS`o06go05L3I7H8N2N2N2O1O1O001O1N2O10O01O1O1O1O1O001O100O1O001O1O1O1O001O1O1O0010O010N`0^NSRO1VjW7" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "dPib03ko03N1O2N1O2M2O2N101N101N101O001N2O001N101O0O2O001N101O00001O010O010000O01000O100O010O100O010O100O001O1N1B?L4N2O1N3MWo\\c0" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_27.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1258.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bfgR13?4NO^j00dUOL3001N1O4Lc1ma0SOQE_OmHI9OO102L7L_1na0SOSE\\OoHI9OO2O9E11_1Pb0ROSE[OQIH81N2Nm0Gk0Qb0SOX^OnN^6<^II63N4Kk2ka0kMUESOnHL4373L5Kk2ka0kMUEROZIN03N1N9F12`1ma0PO_FQORH1N2b08TOP1ka0CoHUO_E1O2J0ha0h0QIROUF0ZO4_a0j0mJVOR5j0nJVOR5j0nJVOR5j0oJUOQ5k0oJUOQ5k0oJUOR5j0nJVOR5j0mJXOR5h0nJXOR5h0mJZOR5f0jHROaE2^1;T`0a0jHWOaEOZ1a0Y`09PCTOl09_JMN6a0DAi0ca04oBWOi0k2VRLR_OQOP2Q2^NQN\\OT6Ta0jJk^O\\OKLX1a2jN[4Wa0Y7O1O1O1O010O00001O001O001O000010O01O0000000000O1N2[Of0kNT1fNZ1cMf[OZG4K^e0V5ZZOnJ9M3M^h0b2\\4dN\\1F:N2O1O100O2O00000001O01N10000O101N1K5O2O1N3MTee2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "jj0h2n0fNni0Z1RVOfNmi0\\1QVOeNni0_1oUOaNoi0d1mUO]NRj0f1kUO[NTj0g1jUOZNVj0g1hUOZNWj0i1fUOXNYj0m1bUOTN^j0g3O1O1O1O1O100O100O1O100O100O1O100O100O1O1O100O100O010O100O101N100O100O100O100O100O100O100O101O000O1000000O1000000O1000000000000000000000000001N101O1O001O001O1O1O001O1O1O1O001O1O001O1O001O1O1O1O001O1O1O0O2O1O1O010O1O1O1O1O100O1O1O1N2O1O1O1O1O2N1O1O2N1O1O1O1O1O2O0O1O1O2N1O2N1O1O2N1O1O2N1O2N1O2N2N1O1O2N1O2N2N1O2N2N3M2N1O1O2N1O1O00O10oLYSOi2gl0WM\\SOf2dl0ZM^SOd2bl0\\M`SOb2al0]MbSO`2^l0`MeSO]2[l0dMhSOX2Xl0iMiSOU2Wl0lMiSOS2Wl0nMiSOQ2Wl0oMkSOo1Vl0QNjSOn1Vl0RNjSOn1Wl0RNhSOn1Yl0SNcSOo1^l0QN`SOP2`l0SNYSOR2gl0b02O1O1UNPSOd0Qm0UOVSOj0ll0QOXSOn0kl0lNYSOS1ll0eNXSOZ1hm0N3N1O1O1N1F_QOUObn0h0bQOUOan0f0`0I8IZQ\\Q1" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_28.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1843.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "UnP88U`06M3M3M2O100O1M3N2N2M3N2M3O2N10000O1O1N2O21NfASOQ=l0bBA^=i11O1O4L1O00010N4M1O1O0O2O0O1O1N2L4M201O1O1O1N101^OXAYOi>8kAFV>3RBKo=0WBMn=MVB0]\\m2" + }, + { + "size": [ + 530, + 730 + ], + "counts": "Zgo:4^`05K2N0100O00O2J5J6I8I60001O00000010O00010O01O001O1O001O3M2M3N2N1O001O00001O00O10000O1000000O20O00000YG" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_29.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1012.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "0i7Wh00000O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2ZOcTOfL`k0W3f0M3O1N2O1O1O1O100O1O1O1N2O1N2O1O100O1O1O1N2O1O1O1O1O100O1O1O1O1N2O1O1O1O1O100O1N2N2O1O1O1O100O1O1O1O1N2O1O1O1O1O1O100N2O1N2O1O1O1O100O1O1N2O1O1O1N2O1O1O100O1O1N2O1O1O100O1O1O1O1O1N2O1O1O1O1O1O100O1O1O1N2O1O1O1N2O1O1NRPWQ1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "`Ph61cPi:2lnWE2iPOOVo02jPOOUo00lPO1To0MlPO4To0LlPO5So0JnPO7Qo0InPO9Ro0FlPO=Zo01O2N2O1O00001O0O2M3N2BWOcQOl0Yn0VOfQOk0Yn0UOgQOl0Xn0SOhQOo0Xn0POhQOQ1bn02O1N3M2N2K5N102gQOaNmm0l1M5USOmM^O1M8IH^j0X2QVOi0^OYM2F0K50T44kK9LN_1FR?\\9]_ORHn0f2TORLU?Ra0\\O`2`MQ1oNj0VO3@[ZO^Fie0Y1WZOeNOO2;O00O4B94EMb21^M021LT1Pf0SOSZOG030Kj7d0^g0M1N2O0O2M2N20TQOIom07m0O10000O1001O00000000O100M30001O03M1O00000O10L4000000000000000000000000002M3N000000O1M3000001O0000000000000000000002N1O0000O1N20000000ZXO1[?1W@k0Y?UOd@Q1Y?oNe@U1W4TO[DCW:4VMV1T4G]NSO_MW1Q4J^NoN`MX1Q4J_NoN_MX1P4K`NnN]MY1R4JaNnNVLKhJb1_:GbNlNaK6aJ`2Z;_NdNkN_Kf3^5aMQOkN^KR5mJPKNO5J4O1LZ12hNO`3_1j5jNZKa;mJdD1GOL7:J14NLX1f9hNZK[`0d4f_O\\K]`0TKe_OJ30O2M3Fc6:XNVd0SKa[O\\5:AZg0QKeXO6E^1>[3]j0O00010O2O1O001OO100O1O1O1000000O01O1N2O010O1O100O0100N110000O1000000O100O\\UOVKTj0j4lUOWKSj0h4nUOXKRj0g4d00000O10000O1000000O1000000O1O1000000O100O1O100O100O10000O1000fUOcKQi0\\4PWOdKPi0\\4oVOfKPi0Z4PWOfKPi0Z4PWOfKPi0Z4PWOfKPi0Y4QWOgKoh0Y4QWOgKoh0Y4QWOgKoh0X4SWOgKmh0Y4]1O100O10000O1000iUOjKdh0V4[WOlKdh0T4\\WOlKdh0S4]WOnKbh0R4^WOnKbh0R4^WOnKbh0Q4_WOoKah0Q4`WOnK`h0Q4bWOmK_h0R4h10000O10000O10000O1000PVOPLPh0o3QXOQLog0o3Q2O100O1000000O100O1O100O100O1000000000000O10000O10000O10000O1000000O100O100O10000O1000000O10000O10000O1O100O1000000O100O100O100O100O100O10000O1000000O1000000O100O1O10000O10000O100000000O100O100O100000oEVMlFi2T9fM^FZ2b9gM]FY2b9iM]FW2d9hM\\FW2e9iM[FW2f9gM[FY2j9bMVF]2m9aMSF_2n9^MTFb2ec0O100O100001O004Le1PSOkKkj0P;WUOjD:Ld035Nh?e=Y_O]B92HKW`0]`0__OR@3c01ROW=ib0eB`]O7Em;gd0lCg[O2GS:Yh0nEcWOQ7kk0QI\\TOX3[o0kL3M1O001O000000000000001O0000000000000000O1000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000O10000001O00000000000000000000000000O100000000001O0000000000000000O100000000000000000000001O0000000000O10000000000000000000000001O0000000000]J1lUO83JU4Mge0[1UZOeNje0^1TZObNle0`1RZO`Nne0a1QZO_Noe0b1PZO^NPf0b1PZO^NPf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NRf0b1nYO^NRf0b1nYO^NRf0b1nYO^NSf0a1mYO_NSf0a1mYO_NSf0a1mYO_NSf0a1mYO_NSf0b1lYO^NTf0b1lYO^NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NVf0`1jYO`NVf0a1iYO_NWf0a1iYO_NWf0b1hYO^NYf0b1fYO^NZf0b1fYO^NZf0b1fYO^NZf0b1fYO^NZf0b1fYO^N[f0a1eYO_N[f0b1dYO^N\\f0b1dYO^N\\f0b1dYO^N\\f0c1cYO]N]f0c1cYO]N]f0c1cYO]N^f0b1bYO^N^f0b1bYO^N^f0b1aYO_N_f0b1`YO^N`f0b1`YO^N`f0c1_YO]Nbf0b1^YO^Nbf0c1]YO]Ncf0c1]YO]Ndf0c1[YO]Nef0c1[YO]Nef0c1[YO]Nef0c1[YO]Nff0b1ZYO^Nff0b1ZYO^Nff0a1[YO_Nef0k0cUOFg3@ff0h0fUOFe3Bff00bUO>:Ee3Mhf0N_UO0;O`33ik00000000001O000000000000001O0000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]hg==_o06H8K5K3N3M3M2O2N3L4L4M1O2N1O1O2M3N7I4L7I7I7J1N1O2O1N101N1O1O101N100O101O000O101N10001N1000001O00001O00001O001O001O001O001O00001O010O001O000010O01O001O0010O010O010O1O01O01O01O00010O0010O0001O01O01O0010O01O010O001O010O0010O10O01O10O010O000010O0001O010O0010O01O1O01O01O010O0010O010O010O010O010O01O01O01O0010O01O00010O02Mj1VLbRO^1lSXe0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "VT_3;5Oen0Q1cQOROF3011a0Xk0e3K4L4M3L3N2M3N1O1O2O0O1O2N1O1O1O2N1O1O2N1O2M2O2N1O001O1O001O1O1O1O2N1O1O1O1O1O1O1O2\\WO^Igg0c6XXO^Igg0c6YXO^Ieg0c6[XO]Idg0d6\\XO\\Icg0e6]XOZIcg0g6^XOXI`g0j6j01O1O1O1O1O1O101N1N2O1O1O2N1O1O1O1O2N1O1N2O2N1O1OoNkWOYJUh0g5lWOXJSh0i5mWOWJRh0i5PXOVJog0k5QXOUJng0l5SXOSJkg0o5UXOQJjg0o5WXOQJhg0P6YXOPJeg0Q6[XOoIdg0R6\\XOnIdg0R6\\XOnIcg0T6\\XOlIcg0U6]XOkIag0W6_XOiI`g0Y6^XOhIag0Y6_XOgI`g0[6^XOgI`g0]7O1O100O1O1O1N2O1N2O2O0O1O1O1O1O1O1O1O10001N100O100N2000000000001O000000000000000000001O000000000000001O000000001O00000000001O0000000000000000001O000000001O00000000000000001O000000001O00001O00001O001O00001O00O10002N0000000001O0001VOcYOnG^f0Q8fYOlG[f0R8nYOeGTf0Z8PZOaGdi06mUO6N\\3gn0^L_QO8Xg9DaXF860M2NO4M6Flk0]6kSOoI4D\\h0^8iN5N1XYOcGke0]8TZOfGje0Z8UZOgGke0Y8UZOhGje0X8VZOhGje0X8UZOjGje0V8UZOkGke0U8UZOkGke0U8TZOlGle0T8TZOlGle0U8SZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0U8SZOkGme0U8SZOkGme0U8RZOlGne0T8RZOlGne0T8RZOmGme0T8RZOlGne0T8RZOlGne0T8RZOlGne0T8RZOlGne0T8RZOlGne0U8QZOkGoe0U8QZOkGoe0U8QZOlGne0U8PZOlGPf0T8PZOlGPf0T8oYOmGQf0S8oYOmGQf0S8oYOmGQf0S8nYOnGRf0S8mYOnGRf0j3PZO@KgLUf0h3RZOhe0CWZO=ge0EXZOc6\\c0WIW\\O5, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_30.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1390.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fPho09do08XOC_QOc0_n0?000000000000000000000000000000000000000001O000000000000000000O1000000001O000000000000000000000000000000001O000000000000000000000000001O000000000000O1O1O1N2M3M3K5L4K5L4M3JVP_5" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "YSQR13io05J6K4M3N2L4K5M3M3M3M3K5L4K5M3M2M4J6L3N3M2M4L3L4N1N3N1O12N2N2O1N2O1N2N2N2N2N2M3N2N2N2N2N2M3M3K5M3M3N2L4M3N2K5M3M3N2N2N2M3N2M3N2N2O1NRPl3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "PPW43mo05K3M3M3M3M2N3M3M2N3M2N4L6J4L3M4L4L3M3M4L4L4L4L3M5K3M4L3M4L4L3M3M3M4L1O1O00000000001O000000000000000000_NPTORORl0j0RTOTOQl0h0TTOTOok0h0UTOUOok0f0TTOXOPl0c0STO[OQl0`0TTO[OQl0`0RTO^ORl0=PTOBTl09oSOETl07QTOESl07PTOEVl05mSOIWl02nSOGYl04m1K[ndQ1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_31.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/602.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]c]Q17bo08WMDfSO4EONm1]l0^1eUOWM^g0k2[XO^Mbg0b2[XObMdg0^2YXOeMgg0[2TXOkMkg0V2PWOiL7T1ih0S2nVOnL1T1Qi0n1nVORMGV1[i0h1mVOQOSi0o0mVOQOSi0o0lVOROTi0n0lVOROTi0o0jVOROVi0n0iVOSOWi0m0iVOSOWi0o0fVOROZi0j3000000000O1000000000O1000O1M3oNR1gNX1eM[TO`N6Hml0V1iROmNcn0:^QOHk^\\5" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_32.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/130.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "i`TQ1;Zo0>H`0@>C`0_O;E:H?A6I]MoSOa1jk0aNYTO`1ck0\\NdTOe1Wk0[NmTOf1oj0XNVUOh1hj0XNZUOi1Pj0gNUVOZ1hi0cN]VO^1ai0aNaVO`1]i0aNcVOa1[i0_NeVOb1Zi0_NfVOa1Xi0`NhVOa1Wi0`NhVO`1Xi0aNgVO_1Xi0dNfVOZOUOn0Uj0KbVOUO]Oo0Qj0N`VOoNDR1li01^VOkNIR1ii08ZVOdN1Q1fi0?UVO]N9R1ci0e0PVOWN`0R1ai0m0jUOoMg0S1_i0P1hUOlMk0R1^i0V1fUOcMn0V1^i0\\1hVOdNYi0OcUOLT13[i0NiUOIl09\\i0LmUOFh0=]i0KPVOCe0a0]i0HTVOB`0e0^i0GYVO[O, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_33.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1944.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "j9`0[3\\2`6dMaI[2_6fMaIY2_6gMbIX2^6hMcIW2]6iMdIU2]6jMdHFZN`2R9jMbHLYNY2U9kMaH1VNT2Y9kMaH3UNQ2[9jMaH:oMl1`9jMbHRO`Bh0i>L3M2N1O0000000000000000000000000000O1M3M3M3O100N2O1O1N2O1O1000000O100000000001O000000000000O1000000001O0000000000000000000000O1001O0000O1000000001O000000000000000000O100000000001OO1000000000000001O00000000O10000000000000000000000000000000000000000000000000000000000000000000000000O11O00000000O11O0000O100001O000000000000000O01001O000000000000000000O11O000000000000000000000000000000000000000000O11O000000000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000000000000000O11O00000000000000000000O11O000000000000000000O11O0000000000000000O100001O0000000000000000O1001O0000000000000000O100001O0000000000000000O1001O000000000000000000O11O00000000000000000000000000000000000000000001O01O0O10O1000001O00000001O000000_Oo@AQ?9ZABg>9^AFb>7bAH_>2gAM[>LkA3[?O1O10O01NUaZ2" + }, + { + "size": [ + 530, + 730 + ], + "counts": "PmY31Y`01j_O3P`0;M3@a0L4J7K6J6K2O1O2M3M3N2N1O1O1N7J4L1O1O103L111M0101N12O2M001OO0AaBlM`=d2OjNdBUO\\=Q2O0101N1O100O1O1O100O1O1O001O1O1O2O0O1O001O1O1O1O1O1O2O0O1O1O1O1O1O1O001O010OaNnM^DR2b;nM^DR2b;nM_DQ2a;oM_DQ2Q=00000001O000000001O000000000000000000001O0000000000cNRNVDn1j;RNVDn1j;RNVDn1j;SNUDm1k;UNSDl1l;UNTDj1l;WNSDi1m;WNSDi1m;XNRDh1n;YNQDg1Pc0eAB[>=_AIa>T11O0000001O0001O000001N1001O000001O000000000000000000000000001O000000000000000O2O000O101M2N2N3K6C and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_34.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2347.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "k`[33k>2F:001O1O1O1O1O2N2N0O2O00Wmi5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "jVj15f>6L4J5K6I6L5G9^Ob0ROn0M2O2O0O1000000O100000000O100000000000000000000000000000000000000000000000000000000000000cEZM`8f2]GeM[8[2bGkM[8U2bGQN[8o1aG[NY8e1fGcNS8\\1mGlNl7T1THSOe7m0ZH\\O^7d0aHBZ7>kGoLd0j2[77QHPMa0n2[72SHQMa0R3_OTM\\7Q3RI;m6m3O1O001O000010O00000000000000000\\KiH_1W7aNiHSM7m3P7POjHQM and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_35.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/118.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "PPf64lo04L1O2N1O1O1O1O1O1O2N3M5K1O1O1O1O001O0000000000000000001O00000000001O00001O00001O00001O00000000001O00001O0000001O000000001O0000001O000000001O00001O0000001O00000000001O0000001O00001O001O1O1O2N1O1O000000000000000000O100000000001O000000000000001O0000000000001O001O00001O00001O0000001O000000001O00000000000000000000O10000000000O10000000002M3N1O2N1O2N1O1O2N2N1O1O1O1O1O1O1O2N1O1O1O1O001O01O01O0000000000O2N1O100O1O1O1N3O0O10000O2O000O100000000000000001O1O1O1N1O2N1N2N3M2O2M3M3N3K6KPoji0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_36.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2158.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[jX5R2n3900O1N20WA0a>1521O1O00000000000000O11O00SO0jB0U=7eBI[=b0XB@h=l000000001O9G=CB`0@8H1O0000000000000000000O100000000000000000000O100000000000O2O1O1O7I`0A5I5La0jMXC5_Of0V>M4L5K4LWgR7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hbj44i>:VCI\\:;^EK^::\\EIb::[EHc:, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_37.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/105.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dlQS14_o0S1hNS1ROg0_O=E and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_38.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/869.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`V`9=4H2Min0f2XN7I2N1O100O10000O100O10O10O100000O0100O100000O0100000000O10O10000000O10O1000O10000O0100000O1000O10O1000000O01000000000O10O1000O100000O10O100000O1000O100000O10O1000000O010000000O10000O01000000000O100000O01000000O1000O01000000O10O100000O100000O01000000O10O10O1000O10O100000O1000O1000O01000000O01000O10000O100000O10O1000000O1000O100000O1000O010000O1000O1000O10000O1000O1000O1000000O01000O1000O010000O10O01000000O0100000000O10000000000O10001O00000O1000000O2O0000000O101O000000001N100000001N10001O00000O2O0000001N1000001O000O101O001O0O101O00001O001O0O2O001O001N101O001O001N101O001O0O2O001O001O0O2O001O001O001N101O1O001O1O1O2Mg0YOo0nNfhed0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_39.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2369.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "o7o6Q81O00000000O2O3N2L5L4L2O3L7I4L2M3N2O0N10kN[JPId5o6cJkH]5S7hJjHX5U7kJiHU5W7oJeHQ5[7SKaHm4_7VK^Hj4a7[KZHf4f7Z100O100001O2N2N3M4L1O001O0000O1O1D7G6L3N2N3N2N00000002N2N3L2M3M2N3O0O3M3LTT`5" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_40.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/48.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`Zi09ao08I5J7M2O1000001OO1000000001O000000000001O0000000000000000000010O0000001O000000000000000000000001O000000000001O00000000000001O0000000000000000001O00000000000000000001O000000000001O000O3LneXT1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_41.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1081.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "go[b06`n0\\1SOk0XOh0H8K6L3VN[MWVOg2bi0jMQVOY2ji0QNoUOQ2mi0S2N2M3L4N2O1OdJbVO\\Oe0f3hh0lLPXOP3og0gL\\XOX3cg0gL`XOX3_g0hLcXOW3]g0iLcXOW3\\g0jLeXOU3[g0jLgXOU3Yg0kLhXOT3Wg0mLiXOS3Wg0nLiXOQ3Wg0PMiXOo2Wg0SMhXOl2Xg0XMfXOf2Zg0]McXOc2]g0`MaXO^2`g0fM]XOY2cg0kMYXOU2gg0WNnWOh1Qh0]NlWOb1Th0cNfWO^1Zh0kN[WOW1eh0Q31KdVOfI[i0f5fVOfJbi0Z5^VOeJ5CUi0f0QWOa4EkJYi0b0[WOc4eh0\\KeWO\\4Yh0eKPXOR4og0oK\\XO3oNV2ch0jMmYOS2ke0VNXZOe1fe0_N`ZOZ1^e0hNmZOl0Se0VOS\\OmN]KNfh0U1d52O00O2TO\\QOOPo0JmWjc0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_42.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/778.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "ZPa=l0jn0=M1O2N1O1O1O1O001O1O001O1O001O001O00001O001O00001O0000001O00001O000000001O00000000001O000000000000000000000000000000O10000000000O100000000O1000000O100O100O100O100O1O1O1O1N2N2A?D\\Peg0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "XQV98Ro0Q1D3M2M3O1N2O1N2O0O2O1O0O2O001O001N10001O001O00001N10001O0000001O0O100000001O0O1000000000000O2O000000000O10000000000O100000O1000000000O10O100000N20O10000O100O100O100O1O1O3M2L7hNmPQl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Vl`21jo09J4L4L3N2N2N2aSOWOci0l0YVOZOci0g0YVO_Odi0c0YVO@ei0a0ZVOAkh0@`UOP1d1Bfh0J]UOg0l1@ch00\\UOa0P2Abh01[UO?R2B`h04ZUO;U2B`h0_1ZWOgNdh0Z1[WOgNdh0Z1ZWOhNeh0Y1ZWOhNeh0Y1ZWOhNeh0Y1ZWOhNeh0Y1YWOiNfh0X1XWOjNgh0V1XWOlNgh0U1WWOlNih0U1VWOlNih0T1WWOmNhh0T1WWOmNhh0S1VWOQOhh0P1SWOUOlh0k0QWOYOoh0g0oVO[OPi0e0oVO^OPi0b0nVO@Qi0`0nVOCPi0=nVOFRi0:lVOHTi09gVOKXi06fVOLZi04dVON\\i06_VOL`i06]VOLai06]VOKci07ZVOKei08WVOIhi0Y3N100O1O1O1O10001N1000000O1000000O010O100O01O001M3K4O2O10O10O10O10000000001O0000000001O01O01O010O010OO3N2M3N1N1N3N1O2N1O2N101N1O1O1O101O0O1000000000O1000000kLVWOTOjh0i0]WOSOch0m0_WORO`h0m0dWOPO\\h0iNTWOoN1R1a0T1Zh0jNiWO00T1Vh0lNlWOOOT1Uh0mNmWONOT1Uh0lNmWOO0T1Sh0mNmWOO0T1Sh0mNnWON0T1Rh0mNoWOO0S1Qh0nNoWOO1R1Ph0nNQXON0T1Ph0mNPXOO1S1og0mNQXO01Q1og0nNQXO11P1og0nNPXO22o0ng0nNQXO32n0mg0nNSXO30o0mg0mNTXO40n0lg0mNUXO51l0jg0oNVXO33l0gg0POWXO44j0fg0POWXO56i0cg0QOYXO57g0ag0QOZXO7, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_43.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2237.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "neU17h>3N1O00001O00000000000001O0001O000000000010O000000000000001NQig7" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_44.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1341.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^`Rc0 and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_45.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bm0^2bm00001O1O1O1O1O1O1O1O1O1O1O1O2N1O1O1N2O2N1O1O1O1O1O1O2N101N1O1O3M7I6I4LkPkV1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_46.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1555.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "VlYb02ko06J4L3M4L4L3N3M2L4N3L3N2N2M2O1O1N1N200O0001O1O10N100100O010O0001O10O1O01N1100O000010O1O0^FkN]DT1b;8TCGna=TOQ\\OL[6o0f=nNY\\O1o5Q1j=lN_\\ONc5V1P>lN`\\ON\\5T1W>nN_\\OMX5U1Z>nN`\\OMR5U1`>nN`\\OMk4W1f>lN`\\ONf4V1m>kN_\\OOa4U1R?mN]\\ON^4U1W?mN]\\ONY4T1\\?oN[\\ONV4R1a?QOY\\ONS4P1e?UOU\\OOQ4l0k?YOP\\OOP4h0h?T2Q@mMP`0X2j_OhMW`0[2d_OeM_`0[2^_OeMe`0[2Y_OeMi`0[2U_OdMn`0\\2P_OcMSa0]2k^ObMXa0_2d^OaM_a0a2[^OaMga0`2T^OaMoa0`2m]OaMUb0_2i]OaMYb0_2_]OfMdb0Z2V]OiMmb0W2P]OhMTc0X2h\\OiM[c0W2b\\OjM`c0V2^\\OiMec0W2Y\\OiMic0W2U\\OgMoc0Y2n[OhMTd0X2j[OhMXd0X2g[OgM[d0Y2c[OgM_d0Y2`[OfMbd0Z2gYOXL9\\1Sf0\\2cYOZL8Y1Wf0]2`YO]L5W1\\f0[2`YO`L2R1af0^2]YObLOo0ff0_2[YOcLNl0if0a2YYOeLLi0lf0b2XYOgLIf0Qg0d2VYOfLId0Rg0f2UYOgLHa0Tg0j2bXOoKNg0;=Xg0m2YXOWL1b0<7Y7POP8R4R@[L9b050`7UOn7a4\\@^L2Lc7YOm7]4^@_L0K[5AoKLW>Y4^@aLNI\\5K`KOf>l3`@lMU5`0X:e1b@nMn3h1Z;:h@TN[1f4W=UM]Ab7^>^HbAd7\\>\\HcAf7\\>[HbAg7]>n400O100000M3TOkAc_O_>V`0i0N2O1TLRAYDR1Km=Z:WEaEk:i8lFTGV9l7o7jLUXORNPh0l1TXOoMog0P2TXOlMng0T2SXOiMog0V2SXO^LMLog0f3WXOZLL1kg0e3hXO]KRO6>j0eg0c3kYO_LSf0a3nYOaLoe0_3QZOcLme0\\3TZOfLje0Z3VZOhLhe0W3ZZOiLee0h2YXOVLS2S1ce0f2_XOQLP2\\1^e0b2P[OaMmd0_2T[OcMid0]2X[OdMfd0[2[[OhMbd0X2_[OjM^d0U2d[OmMYd0S2h[OPNTd0P2n[OQNoc0o1S\\ORNjc0n1W\\OTNfc0l1\\\\OTNbc0l1b\\OSN[c0n1i\\OPNTc0P2R]OmMkb0S2X]OmMeb0S2_]OmM]b0S2k]OgMSb0Y2P^OgMma0Y2V^OgMga0Y2\\^OgMaa0Y2a^OhM\\a0X2g^OhMWa0X2m^OfMPa0Z2S_OfMj`0Z2W_OgMg`0Y2\\_OgMa`0Y2b_OhMZ`0X2h_OiMU`0W2n_OiMo?W2U@hMh?W2]@hM`?X2c@gM[?Y2i@fMT?Z2n@gMo>Y2SAgMk>Y2WAhMf>X2]AiM_>W2eAhMX>X2kAgMS>Z2oAfMn=Z2UBfMh=Z2\\BfM`=Z2dBeMY=[2jBdMT=\\2nBeMo<[2TCdMj<\\2YCdMd<\\2_CeM]<[2gCdMV<\\2mCcMQ<]2QDdMl;\\2WDeMe;[2_DcM_;]2dDcMY;]2iDdMT;\\2nDeMo:[2UEdMh:\\2[EdMb:\\2`EeM^:Z2eEeMZ:Y2jEgM[::Q^OVOh7a0U=lNPCS1Pf00O0010O10010N10011NO1O1O02O01N2OO2N04N2NON10O3L3M2O22ON11OO10O1N2000O00O2N1O1N2O0002N1N1O1jNjQO`0Vn0_OPRO, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_47.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1090.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^m0a0_o000000O10000000000000000000O10O1000000O10001O00000001O001O001O1O000001O000000000O01000000000ZROBjk0>STOFlk0:RTOImk06STOLlk04RTONnk02QTOOok01PTO1ok0OPTO2Pl0NoSO3Ql0MnSO4Rl0LmSO5Sl0KlSO6Tl0IkSO9Ul0GiSO;Xl0DeSO?[l0AcSOa0]l0_OaSOc0_l0]O^SOf0bl0[OZSOh0fl0XOVSOl0jl0TOSSOo0ml0ROnROR1Rm0QObROX1^m0e00000000000O10000001O0000001O3M;E5K4L5K3M3M5K3M3L2O2N2N1O1N2N1O2N101N1O2N1O2O1N2M5KdP`T1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "hVdQ12ko04M3M3M3I7L4L4M3M2L5K5M3L4N1O2N1N3N2N7I8H:F9G9XYO]Mc?f6XZOhK[e0b7L2O0`]OZEe?g:i2001O1O1O001O010O1O1O1O1O010O00O1O1YIV[O_NMgM>FF9N0j0JZd0o3^[OPM@oNYi0Q4Y21O01O0001O001O01O01O010O001O010O001O001O01O01O001O01O01O00010O001O0010O01O00010O001O00001O010O001O010O00001O010O001O00010O00001O010O001O00001O010O00001O10O01O00001O001O010O1O00010O00001O001O0010O0001O00100N101O00010O00010O00010O001O0010O01O00100O00001O0010O000001O0010O00010O01O001O00010O001O10O010O000010O010XO" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "fRj, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_48.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1090.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fnSf0T1Wn0h0J4eKWN[ZOl1ae0gNgYOWOmLS2[i0nNiYOQ1Vf0SOiYOk0Wf0WOjYOf0Vf0\\OjYOb0Vf0CiYO9Wf0LfYO2Zf02eYOK[f08fYODZf0?eYO_O[f0e0eYOWO[f0n0dYOnN\\f0V1dYOfN\\f0^1eYO]N\\f0e1eYOWN[f0n1cYOoM]f0V2aYOgM_f0]2`YO`Maf0c2^YOYMcf0j2]YORMdf0Q3T35L4K4L4L4M4K4L3M3M5L4K4L3N2M4M3L4M2N3N13MN3M2M4L3M3N1N0010O0000001O00001O001O00001O1O001O1O001O1O001O1O1O1O1O1O2N1O001O001O1O1O1O1O1O001O1O1O2N5jKPUOR3Tl0K3M2N1O1O1O1O1O001O1O1O1O001O1O001O001O001O1O001O001O00001O001O001O1O001O1O00001O000000000000000000000000000O10001N1M3I7WOi0D>_Ob0_OPSj<" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "^j>6go05L3N2O1O00001O0O1O100O1O1N2O1N2O1O1O1O100O1O0001N1O2O001O1O10O01O100O1O00100O1O1O10000O100000O100000000000000000001O010O001O010O00100O0010001N100O1O1O100O0001O001N2O0O2O1N2O1N2O0O2O1N2N2N2NjUaT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mckT12Zo07oPOOkn04TQO6an0K^QO=[n0CeQOl0km0UOUROQ1em0oN[ROR1dm0oN[ROR1dm0nN\\ROR1cm0oN]ROQ1cm0oN]ROR1bm0nN^ROR1am0oN_ROQ1am0PO^ROQ1am0oN_ROQ1`m0PO`ROQ1_m0oNaROR1]m0oNcROS1[m0mNeROT1Zm0lNeROW1Ym0iNgROX1Xm0hNhROZ1Um0hNjROY1Um0gNkROY1Um0gNkROY1Um0gNkROY1Um0gNkROX1Vm0gNkROY1Um0gNkROX1Vm0hNjROW1Vm0jNjROU1Wm0kNjROR1Xm0nNhROR1Xm0mNiROR1Ym0mNgROR1\\m0kNeROU1dm0aN^RO]1Sn0O3Nd0[O6I?AoZj1" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_49.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1072.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "nbk;5?M25om0^1K2M2O0000001O0000001O000010O00000000000O1000000000000000000000000001O0000001O001O000001O000000000001O0000000000000000O1O2K4Aa0oNo]Sj0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "kf0V7jh0000000001O0000000000000000100O000000O101N10000001O00001O01O0001ObMZWO[KYf0iMhZOo1lN8\\f0iMdZOV2mN1_f0jM_ZO[2oNLbf0lMZZO\\2ROHdf0mMVZO`2SODgf0mMSZOb2UOAhf0oMmYOf2XO\\Okf0PNfYOk2\\OVOnf0PNaYOR3\\OoNSg0QN[YOW3^OiNVg0VNRYOX3FbNXg0o2dXORM\\g0Q3`XOPM`g0S3]XOmLcg0T3[XOmLeg0g50\\M[XOmLeg0S3[XOmLeg0R3\\XOnLdg0R3\\XOnLdg0Q3^XOnLbg0Q3_XOoLbg0n2`XORM`g0l2cXOSM]g0Z2dXOVK7_2Vg0e0mZOYOSe0e0T[OVOld0h0W[OWOid0h0Y[OWOhd0g0Y[OYOgd0f0Z[OZOfd0f0[[OYOed0f0\\[OZOdd0f0][OYOcd0f0^[OZObd0f0^[OZObd0e0_[O\\O`d0d0`[O\\O`d0d0`[O\\O`d0d0`[O\\Oad0c0_[O]Oad0b0`[O^O`d0b0`[O^O`d0b0`[O^O`d0b0_[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Ocd0a0\\[O@dd0`0\\[O@dd0?][OAcd0?\\[OBdd0?[[OAed0?[[OAed0`0Z[O@fd0`0Y[OAgd0`0X[OAgd0?W[OChd0?U[OCkd0>Q[OEod0UOPoAV1\\O;N2O1O001O2N010O01O01O0000001O00000O100000001N11N101M4[NmQOP1ZZo9" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_50.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/67.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "lhaf06ho04M3M3M3M3M3N2M3N3L2O2N2N3M2N2N102M2O1O1000O1N2N2N2N2M3N2M4M2N3L3N2M3N3M2M4L3MQVV`0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "YmTR11lo05M3N1O1N2O1N2O9G5K7H7J1O1O1O001O001O001O001M2I8L3O1mNQNUTOP2ik0RNUTOo1kk0SNRTOo1lk0SNQTOo1ok0SNmSOo1Sl0TNjSOm1Ul0XNeSOi1[l0ZNbSOf1^l0[NaSOf1^l0]N^SOd1cl0l00001O01O010O00100XM`SOn1al0nMcSOQ2^l0jMfSOW2Rm0O00100O2N2O0O1O100O4M7H1O01O00010O0001O01O01O00001O000O2M3K4G:E;F:H8LbRW3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "TPTV1`12oNim0a3UN_1aNk1UN=Ch0PXOhGhf0j9nNn0ROa1_NX2d\\OgA9GILd?Qb0^@o]O[=cd0bB^[On:dj0dF^UObN:\\2Zn0bNe0[O;E2N1O00001O00000000001O000000O10000000000000000O100`M2gRO<0A[12gk0g1eSOYNYl0X2nRORNQm0_2O1O1O1000000O10000001O00000000" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "0j;Vd0000000000000001O000000000000000000000000000000001O000000000000000000001O0000000000000000000000001O000000000000000000000000000000001O00000000000000000000001O0000000000000000000000000000001O00000000000000000000000000001O0000000000000000000000000000001O000000000000000000000000000000001O00000000000000jIX\\O4hc0I^\\O4bc0K`\\O4`c0Kb\\O4^c0Lc\\O3]c0Le\\O3[c0Mf\\O3Yc0Mh\\O2Xc0Mj\\O2Vc0Nj\\O2Vc0Nk\\O1Uc0Ol\\O0Tc0On\\O0Rc00n\\O0Rc00o\\OOQc01o\\OOQc01o\\OOQc01P]ONPc02P]ONPc02Q]OMob03Q]ONnb02S]OMmb03S]OMmb03S]OMmb03T]OLlb04T]OLlb04T]OLlb04T]OLlb05T]OJlb06T]OJlb06U]OIkb07U]OIkb07V]OIib07W]OIib08V]OHjb08W]OGib09W]OGib0:V]OFjb0:W]OEib0V]OBlb0R]OBnb0>R]OBob0>P]OBPc0>P]OBPc0?o\\OAQc0?o\\OARc0>n\\OBRc0?m\\OBRc0>n\\OBSc0=m\\OCSc0=m\\OCSc0>l\\OBUc0=k\\OCUc0>j\\OBVc0?i\\OAWc0?i\\OAXc0?g\\OAYc0`0f\\O@Zc0`0f\\O@Zc0a0e\\O_O\\c0a0c\\O_O]c0a0d\\O_O[c0b0d\\O^O\\c0b0e\\O]O[c0d0e\\O[O[c0e0e\\O[O\\c0d0e\\O[O[c0f0e\\OYO[c0h0d\\OXO\\c0h0e\\OWO\\c0i0c\\OWO]c0i0d\\OVO\\c0k0c\\OUO]c0l0c\\OSO]c0m0d\\ORO]c0n0b\\ORO^c0o0b\\OPO^c0Q1b\\OoN^c0Q1b\\OnN^c0R1c\\OmN]c0T1d\\OjN]c0U1d\\OjN\\c0W1c\\OiN]c0W1d\\OhN\\c0Y1d\\OfN]c0Y1d\\OfN\\c0[1c\\OeN]c0\\1c\\OcN^c0]1b\\ObN^c0_1b\\O`N^c0`1c\\O_N]c0b1d\\O\\N]c0c1d\\O\\N\\c0e1d\\OZN\\c0f1e\\OYN[c0h1f\\OVN[c0j1f\\OTNZc0l1g\\OSNYc0n1h\\OPNYc0P2g\\OoMYc0Q2h\\OnMXc0R2i\\OmMXc0Q2j\\OnMVc0P2m\\OnMUc0gNk[OTMS4T4cg0M2I7D;GdSOjL`l0R3:G9E]Ob0cN`QO3O2^R:JdmE1Qo00VQO8in0IVQO7ln0GTQO5\\n0MVRO3cm0`0QROAfm0HoQOk09\\Ogm0n0WROoNjm0c1POYNYSO5Gh1Pm0SNXSOZ2Um01O01O1[O^ROlNdm0L_ROd00^Oam0O_RO;L[O5:am0n0\\ROgN3;bm0m0\\ROfN3, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_51.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1383.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "k4d3\\l00O1N2N101O1N1N3N101N101M2O2N101GnKbTOT4Zk0;N1N3K401O010O10O10000O1O101N1O1O2N1O2N2N101N100O2O00001N10001O0O101O001N101O001O0O2O001O1O1O001O1O0O2O1O1O0O2O1O001N2O1O1N101O0O2O0O2O1O1N2O2M2O1N2O1O0O2O1N2O1N2O2M2O2M3N2N2M3N2M3N3L4M2M4L4M3L3M5L;D5K3M3M4L3M^iTT1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_52.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1258.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]d]k0=^o08I8fQOVOSm0P1fRO]Ool0f0fROlNHh0d0WOok0d2mSO^Mok0m2hSOTMUl0^3M4M2N4M7HAc0^O3N>A8Hn0UAnBbKLmNA40:1J[c0V:d\\OcE4`1Uc0W9h\\OkFWc0P;O2i\\O]Ca1Le11mL1a=oS8Pa0mHU^Oc1LlMk0f7Ta0jHY^OVOHMO`2j0e5Ua0gH[^OWO0\\2?h5Va0eHX^O\\OLc10bNd0m7Wa0cHX^O[OMHMb15^NK31I;f8aa0\\HW^OROM020M`18]NK3O2MX9^a0dGg^OROM011O]18^NKR:Sa0UGm^OWOMGO^19^NIR:Ra0VGP_OVOL_17]8k`0oFR_OX12k7j`0nFR_OTOMc15]8l`0lFR_OSONe1O`8Pa0iFS_OSOMZl8cAYGX>\\=L2bCU^Ob:ma0\\EU^ObNNg;oa0fES^OcNNe;Rb0iEm]O_:Vb0c13M3L6K7HnGRC]MhjNgAlIS1L[ON03D90R7Z>kNeAjIT1OYON04B:0o6^>WOaA]I87003I3W8S>bNeAaIN?6_7[>aN_AeJ3f6e2lGn8j6XDS3_2QFgM6a;e6YDU22fG\\2NZ9V6XDl1>hGn1>]9n5WDi1a0hGk1c0^9XOUDY64o1c0iGd1j0n9^5fCm1`3eHl8g=XGWBl8c=TGZBT9a=eFiB^9];cCcDo20`9P;oClDb24a9`:]DZER26c9c8RCiG^1\\Oo15d9V8aClGW1Bk18b9HhBU7V1UHkNj0m1Ji19b9IiBR7^1mHa0Ne17h9MbBQ7b1bH`0ERNc0a3:j9b7ZDbGb00gMb0`3T:j4gHfJPM^ObLQ1i=a4mHlJkLk0Y:n3VIVKdLh0X:d3_IgK[L=X:T2[BjMc7ElK:Q:T2hKbMXJ8n9R2kKjM[J1h9m1ULSNYHYODf0n;]1[BPNR:e0oIMd9W1YBUN_:f0`IJUL@d=_1]B`Nb:g0UILV:k0oLWOlHLY:gHOY:O`M0[H0[f0AbUO=W42Uf04lYOLRf05oYOJoe07XYO_ObLM8=ji09\\YOHiLNhi0:cYOHeLMii08dYOMbLJki07eYOO`LJii05kYO1if00YYO3df0LTYOB\\Lf0Uj0OZYOB`L`0Sj0NZYO<`LUOTj0>nXOYO\\MQ1FVOQj0>oXO[OZMP1HUOPj09UYOBSMP1JQOPj0:UYOGQMl0Vj0\\OjXOGPMn0Wj0YOkXOHnLo0Wj0WO]YOj0cf0QO_YOS1bj0G_QO\\O_n0d0dQOZO[n0g0fQOXOXn0i0;5N20O010M2O1O11\\QOPOUn0R1;1oQOoNTm0f0URO@f0LTm0b0\\RO]Oa01Rm0a0]SO_Obl0`0`SO@_l0`0dSO]O\\l0b0a1O000K50000010O1O1O100CESQO>mn0:1O1K6KeRO@^k0>dTOB\\k0=fTOA[k0>S20100O1OIHkPO4Uo0NlPONUo03;1O0O100lMJgROM_19jk0LfROL<3KM07Um0LdROM<4IM16Xm0JZSO0_OO05\\n0JVRO2`n0O<4M1gNJVRO2F7Rn0NQRO2G1Xn03hQO1MM]m0HZSO^n0AgQO:[n0^OPROok0@STO1aN1^m0MRTO1`N2^m0NPTO1bN1^m02kSO;Vl0DjSOPo0_OTQOa0kn0^OUQOe0in0[OVQOh0hn0XOWQOi0in0WOVQOk0in0TOXQOl0hn0TOXQOl0on0O00000001N1JSOYQOn0en0TOZQOl0fn0TOZQOm0en0SO[QOm0fn0QO[QOP1jn01O1O1O1O2M2N3K7WROcNgl0`1SSOeNll0[1QSOhNol0X1oROjNPm0W1mROkNSm0W1jROkNSm0S2O2M5L2M3N1O2eSOPMA0ik0T3_TOZM^k0j2ZTO[Mek0`30001N10000000001O00000O1O1N2O1001O001O00O1O1N2O1O1N21O000000O100O1JZKQUOg4oj0ZKPUOf4Pk0XKRUOh4Tk0O1O000000O100IZKSUOg4mj0ZKRUOf4nj0[KQUOe4Pk0YKQUOg4oj0XKRUOh4Sk00O1M3O100O100000000000000000000000000000000O1O1O1O1N21O1O001O00O1O1N2N2O10000001O00O1O1N2N2O11O2N1O001O00O1000000000000000000O1O1N2O10000000000O10000000000000000O100O100O10000000O100O1000O100O101O000000001M2O1O1O1000001O000000O01000O010000000001N100O10000000000000O01O1O1O100001O000000O100O10000100O001OO001N2O1O10000001O0000O1N2L40000001O2N1OO1O1O1O1000000000000O1O1O1000000010O0000000O1000O10000001O00001O1O1O4L4L3M2N1O1O2N2N2N2N2N9G4L:\\KSUOa3jk0LgMUL`XOh3_g0ZLbXOd3^g0\\LdXOb3\\g0`LSWONa0a3[h0cLeVOKH9W1W3]h0eL`VOh0k0b2gh0cL_VOS1c0Y2]i0iMbVOV2^i0lMaVOS2`i0mM_VOS2ai0nM^VOR2bi0nM_VOQ2ai0mMaVOS2_i0lMcVOR2_i0lMcVOS2]i0nMcVOQ2lh0kL_VOU1f0o1ih0PM_VOT1f0k1\\h0nLlVO7OS1h0g1\\h0[OcWOe0]h0\\OcWOc0Uh0lLSWOb2h0c0Sh0mLTWOa2i0a0Rh0PMTWOa2h0?Sh00hWO0Xh03eWOM[h05dWOJ\\h08bWOH^h09bWOF^h0<`WOCah0>_WOAah0?_WOAah0`0_WO_Oah0b0_WO]Oah0d0_WO[Oah0f0^WO[Oah0e0`WOZO`h0g0`WOXO`h0i0`WOVO`h0hMUWOZ2;N`h0gMVWO\\2:L_h0m0bWORO^h0o0aWOQO_h0^MZWOS36^O_h0_M]WOS34]O_h0`M]WOT34[O^h0bM^WOS36YO\\h0eM\\WOT38VO\\h0hMYWOT3;SO\\h0_1dWO`N\\h0a1dWO^N\\h0b1eWO\\N]h0c1dWO\\N]h0c1fWOZN]h0c1eWO[N]h0b1fWO\\N[h0b1hWO[NYh0fNPWOl1n0ZOSh0\\NRWOE9l1V1O`g0WNoWOe1h0ZNUN^1Vi0`NTXO5hNQ1T2lNnM1OON`0Pk0AoTO=1FY2IlM100Ma0Vk0NiTOF`2LcM1M?Yk0LhTOG\\20eM1K=^k0H[WONZMc0]k0\\OZWOR1`g0hNiVO3i1U1]g0jNiVOOl13RMm0Wj0TOjVOJo13QMo0gi0UO]VO7NE`30_Ln0di0XO_VO?\\3\\O`Ln0gi0TO^VOc0Y3c0[f0cN`VOj0T3d0hf0]OgXOUO_M`1ji0]O^XOX1bg0hN\\XOZ1dg0eN_XOY1ag0gNaXOV1ag0WNPVONa2j1_g0UNTVOO^2l1^g0RNZYOl1`j0M`0@`0@103K6KVPf1JaoYN`0G8ROo0H7J6J6M3M3K5J6L4M3M3N2M3L4M3N2N2O1O1O100O1O1O1O1O1O1O1O1O1O1N2O1O1kN^KkVOc4ah0hKcVOGk0a4ah0iKdVOFi0c4ch0gKeVODh0f4ch0fKeWO[4[h0eKdWO\\4]h0ZK`VO2R1e4eh0[KZWOf4fh0ZKYWOg4ah0UK_VO5n0h4bh0_K\\WOb4dh0^K\\WOb4eh0]KZWOd4fh0\\KXWOf4ih0YKUWOi4kh0VKTWOl4lh0TKRWOn4nh0RKPWOP5Pi0oJPWOR5Pi0mJPWOT5oh0lJQWOU5oh0jJRWOV5mh0kJRWOV5nh0iJRWOX5nh0gJQWO[5jh0bJ\\WO`5ch0bJ[WO_5fh0_J[WOa5bi00O100000000O1N2N2N2N2N2O1O1O1O1N2O1O1O1O1O100O100N2O1N2N2N2O1O1O100O100O100O100000000001O001O1O001O0000000000000000O100001O000000001O0000001O001O001O001O001O2N3M1O1O0000O1K500O1000000000000001O00001O000000001O00000000000000001O00000000010O000O2O001O00000000000O110O000000001O000000000000001O00001O0000000000001O00001O0000000000001O00000000001O0000001O001O0000001O0000000000001O0000001O001O2N001O00001O1O001O0000O100O100000000O10000001O0000001O00001O000000O11O0000000000001O0000001O00000000001O001O000000001O0000001OgJbJP@^5o?dJP@\\5o?eJQ@[5n?fJR@Z5Y?cJS\\O4d4X5X?fJR\\O3f4W5X?gJQ\\O2g4W5W?iJR\\ONh4Y5T?kJU\\OKg4Z5R?nJW\\OGh4Z5P?PKY\\ODh4\\5o>PKY\\ODh4\\5o>PKY\\ODh4\\5o>PKX\\OEi4[5o>oJY\\OGh4Y5o>PKX\\OHi4X5o>PKX\\OHi4X5o>PKX\\OHi4X5n>QKZ\\OEi4Z5m>QK[\\ODi4Z5k>SKiAm4W>SKiAm4W>nJnAR5R>mJoAS5Q>lJPBT5P>kJQBU5o=iJTBV5l=iJUBW5m=hJRBX5n=hJRBX5o=gJQBY5o=gJQBY5n=hJRBX5n=iJQBW5o=iJQBW5o=iJQBW5n=jJRBV5n=iJTBV5l=jJTBV5k=hJXBX5h=hJYBW5f=jJ[BU5d=lJX\\OMg5W5o=oJY\\OJh5W5n=PKY\\OJj5U5l=RKZ\\OIj5U5l=RKZ\\OIj5U5k=SK[\\OHk5T5k=RK[\\OKi5S5l=PK]\\OMg5S5l=lJa\\O1c5S5l=lJa\\O1d5R5j=nJb\\O0d5R5g=RKd\\OLe5R5f=TKd\\OJf5R5f=UKb\\OJi5P5e=VKb\\OJi5P5e=UKb\\OLi5o4e=TKc\\OMh5o4e=SKc\\OOh5n4h=oJ`\\O4h5m4Y>SKgAm4Y>SKhAl4W>UKiAk4W>UKjAj4V>VKjAj4V>VKjAj4V>VKjAj4W>UKiAk4_=oJS]O1`5P5[=TK^Cl4a, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_53.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/45.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dbc6460O2N2N`30[Lhi0V6jCPJhB0O11O1NW15aN33NO110N06KL40000OHM3N:2V6S9bIoFQf0Y8W1M4L300000001O0000000000000000000000000000000000000000O1001O0000000000O1000000001O000000O1000000000000000000000000000000000000O1O1gFiGbJ^8]5gGVB5n6`8e4XGPEMQOd3JnL0[=^;c60000001O001O00001O000000000000001O000000001O000000001O0000001O0000000000001O0000001O0000001O001O00000000000000001O000000000000001O00000000O100O1O1J6L4H`0iET[O^1Mk16QL67Fh0_o0RO3N1O2O001N1OWe_l0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "V]oT15fo05M3N2N2N2O1O10`0@1O0000000000O2O001O3ROSQOc0\\o0I4L6J\\bY2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "T_Qi0a03GO7^n0o1gQOeMPm0P4PSOYL4M3Ldi0[8RWObGVe0a:RO3N1N2O0O2O0O101O000010O000000000000001O01O000O101O00001O00001O000000000000000001O00000000O11O000001O000000000000000000000000000000001O0000000000000O1000001O000000000000000000000000001O0000000000001O00000000000000000O2O0000001O001O001O00001O0000001O01O0001O00000000000000000O1000000000000000000000000000000000000000000000000000001O000O100000000000000000001O000000000000001O00000000000O100000000000000000000000001O000O100000001O0000001O0O10000000002N3M3M2N1O1O00001O001O00001O00000000001O00001O2N1O1O00000000O1O100000O10000000001O001O001O00001O001O0010O01OO100O100O10000000000O10O01O100001O00000000000O1000001O01O00000000001O010O010O000000000001O01O001O00000001O01O0001O001O00000000001O000000000000000001O000001O00000000000000000001O000000O1000001O00000000001O000000000000000000000000000N2N2O1O10000000001O0001O00000000000000O10000O10001O00000001O0000000000000000000001O0O1000000001O0000000000000000000000000000O10000000000001O000000000001O000000000YA" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_54.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/3005.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "PPU>2no000001O00001O001O0000001O00001O00001O001O0000001O00001O0000001O0000001O001O0000001O0000001O00001O0000001O00001O0000001O0000001O0000001O0000001O000000001O000000001O000000001O00001O00001O000000001O000000001O000000001O000000001O0000001O0000001O00001O000000000000O10000O1O1O100O1O1O1001O00001O001O001O0000000000001O001O1O1O0000O1001O001O001O1O00O1000000001O0000000000000000001O00001O000000000000001O001O0000001O0000001O0000000000000000O1NRPQc0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Tea1?_o05M1N101O00001O00000000000000000000000000000000O1000000000000000000O100000000000O100O2N1O2M4I6J7J6M4L4M3L2O000O101O0000O2O001N2N4K5K7Gb0UOX[YT1" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_55.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1626.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "k`gk0:`o07K4L5K4K5M3M3K5K5L4K5L4L4J6M3N2L4L4L4M3K5K5M3J7L3M3M3L4J6L4L4L4L4L4M3M3L4K5L4L4L4K5M3K5L4M3M3K5L4M3L4K5K5L4M3M3L4L4L4L4L5K4L4L4L4M3L4M3L4M3M3L4N2N2O1O1O1O100O1000010O0000000000000000000001O000000000000000001O00000000000000000000000000000000000001O000000000000000000000000000000000000000001O00000000000001O0000000000000000000000000000000000000000000000001O0000000000O2O01O0000000000000000000000000O1001O001O00000000000000000000000000000000000000000001O0000000000000000000000001O0000000001O0000000000000000000000000000O10000001O000000O2O0000000001O00000000O100001O0000000000000001O0001O000000000000O10000001O0000000001O00000000O1001O000000000000000000000000001O00O10000000000000000001O000001O0000000000000000000000000001O0000000000000000000000000000000000001O000000O2O000001O0000000000oG" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_56.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2291.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "`_n52k>6L2O2nAHX=<]BKc=j0^OmNUCS1hP1h9]OmEA=7_OGW:`0PF_O;8a:7WE@97a:8cFF^99cFF^9:cFB`9=b20000001O000000010O0013Dd^V4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "edk84k>2N2gALP=MRC9LLo, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_57.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1584.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\Z[93mo02M101N10001O00000O101O00000000001O0000000000000000000000000000000000000000000000000001O000000000O101O000O101N101N1Mfeil0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "RbVU11f0a0om0KaQO<^n0d0O1N2O2N8H1O000000001O00000001O01O001O0000O1N2N2O1N2N2N2N2O1N2N2M3N2O1N2N2MoQOVORm0i0mROYOSm0e0lRO^OTm0`0kROCUm0:mROGSm07mROKSm02oROOQm0OPSO2Pm0LoRO6Rm0ImRO9Sm0EmRO=Sm0BmRO?Sm0_OmROd0Tn03N2N2N2O1M3N2N3N1N2N2N2N2O1N2N2N2N2O001O1001O001O3M4K4[OoQOQO_n05Y^8" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_58.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/647.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "d_d5`04h0Wi0h5A6M2OO010O002N1N3M3M3M2O101N1O101O00001O00000000000001O0000000001O00000000000000O101O001O010O4L3M2N4L10O0O1O100O101N1M4]Mi2kMhf\\P1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "bidl01e0;D1mm0_1N2O0O1O100O1O1O001O100O1O1O100O1O1O1O100O1O1O100O100O10O01000000000000O0100000O10000000000O100O0100000000000O1000O1000O100000000000000O00100O100O100O10O10000000O100O100000O10O100000000000000O010O100O100O100O100000O10O1O100O1O100O001O100O1DSM`SOQ3]l0;N2O1O1O010O101N10O010000O1lNXLoUOh3Tk001000O100O10000O1000O010000O1000000[KTLT]Ol3lb0TLU]Ol3`g00O10O0100O01O1O100O00001O001O001O1O001O1O1O1O1O1O001O1O1O1O001O001O00001O1O100O1O1O001O1N2O001O1O001O001O1O001O001O100O1O00001N101O1O001O1O00001O001O1O00100O001O1O1O1O1O1O001O1O1N2O00001O001O1O10O01O1O001O1N2O001O001O1O1O001O1O001O001O1O1O001O001O001O1O001O001O001O001O0010O01O00001O3M2N001O1O1O1N10001O00001O1O1O1O10O01O000O101O001O0000001O00000^OgPO=`o0N1O001O00000O010O1O1O2Nho:L]PE0O2O01N1000eP2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "bii11`03hn0n0E?Bi0WOP1`ROoLQl0f3J7I>C?@1O010O00010O01O00001O00010O0010O000001O00010O001O010O00010O01O0100O010O01O01O00001O00100O1O010O000010O00001O1O010O0001O001O010O01O01O01O0001O001O1O10O01O001O00001O001O010O00001O01O01O001000O10O00010O0000010O01O0010O00010O000010O010O0010O001O010O0100O1O10O01O0001O01O010O0010O1O001O0010O01O1O1O1O1O0000000000O10001O01O001O00001O00001O0000001O00001O1O001O1O001O0000001O000O10001O001O001O001O001O001O001O100N101O00001O00001O00000000001O00001O00001O002N5J2O1O00000001O0McLdSO[3]l0eLcSO[3]l0eLcSO[3]l0eLdSOZ3\\l0dLfSO\\3`l0O00001O001O1O1O1N11O000000000000000000000001O00000000O1001O0000001O00001O001O1O1O1O1O00001O000000000000000000000000001O1O1O001O0000000ZMkROa2Vm0\\MmROc2Xm0000001OO10000O1000000001O00001O0000001O00001O00001O00001O01O01O001O00001O0000001O0000001O1O1O0000001O001O00001O1O1O001O001O001O0000O10000001O00001O001O001O0000001O00001O001O1O00001O00001O00001O00001O001O1O00001O00000000001O1O1O1O00000000001O001O00001O000000001O00001O00001O0000001O1O1O001O00001O00000O2O00001O1O0000001O00001O001O00001O0O2O00001O00001O000000001O001O001O0000001O00001O00010O1O001O00001O0000001O00001O00001O00001O00001O0000001O001O1N101O00000000001O001N100O2O00VPld0" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_59.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/256.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "o_e4;bo05L4L4L3N1O2N2N101N100O1O2O0O100O100000O0100000O100O10000O100O100O1O1O2N100O2N2N1O2N2M3M4L5J\\`kQ1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Zoi86io01O1O100O010O100O100O100N20O01O1000O01O10O0100O100O10O0100O1O100O100O10000O010O100O1O100O10000O100O1O100O010O10000O100O100O100O100O10000O2O0O100O100O100O10000O10000O100000000O10000000001N10000OROmQO1Sn0LTROOkm0O[ROOdm0OaROO^m01eROL\\m03gROI[m07W1O2O0NVRm7NmmRH1O2N1N200O1N2O1O1O1O1O1O100O1O1O1O1N2N2N2K5M3L4N1O2N2O2N100O1001O00000O101O00001O0O10000O2O000O100O2O0O100O1000000O2O000000O010000O1000000O10000O0100000O10O100000000O010O100O100O00100O1O1O0O2L4M3O1N2N110O100000000001O0000001O00001O000O2O001O001O0010O01O001O00001O001O00001O0010O01O001O010O00100O001O001O010O001O001O010O001O1O1O0010O01O00100O001O010O1O00100O001O1O010O00100O001O1O01000O100O1O100O001O1O001O2N1O1O1O1O1O1OWP[=" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_60.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. Red and white.", + "B. Blue and white.", + "C. Black and orange.", + "D. White and black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_61.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. It is a mix of red, black, and white.", + "B. It is a mix of orange, black, and white.", + "C. It is a mix of blue, black, and white.", + "D. It is a mix of green, black, and white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_62.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2 primarily composed of?", + "choices": [ + "A. Entirely made of high-strength plastic.", + "B. Primarily composed of aluminum and rubber.", + "C. A mix of carbon fiber and fiberglass.", + "D. A combination of metal and plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_63.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. It features a solid orange color with black lettering.", + "B. It has a wavy blue and black design.", + "C. It is decorated with a pattern of geometric shapes.", + "D. It has a colorful floral and vine-like pattern." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_64.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "iWm>2\\g01RYO5hf0MnXO?MM`f0EbYOR1OkNle0P2[ZOWNLMhd0l2O1G711O100OI_[OlL2O^d0W31kL`[OV3`d0jL`[OW3_d050000O2O00O01O001O01O01O1O1000O01O01000O1O1O100O1001O1O0O2O001O001N10000O1000O1000O1L31O01O010001O1O001O0000000L4O001O1O1O1001O0O1O10000O21OO1O000O100000N2O100000000O2O1O00001O00000001O001O100O01OO1001O5L0O00100O01O0O2OO2N101O1O2N1O00012N0000000O02N001O5K1O3M2N1N3M2M5F8N1XMW[OW2ld0gMW[OV2kd0gMW[OV2md0hMV[OS2^e0M7aNPZOa0Tf0]OoYO>Uf0@mYOYf0^OiYO?Xf0BkYO9Wf0GiYO8Xf0GiYO8Xf0GjYOM]O0Qg02[aU5" + } + ], + "question": "What is the primary material of the filling in ?", + "choices": [ + "A. Crab meat.", + "B. Spicy tuna.", + "C. Avocado and carrot.", + "D. Tofu." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_65.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00915597.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "[bP55jo02N2O1N1O1O2N101N1O100O1O100O100O1O1YRODjk0Vk0AjTOb0Sk0_OlTOd0Qk0\\OoTOg0oj0YOPUOj0mj0VOSUOn0jj0ROUUOQ1hj0oNXUOU1dj0kN\\UOY1aj0fN_UO]1^j0cNbUOa1Zj0_NfUOd1Xj0[NhUOi1Tj0XNkUOl1Qj0TNoUOo1ni0QNRVOS2ji0mMVVOV2gi0lMWVOX2ei0iMYVO[2di0fMZVO_2bi0bM[VOd2ai0]M]VOg2`i0ZM]VOl2_i0VM^VOP3]i0QMaVOU3Zi0lLcVOX3[i0iLcVOZ3[i0gLcVO\\3[i0eLbVO^3]i0dL`VO^3`i0bL]VOa3bi0`L\\VOb3ci0_L[VOb3ei0\\101O000010ZJ]VOn4ci0RK_VOl4bi0SK`VOl4`i0SKbVOk4_i0TKdVOi4]i0VKeVOh4\\i0WKfVOh4Zi0WKiVOf4Xi0YKhVOg4Yi0XKhVOg4Yi0XKgVOh4[i0VKeVOk4\\i0SKdVOm4^i0QKbVOo4Wj0nK[UOj2ej0WMeUO^2\\j0bMhUOZ2Wj0VMVUONc0j2Wj0XMWUONc0h2Vj0ZMXUONc0f2Uj0\\MYUOOb0c2Uj0^MZUOOc0a2Sj0_M[UO0c0_2Rj0aM\\UO0c0]2Qj0cM]UO1b0Z2Qj0eM^UO1c0W2oi0hM_UO1d0U2li0jMaUO2d0Q2ki0mMcUO1d0o1ji0oMcUO2e0l1hi0RNdUO3e0h1gi0UNeUO3f0e1ei0XNfUOG@3W1i1ei0]NeUOG_O4V1f1gi0_NeUOG^O4T1f1ji0_NeUOG^O3R1f1mi0_NdUOH]O4P1d1Pj0`NdUOH\\O4n0d1Sj0`NdUOH\\O3l0d1Vj0aNbUOH\\O4j0b1Zj0aNaUOI[O4h06XOf0Wk0VO_UOKZO3f06\\Oc0Vk0YO`UOJXO4e06B=Sk0^O_UOKWO5c04K7mj0E_UOKWO4`071Okj0J^UOLWO4=87Hhj01]UOKWO5;9=_Ofj07\\UOLWO48Xl0A_TOMUO51?[l0^O_TOOVO3Oa0[l0^O_TOOWO3Mb0\\l0[OaTO0WO2Je0]l0ZOaTO0YOT1Vl0kNbTO2WOS1Vl0lNcTO1XOR1Ul0lNdTO2WOR1Tl0mNeTO1XOQ1Sl0mNfTO3VOP1Sl0nNfTO3XOm0Sl0oNfTO4WOm0Rl0POgTO4WOj0Sl0QOgTO5VOj0Rl0ROhTO4UOk0Sl0POiTO6SOj0Sl0QOiTO6SOi0Ul0POgTO9SOh0Ul0POfTO>POc0Zl0oNdTOm1\\k0SNcTOo1]k0QNaTOP2_k0PN`TOR2`k0nM^TOT2ak0lM^TOU2ck0kM[TOW2dk0jMZTOW2gk0hMXTOY2hk0hMWTOX2jk0gMUTOo0AOYl0SOTTOh0ASO3P1Yl0TOSTOj0BPO4O0g0Wl0ASTOh0EmNb0d0gk0GQTOi0d0TO`k03nSOh0^m0XObROg0_m0XOcROg0\\m0ZOeROe0[m0ZOgROd0Ym0]OiROa0Wm0_OjRO`0Um0@nRO>Rm0BoRO]ROBdm0a0YRO_Ohm0b0URO^Omm0c0QRO]Oom0e0nQO\\ORn0g0kQOXOVn0j0gQOWOYn0W110RROnNol0V2K3M2N1O1O2N1K6N1N2N2O0O1O100O00iLbSOm2_l0RMbSOn2]l0SMcSOl2]l0TMcSOl2^l0SMcSOl2]l0UMcSOj2]l0VMdSOj2\\l0UMdSOk2\\l0VMdSOi2\\l0WMeSOh2\\l0WMeSOi2[l0WMeSOh2\\l0XMdSOg2]l0ZMcSOd2^l0\\McSOb2\\l0aMdSO^2Zl0dMhSOY2Xl0iMhSOU2Yl0kMiSOR2Xl0nMiSOOF`1bl0aNiSOKIc1]l0cNlSOFIf1\\l0dN[TOZ1fk0fN[TOX1fk0hNZTOW1fk0jN[TO4PO:el0B\\TO3oN:fl0C[TO2PO:fl0D[TO1PO9fl0F[TOOPO:gl0FYTO0PO:gl0FZTOOPO9gl0G[TONoN:gl0G[TOOnN9hl0H[TOMoN:gl0G\\TONmN:hl0G\\TONmN:hl0H\\TOMmN9il0H\\TOMlN:il0H]TOLkN?", + "choices": [ + "A. White.", + "B. Yellow.", + "C. Green.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_66.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00915597.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "[bP55jo02N2O1N1O1O2N101N1O100O1O100O100O1O1YRODjk0Vk0AjTOb0Sk0_OlTOd0Qk0\\OoTOg0oj0YOPUOj0mj0VOSUOn0jj0ROUUOQ1hj0oNXUOU1dj0kN\\UOY1aj0fN_UO]1^j0cNbUOa1Zj0_NfUOd1Xj0[NhUOi1Tj0XNkUOl1Qj0TNoUOo1ni0QNRVOS2ji0mMVVOV2gi0lMWVOX2ei0iMYVO[2di0fMZVO_2bi0bM[VOd2ai0]M]VOg2`i0ZM]VOl2_i0VM^VOP3]i0QMaVOU3Zi0lLcVOX3[i0iLcVOZ3[i0gLcVO\\3[i0eLbVO^3]i0dL`VO^3`i0bL]VOa3bi0`L\\VOb3ci0_L[VOb3ei0\\101O000010ZJ]VOn4ci0RK_VOl4bi0SK`VOl4`i0SKbVOk4_i0TKdVOi4]i0VKeVOh4\\i0WKfVOh4Zi0WKiVOf4Xi0YKhVOg4Yi0XKhVOg4Yi0XKgVOh4[i0VKeVOk4\\i0SKdVOm4^i0QKbVOo4Wj0nK[UOj2ej0WMeUO^2\\j0bMhUOZ2Wj0VMVUONc0j2Wj0XMWUONc0h2Vj0ZMXUONc0f2Uj0\\MYUOOb0c2Uj0^MZUOOc0a2Sj0_M[UO0c0_2Rj0aM\\UO0c0]2Qj0cM]UO1b0Z2Qj0eM^UO1c0W2oi0hM_UO1d0U2li0jMaUO2d0Q2ki0mMcUO1d0o1ji0oMcUO2e0l1hi0RNdUO3e0h1gi0UNeUO3f0e1ei0XNfUOG@3W1i1ei0]NeUOG_O4V1f1gi0_NeUOG^O4T1f1ji0_NeUOG^O3R1f1mi0_NdUOH]O4P1d1Pj0`NdUOH\\O4n0d1Sj0`NdUOH\\O3l0d1Vj0aNbUOH\\O4j0b1Zj0aNaUOI[O4h06XOf0Wk0VO_UOKZO3f06\\Oc0Vk0YO`UOJXO4e06B=Sk0^O_UOKWO5c04K7mj0E_UOKWO4`071Okj0J^UOLWO4=87Hhj01]UOKWO5;9=_Ofj07\\UOLWO48Xl0A_TOMUO51?[l0^O_TOOVO3Oa0[l0^O_TOOWO3Mb0\\l0[OaTO0WO2Je0]l0ZOaTO0YOT1Vl0kNbTO2WOS1Vl0lNcTO1XOR1Ul0lNdTO2WOR1Tl0mNeTO1XOQ1Sl0mNfTO3VOP1Sl0nNfTO3XOm0Sl0oNfTO4WOm0Rl0POgTO4WOj0Sl0QOgTO5VOj0Rl0ROhTO4UOk0Sl0POiTO6SOj0Sl0QOiTO6SOi0Ul0POgTO9SOh0Ul0POfTO>POc0Zl0oNdTOm1\\k0SNcTOo1]k0QNaTOP2_k0PN`TOR2`k0nM^TOT2ak0lM^TOU2ck0kM[TOW2dk0jMZTOW2gk0hMXTOY2hk0hMWTOX2jk0gMUTOo0AOYl0SOTTOh0ASO3P1Yl0TOSTOj0BPO4O0g0Wl0ASTOh0EmNb0d0gk0GQTOi0d0TO`k03nSOh0^m0XObROg0_m0XOcROg0\\m0ZOeROe0[m0ZOgROd0Ym0]OiROa0Wm0_OjRO`0Um0@nRO>Rm0BoRO]ROBdm0a0YRO_Ohm0b0URO^Omm0c0QRO]Oom0e0nQO\\ORn0g0kQOXOVn0j0gQOWOYn0W110RROnNol0V2K3M2N1O1O2N1K6N1N2N2O0O1O100O00iLbSOm2_l0RMbSOn2]l0SMcSOl2]l0TMcSOl2^l0SMcSOl2]l0UMcSOj2]l0VMdSOj2\\l0UMdSOk2\\l0VMdSOi2\\l0WMeSOh2\\l0WMeSOi2[l0WMeSOh2\\l0XMdSOg2]l0ZMcSOd2^l0\\McSOb2\\l0aMdSO^2Zl0dMhSOY2Xl0iMhSOU2Yl0kMiSOR2Xl0nMiSOOF`1bl0aNiSOKIc1]l0cNlSOFIf1\\l0dN[TOZ1fk0fN[TOX1fk0hNZTOW1fk0jN[TO4PO:el0B\\TO3oN:fl0C[TO2PO:fl0D[TO1PO9fl0F[TOOPO:gl0FYTO0PO:gl0FZTOOPO9gl0G[TONoN:gl0G[TOOnN9hl0H[TOMoN:gl0G\\TONmN:hl0G\\TONmN:hl0H\\TOMmN9il0H\\TOMlN:il0H]TOLkN in the image?", + "choices": [ + "A. The masked object has a smooth, glossy surface.", + "B. The masked object has serrated or jagged edges.", + "C. The masked object has small dark spots on its surface.", + "D. The masked object is covered in tiny black specks." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_67.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "oX[=4hg06L4N1N100O2O001O001N101O010O010O01000O0100O010O1N1N3N1N3M3N1N3Nonk9" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Black.", + "B. Red.", + "C. White.", + "D. Brown." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_68.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "oX[=4hg06L4N1N100O2O001O001N101O010O010O01000O0100O010O1N1N3N1N3M3N1N3Nonk9" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. A rectangular object with sharp corners.", + "B. A cylindrical object.", + "C. A rectangular object with rounded corners.", + "D. A flat, circular object." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_69.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "XXW78ag07I7J7O0K5N201O000O2O0000000000000O101O0000000000000000000000000000000000000000000010OO10000000000000000000000000000000000000000000000000000O10000000001O00000000000000000000000001O000000000000000000001O000000000000000000001O000000002N1O1O1O1O1O3M1O001O001OVPh=" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metallic.", + "B. Plastic.", + "C. Wooden.", + "D. Ceramic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_70.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "XXW78ag07I7J7O0K5N201O000O2O0000000000000O101O0000000000000000000000000000000000000000000010OO10000000000000000000000000000000000000000000000000000O10000000001O00000000000000000000000001O000000000000000000001O000000000000000000001O000000002N1O1O1O1O1O3M1O001O001OVPh=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red.", + "B. Black.", + "C. Brown.", + "D. White." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_71.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N?", + "choices": [ + "A. Circular.", + "B. Square.", + "C. Rounded rectangular.", + "D. Oval." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_72.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N shown in the image?", + "choices": [ + "A. Wood.", + "B. Glass.", + "C. Metal.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_73.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N?", + "choices": [ + "A. White.", + "B. Pink.", + "C. Brown.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_74.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the texture or pattern of ?", + "choices": [ + "A. A grille with concentric ridges and radial spokes.", + "B. A pattern of small, rectangular tiles.", + "C. A grid pattern of small, square panes.", + "D. A perforated lattice pattern." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_75.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The masked object is circular.", + "B. The masked object is square.", + "C. The masked object is rectangular.", + "D. The masked object is arched." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_76.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the shape of the vent on ?", + "choices": [ + "A. Rectangular.", + "B. Circular.", + "C. Arched.", + "D. Square." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_77.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the color of the fan inside ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Orange.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_78.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "Which of the following best describes the texture/pattern of in the image?", + "choices": [ + "A. It is a price tag with printed text.", + "B. It is a price tag with handwritten text.", + "C. It is a blank piece of paper without any text.", + "D. It is a sticker with a barcode on it." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_79.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "Based on the image, what is the shape of ?", + "choices": [ + "A. It is a rectangular object.", + "B. It is a square object.", + "C. It is a circular object.", + "D. It is a triangular object." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_80.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Red.", + "B. White.", + "C. Black.", + "D. Brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_81.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00981094.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R_]>4lg0000000000000O10O1000000000000000000000O10000000O1000000000000O1O01000000000000000000000O10O100000000001O000000000000O10O1000000000000000000O10O1000000000000000O1000000000O10000000O1000000000000000O10O1000000000O2O0000000000O10000000000000O100000000000O10000000000000000L5O22L6JSaY6" + } + ], + "question": "What is the primary color of 's shaft?", + "choices": [ + "A. Black.", + "B. Red.", + "C. Silver.", + "D. Blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_82.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00981094.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R_]>4lg0000000000000O10O1000000000000000000000O10000000O1000000000000O1O01000000000000000000000O10O100000000001O000000000000O10O1000000000000000000O10O1000000000000000O1000000000O10000000O1000000000000000O10O1000000000O2O0000000000O10000000000000O100000000000O10000000000000000L5O22L6JSaY6" + } + ], + "question": "Which of the following descriptions accurately represents the texture or pattern of ?", + "choices": [ + "A. The handle is smooth and made of two different colors of plastic.", + "B. The handle features a series of parallel grooves running along its length.", + "C. The metallic grip area has a knurled, cross-hatched pattern for a better hold.", + "D. The entire surface of the object is smooth and polished metal." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_83.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "What is the color of the main body of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Brown.", + "D. Blue and white." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_84.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "Which of the following statements about the color of is correct?", + "choices": [ + "A. The hull of the object is painted dark blue.", + "B. The entire object is covered by a large blue tarp.", + "C. The nameplate on the side features white lettering.", + "D. There is a red life preserver attached to its side." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_85.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "What is the color of the component mounted at the rear of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Black.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_86.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What is the color of the head of ?", + "choices": [ + "A. Gray.", + "B. Blue.", + "C. White.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_87.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue and white.", + "B. Pure white.", + "C. A mix of white, brown, and gray.", + "D. Black and brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_88.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What color is the beak of ?", + "choices": [ + "A. The beak is yellow.", + "B. The beak is black.", + "C. The beak is white.", + "D. The beak is grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_89.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Pure white.", + "B. A combination of grey and brown.", + "C. Black and white.", + "D. Blue and grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_90.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. It is primarily blue and white.", + "B. It has a combination of white, grey, and brown feathers.", + "C. It is completely black.", + "D. It is mostly white with some black markings." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_91.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "Which of the following statements correctly describes a shape characteristic of in the image?", + "choices": [ + "A. The wings of the object are fully folded against its body.", + "B. The tail of the object is spread out in a fan shape.", + "C. The beak of the object is noticeably curved upwards.", + "D. The entire body of the object forms a straight, horizontal line." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_92.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is shown in the image?", + "choices": [ + "A. A bird swimming in the water.", + "B. A bird perched on a boat cover.", + "C. A flying bird.", + "D. A bird sitting on the roof of a boat." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_93.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Gold.", + "B. Gray.", + "C. White.", + "D. Red." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_94.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Glossy.", + "B. Matte.", + "C. Rough.", + "D. Ribbed." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_95.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "To which device does belong?", + "choices": [ + "A. It is a red alarm button on the console.", + "B. It is a spherical paperweight used to hold down papers.", + "C. It is the trackball of an ergonomic mouse.", + "D. It is a hold-indicator light for the telephone." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_96.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "URc=?`g0d0]Od0\\Oc0]Od0\\Od0[Od0]O:F1O00000O1000000000O100000O100000000000000O01000000000000000O100000O1000000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O1000000000000000O010000000000000000O1000O100000000000O1000000000O100000O1000000000000000O010000000000000000O1000O100000000000O100000000000O2O:Fe0[Od0\\Od0\\Oe0[Od0\\Oe0ZO\\Ri5" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. It is a square.", + "B. It is rectangular.", + "C. It is a trapezoid.", + "D. It is a parallelogram." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_97.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "URc=?`g0d0]Od0\\Oc0]Od0\\Od0[Od0]O:F1O00000O1000000000O100000O100000000000000O01000000000000000O100000O1000000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O1000000000000000O010000000000000000O1000O100000000000O1000000000O100000O1000000000000000O010000000000000000O1000O100000000000O100000000000O2O:Fe0[Od0\\Od0\\Oe0[Od0\\Oe0ZO\\Ri5" + } + ], + "question": "What is the primary material of ?", + "choices": [ + "A. Plastic.", + "B. Wood.", + "C. Metal.", + "D. Glass." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_98.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Blue.", + "C. White.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_99.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. Cuboid.", + "B. Cylindrical.", + "C. Conical.", + "D. Rectangular." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_100.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Paper.", + "B. Cloth.", + "C. Wood.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_101.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. The object is rectangular.", + "B. The object is circular.", + "C. The object has a checkerboard pattern of squares.", + "D. The object is oval." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_102.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. Red.", + "B. Yellow.", + "C. Blue.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_103.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. The object is predominantly yellow with a green checkered pattern.", + "B. The object is primarily white.", + "C. The object has a black body and an illuminated red light.", + "D. The object is dark gray." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_104.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01080826.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "k`[19eo04M2N101N1O100O10000O1O100O10O10O01M3N101O1O10O01000O010000O10O100000000000010O0001O0O102M3M4J^_Te0" + } + ], + "question": "What is in the image?", + "choices": [ + "A. It is a kitchen sponge.", + "B. It is a bar of soap.", + "C. It is a bottle of dish soap.", + "D. It is the handle of a kitchen utensil." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_105.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01080826.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "k`[19eo04M2N101N1O100O10000O1O100O10O10O01M3N101O1O10O01000O010000O10O100000000000010O0001O0O102M3M4J^_Te0" + } + ], + "question": "Which of the following options accurately describes located near the sink?", + "choices": [ + "A. A yellow and brown bar of soap.", + "B. A yellow cleaning sponge.", + "C. A part of a silver faucet.", + "D. A slice of a banana." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_106.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "What is the shape of the object indicated by ?", + "choices": [ + "A. The object is elongated and thin.", + "B. The object is round and smooth.", + "C. The object is bell-shaped with multiple lobes.", + "D. The object is bulbous and tapers at one end." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_107.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Red.", + "C. Green.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_108.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "Based on the image, what is the shape of ?", + "choices": [ + "A. The object is spherical.", + "B. The object is curved.", + "C. The object is bell-shaped.", + "D. The object is cylindrical." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_109.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01095871.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "ob<`0_o02N2O000000O1000001O0001O000001O000001O000001O0001O000001O000001O000001O000001O0001O00000001O0001O000001O0001O00000001O0001O00000001O01O00000001O00012M2\\XO[Oi?f0d_ONZ`02S_Oa0m`0@_^OS1aa0kNn]Oh1Sb0VN\\]O\\2db0cMj\\Oo2Wc0QM\\\\OBWLa3]g0lLQ\\Oi3oc0WLe[OU4[d0kKY[Oa4gd0_KmZOm4Te0RKcXOCd1g5ie0fJbXOENN`0N0U6Pg0ZJbXOL;HF^6]g0nIbXO07\\6Wg0dIbXO14]6Zg0XJdXOj5]g0XJ_XOi5ag0[1000000000_OjXOWHXg0o6aXORI^h0e6c0C=lN_VOPKZj0e4f0YOXUORLmj0`3P1D?", + "choices": [ + "A. The masked object is a large, rectangular block.", + "B. The masked object is part of a long, horizontal structure.", + "C. The masked object is the main vertical support of the structure.", + "D. The masked object consists primarily of crisscrossing diagonal lines." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_110.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01095871.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "ob<`0_o02N2O000000O1000001O0001O000001O000001O000001O0001O000001O000001O000001O000001O0001O00000001O0001O000001O0001O00000001O0001O00000001O01O00000001O00012M2\\XO[Oi?f0d_ONZ`02S_Oa0m`0@_^OS1aa0kNn]Oh1Sb0VN\\]O\\2db0cMj\\Oo2Wc0QM\\\\OBWLa3]g0lLQ\\Oi3oc0WLe[OU4[d0kKY[Oa4gd0_KmZOm4Te0RKcXOCd1g5ie0fJbXOENN`0N0U6Pg0ZJbXOL;HF^6]g0nIbXO07\\6Wg0dIbXO14]6Zg0XJdXOj5]g0XJ_XOi5ag0[1000000000_OjXOWHXg0o6aXORI^h0e6c0C=lN_VOPKZj0e4f0YOXUORLmj0`3P1D is correct?", + "choices": [ + "A. The main arm is a solid beam, not a lattice structure.", + "B. A hook is visible hanging from the object's arm.", + "C. It is the tallest structure in the image.", + "D. There is no operator's cab attached to the tower." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_111.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0?", + "choices": [ + "A. Triangular.", + "B. Cylindrical.", + "C. Irregular.", + "D. Rectangular." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_112.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0?", + "choices": [ + "A. The object is made of plastic.", + "B. The object is made of wood.", + "C. The object is made of metal.", + "D. The object is made of stone." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_113.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0 in the image?", + "choices": [ + "A. A structural support for the playground.", + "B. A curved wooden bench.", + "C. A flat-topped wooden stool.", + "D. A wooden toolbox." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_114.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gb_f03lg09[OESYO>Rg05I^OnXOe0Qg06O1O1O0001O1O001N2N2L4O001N003M2O000O2N1000000000001N100O1O1O110O001N\\]b0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. It is cylindrical.", + "B. It is rectangular.", + "C. It is conical.", + "D. It is spherical." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_115.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gb_f03lg09[OESYO>Rg05I^OnXOe0Qg06O1O1O0001O1O001N2N2L4O001N003M2O000O2N1000000000001N100O1O1O110O001N\\]b0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Gray.", + "C. Brown.", + "D. Black." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_116.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is the overall shape of ?", + "choices": [ + "A. Crescent-shaped.", + "B. Rectangular.", + "C. Circular.", + "D. Triangular." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_117.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Red.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_118.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is in the image?", + "choices": [ + "A. A drum rim made of plastic.", + "B. A buckle made of silver.", + "C. A drum rim made of metal.", + "D. A drumhead made of hide." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_119.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01108895.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WUk23lg04K5K6K4gXO\\Oif0U1K4L5U]OnN^>X1PAVOm>Q1k@SOS?S1i@mNU?Y1f@hNW?_1U^OcMZ2P1^?e1a@[N\\?i1b@XN]?j1b@VN]?l1a@UN^?m1a@SN^?o1`@RN_?P2`@PN_?Q2a@oM^?S2`@nM_?T2`@lM_?V2_@kM`?W2_@iM`?Y2_@gM`?[2^@gM`?[2_@eM`?]2_@cM`?_2_@aM`?a2^@`Ma?b2]@_Mb?c2]@]Mb?d2]@]Mb?e2\\@\\Mc?f2\\@ZMc?h2\\@XMc?j2[@WMd?k2Z@VMe?l2Y@UMf?m2Y@SMg?m2X@UMf?m2Y@SMg?n2X@RMg?o2Y@QMf?Q3X@PMh?P3X@PMg?R3X@nLh?S3W@mLh?T3W@mLi?T3V@lLi?U3W@kLh?W3V@jLj?W3U@gLl?Z3T@fLl?[3S@eLl?]3S@cLm?]3R@dLm?^3R@bLm?_3R@bLn?_3Q@aLn?a3Q@_Lo?a3Q@_Ln?c3R@\\Ln?e3R@ZLm?g3T@XLk?j3U@ULj?l3W@SLi?n3V@RLi?o3X@PLi?P4V@PLj?P4V@PLk?o3T@RLl?n3S@SLm?m3S@SLm?m3R@TLn?l3R@TLn?l3R@TLn?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?n3P@RLP`0n3P@RLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3R@PLn?Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?Q4e_O^J9a1R`0Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?^4d_ObK[`0R60000000O01O1O1ON3M4UNo_OfKP`0Z4o_OhKP`0X4P@jKn?V4R@kKm?U4S@lKl?T4U@kKl?T4T@lKl?T4T@mKk?S4U@mKk?S4U@mKk?S4U@mKl?R4T@nKl?R4U@mKk?S4U@mKk?R4V@nKj?R4V@nKk?P4V@PLj?P4V@hKR`0X4n_OiKQ`0V4P@kKo?U4Q@kKP`0S4Q@nKn?R4R@oKm?Q4S@PLl?o3U@RLj?n3V@SLi?m3W@SLj?k3W@ULi?k3W@SLk?l3W@QLk?o3U@nKn?R4R@oKn?o3S@QLm?o3S@RLl?m3U@SLk?m3U@TLj?l3V@ULj?i3W@WLi?i3W@WLj?h3V@XLj?g3W@WLl?h3T@VLo?h3R@YLn?f3R@[Lm?e3S@\\Ll?c3U@^Ll?fNW_Og4n0cLk?fNW_Oe4o0eLj?fNW_Od4P1fLj?fNV_Oc4P1gLk?eNU_Oc4Q1hLk?eNT_Ob4Q1iLk?eNT_Oa4R1jLk?eNS_O_4S1lLl?T3T@lLm?R3T@nLl?R3T@mLn?Q3T@nLl?Q3U@oLl?P3T@mLo?R3R@aL\\`0^3d_OcL\\`0[3e_OeL\\`0Y3e_OhLZ`0X3f_OhL[`0V3f_OkLZ`0S3g_OnLY`0P3h_OPMY`0m2j_OSMW`0i2k_OXMV`0d2l_O\\MU`0a2m_O`MT`0\\2n_OdMT`0X2n_OiMR`0T2P@lMR`0Q2P@nMR`0n1P@RNR`0j1P@UNR`0i1o_OWNS`0f1n_OYNU`0b1n_O^NS`0^1P@aNS`0[1P@dNR`0W1Q@hNR`0S1Q@jNS`0S1`3K5K4M4K5K3N2M4N1O2N2N0M5L3M4MQcV`0" + } + ], + "question": "What are the primary colors of ?", + "choices": [ + "A. Green and yellow.", + "B. Silver and blue.", + "C. Light blue and white.", + "D. Solid silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_120.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01108895.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WUk23lg04K5K6K4gXO\\Oif0U1K4L5U]OnN^>X1PAVOm>Q1k@SOS?S1i@mNU?Y1f@hNW?_1U^OcMZ2P1^?e1a@[N\\?i1b@XN]?j1b@VN]?l1a@UN^?m1a@SN^?o1`@RN_?P2`@PN_?Q2a@oM^?S2`@nM_?T2`@lM_?V2_@kM`?W2_@iM`?Y2_@gM`?[2^@gM`?[2_@eM`?]2_@cM`?_2_@aM`?a2^@`Ma?b2]@_Mb?c2]@]Mb?d2]@]Mb?e2\\@\\Mc?f2\\@ZMc?h2\\@XMc?j2[@WMd?k2Z@VMe?l2Y@UMf?m2Y@SMg?m2X@UMf?m2Y@SMg?n2X@RMg?o2Y@QMf?Q3X@PMh?P3X@PMg?R3X@nLh?S3W@mLh?T3W@mLi?T3V@lLi?U3W@kLh?W3V@jLj?W3U@gLl?Z3T@fLl?[3S@eLl?]3S@cLm?]3R@dLm?^3R@bLm?_3R@bLn?_3Q@aLn?a3Q@_Lo?a3Q@_Ln?c3R@\\Ln?e3R@ZLm?g3T@XLk?j3U@ULj?l3W@SLi?n3V@RLi?o3X@PLi?P4V@PLj?P4V@PLk?o3T@RLl?n3S@SLm?m3S@SLm?m3R@TLn?l3R@TLn?l3R@TLn?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?n3P@RLP`0n3P@RLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3R@PLn?Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?Q4e_O^J9a1R`0Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?^4d_ObK[`0R60000000O01O1O1ON3M4UNo_OfKP`0Z4o_OhKP`0X4P@jKn?V4R@kKm?U4S@lKl?T4U@kKl?T4T@lKl?T4T@mKk?S4U@mKk?S4U@mKk?S4U@mKl?R4T@nKl?R4U@mKk?S4U@mKk?R4V@nKj?R4V@nKk?P4V@PLj?P4V@hKR`0X4n_OiKQ`0V4P@kKo?U4Q@kKP`0S4Q@nKn?R4R@oKm?Q4S@PLl?o3U@RLj?n3V@SLi?m3W@SLj?k3W@ULi?k3W@SLk?l3W@QLk?o3U@nKn?R4R@oKn?o3S@QLm?o3S@RLl?m3U@SLk?m3U@TLj?l3V@ULj?i3W@WLi?i3W@WLj?h3V@XLj?g3W@WLl?h3T@VLo?h3R@YLn?f3R@[Lm?e3S@\\Ll?c3U@^Ll?fNW_Og4n0cLk?fNW_Oe4o0eLj?fNW_Od4P1fLj?fNV_Oc4P1gLk?eNU_Oc4Q1hLk?eNT_Ob4Q1iLk?eNT_Oa4R1jLk?eNS_O_4S1lLl?T3T@lLm?R3T@nLl?R3T@mLn?Q3T@nLl?Q3U@oLl?P3T@mLo?R3R@aL\\`0^3d_OcL\\`0[3e_OeL\\`0Y3e_OhLZ`0X3f_OhL[`0V3f_OkLZ`0S3g_OnLY`0P3h_OPMY`0m2j_OSMW`0i2k_OXMV`0d2l_O\\MU`0a2m_O`MT`0\\2n_OdMT`0X2n_OiMR`0T2P@lMR`0Q2P@nMR`0n1P@RNR`0j1P@UNR`0i1o_OWNS`0f1n_OYNU`0b1n_O^NS`0^1P@aNS`0[1P@dNR`0W1Q@hNR`0S1Q@jNS`0S1`3K5K4M4K5K3N2M4N1O2N2N0M5L3M4MQcV`0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The object is made of ceramic tiles and grout.", + "B. The object is made of metal and plastic.", + "C. The object is made of painted wood.", + "D. The object is made of woven fabric and straw." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_121.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red and white.", + "B. White and black.", + "C. Silver and black.", + "D. Orange and silver." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_122.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "Which of the following accurately describes a feature of ?", + "choices": [ + "A. The object has a curved handle on top.", + "B. The object has a large, gray, overarching handle.", + "C. The object is primarily cylindrical in shape.", + "D. The object has a square base." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_123.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "Which statement correctly describes a shape-related feature of ?", + "choices": [ + "A. The display screen on its front is rectangular.", + "B. The top of the object is completely flat.", + "C. It has a large, curved handle on top for carrying.", + "D. The main body of the object is a perfect cube." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_124.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What is the primary color of the body of ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Silver.", + "D. Orange." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_125.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What material is primarily made of?", + "choices": [ + "A. Metal.", + "B. Wood.", + "C. Plastic.", + "D. Concrete." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_126.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100 made of?", + "choices": [ + "A. Woven.", + "B. Plastic.", + "C. Cardboard.", + "D. Leather." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_127.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100?", + "choices": [ + "A. Woven.", + "B. Plaid.", + "C. Polka-dotted.", + "D. Striped." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_128.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100?", + "choices": [ + "A. White.", + "B. Red.", + "C. Black.", + "D. Brown." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_129.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "PP`:1og02N000000001O00000000001O0000000000001O00000P\\Ok1U>UNkAm1S>SNmAn1R>SNmAm1fMoMk>4_Cm1dMVOh=mNdDh1iM`0^TNYOR2h0_NaNh0^2ZOZMe0m2[OlLf0Z3[O_Ld0h3[ORLf0T4ZOfKe0a4YO[Kh0j4UOTKk0Q5ROmJm0Y5oNfJQ1_5lN_JT1f5iNXJV1n5fNQJZ1T6cNiI]1]6`N`Ia1e6]NXIb1n6^NlHc1Y7]NbHb1d7^NVHc1o7]NkGc1[8]N^Ge1g8[NSGe1S9[NgFe1_9[N[Ff1j9ZNPFf1V:ZNeEe1a:\\NYEd1l:\\NnDd1X;\\NcDc1c;]NXDc1m;]NmCc1Y<]NbCb1d<^NWCb1n<^NlBc1Y=^NaBb1d=^NWBb1n=^NlAc1Y>]NbAc1c>]NXAc1m>]Nn@c1W?]Nd@b1b?^NY@b1l?_Nk_Od1Z`0\\Nd_Oa1a`0_N__O\\1f`0cNZ_OY1k`0gNV_OR1Pa0nNP_Ol0Va0TOj^Oe0]a0\\Oc^O=ca0C]^O7ia0IW^O1oa00Q^OMQb03o]OKSb05n]OHTb09i200001O00001O00001O001O0000001O00001O00001O00001O0000010N10001O001O00001O00001O00001O001O00001O00001O001O0[YOUOUf0k0eYO]OYf0V1O0000O1000000000000000000O100000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000O100000000O10000000000O2OO100000O10000000000O100000000O10000000000O10000O10000O100O10000O10000O10000O10000000000O10000000000O1N2F:J6JQa70m^H:[OEXYOd0hf0;O00O11O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O00" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of plastic.", + "B. It is made of metal.", + "C. It is made of wood.", + "D. It is made of fabric." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_130.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "PP`:1og02N000000001O00000000001O0000000000001O00000P\\Ok1U>UNkAm1S>SNmAn1R>SNmAm1fMoMk>4_Cm1dMVOh=mNdDh1iM`0^TNYOR2h0_NaNh0^2ZOZMe0m2[OlLf0Z3[O_Ld0h3[ORLf0T4ZOfKe0a4YO[Kh0j4UOTKk0Q5ROmJm0Y5oNfJQ1_5lN_JT1f5iNXJV1n5fNQJZ1T6cNiI]1]6`N`Ia1e6]NXIb1n6^NlHc1Y7]NbHb1d7^NVHc1o7]NkGc1[8]N^Ge1g8[NSGe1S9[NgFe1_9[N[Ff1j9ZNPFf1V:ZNeEe1a:\\NYEd1l:\\NnDd1X;\\NcDc1c;]NXDc1m;]NmCc1Y<]NbCb1d<^NWCb1n<^NlBc1Y=^NaBb1d=^NWBb1n=^NlAc1Y>]NbAc1c>]NXAc1m>]Nn@c1W?]Nd@b1b?^NY@b1l?_Nk_Od1Z`0\\Nd_Oa1a`0_N__O\\1f`0cNZ_OY1k`0gNV_OR1Pa0nNP_Ol0Va0TOj^Oe0]a0\\Oc^O=ca0C]^O7ia0IW^O1oa00Q^OMQb03o]OKSb05n]OHTb09i200001O00001O00001O001O0000001O00001O00001O00001O0000010N10001O001O00001O00001O00001O001O00001O00001O001O0[YOUOUf0k0eYO]OYf0V1O0000O1000000000000000000O100000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000O100000000O10000000000O2OO100000O10000000000O100000000O10000000000O10000O10000O100O10000O10000O10000O10000000000O10000000000O1N2F:J6JQa70m^H:[OEXYOd0hf0;O00O11O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O00" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Red.", + "C. Yellow.", + "D. Grey." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_131.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hebe0 is correct?", + "choices": [ + "A. The object is a combination of blue and white.", + "B. The object is a combination of black and green.", + "C. The object is entirely brown.", + "D. The object is a combination of red and yellow." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_132.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hebe0?", + "choices": [ + "A. Rubber.", + "B. Leather.", + "C. Canvas.", + "D. Plastic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_133.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01155009.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WRn23Sg0ObYO7Wf0l0K5N2O1M3O1O000100O000O10000O10000000000000000000000000000000000000000000000000000000001O0O2O001O2J7nNiYO0Pmjc0" + } + ], + "question": "What is the background color of ?", + "choices": [ + "A. Red.", + "B. Yellow.", + "C. Black.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_134.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. White.", + "B. Silver.", + "C. Gold.", + "D. Brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_135.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. It is a hook-shaped object.", + "B. It is a circular object.", + "C. It is a rectangular object.", + "D. It is an oval object." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_136.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. The masked object is silver.", + "B. The masked object is beige.", + "C. The masked object is white.", + "D. The masked object is brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_137.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metal.", + "B. Plastic.", + "C. Glass.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_138.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Green.", + "C. White.", + "D. Silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_139.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the shape of the top of ?", + "choices": [ + "A. It has a pump-action dispenser.", + "B. It has a rounded, dome-like shape.", + "C. It is flat and tapered.", + "D. It is a simple, cylindrical screw-top." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_140.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. Dome-shaped.", + "B. Cylindrical.", + "C. Irregular.", + "D. Rectangular." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_141.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "Which of the following best describes the shape of in the image?", + "choices": [ + "A. A long, thin cylinder.", + "B. A dome shape.", + "C. A short, wide cylinder.", + "D. A long, narrow rectangle." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_142.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Wood.", + "B. Plastic.", + "C. Metal.", + "D. Cardboard." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_143.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "j^T=385S10_l0NYRO69HNf0]m0N`ROe0\\m0R1O2M3M2O2M2O2N1O1O101M101N100O2O0O100O2O0O100O2O0O010O010O01O001O00000000O100O101O000000000000O10O10O10000001O1O2N2N1O2N1O001O1O1O1O1O1O1O001O1O1O1O1O1O001O001O0000000O2O0O10000O101N100O10001N100O100O2O000O2O0O2O1N2O1O1N2O1N2O0O2O1O1N2O1N2O1N2N1O2O1N2N2N2N2O1N2N2N2N3N1N2N2N2N2O1N3M2N1O2O0O2N1O10O01O1O00100O001O10O01O1O010O1O001O10O01O1O010O1O001O010O00O1O2F9N200O2O000O1O2M2O2N10001O001O001O000O2O001O00001O001O0N3L300100O2O0O100O1O101N100O1O101N1000000O2O000O101O000O101O000O101O0O10O1000O0100O10O0100O010O100O010O1000O01000O01000000O10000O1000000O10000O1000000O10000O10000O100BWVOeJii0[5WVOdJji0[5XVOcJii0]5ZVO]Jii0b5;O1O100O1O2O0O1O2O0O2N100O2N1O2O0O1O2O0O2N100O2N101N1O101N1O2O0O1O2O0O2N100O2N2O1N1O2O1N2N2O1N1O2O1NVC" + } + ], + "question": "Which of the following statements accurately describes the shape of the ears of ?", + "choices": [ + "A. The ears are rounded at the tips.", + "B. The ears are pointed and triangular.", + "C. The ears are floppy and folded downwards.", + "D. The ears are not visible in the image." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_144.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "V\\_:2U10`m04\\QONj04fm06VRONhm05SRO0jm03RRO1km01QRO3mm00oQO2Qn00jQO3Vn0l0000O1N101O1N2O0O2O1N2O0O2O1N2TOZQO;gn0E]QO5fn0Ii0d0\\O3M3M2N1O0O2O00001O2N1O1N3M3N1N2N2O0N2O001O1O1O1O1O100O02O001O2N5K3M2ZNiQO^1]n00000000004L1O001N010O0000100O1O1O00001OO1000O2O001O001N1O3D;M4M4KRP55koJ2N2O1HCjPO>Uo0c0B6K2N2N2M2O1O2N3L3N2N3M2M4M2N2N2N2O0O1O1O100O1O100O1O00100O1O100O1O[OlRO`NSm0a1mRO_NSm0a1nRO]NRm0d1oROZNRm0f1RSOUNnl0l1c01N10000_ROQNQm0o1nROSNPm0o1nROSNQm0m1oROTNPm0l1oROUNPm0l1PSOTNPm0^200O10000O10000O10000O1000O010000O100O10000O10000O100O01000O10000O100O1000000001O0O100000001O00000000001N1000001O001O0O2O001O1O001O1O0O2O001O1O001O1N2O1O1O1O1O1O1O1N2O1O1O1O1O1O1N2O1O1O1O2N2N2N2M3N2N1O1O0O2O0O2O0O2O0O2O0O2O0O2O001N100O2O0O2N1O2N1O1N2O1O1O1N200O1O1O1O100O1O1000O100001N2O001O00O1O1O10O01O1O1O00100O001O1O1O0O2O001N101N1O2N1O2O000100O1O010O1O1O1H8K5M3J7L`dm2" + } + ], + "question": "What is the shape of the ear of ?", + "choices": [ + "A. Rounded.", + "B. Floppy.", + "C. Triangular.", + "D. Pointed." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_145.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "V\\_:2U10`m04\\QONj04fm06VRONhm05SRO0jm03RRO1km01QRO3mm00oQO2Qn00jQO3Vn0l0000O1N101O1N2O0O2O1N2O0O2O1N2TOZQO;gn0E]QO5fn0Ii0d0\\O3M3M2N1O0O2O00001O2N1O1N3M3N1N2N2O0N2O001O1O1O1O1O100O02O001O2N5K3M2ZNiQO^1]n00000000004L1O001N010O0000100O1O1O00001OO1000O2O001O001N1O3D;M4M4KRP55koJ2N2O1HCjPO>Uo0c0B6K2N2N2M2O1O2N3L3N2N3M2M4M2N2N2N2O0O1O1O100O1O100O1O00100O1O100O1O[OlRO`NSm0a1mRO_NSm0a1nRO]NRm0d1oROZNRm0f1RSOUNnl0l1c01N10000_ROQNQm0o1nROSNPm0o1nROSNQm0m1oROTNPm0l1oROUNPm0l1PSOTNPm0^200O10000O10000O10000O1000O010000O100O10000O10000O100O01000O10000O100O1000000001O0O100000001O00000000001N1000001O001O0O2O001O1O001O1O0O2O001O1O001O1N2O1O1O1O1O1O1O1N2O1O1O1O1O1O1N2O1O1O1O2N2N2N2M3N2N1O1O0O2O0O2O0O2O0O2O0O2O0O2O001N100O2O0O2N1O2N1O1N2O1O1O1N200O1O1O1O100O1O1000O100001N2O001O00O1O1O10O01O1O1O00100O001O1O1O0O2O001N101N1O2N1O2O000100O1O010O1O1O1H8K5M3J7L`dm2" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. Its back is arched.", + "B. Its back is completely straight.", + "C. Its tail is curled up.", + "D. Its head is tilted downwards." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_146.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. It has a smooth texture.", + "B. It has a woven texture.", + "C. It has a crinkled texture.", + "D. It has a rough texture." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_147.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Green.", + "C. Black.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_148.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "Which of the following statements accurately describes ?", + "choices": [ + "A. It is a canvas handbag.", + "B. It is a leather handbag.", + "C. It is a nylon satchel.", + "D. It is part of a leather jacket." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_149.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "What is a characteristic of ?", + "choices": [ + "A. It has a visible seam.", + "B. It has a metal zipper.", + "C. It is made of woven fabric.", + "D. It has a leather strap." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_150.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "Which statement accurately describes a color-related feature of ?", + "choices": [ + "A. The object is orange.", + "B. The object has white text on its rear.", + "C. The object is entirely black.", + "D. The object is red." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_151.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "What is the material of the taillight on ?", + "choices": [ + "A. Glass.", + "B. Metal.", + "C. Plastic.", + "D. Rubber." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_152.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "What text, which indicates its purpose, is displayed on ?", + "choices": [ + "A. TAXI.", + "B. Norwich.", + "C. 21 22.", + "D. city & rail station." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_153.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O?", + "choices": [ + "A. Gray.", + "B. Black.", + "C. White.", + "D. Silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_154.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O?", + "choices": [ + "A. Hard, smooth plastic.", + "B. Polished metal.", + "C. A soft, cushioned material.", + "D. Flexible rubber." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_155.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O in the image?", + "choices": [ + "A. A curved handle of a pair of scissors.", + "B. A curved telephone receiver.", + "C. A curved headset.", + "D. A curved arm of a desk lamp." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_156.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The object has a cylindrical shape.", + "B. The object has a square shape.", + "C. The object has a rectangular shape.", + "D. The object has an irregular shape." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_157.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Gray.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_158.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Gray.", + "C. Black.", + "D. Beige." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_159.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "What is the binding style of ?", + "choices": [ + "A. It is bound with a spiral wire.", + "B. It is held together by staples in the center.", + "C. Its pages are glued together at the spine.", + "D. It uses a three-ring binder mechanism." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_160.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "Which of the following descriptions accurately portrays the pattern on ?", + "choices": [ + "A. The object has a purple \"Y!\" logo on its cover.", + "B. The object has the text \"Microsoft\" printed on its cover.", + "C. The object has the text \"YAHOO\" printed on its cover.", + "D. The object is plain black with no text or logos." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_161.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "What is the pattern on the surface of ?", + "choices": [ + "A. It has a purple logo on the cover.", + "B. It has white text on the cover.", + "C. It is plain black with no markings.", + "D. It has black text on a white cover." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_162.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "flf[28b_1:G8H9G8K6J4M2M3N3L3N2M4M2M3N3M2N2N3N1N2O2M2O1N3M2O1N3N0O2O0O2N101N101N101N1O2O0O2M2O2N1O2N101N101N101O00001O001N1O100000000000O100000000O1000000O2O0000000O10001O0O1000001O0O2O001O1N2O001O1O0O2O1O1O0O2O1N1O2N2N2N2M4M2M3M3M3M3N2M3N2N2N2N3L3N2M3M3M3M3M3L4K6SOYaN9m^1CXaN8[_1L5K\\SSj0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. The object has a rough and bumpy surface with a cross-hatch pattern.", + "B. The object has a smooth surface.", + "C. The object has a leathery and wrinkled texture.", + "D. The object is covered in a layer of fine fuzz." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_163.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "flf[28b_1:G8H9G8K6J4M2M3N3L3N2M4M2M3N3M2N2N3N1N2O2M2O1N3M2O1N3N0O2O0O2N101N101N101N1O2O0O2M2O2N1O2N101N101N101O00001O001N1O100000000000O100000000O1000000O2O0000000O10001O0O1000001O0O2O001O1N2O001O1O0O2O1O1O0O2O1N1O2N2N2N2M4M2M3M3M3M3N2M3N2N2N2N3L3N2M3M3M3M3M3L4K6SOYaN9m^1CXaN8[_1L5K\\SSj0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. It has a rough and scaly texture.", + "B. It has a dimpled texture.", + "C. It has a matte and dull texture.", + "D. It has a glossy texture." + ], + "answer": "D", + "type": "texture/pattern", + "image": "images/vqa_164.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "Q`kc27h_13N7I4L2N1O2N3M6J6J7I7I2N2N1O2N2N001O00001O0000001O00001O0000000000000000001O2N2N2N001O1O001O1O00000000O1000000O1000000O1000000O1000000O100O100O100O100O100O100O1O100O100O100O100O100O1O1O1O1O1O1O1O1O100O1N2H8G9K5L4K5K6NP`ob0" + } + ], + "question": "Which of the following best describes ?", + "choices": [ + "A. A pomegranate.", + "B. A large cherry.", + "C. A small red apple.", + "D. A red plum." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_165.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01276645.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "maPe0=ao06J6L4K5M2N3L4M2N2N1N3N2N101N100O100O1O100O10O01OO2O0O2O9F7J1N101N1O2O0O2O0O2N100O1O001O1O1N2O0O2O2L4LR\\[1" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Tan.", + "C. White.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_166.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01312527.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "`ja>3lg02O1N1000O010000O1N101N2O1O100O1O1O1O10000O10O100O10O10O14L2M101O000O101N101O001O4L1O100O0001O000010O00O2H7N4M101N10000O1O100011O0O10O000O10003M2N1N2O1N1O2O0NcUb7" + } + ], + "question": "What is the primary color of the body of ?", + "choices": [ + "A. Black and white.", + "B. Grey and brown.", + "C. Solid grey.", + "D. White and grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_167.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01312527.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "`ja>3lg02O1N1000O010000O1N101N2O1O100O1O1O1O10000O10O100O10O10O14L2M101O000O101N101O001O4L1O100O0001O000010O00O2H7N4M101N10000O1O100011O0O10O000O10003M2N1N2O1N1O2O0NcUb7" + } + ], + "question": "Based on its shape, what is in the image?", + "choices": [ + "A. The folded wings of a bird.", + "B. A frog sitting on the grassy bank.", + "C. A piece of a decaying tree stump.", + "D. A fish jumping out of the water." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_168.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01350089.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "fnQf0?]g06J7I6L5L3LKaYOoN[f0R1gYOnNVf0T1jYOmNSf0U1mYOlNQf0U1oYOmNme0U1QZOnNne0R1QZOPOme0Q1SZOPOke0Q1UZOPOie0Q1WZOPOge0P1ZZOgNN0fe0Z1\\ZOfNO0ce0[1^ZOeNO0be0\\1_ZOdNO1`e0\\1bZOaNO3^e0]1mZOdNQe0]1nZOeNQe0[1nZOfNQe0[1nZOgNQe0Y1nZOhNQe0Y1nZOiNQe0W1nZOjNQe0W1nZOkNQe0T1oZOmNQe0S1nZOnNQe0S1nZOnNRe0R1lZOQORe0P1mZOQOSe0o0mZOQOSe0o0mZOQOSe0o0mZOQORe0P1oZO\\N=Jdd0j1oZOZNfe0f1:00O1L4MlZO^NPd0X1oZOiNd0?]d0g0g[O]OYd0c0d[O@\\d0`0d[O@\\d0`0d[O@\\d0?[[OlNFe0od0?X[OoNIb0od0?T[OSOM>od0?Q[OVO0;od0?Q[OVO0;od0?Q[OVO0;od0?Q[OVO1:nd0`0Q[OVO29md0a0Q[OVO38ld0b0Q[OVO47kd0c0Q[OUO67id0d0Q[OUO85gd0f0Q[OUO94fd0g0Q[OUO:3ed0h0Q[OUO;2dd0i0Q[OUO<1cd0j0Q[OUO0F4:kd0k0Q[OUO0G47ld0m0P[OUO0H45md0m0oZOVO0J41nd0n0nZOWO0K4Ond0o0nZOWO0L4LPe0P1lZOXO0M4JQe0P1kZOYO0N3ISe0o0jZOZO0O2HTe0o0jZOZO001FWe0P1fZO[O200EYe0a1gZOkNMF\\e0_1gZORO[e0m0eZOSO[e0m0eZOSO[e0n0cZORO^e0g110\\J" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. The masked object is black.", + "B. The masked object is white.", + "C. The masked object is brown.", + "D. The masked object is silver." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_169.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Black.", + "B. Brown.", + "C. Silver.", + "D. Dark green." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_170.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Metallic.", + "B. Pinstriped.", + "C. Grooved.", + "D. Smooth." + ], + "answer": "D", + "type": "texture/pattern", + "image": "images/vqa_171.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. Oval-shaped.", + "B. Circular.", + "C. Square.", + "D. Rectangular." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_172.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364554.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 820 + ], + "counts": "PPa7;eo0;E2N1O1O1O2N1O1O1O1O1O1O2N1O1`QONPm0k1F7I7I3M3M3M3M3M2N1O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O001O001O00001O000000001O0000001O000000001O0000001O00000oTOUKmj0Q5N2N1OO100O1N2O1N20000000TUO[K]j0T501ON2N2000000O12N2N3M2N2N001OO100N2N2N2N2N2N2N2N2N21O00001O001OO100O100O1O100O1O100O1O100O100O1O100O1O100O1O100O100O10SUO`KZj0`4eUObKZj0]4gUOcKYj0]4gUOcKYj0\\4gUOeKYj0[4gUOeKYj0Z4gUOgKYj0Y4gUOfKZj0Z4eUOgK[j0X4eUOiK[j0W4eUOiK[j0V4eUOjK\\j0V4dUOjK\\j0U4dUOlK\\j0T4dUOlK\\j0T4cUOlK^j0S4bUOnK^j0R4bUOnK^j0Q4bUOPL^j0P4bUOoK_j0P4aUOQL_j0o3aUOQL_j0o3`UORL`j0m3aUORL`j0n3_UOSLaj0l3`UOTL`j0l3_UOULaj0j3`UOULaj0k3_UOULaj0k3^UOVLbj0i3_UOWL5Jbi0o3YVOVLO3gi0f3ZVOXLG:oi0^3ZVOXLE?", + "choices": [ + "A. The lettuce inside is shredded.", + "B. The tomato is diced into small cubes.", + "C. It contains whole, intact lettuce leaves.", + "D. The tortilla is rolled into a closed cylinder." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_173.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364554.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 820 + ], + "counts": "PPa7;eo0;E2N1O1O1O2N1O1O1O1O1O1O2N1O1`QONPm0k1F7I7I3M3M3M3M3M2N1O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O001O001O00001O000000001O0000001O000000001O0000001O00000oTOUKmj0Q5N2N1OO100O1N2O1N20000000TUO[K]j0T501ON2N2000000O12N2N3M2N2N001OO100N2N2N2N2N2N2N2N2N21O00001O001OO100O100O1O100O1O100O1O100O100O1O100O1O100O1O100O100O10SUO`KZj0`4eUObKZj0]4gUOcKYj0]4gUOcKYj0\\4gUOeKYj0[4gUOeKYj0Z4gUOgKYj0Y4gUOfKZj0Z4eUOgK[j0X4eUOiK[j0W4eUOiK[j0V4eUOjK\\j0V4dUOjK\\j0U4dUOlK\\j0T4dUOlK\\j0T4cUOlK^j0S4bUOnK^j0R4bUOnK^j0Q4bUOPL^j0P4bUOoK_j0P4aUOQL_j0o3aUOQL_j0o3`UORL`j0m3aUORL`j0n3_UOSLaj0l3`UOTL`j0l3_UOULaj0j3`UOULaj0k3_UOULaj0k3^UOVLbj0i3_UOWL5Jbi0o3YVOVLO3gi0f3ZVOXLG:oi0^3ZVOXLE?", + "choices": [ + "A. It is a full circular slice.", + "B. It is a sliced piece.", + "C. It is whole and round.", + "D. It has a jagged, leafy shape." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_174.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^dca01ng02N000QYO5ke0KTZO7je0IWZO8ie0HUZO9ke0GVZO8ke0HTZO7ne0HRZO8ne0IPZO8Qf0HnYO8Rf0HmYO9Tf0GkYO9Vf0FkYO8Vf0IiYO6Yf0IgYO6Zf0KfYO1]f0OcYO2]f0MdYO2\\f0OcYO1^f0NbYO3]f0NcYO1]f0OcYO2]f0MdYO2\\f0OcYO1^f0NbYO3]f0NcYO1]f0OcYO2oe0LhYO1:2me0OhYO190oe01gYOO91oe01hYON90Pf0M]YO3:17OUf00eYO051Wf0NdYO150Xf00bYO14O[f00bYO021\\f0ObYO020]f00aYO10Oaf00_YO0O0cf01]YOOO1ef00UYO0M08Ohf01PYO30O4Mnf09nXOK2MQg0a0nXO^OQg0d0nXO\\OQg0k0N02O1N5L=YO_XO5fg0O101N2N^bQ5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. It has a high, gooseneck-style spout.", + "B. It has a straight, right-angled spout.", + "C. It is composed of two separate cross-shaped handles and a central spout.", + "D. It has a curved or arc-shaped spout." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_175.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. A straight, rectangular bar.", + "B. A circular knob.", + "C. A curved handle.", + "D. A T-shaped pull handle." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_176.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "Which of the following descriptions about the texture of is correct?", + "choices": [ + "A. The masked object has a smooth surface.", + "B. The masked object has a grooved texture from the wooden planks.", + "C. The masked object features a distinct wood grain pattern.", + "D. The masked object has a slatted texture." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_177.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "What is a defining characteristic of in the image?", + "choices": [ + "A. It is a vertically oriented mount.", + "B. It is a horizontally positioned dispenser.", + "C. It has a cylindrical shape.", + "D. It is an L-shaped bracket." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_178.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "Which of the following statements accurately describes in the image?", + "choices": [ + "A. The control panel is located on the left side of its door.", + "B. It has ventilation slots located on its top surface.", + "C. The object is the same color as the wooden cabinets above it.", + "D. It has a large, vertical handle for opening the door." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_179.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "bdf>5ig03N101N2O1O001O1O001O1O1O001O1O0000O1N2O100O1000000O1000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1nN1SZOOje0;oYOEPf0V1N2N2N200O10000000000000000000001O001O000000000000000001O000000000000000000000000000O1O0F;O101N2No\\P6" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Silver.", + "C. Brown.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_180.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "bdf>5ig03N101N2O1O001O1O001O1O1O001O1O0000O1N2O100O1000000O1000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1nN1SZOOje0;oYOEPf0V1N2N2N200O10000000000000000000001O001O000000000000000001O000000000000000000000000000O1O0F;O101N2No\\P6" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. It is a high-arc gooseneck faucet with a pull-down sprayer.", + "B. It has two separate handles for hot and cold water.", + "C. It has a single handle and a curved spout.", + "D. It is a wall-mounted faucet positioned above the sink." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_181.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01396529.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Pkd`09fo02O1M4M2O1O1N2N2N2O1N2N2N2N101N100O2O0O100O2O1N1O010N2N2NObQOcN^n0]13011OO1O2N1O0O2O001L310O1O1N1O2O0O2M20001O0O1O1O1O1O0011O0O100ZO[OPROf0Pn0[OnQOf0Qn0[OmQOf0Tn0[OjQOf0Vn0ZOiQOf0Xn0VO]QO2:h0\\n0XOcQOi0^n0<001N6K1N6J2N2N2O2N1N2O1O5J2MXUg4" + } + ], + "question": "What is the primary material of ?", + "choices": [ + "A. Plastic.", + "B. Leather.", + "C. Rubber.", + "D. Canvas." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_182.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01403825.jpg", + "mask_rles": [ + { + "size": [ + 1152, + 2048 + ], + "counts": "fX[\\1b0]S12M3L3O2N101N101N101N101N101N101N101N101N101N1O2O0O2O0O2O1N101N100O01O001O001O00001O010O010O010O0010O010O0mN^NjoNb1TP1aNjoN_1UP1dNjoN[1UP1hNioNY1TP1kNjoNU1UP1mNkoNS1SP1POkoNP1SP1TOloNl0SP1VOkoNj0UP1XOjoNh0VP1YOhoNg0XP1[OgoNd0YP1^OeoNc0[P1^O`oNe0`P1\\O_oNe0`P1]O]oNd0dP1]OYoNe0fP1Z10010O001O010O00010O010O0100O1O010O100O10O0100O1O010O100O010O100000O1000000O0100000O10000000O0100000000O100000001O0O2O00001O001N110O00001O001O001O01O01O001O001O001O0000O1O1O1O10O0100O100O10000O010O100O100O10O010O10O10O1000O010000O02O0000000O1000000O101O00000O100000000O10001O0O100000000O1000001O0O1O100O1O100O1O101mNSoNPOnP1o0boNaN_P1]1S1O1N2N2O1N2O2M2O1N200000000O10001O000O101O00001O0O101O00001N10001O000O2O00001O0O10001N10001N10001N10001N10001N10001N10001N10001O0O2NXfVa0" + } + ], + "question": "What is the material of the cover of ?", + "choices": [ + "A. Wood.", + "B. Rubber.", + "C. Plastic.", + "D. Cardboard." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_183.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01403825.jpg", + "mask_rles": [ + { + "size": [ + 1152, + 2048 + ], + "counts": "fX[\\1b0]S12M3L3O2N101N101N101N101N101N101N101N101N101N1O2O0O2O0O2O1N101N100O01O001O001O00001O010O010O010O0010O010O0mN^NjoNb1TP1aNjoN_1UP1dNjoN[1UP1hNioNY1TP1kNjoNU1UP1mNkoNS1SP1POkoNP1SP1TOloNl0SP1VOkoNj0UP1XOjoNh0VP1YOhoNg0XP1[OgoNd0YP1^OeoNc0[P1^O`oNe0`P1\\O_oNe0`P1]O]oNd0dP1]OYoNe0fP1Z10010O001O010O00010O010O0100O1O010O100O10O0100O1O010O100O010O100000O1000000O0100000O10000000O0100000000O100000001O0O2O00001O001N110O00001O001O001O01O01O001O001O001O0000O1O1O1O10O0100O100O10000O010O100O100O10O010O10O10O1000O010000O02O0000000O1000000O101O00000O100000000O10001O0O100000000O1000001O0O1O100O1O100O1O101mNSoNPOnP1o0boNaN_P1]1S1O1N2N2O1N2O2M2O1N200000000O10001O000O101O00001O0O101O00001N10001O000O2O00001O0O10001N10001N10001N10001N10001N10001N10001N10001O0O2NXfVa0" + } + ], + "question": "Which of the following best describes a shape characteristic of ?", + "choices": [ + "A. The object is rectangular in shape.", + "B. The handle of the object is curved.", + "C. The object has a pointed tip.", + "D. The head of the object is rounded." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_184.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. It is rectangular.", + "B. It is conical.", + "C. It is cylindrical.", + "D. It is spherical." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_185.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Red.", + "C. Green.", + "D. Yellow." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_186.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. It is blue.", + "B. It has white text.", + "C. It has red measurement markings.", + "D. It is solid white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_187.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "kkY63b12ee03UZO6de0LXZOde0YO]ZO:NM4ge0^O\\ZO?M3ge0BWZO=11ge0j0YZOUOhe0k0XZOUOhe0j0YZOUOge0l0YZOTOge0k0ZZOTOge0k0ZZOUO6ROod0i1lZOTO3WOnd0f1oZOSO2ZOmd0b1S[ORO0^Old0`1U[OQONAld0]1W[OROMBkd0[1Y[OSOKDjd0Y1][OQOJHgd0W1_[OQOIJgd0U1a[OPOHLfd0T1c[OnNHOdd0S1e[OnNE2cd0Q1j[OkNC5bd0P1n[OhN@a0e0QObb0e1\\]OfN]Of0c0QOcb0c1_]OdNZOj0b0QOcb0b1c]O`NYOn0?ROdb0`1f]O]NWOR1?ROcb0_1j]OXNVOX1;TO^a0Do^Oh1?@TOVOYa0GR_Oc1b0_OSOXOUa0JT_O`1d0]OUOYOPa0MV_O]1f0\\OTO[Ol`00Y_OY1h0ZOUO\\Oi`03Y_OW1j0XOVO^Oe`05Z_OT1m0WOVO_Oa`09Z_OR1Q1SOVOA^`0nc0CS]OOoN>mc0DU]ONnN?", + "choices": [ + "A. Solid color.", + "B. Checkered pattern.", + "C. Striped pattern.", + "D. Polka dot pattern." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_188.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "kkY63b12ee03UZO6de0LXZOde0YO]ZO:NM4ge0^O\\ZO?M3ge0BWZO=11ge0j0YZOUOhe0k0XZOUOhe0j0YZOUOge0l0YZOTOge0k0ZZOTOge0k0ZZOUO6ROod0i1lZOTO3WOnd0f1oZOSO2ZOmd0b1S[ORO0^Old0`1U[OQONAld0]1W[OROMBkd0[1Y[OSOKDjd0Y1][OQOJHgd0W1_[OQOIJgd0U1a[OPOHLfd0T1c[OnNHOdd0S1e[OnNE2cd0Q1j[OkNC5bd0P1n[OhN@a0e0QObb0e1\\]OfN]Of0c0QOcb0c1_]OdNZOj0b0QOcb0b1c]O`NYOn0?ROdb0`1f]O]NWOR1?ROcb0_1j]OXNVOX1;TO^a0Do^Oh1?@TOVOYa0GR_Oc1b0_OSOXOUa0JT_O`1d0]OUOYOPa0MV_O]1f0\\OTO[Ol`00Y_OY1h0ZOUO\\Oi`03Y_OW1j0XOVO^Oe`05Z_OT1m0WOVO_Oa`09Z_OR1Q1SOVOA^`0nc0CS]OOoN>mc0DU]ONnN in the image?", + "choices": [ + "A. White.", + "B. Green.", + "C. Red.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_189.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "What is the shape of the platform on ?", + "choices": [ + "A. Trapezoidal.", + "B. Rectangular.", + "C. Square.", + "D. Circular." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_190.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The masked object is circular.", + "B. The masked object is triangular.", + "C. The masked object is rectangular.", + "D. The masked object is oval." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_191.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "Which of the following statements accurately describes a color feature of ?", + "choices": [ + "A. The top surface of the object is gray.", + "B. The object has a black display screen.", + "C. The main body of the object is light green.", + "D. The entire object is a uniform color." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_192.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "`hla02Pi02N2O1N1OKWWO3hh0NXWO2bh02`WON_h02bWON]h02cWON]h03cWOL]h04cWOL]h05bWOK^h05bWOK^h06aWOJ_h0O`WO21O_h0OaWO2OO`h0OaWO2OO`h0ObWO7^h0IbWO7^h0IcWO6^h0JaWO6_h0JbWO5_h0JaWO6_h0K`WO5ah0L]WO4ch061O1O1O00001O2N2O0O2OO01O01O01O010O0010O0001Ndio5" + } + ], + "question": "What is the material of the handles of ?", + "choices": [ + "A. Metal.", + "B. Wood.", + "C. Rubber.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_193.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Blue.", + "C. Yellow.", + "D. White." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_194.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of metal.", + "B. It is made of plastic.", + "C. It is made of wood.", + "D. It is made of stone." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_195.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "Which of the following describes the shape of ?", + "choices": [ + "A. A flat-topped object with four legs.", + "B. A cylindrical object with a rounded top.", + "C. A series of connected, curled tentacles.", + "D. A rectangular frame with a grid pattern." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_196.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01455911.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "P_k23Tf0Nb[O5[d0Nb[O4]d0Lb[O6]d0JeZO315Ze0H]ZOa02I_e0GYZOk02@4E^d06\\[OJKV11UO4Nad00\\[Of1NaNbd0k2K4M3L3M2O1N2O1O1O1O1O1O1O1O1O1O1000000O1000000O10000000O01000000O0100000O101O\\Lc\\Oe2]c0YMd\\Oh2[c0YMe\\Og2[c0XMf\\Oi2Yc0VMh\\Oj2Xc0VMh\\Oj2Xc0UMh\\Ol2Xc0SMi\\Om2Xc0RMh\\On2[c0nLf\\OR3^c0iLd\\OV3\\c0jLe\\OU3[c0jLf\\OV3Zc0fLk\\OY3Tc0hLm\\OW3Sc0hLo\\OW3Pc0jLP]O>[OR2ec0_MR]O2E_2Xc0`Mh]O`2Xb0_Mi]Oa2Vb0`Mk]O_2Ub0`Ml]O`2Sb0aMm]O_2Sb0`Mn]O`2Rb0`Mn]O`2Sb0^Mn]Ob2hc000000O100O100O100O100O1000000O1000000O100000000O1000000O1000000O100001O1O1O1O1O1O1O1O1bMgZOS2Ze0kMgZOU2Ze0gMjZOX2ae0N1O1O2N1O1O2N1O2N1O1O2N1O:F>B3M0000O10O10000O100O00100O1O100O1O1O101N2N2O1N2N2Ml`\\a0" + } + ], + "question": "Which of the following descriptions about a part of is correct?", + "choices": [ + "A. It has a rectangular license plate.", + "B. The rearview mirrors are circular.", + "C. The seat is triangular.", + "D. The headlight is square." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_197.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01511060.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Rb0e0Zg01O100000001O00000000000000000000001O00000000000000000000001O000000000000000000001O0001O00001O00001O000010O0001O00001O00000O2O0O101N100O2O0O101N101N101N2O1NcUTf0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Light beige.", + "B. Light gray.", + "C. Dark gray.", + "D. Off-white." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_198.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01525619.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "ZQ\\l0g0P5[OVe0h0_ZOC^e0?YZOJfe07PZO3ne0OiYO:Wf0F_YOe0_f0]OWYOn0gf0ROoXOY1Pg0gNgXOc1dg0RNQXOY2ng0gMiWOc2Xh0[M^WOQ3ah0nLVWO\\3lh0aLjVOi3Ui0XLbVOQ4Wi0WL^VOS4Zi0VL\\VOR4ci0V1O2O1N20O01000gIoVOb5hi001O2M10O1001O000O2O000O1O0010O01O1O10O01O1O00100O001O10O01O1O010O1O1O010O1O00100O001O10O01O1O1N101O001O0O2N101O1O0010O10M2N3M2N3M2M4M2N3M2N3N002K5K5L4K4L5K4L5K3M4M3L4L4L4L4L4L4M3L5KoK" + } + ], + "question": "Which of the following descriptions about the shape of is correct?", + "choices": [ + "A. The masked object has a curved handle.", + "B. The main body of is a perfect cylinder.", + "C. The masked object is heart-shaped.", + "D. The top rim of has a scalloped edge." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_199.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01534987.jpg", + "mask_rles": [ + { + "size": [ + 786, + 1178 + ], + "counts": "Z`d27Yh04L4L4L4L4L5K4M2N2N2N2N2N2N1O2N2N2N2N1O2O1N2N101N101N100O2N100O100O2O0fNkMV\\OV2ic0mMT\\OT2lc0oMQ\\OR2nc0QNn[OP2Rd0SNk[Om1Ud0UNh[Ol1Xd0WNd[Oj1\\d0YNa[Oh1^d0ZN_[Og1ad0\\N\\[Od1dd0_NX[Ob1hd0U10O3M3M3M2O2M3M3M2N3L4M2N000000000O10000000000O10000000000O2O0000000O10000O101N100O100O10000O2O0O100O100O100O2O000O2O0O1O2N1O2N2N1O2N1N3L3N4J5J6J7I6J7I7J`adf0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Transparent.", + "B. White.", + "C. Brown.", + "D. Black." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_200.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01575962.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Xh0>bo010000000O0100O10000O1O101N100O2O0O1010O1O1N2N2LoQP2E^noM1O100N101O1N2O1O1O10O10O100000000O0100000O100O1O001N2O1O0001O1N1L500O1000O010000O10O01O1MdThc0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The masked object is made of metal.", + "B. The masked object is made of ceramic.", + "C. The masked object is made of wood.", + "D. The masked object is made of plastic." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_201.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01575962.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Xh0>bo010000000O0100O10000O1O101N100O2O0O1010O1O1N2N2LoQP2E^noM1O100N101O1N2O1O1O10O10O100000000O0100000O100O1O001N2O1O0001O1N1L500O1000O010000O10O01O1MdThc0" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. The masked object is long and cylindrical.", + "B. The masked object is a set of two long, flat metal strips.", + "C. The masked object is a thin, flat slice.", + "D. The masked object is a folded piece of cloth." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_202.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01616394.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "^bh31io09G7J7PRO]OZl0i0cSO^OSl0i0jSO]Olk0j0PTO]Ofk0i0XTO\\O_k0k0^TO[OXk0l0eTOZOQk0l0lTO[Oij0m0TUOYObj0n0[UOXO[j0n0cUOXOSj0o0iUOXOmi0o0PVOVOgi0P1WVOTOai0S1\\VOoN^i0m3J6I7J6J6J5N3M3N2O1N2_N[HeZOf7Pe0eHoZO\\7md0hHR[OY7ld0lHP[OU7od0lHoZOV7od0lHP[Om2^OOae0UMP[O^21:nd0YMP[O\\277id0]MP[O[2<5cd0aMP[OZ2a0YNcN4ke0ZOoZOY2i0QNcN:ee0\\OnZOX2o0jMeN`0^e0^OmZOW2U1cMgNf0We0_OmZOW2X1^MkNj0Ze0WObZO`2[1ZMPOm0Re0ZObZO^2^1VMUOP1jd0]ObZO\\2b1QMYOT1bd0_ObZO\\2^3SNoa0BbZOZ2c3RNja0EbZOX2h3QNea0HbZOV2m3oMaa0LaZOT2Q4oM]a0MbZOS2T4oMYa0ObZOQ2W4PNWa0O`ZOQ2\\4oMSa01`ZOo1`4oMo`03`ZOm1d4nMl`05`ZOl1g4mMi`08_ZOj1k4lMf`0;^ZOi1m4kMe`0=\\ZOh1R5iMb`0?\\ZOg1U5hM_`0b0[ZOe1Y5fM]`0f0YZOc1]5dM[`0j0WZOa1`5cMZ`0n0TZOi06ZNa5KV`0U1PZOc0gl0Y12O1O1O1O1O100O1OU]OVMb8i2^GZM`8e2`G\\M`8d2`G]M_8b2aG`M\\8a2eGaMW8`2jGaMS8`2oGaMn7`2RHaMk7`2WH`Mf7a2[HaMa7`2aH`M]7`2eHaMW7e0a^O:Y:ROS7d0k^O5S:WOo6d0T_O0n9^Ok6b0\\_OKk9Cf6b0g_ODe9Ka6`0P@@`91_6=X@]OZ97\\6;`@XOU9>Z69f@SOS9d0V68mKIQ46PLKo35RLLm32TLOl3OVL1k3LWL4i3JXL6j3GXL9h3FXL:j3CXL`0e3^O]Le0^3\\ObLi0X3WOiLm0R3TOnLQ1k2POVMR1g2nNZMS1d2nN\\MS1b2mN_MT1_2lNbMU1\\2lNdMU1Y2lNhMU1V2kNkMV1S2jNnMW1P2jNPNW1n1iNSNX1j1iNVNY1h1hNXNY1f1gN[NZ1c1fN^N[1`1fN`N[1^1eNcN\\1\\1cNeN^1Y1bNiM^OkCQ2[>_NgMGlCk1\\>[NfM1lCf1]>UNeMQNdMe0mC[1_>lMbMP1mCU1b>fM_M\\1mCo0Qk0YOlTOi0nj0^OPUOc0jj0DTUO>hj0GUUO:ij0`2N2O00000001N2N2M3K5K5K5K5dM`TO2ek0EcTO7bk0@gTO;^k0BfTO0hk0M\\TOL2WO\\j0k0gUOGO@Zj0g0kUODMFXj0d0oUOAKLVj0a0TVO]OG4Uj0>WVOYOF9Tj0<[VOTODa0Qj09iWOHWh06jWOJWh04iWONXh0OiWO2Wh0LjWO4Wh0JjWO7Vh0HiWO:Wh0DjWO?", + "choices": [ + "A. Golden brown.", + "B. A mix of green and white.", + "C. A mix of purple and orange.", + "D. Creamy white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_203.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01616394.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "^bh31io09G7J7PRO]OZl0i0cSO^OSl0i0jSO]Olk0j0PTO]Ofk0i0XTO\\O_k0k0^TO[OXk0l0eTOZOQk0l0lTO[Oij0m0TUOYObj0n0[UOXO[j0n0cUOXOSj0o0iUOXOmi0o0PVOVOgi0P1WVOTOai0S1\\VOoN^i0m3J6I7J6J6J5N3M3N2O1N2_N[HeZOf7Pe0eHoZO\\7md0hHR[OY7ld0lHP[OU7od0lHoZOV7od0lHP[Om2^OOae0UMP[O^21:nd0YMP[O\\277id0]MP[O[2<5cd0aMP[OZ2a0YNcN4ke0ZOoZOY2i0QNcN:ee0\\OnZOX2o0jMeN`0^e0^OmZOW2U1cMgNf0We0_OmZOW2X1^MkNj0Ze0WObZO`2[1ZMPOm0Re0ZObZO^2^1VMUOP1jd0]ObZO\\2b1QMYOT1bd0_ObZO\\2^3SNoa0BbZOZ2c3RNja0EbZOX2h3QNea0HbZOV2m3oMaa0LaZOT2Q4oM]a0MbZOS2T4oMYa0ObZOQ2W4PNWa0O`ZOQ2\\4oMSa01`ZOo1`4oMo`03`ZOm1d4nMl`05`ZOl1g4mMi`08_ZOj1k4lMf`0;^ZOi1m4kMe`0=\\ZOh1R5iMb`0?\\ZOg1U5hM_`0b0[ZOe1Y5fM]`0f0YZOc1]5dM[`0j0WZOa1`5cMZ`0n0TZOi06ZNa5KV`0U1PZOc0gl0Y12O1O1O1O1O100O1OU]OVMb8i2^GZM`8e2`G\\M`8d2`G]M_8b2aG`M\\8a2eGaMW8`2jGaMS8`2oGaMn7`2RHaMk7`2WH`Mf7a2[HaMa7`2aH`M]7`2eHaMW7e0a^O:Y:ROS7d0k^O5S:WOo6d0T_O0n9^Ok6b0\\_OKk9Cf6b0g_ODe9Ka6`0P@@`91_6=X@]OZ97\\6;`@XOU9>Z69f@SOS9d0V68mKIQ46PLKo35RLLm32TLOl3OVL1k3LWL4i3JXL6j3GXL9h3FXL:j3CXL`0e3^O]Le0^3\\ObLi0X3WOiLm0R3TOnLQ1k2POVMR1g2nNZMS1d2nN\\MS1b2mN_MT1_2lNbMU1\\2lNdMU1Y2lNhMU1V2kNkMV1S2jNnMW1P2jNPNW1n1iNSNX1j1iNVNY1h1hNXNY1f1gN[NZ1c1fN^N[1`1fN`N[1^1eNcN\\1\\1cNeN^1Y1bNiM^OkCQ2[>_NgMGlCk1\\>[NfM1lCf1]>UNeMQNdMe0mC[1_>lMbMP1mCU1b>fM_M\\1mCo0Qk0YOlTOi0nj0^OPUOc0jj0DTUO>hj0GUUO:ij0`2N2O00000001N2N2M3K5K5K5K5dM`TO2ek0EcTO7bk0@gTO;^k0BfTO0hk0M\\TOL2WO\\j0k0gUOGO@Zj0g0kUODMFXj0d0oUOAKLVj0a0TVO]OG4Uj0>WVOYOF9Tj0<[VOTODa0Qj09iWOHWh06jWOJWh04iWONXh0OiWO2Wh0LjWO4Wh0JjWO7Vh0HiWO:Wh0DjWO in the image?", + "choices": [ + "A. A smooth, curved arc.", + "B. A ruffled or scalloped edge.", + "C. A collection of separate, sharp fragments.", + "D. A complete and perfect circle." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_204.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Brown.", + "C. White.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_205.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "What material is at the person's waist made of?", + "choices": [ + "A. Elastic.", + "B. Leather.", + "C. Cotton.", + "D. Nylon." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_206.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. It has a ribbed texture.", + "B. It has a smooth surface.", + "C. It has a denim-like texture.", + "D. It has a braided texture." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_207.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nm=5jg02N2O00000001N10000001O00000001O00000000O1L4`XOISg0e0L3O2O0O2N1O1O1O2N1O1O101N1O1O1O1OD`YO[O^f0d0dYO\\O[f0e0fYOZOZf0f0fYOZOYf0h0gYOXOXf0h0hYOXOWf0j0iYOUOVf0l0jYOTOVf0l0kYOSOTf0o0lYOQORf0P1nYOPORf0Q1nYOnNQf0S1oYOmNQf0T1oYOlNoe0U1QZOkNne0V1SZOiNme0X1RZOhNme0Y1TZOfNle0[1SZOeNle0\\1UZObNke0`1:0O2O0O101N101O001N10000O10O1N2O001O1O1O1O1O1O2N1O1O1O102M10001O0O2O001N101O001N101O0O2O001O0O2O1O1N2O001N2O1O2M101O1N4M3M4K`jed0" + } + ], + "question": "Which statement accurately describes a feature of ?", + "choices": [ + "A. The masked object is long and curved.", + "B. The masked object is perfectly spherical.", + "C. The masked object has a short stem.", + "D. The masked object has a long, prominent stem." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_208.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01629547.jpg", + "mask_rles": [ + { + "size": [ + 1600, + 1200 + ], + "counts": "mP1`8P1jHAUOdU1S8ijNjIUU1X6jjNiITU1X6ljNiISU1X6ljNjIRU1V6njNkIQU1T6PkNmIoT1S6QkNnInT1R6RkNoImT1P6TkNPJmT1o5SkNPJoT1o5QkNQJoT1o5QkNQJoT1o5RkNPJnT1P6SkNoImT1Q6UkNlIlT1T6_kNaIaT1_6^kNjGoNQ1cU1U7^kNjGB8VU1n7XkNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjG[V1U8eiNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8diNlG\\V1T8diNlG\\V1T8diNlG\\V1T8ciNmG]V1S8ciNmG]V1S8ciNmG]V1S8biNnG^V1R8biNnG^V1R8biNoG]V1Q8biNQH]V1o7ciNQH]V1o7ciNRH\\V1n7ciNSH]V1m7ciNTH\\V1l7ahNdGi0b0fV1h7_hNmGg0;jV1h7]hNSHe06mV1]8nhNdGRW1`8jhNaGUW1c8ghN]GYW1g8bhNZG^W1d900000000000a0_O6J6J6J6J6J6J6J6J6J6J6J3M3M4L4L9G5L2M1O1O1O2N2N001O1O001O001O1O001O0000oE^lNd5bS1ZJalNe5_S1ZJdlNd5\\S1ZJglNd5ZS1ZJjlNd5VS1ZJmlNe5SS1YJQmNe5oR1YJTmNe5mR1YJVmNf5jR1XJZmNf5fR1XJ^mNf5bR1XJemNb5\\R1]JjmN^5WR1_JomN]5QR1aJVnNY5kQ1eJ[nNW5fQ1fJanNU5`Q1hJgnNR5[Q1kJknNo4\\Q1jJknNn4YQ1oJmnNj4WQ1VKmnNc4VQ1\\KonN\\4UQ1cKonNV4UQ1jKonNn3UQ1QLonNi3TQ1WLPoNa3TQ1^LQoNZ3SQ1fLQoNR3SQ1mLToNj2nP1WMYoN^2kP1aM\\oNU2hP1\\L]jN7R5R3eP1eL[jN8^5[2bP1[MRjN:^6:_P1ZOUiN;V_1Dk`NS_1@o`N?R_1_OPaN`0R_1]OPaNa0R_1^Oo`N`0T_1]On`Na0T_1]On`Na0U_1\\Oo`N`0S_1^OQaN>R_1_ORaN=Q_1_OTaN=[`1K4M2N3KWblb1" + } + ], + "question": "What is a primary material of ?", + "choices": [ + "A. Bamboo.", + "B. Metal.", + "C. Wood.", + "D. Rubber." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_209.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01629547.jpg", + "mask_rles": [ + { + "size": [ + 1600, + 1200 + ], + "counts": "_Sji02la14L3N2N2N2N2N2N2N2N2N2N2N2N2N2N2N2DnNg_NT1V`1=N1O1O1O1O1O1O100O1O1O1O1O010O1O1O1O1O100O1O001O3M2O2M2N3M2N3M2WdNZMVX1h2fgN\\MWX1f2ggN[MXX1h2egNZMYX1h2fgNXMEk1eS1P1dlNUMBd2US19XmNRM_OT3oR1L`mNPM\\O\\3PS1GamNmL[Oc3RS1AamNmLXOj3SS1\\OcmNiLVOS4TS1UOdmNhLVOX4TS1ROdmNfLVO]4SS1nNemNeLWOb4QS1kNfmNcLVOg4RS1gNfmNbLWOl4PS1dNfmNaLXOP5oR1`NhmN_LWOW5nR1\\NhmN^LXOZ5nR1YNimN\\LXO`5lR1VNimNZLYOf5kR1QNkmNYLXOj5kR1oMjmNWLZOo5iR1kMkmNWLYOT6iR1fMlmNVLZOX6hR1cMmmNULXO^6hR1]MomNULXOc6fR1YMPnNTLYOg6eR1UMQnNTLYOj6eR1SMQnNSLXOm6fR1PMQnNSLXOQ7eR1mLQnNRLZOS7dR1kLQnNRL[OV7bR1iLRnNQL[OX7bR1gLRnNQL\\OZ7aR1hLomNnK@]7_R1hLmmNkKD_7^R1gLmmNjKDb7]R1eLmmNiKFd7\\R1dLlmNhKHf7[R1bLlmNhKIi7YR1`LmmNgKIk7YR1^LmmNgKIn7YR1\\LlmNfKKQ8WR1YLmmNgKKR8WR1WLnmNfKKV8UR1TLomNfKLX8TR1QLPnNgKMY8RR1PLPnNhKM[8QR1mKRnNgKN]8oQ1lKRnNgKO`8mQ1iKTnNfK0b8kQ1hKTnNgK0c8kQ1fKUnNfK1f8hQ1dKVnNfK2h8gQ1[IbmNId0SN4j8eQ1ZIdmNHc0TN3m8dQ1WIhmNG?VN5m8cQ1VIkmNE=WN5Q9aQ1SIomND9YN7Q9`Q1RIQnNC8ZN6S9`Q1PITnNB5ZN8U9^Q1oHWnNA1\\N9U9_Q1nHXnNAO[N;V9^Q1nHZnN@L\\N;X9^Q1lH]nN_OH^N=W9^Q1lH^nN^OG^N=Y9^Q1kH`nN]OC`N>Y9_Q1jHbnN\\O@`N`0[9]Q1iHdnN\\O^O`N`0\\9^Q1hHfnN[OZObNb0\\9]Q1gHinNZOWObNc0^9]Q1fHknNYOTOcNc0_9^Q1eHlnNDBl7aQ1`HonNA\\OT8eQ1[HQoN_OVOZ8iQ1WHRoN]OnNe8oQ1nGUoNm9kP1SFWoNk9iP1UFXoNk9gP1UF[oNi9eP1WF]oNg9cP1XF_oNh9`P1XFboNf9^P1YFeoNe9\\P1ZFaoNj9^P1YDVnNX1]1`:]P1WD_nNGER1`1P;]P1VDgnNh0l0S;]P1SDinNi0j0T;^P1PDknNk0g0U;bQ1jD^nNW;cQ1gD]nNY;dQ1fD\\nNZ;eQ1eD[nN\\;eQ1cD[nN];gQ1aDYnN`;gQ1_DYnNa;hQ1^DYnNa;hQ1^DXnNc;iQ1[DWnNe;jQ1ZDWnNe;jQ1ZDVnNf;kQ1YDUnNg;mQ1VDTnNj;mQ1UDTnNj;mQ1TDTnNl;mQ1RDTnNn;nQ1oCTnNPZ1L]Nl9fQ1kE]nNb0`1G^Nj9gQ1mEXnNd0d1D]Nk9gQ1oEVnNb0g1C\\Nk9hQ1RFRnN`0k1C\\Ni9hQ1UFomN`0n1@\\Nk9gQ1WFlmN?Q2_O^Nh9fQ1cGmoNdN^Ng9fQ1eGmoNbN_Ng9eQ1gGUQOW8ln0iGTQOU8mn0kGUQOR8ln0nGUQOQ8kn0oGWQOn7jn0RHWQOl7jn0THXQOj7hn0VHYQOh7hn0WH[QOf7fn0YH\\QOe7en0ZH^QOb7dn0\\H_QOb7bn0UHiQOh7Xn0XHiQOf7Xn0YHkQOd7Vn0\\HkQOa7Wn0^HkQO`7Vn0`HlQO]7Un0bHmQO\\7Tn0cHoQOZ7Rn0fHoQOW7Sn0hHPROU7Rn0jHoQOT7Rn0kHPROS7Qn0mHPROQ7Rn0mHQROo6Rn0PIoQOn6Sn0PIoQOm6Un0PImQOm6fn0`H^QO\\7eT1M4L4L4L3M4L4L4L3M4L4L4L4L3M4L4L4@?^Oc0UNRcNQN_]1l1WcN_Mk\\1_2UcNaMm\\1]2ScNcMo\\1[2QcNeMR]1X2nbNhMT]1U2lbNkMW]1S2ibNmMZ]1P2fbNPN\\]1n1dbNRN^]1W33M2O2bL^aNY3d^11O001O000bHdLXPO\\3fo0gLYPOY3eo0jLZPOV3do0lL\\POU3bo0lL^POU3_o0nL`POR3^o0PMbPOQ3[o0QMePOo2Zo0RMfPOn2Xo0UMgPOk2Xo0VMhPOj2Xo0VMhPOY1cIeNgU12ePOY1dIeNgU12ePOZ1cIcNiU13dPOZ1cIaNkU16]PO^1gIXNPV1:XPO_1hIWNQV19WPO`1hIWNQV18XPOb1fIVNRV18XPOb1fIVNRV17YPOc1eIVNRV16ZPOe1cIUNTV14ZPOg1bIUNTV12\\POj1_ITNVV1O]POm1]ITNVV1N_POm1[IUNWV1K`POQ2XISNYV1KaPOQ2VITNQ^1k1naNVNS^1j1laNVNU^1i1kaNWNV^1i1iaNWNW^1l1faNTN[^1m1caNSN]^1P2`aNPN_^1S2_aNmMa^1V2\\aNjMc^1Y2[aNgMe^1[2YaNeMg^1j2000002N4L5K5K3M2N1O2N1O2iJTLVlNm3gS1VLXlNl3fS1ULXlNm3fS1ULYlNm3eS1TLZlNm3eS1TLZlNn3cS1TL\\lNm3cS1TL\\lNn3bS1SL]lNn3bS1SL]lNo3bS1QL]lNP4cS1PL\\lNR4dS1mK[lNT4iS1hKVlNZ4iS1fKVlN[4iS1eKWlN\\4hS1eKWlN\\4hS1dKXlN]4gS1cKYlN^4fS1bKZlN_4dS1bK\\lN_4cS1aK]lN`4bS1`K]lNb4bS1_K]lNb4bS1^K^lNb4bS1UKRhN0\\4l4aS1TKShN0\\4m4aS1RKShN1\\4n4`S1QKThN1\\4o4_S1ZK`lNg4`S1XK`lNi4_S1WKalNj4^S1VKblNj4_S1UKalNl4^S1TKblNm4]S1SKclNn4]S1RKblNo4]S1QKclNP5\\S1PKdlNQ5\\S1nJdlNS5[S1mJelNS5\\S1lJdlNU5[S1kJelNV5[S1jJclNX5\\S1hJdlNY5\\S1fJdlN[5[S1eJelN[5\\S1dJdlN]5[S1cJelN]5\\S1bJdlN_5[S1bJdlN^5]S1aJclN_5]S1aJclN_5^S1`JblNa5]S1_JclNa5^S1^JehNN\\3d5PT1\\JchN3\\3b5QT1YJdhN6Z3a5`T1aJ_kN_5aT1cJ]kN^5bT1cJ]kN]5cT1eJ[kN[5eT1gJYkNY5gT1iJWkNX5hT1iJVkNX5kT1iJSkNW5mT1kJQkNV5nT1lJPkNT5PU1mJojNS5RU1nJljNR5TU1PKjjNQ5UU1QKijNo4XU1RKfjNn4ZU1TKdjNm4[U1UKcjNk4^U1WK_jNi4aU1YK]jNh4bU1ZK\\jNf4eU1[KYjNe4gU1^KVjNb4jU1`KTjNa4lU1`KRjN`4nU1cKoiN]4QV1eKmiN\\4SV1eKkiN[4UV1gKhiNZ4XV1iKeiNW4\\V1jKbiNV4_V1kK_iNU4aV1mK]iNS4dV1oKYiNQ4hV1PLViNP4jV1RLTiNn3mV1VLnhNi3TW1[LghNe3ZW1^LbhNb3^W1_LahNa3`W1_L_hNa3bW1`L\\hN`3dW1aL[hN^3gW1bLXhN^3iW1bLVhN^3kW1bLThN^3RX1]LmgNc3TX1]LkgNc3VX1]LhgNc3YX1^LfgNb3[X1^LdgNb3\\X1_LcgNa3]X1`LbgN`3^X1aLagN_3^X1cLagN\\3`X1eL_gN[3aX1gL]gNY3dX1gL[gNY3fX1gLYgNY3hX1gLWgNY3iX1hLVgNW3lX1iLSgNW3nX1iLQgNW3PY1iLofNV3SY1jLlfNU3UY1k21N2O1N2N101N2N2O1N3N1N3M2O2M2N3M2N2N3M2N3M2N3M2M4M2N3M3TKodNd2T[1R24L3M3iLUeNTOoZ1h0SeNVOP[1h0PeNWOS[1g0mdNWOX[1e0jdNYOY[1e0gdNZO\\[1d0ddN[Ob[1`0_dN^Oh[1;YdNDo[15QdNIX\\10icNL]\\11ccNLc\\10^cNMh\\10YcNKn\\12RcNKT]11mbNKY]13hbNI^]14bbNId]14\\bNHk]14WbNHl]18TbNDP^1;RbNAR^1>oaN]OU^1b0maNZOV^1d0oaNVOT^1:d`NLo`13?000001O00001N10c_NGU_15k`N0S_1Nm`N4^`12N2O00000O20O000000010O000000mNDb`N=Z_1He`N7Z_1Ke`N5Z_1Me`N3[_1Nd`N3Z_1Ng`N1Y_10f`N0Z_10f`N0Z_10f`N0Z_10f`N1Y_10g`NOY_11g`NOZ_10f`N0Z_11e`NO[_11f`NOZ_10f`N0Z_11e`NO[_11e`NO[_11e`N0[_1Of`N0Z_11e`NO[_11e`NO[_11e`NO\\_11c`N0\\_10e`NO[_11e`NO\\_11c`NO]_11c`NO]_11d`NO\\_10d`N0\\_11c`NO]_11c`NO^_10b`N1]_1Od`N0]_1Nd`N2\\_1Nd`N2]_1Ld`N4]_1Jd`N7\\_1Fg`N9``10000010O00000010O000000010O0000010O00000O2O1N2N2NRi\\4" + } + ], + "question": "What is the color of the seat on ?", + "choices": [ + "A. Red.", + "B. Orange.", + "C. Black.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_210.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01634579.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 853 + ], + "counts": "Vlg>3io07K3M2O2L5M2N3M2N2N2N2O1N2O000XQOlNdn0[1M1O1O1O001O0000001O00000000000000010O0010O0010O01O1O3M1O3M2N1O001O1O1mNWQOl0Qo0N2M2O0O2N2O0O3N3K7IWSo9" + } + ], + "question": "What is the material of the item from which most likely originated?", + "choices": [ + "A. Plastic.", + "B. Waffle.", + "C. Stainless steel.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_211.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01635395.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "Rcfi03ho07I7I6J6K4K6J5_Nb1L4M2M4L3M4M3L3M4L3N]OQTOZMkk0`3M2N3N2N1O2O0N2O2N1O001000O100N2fNQUObMPk0Q2nUOZMVj0X2Q2B>A?B>ATSk00knSOEgl0]1hROfNel0d2^Ob0\\ObLSTOP4gk08J6J6J5K5L4K6J4L5K5D;01O1O1O2O000O100001O01O01O1O2N0001O00000000001O0O100000001O00000000000O2O00000000001O00000O10001O00000000001O0O100000001O000000001N2O1O1O001O1O1O1O1O001O1O1O1O1O1O001O001O00001O0000001O00001O0000001O00001N1O1O2N1000001O002N10O010O101NZB" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The object is circular.", + "B. The object is rectangular.", + "C. The object is oval-shaped.", + "D. The object is square-shaped." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_212.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01635395.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "Rcfi03ho07I7I6J6K4K6J5_Nb1L4M2M4L3M4M3L3M4L3N]OQTOZMkk0`3M2N3N2N1O2O0N2O2N1O001000O100N2fNQUObMPk0Q2nUOZMVj0X2Q2B>A?B>ATSk00knSOEgl0]1hROfNel0d2^Ob0\\ObLSTOP4gk08J6J6J5K5L4K6J4L5K5D;01O1O1O2O000O100001O01O01O1O2N0001O00000000001O0O100000001O00000000000O2O00000000001O00000O10001O00000000001O0O100000001O000000001N2O1O1O001O1O1O1O1O001O1O1O1O1O1O001O001O00001O0000001O00001O0000001O00001N1O1O2N1000001O002N10O010O101NZB" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The masked object is made of glass.", + "B. The masked object is made of plastic.", + "C. The masked object is made of metal.", + "D. The masked object is made of rubber." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_213.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01689730.jpg", + "mask_rles": [ + { + "size": [ + 855, + 1024 + ], + "counts": "`dg>:\\j0a0@f1ZN?A01O00001O000000000000000000000000000000000000000000000000000000000000000O10000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000001O000O10000000000000000000000000000O100000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000O1O1O1O1O1O1O1O1O100O00100O1O1N2O1O1O1O100O1O1O100O1O1000000000000O1000000000000O1O11O2N1O1O1O1O1O1O2M2O1O2N1O2N1O2N2N1O2N2N2N2N2N2N1O2N2N2N2N2N1O2N2N2N2N2N1O4L4L4L5K4L4L3M1O1O1O4L1O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O1O2N1O1O1O2MReS2" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is a painted wooden board.", + "B. It is made of red plastic.", + "C. It is made of fabric.", + "D. It is a metal sheet painted red." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_214.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Plastic.", + "B. Metal.", + "C. Wood.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_215.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metallic.", + "B. Wooden.", + "C. Plastic.", + "D. Ceramic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_216.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "Based on its shape, what is ?", + "choices": [ + "A. A spoon with an oval head.", + "B. A pie server with a triangular blade.", + "C. A knife with a long, rectangular blade.", + "D. A fork with four tines." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_217.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01729425.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dUZa05jg06J2N2N2N2O1N1O2O000O101N1O1O100O2O000O1O1O1O1O100O1O1N2O1O100O100000001O00000000O2N2OO11O2N2M4L3M3N3M2N2M2O1N2N2O1O001N10000000O100O10000O100O100104JYjl4" + } + ], + "question": "Which of the following statements correctly describes the shape of ?", + "choices": [ + "A. It has a pointed toe.", + "B. It has a square toe.", + "C. It is an open-toed object.", + "D. It features a round toe box." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_218.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01729425.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "fcc55jg03M2POKRZO8ke0JRZO:ke0HRZOke0EQZO?me0BoYOc0oe0]OPZOf0ne0ZORZOi0le0VOSZOl0le0TOTZOl0me0SOSZOm0me0SOSZOm0ne0SOQZOm0oe0SORZOl0ne0TORZOl0ne0TORZOl0ne0TOPZOn0oe0SOPZOn0Pf0a0O100O10000001O1O1O1O1UNmYOg1Vf001O00001N10001O001N101ROcYO7^f0GcYO9^f0EdYO9^f0EcYO8af0G_YO6df0I]YO3hf0H\\YO5Yg0O2N3N2MW\\Ra0" + } + ], + "question": "Based on the image, which of the following statements about the color of is correct?", + "choices": [ + "A. The clothes it is wearing are green.", + "B. Its main body is yellow.", + "C. The ears are pink.", + "D. It is holding a red basket." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_219.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01770249.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "h:k6`0]Il1T1X:a5jC\\In1S1V:e5jCXIP2S1U:h5hCUIT2S1S:k5fCSIW2R1R:n5eCoHZ2S1P:P6dCnH\\2R1n9^6RFbIm9^6TFbIk9_6UFaIj9`6VF`Ii9`6XF`If9b6ZF^Ie9b6\\F]Id9e6[F[Id9g6[FYIc9k6[FUId9m6[FSId9P7ZFPIe9R7ZFnHe9T7ZFlHd9W7[FiHd9Y7[FgHd9[7[FeHd9^7ZFbHe9`7ZF`Hd9c7[F]Hd9e7[F[Hd9g7[FYHd9i7[FWHc9m7[FSHd9o7[FQHd9Q8[FoGe9R8ZFnGe9T8ZFlGe9V8YFkGf9W8YFiGf9Z8WFfGi9\\8VFdGi9^8UFcGj9_8UFaGj9a8UF_Gj9c8TF^Gk9\\9\\EdFd:a9VE`Fi:g9QEYFn:];O1O1O1O1O1O1O100O1O1O1O1O1O1O001O1O100O1O1O1O1O1O1O1O1O1O2O0O1O1O1O2N1O1O1O1O2O0O2N3M2N3M2O1N3M2N3N1N3M2O2M2N2O2M2N3N1N3N1N3N1N101N2O1N2N2O1N2O1N2O1N101N2O1N2N2O1N2O1N2O0O2O1N2N2O1N2O1N2O1N100O100O1O100O100O100O100O100O1O100O100O100O100O1O100O10000O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O1000000O100000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O100000000O100000000O100000000O100000000O1000000O100000000O10000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O10000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O100000000000000O100000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000000000000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O0000000000000000001O0000000000001O0000000000001O000000000000001O0000000000001O0000000000001O0000000000001O000000000000001O0000000000001O0000000000001O00000000001O0000001O0000001O0000001O0000001O00001O0000001O0000001O0000001O0000001O00001O00O100O100O100O100O100O100O100O10000O100O100O100O100O100O100O100O10000000\\]O[Kma0e4R^O\\Kna0d4R^O\\Kna0d4R^O\\Kna0d4Q^O^Kna0b4R^O^Kna0b4R^O^Kna0b4Q^O`Kna0`4R^O`Kna0`4R^O`Kna0`4Q^ObKna0^4R^ObKna0^4R^ObKna0^4Q^OdKna0\\4R^OdKna0\\4R^OdKna0\\4Q^OfKna0Z4R^OfKna0Z4R^OfKna0Z4R^OgKma0Y4R^OhKna0X4R^OhKna0X4R^OhKna0X4R^OiKma0W4S^OiKma0W4S^OiKma0W4S^OjKla0V4T^OjKla0V4T^OjKla0V4T^OkKka0U4U^OkKka0U4U^OkKka0U4U^OlKja0T4W^OkKia0U4W^OkKia0U4W^OlKha0T4X^OlKha0T4X^OlKha0T4X^OmKga0S4Y^OmKga0S4Y^OmKga0S4Z^OlKfa0T4Z^OmKea0S4[^OmKea0S4[^OmKea0T4Z^OmKea0S4[^OmKea0S4[^OmKea0S4\\^OmKca0T4\\^OlKda0T4]^OkKca0U4]^OlKba0T4_^OkKaa0U4_^OkKaa0V4_^OjK`a0V4`^OjK`a0V4a^OjK^a0V4b^OjK^a0V4b^OkK]a0V4c^OiK]a0W4c^OiK]a0W4d^OiK[a0W4e^OiK[a0X4e^OhKZa0X4f^OhKZa0X4f^OiKYa0W4h^OhKXa0X4h^OhKXa0Y4h^OgKWa0Y4i^OgKWa0Y4i^OhKVa0X4k^OgKUa0Y4k^OhKTa0Y4l^OfKTa0Z4l^OfKTa0Z4l^OgKSa0Y4n^OfKRa0[4m^OfKRa0Z4n^OfKRa0Z4o^OeKQa0[4o^OfKPa0Z4Q_OeKo`0\\4P_OeKo`0[4Q_OeKo`0[4R_OeKm`0[4S_OeKm`0[4T_OdKl`0]4S_OdKl`0\\4T_OdKl`0\\4U_OdKj`0\\4V_OdKj`0]4U_OdKj`0\\4W_OcKi`0]4W_OcKi`0^4V_OcKi`0]4X_ObKh`0^4X_OcKg`0^4X_ObKh`0^4Y_OaKg`0_4Y_ObKf`0_4Y_OaKg`0_4Z_OaKe`0_4[_OaKe`0`4Z_OaKe`0_4\\_O`Kd`0`4\\_OaKc`0`4\\_O`Kd`0`4]_O`Kb`0`4^_O`Kb`0a4]_O`Kb`0`4^_O`Kb`0`4__O`K``0a4__O_Ka`0a4__O_Ka`0a4`_O_K_`0b4`_O^K``0b4`_O_K_`0a4b_O^K^`0c4a_O^K^`0b4b_O^K^`0b4c_O^K\\`0c4c_O]K]`0c4c_O^K\\`0b4e_O]K[`0d4d_O]K[`0c4e_O]K[`0c4f_O]KY`0d4f_O\\KZ`0d4f_O\\KZ`0d4g_O\\KX`0e4g_O[KY`0e4g_O\\KX`0d4i_O[KW`0f4h_O[KW`0e4j_OZKV`0f4j_O[KU`0f4j_OZKV`0f4k_OZKT`0f4l_OZKT`0g4k_OZKT`0f4m_OYKS`0h4l_OXKT`0h4l_OYKS`0g4n_OXKQ`0j4n_OWKP`0j4P@VKo?k4R@UKl?m4S@SKl?n4T@SKj?n4W@QKh?Q5W@PKg?Q5Y@oJf?R5[@nJc?T5\\@lJb?V5^@kJ`?V5a@iJ^?Y5a@hJ\\?Z5e@eJZ?\\5f@eJW?^5i@aJV?`5j@aJT?`5m@_JQ?d5n@]JP?d5QA\\Jm>e5SA[Jk>h5UAXJi>i5XAVJg>k5ZAUJc>n5^APJa>R6_AkIb>V6_AgIa>\\6_AcI`>_6eAZI[>g6]13N1N3N2N1N3N1O2M3N1O2L3M4L4K4M4L3O2N2O0O2N1O2_BXGh;h8VD[Gh;g8UD[Gj;h8SDZGk;h8RD[Gm;g8PDZGo;h8nC[GP?", + "choices": [ + "A. The object is entirely black, matching the stove it is on.", + "B. The object is uniformly silver in color.", + "C. The object contains a bright yellow utensil.", + "D. The object is filled with red-colored food." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_220.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01788343.jpg", + "mask_rles": [ + { + "size": [ + 1009, + 1024 + ], + "counts": "_oo78\\n0n0N2M2O100O2N100O100O2O0000001N11OO10001O000000001O00000000001O000000001O00000000001O00O100000000000000000001N10001O00001O00001O0O101O00001O00001O0O1O2I7Hh0XOUVSe0" + } + ], + "question": "What material is a component of ?", + "choices": [ + "A. Plastic.", + "B. Cardboard.", + "C. Wood.", + "D. Metal." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_221.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01811034.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hPYf06gg04K6J5K5K5K4L5K5K4M4K4L5K4L4L4L4L3M4L4L3M4L3M4K4M4L3M4L3M4K4M4L3M4TOmKg]OV4Yb0mKa]OW4^b0lK\\]OW4db0c000O010O0100O001O1N1N3K5L4L4L4L4L4L4L4L4L4L4L5K4L4L5K4L4L5K4L5K4L4L5K4L5K]D" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metal.", + "B. Glass.", + "C. Cardboard.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_222.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01811034.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hPYf06gg04K6J5K5K5K4L5K5K4M4K4L5K4L4L4L4L3M4L4L3M4L3M4K4M4L3M4L3M4K4M4L3M4TOmKg]OV4Yb0mKa]OW4^b0lK\\]OW4db0c000O010O0100O001O1N1N3K5L4L4L4L4L4L4L4L4L4L4L5K4L4L5K4L4L5K4L5K4L4L5K4L5K]D" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. A combination of orange and white.", + "B. A combination of blue and white.", + "C. A combination of green, yellow, and white.", + "D. Primarily red and black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_223.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is the texture/pattern of ?", + "choices": [ + "A. Ribbed.", + "B. Pleated.", + "C. Smooth.", + "D. Velvet." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_224.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is in the image?", + "choices": [ + "A. A knotted tie.", + "B. A bow tie with a butterfly shape.", + "C. A decorative epaulet on the shoulder.", + "D. The leaf of the poppy pin." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_225.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Black.", + "D. Light blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_226.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red.", + "B. Green.", + "C. White.", + "D. Blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_227.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the texture of in the image?", + "choices": [ + "A. It has a smooth, glossy surface.", + "B. It has a papery skin.", + "C. It has a grainy, wooden texture.", + "D. It is embroidered with colorful threads." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_228.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the color of the root of ?", + "choices": [ + "A. Red.", + "B. White.", + "C. Brown.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_229.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D?", + "choices": [ + "A. The object has a smooth, polished surface.", + "B. It has a prominent, visible wood grain pattern.", + "C. The surface is painted a solid, matte color.", + "D. It is covered with a striped pattern." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_230.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D is correct?", + "choices": [ + "A. It has six strings.", + "B. It is a four-stringed instrument.", + "C. It is painted bright red.", + "D. It is standing on a pink stand on the floor." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_231.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D?", + "choices": [ + "A. f-shaped.", + "B. Oval.", + "C. Round.", + "D. Diamond-shaped." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_232.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D in the image?", + "choices": [ + "A. Blue.", + "B. Black.", + "C. Red.", + "D. Light brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_233.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D in the image?", + "choices": [ + "A. Trapezoidal.", + "B. Triangular.", + "C. Rectangular.", + "D. Curved." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_234.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D?", + "choices": [ + "A. The masked object is a component of the white van.", + "B. The masked object is a component of the black car.", + "C. The masked object is a component of the blue building.", + "D. The masked object is a component of the gray road surface." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_235.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D?", + "choices": [ + "A. White.", + "B. Black.", + "C. Gray.", + "D. Blue." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_236.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01936287.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "l\\me01ng02N3N000ROKQZO5ne04jYOKWf06hYOJYf05hYOJb01bd0MnZO06`0id0@Q[O0Lk0Re0UOR[O0DR1]d0SOo[OKOn0DSObd06j[OIDo0Ye0YOS[OI[Ol0le0\\OYZOn0Pf079DIPOVZOc0Xf0;7CTYOGlf03\\YOKef0McYO2_f0KdYO3P\\a1" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Black.", + "B. Yellow.", + "C. White.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_237.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01939853.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sin91mo04M2O1N1000O10000000O1000O100000O1000O0100O010O010O01000N1O2N110O2O1N2O1N2O1N2O1O1N2O1N10001N10O1000O10000O1000000O100000O01000000O1000000O10O100GTQO]Oln0c0WQOZOjn0e0901O001O0000001O00001O0000001O00001O0000001O00001O0000001O00001O00001N100O2O2L4L]W]:" + } + ], + "question": "Which of the following statements accurately describes the shape of ?", + "choices": [ + "A. The masked object is a large animal lying on the ground.", + "B. The masked object is one of the small pigeons walking on the ground.", + "C. The masked object is a small animal standing on its feet.", + "D. The masked object is a decorative golden statue at the base of the temple." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_238.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01939853.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sin91mo04M2O1N1000O10000000O1000O100000O1000O0100O010O010O01000N1O2N110O2O1N2O1N2O1N2O1O1N2O1N10001N10O1000O10000O1000000O100000O01000000O1000000O10O100GTQO]Oln0c0WQOZOjn0e0901O001O0000001O00001O0000001O00001O0000001O00001O0000001O00001O00001N100O2O2L4L]W]:" + } + ], + "question": "Which of the following descriptions about the texture of is correct?", + "choices": [ + "A. It has a shaggy coat.", + "B. It has a smooth coat.", + "C. It is covered in feathers.", + "D. It has a spotted pattern." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_239.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01944558.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "\\]n5i0Ug05J4H8M3N1O1O100O10000000000001N1O2N2H8M4L;EiZ`a0" + } + ], + "question": "What is a characteristic texture of ?", + "choices": [ + "A. Creamy.", + "B. Hard and woody.", + "C. Woven fabric.", + "D. Smooth and metallic." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_240.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01948375.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Plm=166[g0;L3N3kYOG]d0;S[O3md0`100O1000O100000O1000O1000O1000O100O002N1O2O1N0000001O1L4]Oc0N2M2N3M3N2M3N2O001O1O1O1O1N2O101\\OjXO;_g0OO1O001O00O102N2HZTe8" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Grayish-white.", + "B. Teal.", + "C. Brown.", + "D. Beige." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_241.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01948375.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Plm=166[g0;L3N3kYOG]d0;S[O3md0`100O1000O100000O1000O1000O1000O100O002N1O2O1N0000001O1L4]Oc0N2M2N3M3N2M3N2O001O1O1O1O1N2O101\\OjXO;_g0OO1O001O00O102N2HZTe8" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. Smooth.", + "B. Ribbed.", + "C. Plush.", + "D. Waxy." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_242.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F?", + "choices": [ + "A. Plastic.", + "B. Ceramic.", + "C. Fiberglass.", + "D. Porcelain." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_243.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F?", + "choices": [ + "A. Patterned.", + "B. Matte.", + "C. Smooth.", + "D. Rough." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_244.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F in the image?", + "choices": [ + "A. The masked object is beige.", + "B. The masked object is white.", + "C. The masked object is light brown.", + "D. The masked object is blue." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_245.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01968981.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "al\\<:ag0g0[O6K4L5K4L3N4K3N2M4M2M4M3L4M3M3M2N2N3M2N2N2N2O1N2O1N2N2O1N2O0O2N2O1N101N2O1O1O0O2O1N101O1N101N2O001N2O1O001O1O1O1O1O1N2O1O1O1O1O0O101n\\O`Kbb0`4]]OdK`b0\\4]]OgKcb0Y4\\]OhKdb0X4\\]OhKdb0k4O001O01[O]]OkKcb0U4_]OhKbb0X4_]OeKcb0[4`]O`Kbb0_4`000O2O0O1N2M3N2O1O2N1O1O2N1O2N2N1O1O2N100O2N1O1O2N1O2N2N1O2N1O2N2N2N2N2N1O2N2M3M3N2M3N3L3N3L3M3M3M4M6I5K5J5J7J8Ec[\\8" + } + ], + "question": "What is the primary material of the envelope of ?", + "choices": [ + "A. Canvas.", + "B. Rubber.", + "C. Polyester.", + "D. Plastic sheeting." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_246.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01968981.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "al\\<:ag0g0[O6K4L5K4L3N4K3N2M4M2M4M3L4M3M3M2N2N3M2N2N2N2O1N2O1N2N2O1N2O0O2N2O1N101N2O1O1O0O2O1N101O1N101N2O001N2O1O001O1O1O1O1O1N2O1O1O1O1O0O101n\\O`Kbb0`4]]OdK`b0\\4]]OgKcb0Y4\\]OhKdb0X4\\]OhKdb0k4O001O01[O]]OkKcb0U4_]OhKbb0X4_]OeKcb0[4`]O`Kbb0_4`000O2O0O1N2M3N2O1O2N1O1O2N1O2N2N1O1O2N100O2N1O1O2N1O2N2N1O2N1O2N2N2N2N2N1O2N2M3M3N2M3N3L3N3L3M3M3M4M6I5K5J5J7J8Ec[\\8" + } + ], + "question": "Which statement accurately describes a feature of in the image?", + "choices": [ + "A. It has a simple horizontal striped pattern.", + "B. It has a small basket hanging underneath.", + "C. It is primarily colored green and white.", + "D. There is no visible basket attached to it." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_247.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "obW95jo03M3M3N2N1N2O1N100O1000001O01O0000O100000O2N1N2L4M3000010O0001O001O010O001O0010O010O010O01O1O010O001O010O000001O0000001O0000001O01O00000000000000000000000mNF`RO:_m0H`RO8_m0J`RO6`m0K_RO5`m0L`RO4`m0M_RO3am0M_RO3am0N^RO2cm0N\\RO2dm0N\\RO3dm0M[RO3fm0LZRO4fm0MYRO3hm0LXRO4hm0MWRO3jm0MURO3km0MURO3lm0MSRO3nm0LRRO4nm0MQRO3Pn0LPRO4Pn0MoQO3Rn0MmQO3Sn0MmQO3Tn0MkQO3Vn0LjQO4Vn0MiQO3Xn0LhQO4Xn0MgQO3Zn0MeQO3[n0MeQO3\\n0McQO4]n0KcQO5]n0LbQO4_n0KaQO5_n0L`QO4an0L^QO4bn0L^QO4cn0L\\QO4en0K[QO5en0LZQO4gn0KYQO5gn0LXQO4in0LVQO4jn0LVQO4kn0LTQO4mn0KSQO5mn0LRQO4on0KQQO5on0LPQO4Qo0=001O10O01O001O001O1O001O001O001O1N1O2N1N3L4M3Mm\\P:" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. It has spoon-like, cupped ends.", + "B. It has scalloped gripping ends.", + "C. It is a single, straight utensil with a pointed tip.", + "D. It has flat, spatula-like ends." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_248.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "obW95jo03M3M3N2N1N2O1N100O1000001O01O0000O100000O2N1N2L4M3000010O0001O001O010O001O0010O010O010O01O1O010O001O010O000001O0000001O0000001O01O00000000000000000000000mNF`RO:_m0H`RO8_m0J`RO6`m0K_RO5`m0L`RO4`m0M_RO3am0M_RO3am0N^RO2cm0N\\RO2dm0N\\RO3dm0M[RO3fm0LZRO4fm0MYRO3hm0LXRO4hm0MWRO3jm0MURO3km0MURO3lm0MSRO3nm0LRRO4nm0MQRO3Pn0LPRO4Pn0MoQO3Rn0MmQO3Sn0MmQO3Tn0MkQO3Vn0LjQO4Vn0MiQO3Xn0LhQO4Xn0MgQO3Zn0MeQO3[n0MeQO3\\n0McQO4]n0KcQO5]n0LbQO4_n0KaQO5_n0L`QO4an0L^QO4bn0L^QO4cn0L\\QO4en0K[QO5en0LZQO4gn0KYQO5gn0LXQO4in0LVQO4jn0LVQO4kn0LTQO4mn0KSQO5mn0LRQO4on0KQQO5on0LPQO4Qo0=001O10O01O001O001O1O001O001O001O1N1O2N1N3L4M3Mm\\P:" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of plastic.", + "B. It is made of metal.", + "C. It is made of wood.", + "D. It is made of ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_249.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "Which of the following descriptions best fits the shape of ?", + "choices": [ + "A. The masked object is perfectly straight from end to end.", + "B. The masked object has a distinctly curved handle.", + "C. The tines of are blunt and rounded.", + "D. The handle of is cylindrical and thick." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_250.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Smooth.", + "B. Grooved.", + "C. Brushed.", + "D. Hammered." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_251.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "Which of the following correctly describes a feature of ?", + "choices": [ + "A. It has two tines.", + "B. It has three tines.", + "C. It has five tines.", + "D. It has four tines." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_252.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "e\\X;2jo06J5L4N1N2O1N2O0gNZ1N2O1O100O11O001O2M4M4K5K3K5K4L5K5M3N1OO1O1O1O100O1O101N2`QOnNPn0R1mQOROQn0Q1kQOROSn0^101N1M2N3N10100O0100O10000O100000001O001O1O2N2N001O001O001O001O001O001O001O001N100O2M3B=O1O00O1O100O2O1N101O1O1O00GXQO[Oin0c0[QOYOfn0e0;00O10O0100O2O0O2O2M102M2O2H\\PO0QeV9" + } + ], + "question": "Which of the following statements accurately describes a feature of in the image?", + "choices": [ + "A. The masked object has a large, round nose.", + "B. The masked object has long, floppy ears.", + "C. The masked object has small, beady eyes.", + "D. The masked object has a slender body." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_253.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01981955.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Zm\\8a0^g03M1O2N1O2N2N2N2N1O2N1O1O2N2N2N4M0O2N2N1O2O0O2O0LYNnYOi1Qf0WNoYOi1Tf01KVNRZOj1me0VNSZOl1oe0100O3N1O3M00001O00001MjMZZOV2fe0jMZZOV2he000000000000000O101N2O0O2K400N200O2N1M4M2N3L301N1O1O1O2N1O2M3N2N1O4L3M1O9F3M3Jjbe=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Light blue.", + "B. White or beige.", + "C. Red and yellow.", + "D. Blue with white polka dots." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_254.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. The object has a wood grain texture.", + "B. The object has a smooth, metallic texture.", + "C. The object has a woven texture.", + "D. The object has a porous, baked texture." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_255.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Wood.", + "B. Woven fabric.", + "C. Plastic.", + "D. Wicker." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_256.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the texture/pattern of in the image?", + "choices": [ + "A. Smooth with wood grain.", + "B. Woven.", + "C. Smooth and metallic.", + "D. Porous and baked." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_257.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Yellow.", + "C. Brown.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_258.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/1886.jpg", + "mask_rles": [ + { + "size": [ + 384, + 683 + ], + "counts": "f^g3g0V;5K4K4L4N11B=01001O2N2N:E3N1O1O000001AhESOZ:m0<3N00003M_O\\ECi:2Xbn3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one counting from the left", + "B. The fifth one counting from the right", + "C. The sixth one counting from the left", + "D. The fifth one counting from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_259.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/1888.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "PbT4:c;5M3M2O0O10O0O2M2103M3L3N4L2N2N1O0000I7N200O100O1001O0oDYOj:Q1M00L4M3M4O1O2O4L9G5K^i\\1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the right", + "B. The third one counting from the left", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_260.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2032.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "bQf2a0^;6J2N5L2N1N3N2N1O001O0O100000000O1000000O100000000O10001O000000O1000000000O1000000000001OO101O000000000000000O10000000000O1O1O1M3M300M3M3O1M3M3N2N2N2O100O1001O1O002N2N2N1O2N3M1O1N3N1O2N002N1O2N1O1N10001O000000000O100000000000000000000000000000000000000O100000000O1000O100000O100000000001O000000001O001O00001N101O0O2N1G:G8I8Jij5" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one counting from the top in the left column", + "B. The third one counting from the top in the right column", + "C. The fourth one counting from the top in the right column", + "D. The third one counting from the top in the left column" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_261.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2051.jpg", + "mask_rles": [ + { + "size": [ + 384, + 461 + ], + "counts": "X]V13j;4K5L3N2K6M2O1O2O001O001O00001O001O00001O0000010O00000000O2O000O2O0O2O0O101O1N110O001O001002M3NO010O001O010O00010O01O001O010O001O00100O001O010O001O10O010O01O100O101N0001N2N1O2O0O2N2O1N101N1N3N2O1O001O001O1O1O2L6JUZn2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one from the top in the left column", + "B. The fourth one from the top in the right column", + "C. The third one from the top in the right column", + "D. The fourth one from the top in the left column" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_262.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2179.jpg", + "mask_rles": [ + { + "size": [ + 384, + 526 + ], + "counts": "a_Y4`0^;5K7K4K3N3L2O2M3N2M2O2M2O2M2O1N2N2N2N2N2N2O0O2N1O2N1O2O0O2M2O1N3L3N2O2M2N2M3O100O1000000000000000000001O0000000O101O000O10001O0O101O0O100O2O0O2N1O2N1O2N1O2N2O0N3N2N2N2M2O3L3M3N2M4K5K5K5K5J:@Pig0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left in the first row from the top", + "B. The second one counting from the left in the last row from the top", + "C. The third one counting from the left in the last row from the top", + "D. The fourth one counting from the left in the last row from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_263.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2613.jpg", + "mask_rles": [ + { + "size": [ + 384, + 684 + ], + "counts": "SW:5i;3N1O2N0O2O1O0O1O1O2N2N3M5K2O1ON2100O1O1O101N2N2O1N1O1O100OJ]EROb:m0bEnN`:P1801O10O01O10O010000O10000O10000001O100O2N2N2O0O1O2N014K10O001O1O0OO_Ob0N1L5N3O1N3M4JTbj6" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the left", + "B. The second one from the left", + "C. The first one from the right", + "D. The second one from the right" + ], + "answer": "A", + "type": "ordering", + "image": "images/vqa_264.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2614.jpg", + "mask_rles": [ + { + "size": [ + 384, + 604 + ], + "counts": "QeZ61k;6M2N22N2OO0000001N11O2OO01O00O01000O1N2N2N2oDGX:=aEGj9I]FV1b9mNZF1Fi0P:XOWFOIj0Q:XOSFNMj0R:BnE`0S:]OnEd0R:[OPFc0Q:]OPFb0o9_OQFa0l9CRF=c91YF1f9FZFc0g9[O]Fc0e9ZO]Fe0f9VO\\Fj0f9PO_Fo0Y:0ORO^Ea0`:^OcEa0\\:_OeEa0Y:@iE`0U:_OmEa0Q:@PF`0n9@TF`0V9ZOhF93>T92mFOT9NnF3R9JPG6R9GoF9T9BmF`0S9^OnFb0R9]OoFc0S9ZOnFg0T:1O101O1N`0A6I2\\J" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the left", + "B. The first one counting from the right", + "C. The second one counting from the right", + "D. The second one counting from the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_265.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/286.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "Q`n1=_;8I6J5M2L6K5L3M2N4L2N3L4N2L4M2N3M2N2N3M101N2N2N1O2O1N1O2O1N100O10001N1000001N100000001O000O100010O00001O00000000000O101O0000001N2O0O2O0O2O001N2N2O0O2N2N3M1O2N3K5M2L4M4K5M2M3E;K`0UOoDLUSh3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one from the left", + "B. The 2nd one from the left", + "C. The 3rd one from the left", + "D. The 2nd one from the right" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_266.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/290.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "[oZ49e;4K5L2M4J5I8M3N1N3H7O2M201N2N2O1O1O1O1N2O1O1O1O1O1N3OO01O000O1O100001O1O001OO1000O10001N10000O1O2O0O2N1O2O0O2N2O1N1O3M2N2N4L3M2N1O2M5L3L4L4J:FPQj0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the left", + "B. The second one counting from the right", + "C. The first one counting from the right", + "D. The second one counting from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_267.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/291.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "mR[35i;5E8G9M3N2M3N2N2M3N2N2N2O1N2N3M2N2O1O1O001O1O1O1JRNYFo1g9RNWFn1i95000O10001O000O2O00001N100O01D;M3O2O00eMkFm1U9QNPGl1P9RNTGk1b9O1N2O0O2O0O10O001N100000000000O1O1O2N1UOfE0[:OgEO[:OgE0Z:MiE2W:LlE5S:JnE6R:IoE7Q:GQF9o9FRF:n9ETF9m9FTF:l9EUF in the full image?", + "choices": [ + "A. The second one counting from the left in the middle row", + "B. The third one counting from the left in the top row", + "C. The fourth one counting from the left in the middle row", + "D. The third one counting from the left in the middle row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_268.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2910.jpg", + "mask_rles": [ + { + "size": [ + 384, + 1210 + ], + "counts": "`X[:1Q86aHo3Y6Q1L3O0O2O0O1001O1O00000000000000000000000000000000000000001O00O100000000001O000000000001O00O10000000001O0000O1000000000000001O00O1000000000000000000000000000000000000001O0000O10000000000001O00000001N10000001N100000O101O0000000001O00O1000000000000O10000001O00O10000O2O00O2O00000O1000000000001N2N5KV1PK[K3XSR2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the left in the first row.", + "B. The first one from the right in the first row.", + "C. The second one from the right in the first row.", + "D. The second one from the right in the second row." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_269.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2922.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "eRY1122g;OXD3g;MYD000g;2ZD50>1]OO3V:d1M000O100000001OO1001O00000000O11O00000000O1001OO11O00O1000000001O01N11O000O1001O00O11O00O1000000000000001O000000O1001O00000000O1000O11O00000000O100001O0000O2O000000O11O00001O004LWe\\2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 2nd one from the left in the 2nd row", + "B. The 3rd one from the left in the 2nd row", + "C. The 2nd one from the left in the 3rd row", + "D. The 3rd one from the left in the 3rd row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_270.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2938.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "h_6:]:Z1N2O00000000000000000000000000000000000000000000000000001O000001OO1001O00000000000000000000000000000000000000000000000000000000000O2XOWRi3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The last one from the left", + "B. The first one from the right", + "C. The first one from the left", + "D. The second one from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_271.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2941.jpg", + "mask_rles": [ + { + "size": [ + 384, + 484 + ], + "counts": "[Sg23j;Z2iM1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000000000000000000000001O0000000000000000000000000000O11O0000O11O0000000000O1001O00O11O0000000000000000000000000000000000000000001O3LdT_1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left in the second row counting from the top.", + "B. The second one counting from the right in the first row counting from the top.", + "C. The first one counting from the right in the second row counting from the top.", + "D. The second one counting from the right in the second row counting from the top." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_272.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "gae37i;2M2O1O1O1ON2O2M3NYfV2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left", + "B. The second one counting from the right", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_273.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/317.jpg", + "mask_rles": [ + { + "size": [ + 384, + 579 + ], + "counts": "jje12l;3M3N2O2M2N2N2O1O1O1O000100O1O001O1O2N3MT]k4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left.", + "B. The fourth one counting from the left.", + "C. The fifth one counting from the left.", + "D. The fourth one counting from the right." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_274.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3269.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "ZYP35h;4HIdD9Z;IeD7[;IeD8Z;IdD8\\;601O00000000000002N1O00001O1O2N1N101N2O1Mhb^1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top in the second column counting from the right.", + "B. The second one counting from the top in the first column counting from the right.", + "C. The second one counting from the top in the second column counting from the right.", + "D. The second one counting from the top in the second column counting from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_275.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3281.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "lU_35i;3M2O1N3M2N2O10000O101O000O100O1I]OPEd0P;600O2O000O100O101O0O10000O2N1N200O100O10000O101O0000000000O1000O100N1O2000O10000000001N10000O1O100O100O1O1O2O1N2N2N1N3N1O2O0O2N1O2O1N5K2N2Nkj8" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the top in the first column from the right.", + "B. The second one from the top in the second column from the right.", + "C. The second one from the top in the first column from the right.", + "D. The second one from the bottom in the first column from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_276.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3284.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "eWR29f;2N1N2O100O1O1O010O100O010O100O010O10O100O10O010O01000O0100O10O01O1000O010000O010O01O1O001O1O001WO^ORFc0i90iE1U:n0M3M201O1N2O1O1O1O1O001O1O001O100O2N101O1N1O2O1O0O2O0O2O000O101O0O2O0000001O0O2O000000000000O1O1M3O1O1O1O1O1O1O1O100O10001N10000O10001N10001N10001O0O2O00001N101O10OO2O0O2O1N2O1N2N2N2M3M3N3M2M4L5J6J9@ZVU2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left in the third row counting from the left.", + "B. The third one counting from the right in the second row counting from the left.", + "C. The second one counting from the right in the third row counting from the left.", + "D. The third one counting from the right in the third row counting from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_277.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3500.jpg", + "mask_rles": [ + { + "size": [ + 384, + 613 + ], + "counts": "Q^e2:c;6K5K4L4M3L4M3L4M2N2N3M2N3M2N3M2N3M3N2M2N101N102M2O1O1N101O1O001O001O010O000010O00001O001O1O001O001N2O1N101O1O1N2O1N2N3N1N2N3M2N2N3M2N3M2N3L4M3M3L5L3L6K6F_Uc3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top", + "B. The first one counting from the bottom", + "C. The second one counting from the bottom", + "D. The first one counting from the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_278.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3644.jpg", + "mask_rles": [ + { + "size": [ + 384, + 577 + ], + "counts": "]UW25g;8H6K4L4M3M2N3M2N2O2M2O1O1N2O001O1O1O1O1O1O1O10O01O001O1O01000O010O000010O0100O0100O0010O01000O01O10O0100O00100O010O1O10O01000O0100000O010000000O10O1000O1000000000000O100000O10O100000000O100000000000000000000000O1000000000000O2O00000O101O001O0O10001O0O10001N1O2O1N1O2N2N2N1O2N2N3L3N3L4M6GPlj2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top", + "B. The first one counting from the bottom", + "C. The second one counting from the bottom", + "D. The last one counting from the bottom" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_279.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3696.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "UeR1?];:F8J4J7J4M3L4M4M2M3N2O0O3M2N2N2M3O0O2O2M2O0O2N2O1N2N2O1N2N101N2N2O0O1O2O0O2O0O2O0O2O001N2O0O101O001O001N10000000001N1000001O000000001N10000000000000001O00000000000000000001O000000000000000000001O00000000001O000O101O001O00001N101O0O2O00000O2O000O2O000O2O000O2O000O2O0O2O1N2O0O2O1N101N101N101N1O2O0O2O1N102M1O2N2N2N2N1O2N2N1O2N2N2N2M4M2M4L3N3L3M6H in the full image?", + "choices": [ + "A. The second one counting from the right in the top row", + "B. The third one counting from the right in the middle row", + "C. The second one counting from the right in the middle row", + "D. The second one counting from the left in the middle row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_280.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4.jpg", + "mask_rles": [ + { + "size": [ + 384, + 575 + ], + "counts": "afW36h;4M2O1O1N2O1O1OO2N1O1O2M3M5K_YZ3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left", + "B. The second one counting from the right", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_281.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4063.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_ZW2=_;7K4L3N3L3N2N3M1O2N2N2O0O101N101N10001O000O10000000000000000001O0O10001N101N101N2N101N2N2N2N2M3N3L4L5J8Fdid1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the top", + "B. The first one from the bottom", + "C. The first one from the top", + "D. The last one from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_282.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4066.jpg", + "mask_rles": [ + { + "size": [ + 384, + 531 + ], + "counts": "^R`1d0X;8J5K5J5M3L4L4M2N2N2N3L3O1N2N2M3O1N1O2N2O0O2O1N101N100O2O000O101O0O101O0O10001O0000000O10000000000000000001O00000000001N10000O101N10001N101O0O2O001N2N101N2O1N2N2N2N2N2N2N3M2N3M2N3L4L4M3L4L4J8Egeb3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the top", + "B. The third one from the top", + "C. The second one from the bottom", + "D. The second one from the top" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_283.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4405.jpg", + "mask_rles": [ + { + "size": [ + 384, + 580 + ], + "counts": "lPR45i;3N3M101N2O1O1O1O00O1O1O2N1N4L4LVSa2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one in the top row", + "B. The last one in the bottom row", + "C. The second one in the bottom row", + "D. The first one in the bottom row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_284.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4406.jpg", + "mask_rles": [ + { + "size": [ + 384, + 604 + ], + "counts": "^c\\38e;7K3M3L4N1N2N3M2N2O1N2O1O1O1N2O1O2N5K1O00000000KfEcN[:[16O2N1N2O1O2N1O2N1O2N2N2M3M3M3N3LfTV3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 4th one counting from the right", + "B. The 5th one counting from the left", + "C. The 6th one counting from the right", + "D. The 5th one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_285.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4412.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "Q[a19e;5L2N3M2O1O2M2O1N2O1O2N1O2N3M3M00I7N2N2O2N1O2N1N2O2N2N1O3L3LRQk4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left", + "B. The fourth one counting from the left", + "C. The third one counting from the left", + "D. The third one counting from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_286.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4493.jpg", + "mask_rles": [ + { + "size": [ + 384, + 577 + ], + "counts": "kiS35a;`1fNb1jEjLh3k1WOX7K4L2N2O1N2O00000001O1O0O2N3M2N4\\I5k0V7WN[_\\3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 8th one counting from the left", + "B. The 9th one counting from the right", + "C. The 9th one counting from the left", + "D. The 10th one counting from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_287.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4495.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "V^c03l;101N101O001O000O10000O1O1O1O1O1O00100O0O100O20OO1001O01O00O01010OO10O1010O0O0010010NO2010kDAh:?XECf:=ZEDd:<^EDa:<_EE`:<`EE_::aEG_:7bEJ_:4aEM_:3`EN`:2^E0c:N\\E4d:I]E8d:H[E9e:F[E;e:EZE in the full image?", + "choices": [ + "A. The 2nd one from the left", + "B. The 1st one from the right", + "C. The 1st one from the left", + "D. The last one from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_288.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4497.jpg", + "mask_rles": [ + { + "size": [ + 384, + 570 + ], + "counts": "dVk433N9g0^8_OUHJTOl1Q4UNmK;0E2Og00V1>bNj2S3b1_LaN^3R5O1N2O1O100O10000O01O1M3DBWE]Oo:3UY`1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the right", + "B. The fourth one counting from the left", + "C. The fifth one counting from the right", + "D. The fourth one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_289.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4895.jpg", + "mask_rles": [ + { + "size": [ + 384, + 532 + ], + "counts": "m]Y43h;9J6K2N4L3N2M3N2M3N2M2O2M3N1O2N1O2N1O1O2N3M1O1O2N1O1O1O100O1O10O01O10O01O0010O01O001O01O02O3L5K2O0O2OO010O0O2O1N1O2N1O2N2N2M2O2N2O010O1O010]OVFnNk9P1YFmNh9Q1ZFnNf9R1ZFnNg9Q1ZFnNg9P1ZFoNh9P1YFnNj9P1e0M3M2O0BPEKR;3oDMQ;1RENQ;KSE5\\;0O01000001O0000001O0O101O1O2Mj`h0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the right", + "B. The fourth one counting from the left", + "C. The fifth one counting from the right", + "D. The fourth one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_290.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4904.jpg", + "mask_rles": [ + { + "size": [ + 384, + 522 + ], + "counts": "ccR2;b;5M3L4L3O1M3N2N2N1O2N1O2N1O1O2N2N1O1O101N1O101N100O101O0O2O000O10000O101O0000000000000000000000000000O2O00001N101N2O1N2O1N2N3N1N3M2N3M2N3M3M4K5KbhU3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 2nd one in the 2nd row", + "B. The 3rd one in the 2nd row", + "C. The 1st one in the 3rd row", + "D. The 2nd one in the 3rd row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_291.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4922.jpg", + "mask_rles": [ + { + "size": [ + 384, + 513 + ], + "counts": "ZSV17d; in the full image?", + "choices": [ + "A. The first one from the left", + "B. The second one from the right", + "C. The third one from the left", + "D. The second one from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_292.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4934.jpg", + "mask_rles": [ + { + "size": [ + 384, + 483 + ], + "counts": "hiW3131g01XO2i:NlE:T:EjE7]OK]fm0m9VOfEM=n0k9VOhEL=n0j9WOXFk0g9TOYFn0CnNm95`FT1^9lNaFV1]9kNbFV1^9jNaFW1^9jNaFW1_9hNaFY1^9hNbFX1^9gNbFZ1^9gN`FZ1`9gN^FZ1b9fN]F[1b9b000000000000000001O0000001O0000001O0O2O0GSF_Nn9`1RF_NP:_1RF`No9^1RFbNo9\\1RFcNQ:[1oEeNR:Y1oEfNT:X1kEhNW:U1=M2G9K5M5K5JWfY1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second item from the right in the bottom row", + "B. The third item from the left in the bottom row", + "C. The third item from the right in the bottom row", + "D. The third item from the right in the top row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_293.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4939.jpg", + "mask_rles": [ + { + "size": [ + 384, + 480 + ], + "counts": "kfj4a0[;7K3L6K4M2N3M2N2N3N1N3M2O2M4M2M2O1N2O00001N10000000000000O0100000000000O00100O1O010O1O10O01N2O1O2O0O2M2O2N2M3O1M2O2N2M3N3K4M4K4M4J6K_5" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The bottom one in the rightmost column", + "B. The second one from the bottom in the rightmost column", + "C. The second one from the top in the rightmost column", + "D. The second one from the bottom in the middle column" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_294.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4948.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "chh2j0Q;9D in the full image?", + "choices": [ + "A. The third from the left in the second row", + "B. The second from the left in the third row", + "C. The third from the left in the third row", + "D. The fourth from the left in the third row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_295.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5145.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "gUd3 in the full image?", + "choices": [ + "A. The first from the top down", + "B. The second from the bottom up", + "C. The first from the bottom up", + "D. The second from the top down" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_296.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5362.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "bnS34j;2O2O0O10000O1001O01O0001O1N2N`Q_1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third item from the left", + "B. The second item from the right", + "C. The third item from the right", + "D. The fourth item from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_297.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5375.jpg", + "mask_rles": [ + { + "size": [ + 384, + 580 + ], + "counts": "\\co11o;2N0nSg1OokXN9H7J4L4M4M3L3M2O2M2N2N2N2N2O1N2N1O2O1N2N101N2N101N101N2O0O1O2O001N1O1O1O100O100O10000000O0100000O10O1000000000O0100000000O100000O010000O100000000000000000000000000000O10001O0000000000001O00000001O000000000001O00000000010O00001O0000001O00001O0O101N10000O1O2O001N100O2N101N1O101N2N2O2M3M2O1N2M101O1O2M2O2M4L4J9GTUW1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 3rd one in the 3rd row", + "B. The 4th one in the 3rd row", + "C. The 3rd one in the 4th row", + "D. The 4th one in the 4th row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_298.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5461.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "\\hS15h;6L4L2O2M2N2O1N2N2O1N1O2O0O2O0O1O10000O100000000000O10000O101O0O2O0O2N1O2O2M2M3N2N3L4Lhgk2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one in the second row", + "B. The second one in the first row", + "C. The second one in the second row", + "D. The first one in the first row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_299.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5571.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "cZd4<`;:H6J4L4L4M3L3N2N3M2N2N2O1N2O1N1O2O0O2O0O2O000O2O0O101O0O1000001O000000000000000000O10001O0O101O0O101O0O101N2O0O2O1N101N2N2N2N2N2N2N3M2M4M4K4L4L4K6J]a?" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second row from the top, in the first column from the left.", + "B. The first row from the top, in the first column from the right.", + "C. The second row from the top, in the first column from the right.", + "D. The second row from the top, in the second column from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_300.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5572.jpg", + "mask_rles": [ + { + "size": [ + 384, + 614 + ], + "counts": "oYT4`0Y;:]Ob0I6K6L4K5M2M4L3N2N1O2N2N1N3N2N2N101N2N2N101N101N101N101O0O10001N100O101O000O1000001O000000000000000000O10001O000O10001O000O101O0O2N101N1O2O0O2O1N101N2N2N1O2O1N2N2O1N3M3L4M3L4K5L5J6J6^Od0AoZn1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third row from the top, the second one from the right", + "B. The second row from the top, the first one from the right", + "C. The third row from the top, the first one from the right", + "D. The first row from the top, the third one from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_301.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5575.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "gP`36R;j0J4M3M5M1N3N2M2O1O2M2O1O1O1O2N100O3M100O2O1N101N100O2O0O101O1O0O101O000000001O000000000000000001O0O1000001O000O2O00001N101O0O2O0O2O1N2N2O1N1O2N2N2N101M4M3M2N3L5K3M5I7JU4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first column from the right, the second item from the bottom upwards.", + "B. The second column from the right, the first item from the bottom upwards.", + "C. The first column from the left, the first item from the bottom upwards.", + "D. The first column from the right, the first item from the bottom upwards." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_302.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5813.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "g[X26g;5N01O1O1O1O0000I7K5M3L4L4M3N2N2N2L4N2N2O1N2O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O100O2N1O1O1O100O001O2N100O1O100O010O2O0O10000O1000O11O0O100000O100000001O000O101O00010O001O001O1O1O001O1O1O1O1O1O1O001O2N1O1O2N1O2N2N2N2N3M3M4L3M4L3M5J7Fk`\\2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The one in the upper part", + "B. The one on the left side", + "C. The one in the bottom part", + "D. The one in the center" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_303.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5819.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "V]V1154_;:L4M2N2N2O0O1O1N2M3M3M3N2N2M3N1O2M201N2N1O101N1O1O101N10000O2O0O100000001O000000000000000000000001O0000001O000000000000ORNQFm1o9SNQFm1o92O10O10O11SNQFg1U:O1N100O2N100O2O0O100O1O2N2O1N100O2O1O2M101N101O0O2N100O2O0O2O2M2N2M4K6JhV[2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 1st row", + "B. The 2nd one in a row of two", + "C. The 1st one in the 2nd row", + "D. The 2nd one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_304.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5918.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "Qj><`;7K5K4L3M4M2N2N1O2N101N100O1O100O10000O100O101O0000000O1000000000000000000000000000000001O0001O01O1O001O1O001O001O001O001O1O1O1O001N2M4L3M3N3I7K8FQ^^3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 2nd row", + "B. The 2nd one in the 1st row", + "C. The 2nd one in the 2nd row", + "D. The 1st one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_305.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5930.jpg", + "mask_rles": [ + { + "size": [ + 384, + 514 + ], + "counts": "hm]4:^;>H5J5K5L5L4K5L2M4M2N2M3N3M2N2O1N3N1N2N2O1N2O001N2O1O0O2O1O001O001O001O1O001O001O00001O000001O0000001O001N101N101N1O2N2N1O2N2N1O2N2N2M4M3L3M3M4L4L5J6J6H9H=ZO^Ve0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the left to the right", + "B. The second one from the right to the left", + "C. The second one from the left to the right", + "D. The first one from the right to the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_306.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6.jpg", + "mask_rles": [ + { + "size": [ + 384, + 575 + ], + "counts": "R[d38f;4M3N1N2N2O1O2N1N2O2N2N00M3O1O2N1N2O2N1O2M3N3LPYj2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one from the left to the right", + "B. The fifth one from the left to the right", + "C. The sixth one from the left to the right", + "D. The fifth one from the right to the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_307.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6646.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_b`06d;:K2O0000000O1000001O0001O00000000000000000000000000000000000000000000000000000000000000001OO100001O000001O000000000000000000000000000000000000000000000000000000000000000000001O0001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O10000000001O00O10000001O0000O100001O0000O100001O0000O10000001O0000O1001O0001O00000O10002Mdig1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one from the top down", + "B. The sixth one from the top down", + "C. The fifth one from the top down", + "D. The fifth one from the bottom up" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_308.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6650.jpg", + "mask_rles": [ + { + "size": [ + 384, + 480 + ], + "counts": "TRo0b0S;g0^O=F8H6K4L4M2N3M2N1O2N2N2N101O001O001O00000000000000000001O00000000000000000000O1000000000000000000000000000000O10000000000000000000000001O0000000O10001N100O1O2O0O1O2N2N2N1O2N2N2N2M5L3K6K5J9Dk0POkU`3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth row from the top, the first one from the left", + "B. The third row from the top, the second one from the left", + "C. The fourth row from the top, the second one from the left", + "D. The second row from the top, the fourth one from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_309.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6651.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "jUj11132MY;W1eDlN_:f1C4M3M3N1N=@i0iFfL00\\8Q4_O100O100O2O000000001N101O001O1O1O3M4L3M1O2N01N100O101gMkHBY7\\OSJE_NROo7S1l2H9^Oa__2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 1st row", + "B. The 2nd one in the 2nd row", + "C. The 2nd one in the 1st row", + "D. The 3rd one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_310.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6660.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "gmm012013[;b0J3O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1N2O101N1O100000000000000000000000000000000000000000001O000000O1001O000000O100001O00O1000000000000000000001OO100000000000000000000000000000000000000000000001OO1001OO1001OO1000000001OO1000000000000000000001OO1000000001OO10000001O0000000001O00O100001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000O11O00O2O000000000000002M_R<" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second from the top down", + "B. The third from the bottom up", + "C. The second from the bottom up", + "D. The bottom one" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_311.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6743.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "`Yn24g;;H4L4N3L3M2N2O1N2N101N1O10001O0O1000000000000000000001O001O0O2O001N2N101N2N3M2M4L4L5HeVQ1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first column from the right, the first row from the top", + "B. The second column from the right, the second row from the top", + "C. The first column from the right, the second row from the top", + "D. The first column from the left, the third row from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_312.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/680.jpg", + "mask_rles": [ + { + "size": [ + 384, + 931 + ], + "counts": "ega71a;g0bDDO0178AN2]8c3@4K4N00000000000000000000000000000000000000000001OO10000000000000000000000001O00O10O1000000001O0000000001O00O10000000000000000000000001O00O1000000000001OO100000000O100001O0000001O0000000000000000O1000000000000QMXHELW1m7TOUHf1l7[NRHf1o7U11N2oNlGbM00]8W2gG^MN31ON0e8O^G0N12NNb0OBOO]bP2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the bottom and the fourth one from the left", + "B. The second one from the bottom and the fifth one from the left", + "C. The first one from the top and the fifth one from the left", + "D. The first one from the bottom and the fifth one from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_313.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6804.jpg", + "mask_rles": [ + { + "size": [ + 384, + 550 + ], + "counts": "Vf\\29e;4L4L4L5K3N1N2O1O1O1O1O1O1O2N100O1O2O0O101N2O0O2O001N2O001O1O1N2O1O1O1O00O2M2N2M4M2N2M4M2N3L3N3M3M2O2M3N2M3O1N2O1O2M201N2O2MomY3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second line from the bottom, the second one from the left.", + "B. The third line from the bottom, the third one from the left.", + "C. The second line from the bottom, the third one from the left.", + "D. The third line from the bottom, the second one from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_314.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6813.jpg", + "mask_rles": [ + { + "size": [ + 384, + 413 + ], + "counts": "^[_2<^;>E5L3M4L4M2N3M2N2N2N1O2N2N2N101N2N1O2O0O2O001O0O2O001O0000001O000O11N10000000000O1000001O0O2O0O2N101N2N1O2O0O101N101N101N2N2O1N3M4L4K5K5K8H5J```1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 3rd one from the right in the 4th row.", + "B. The 2nd one from the left in the 4th row.", + "C. The 2nd one from the right in the 3rd row.", + "D. The 2nd one from the right in the 4th row." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_315.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/682.jpg", + "mask_rles": [ + { + "size": [ + 384, + 485 + ], + "counts": "dbT1=];8I7J6L4L5K5K5K4L3O1N2M7bFkMb8o2I4L4N3N3N001O3M5LO0001O00001OO100000000001OO101O001O3M9G4gN`GjNa8P1jGjNY8o0PHlNR8o0UHlNo7o0d1K7J7H6J5J6J`]m3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one in the second row", + "B. The third one in the second row", + "C. The second one in the third row", + "D. The third one in the third row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_316.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6827.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "iX\\1?`;4L2N5K:F9H4L4K5L4L5K6J6J5J6K4L5K8H4L4L1O1O1O001O001O0O2O001O0000001O0000000000000000001O00000000000000000O11N100000O1000O100000000O1000001N10000O10000O2O0O2N1O2N6aL^G81J;U2l9iMfE`1k:@d0UOk\\c3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first row, the first one from left to right.", + "B. The first row, the second one from left to right.", + "C. The second row, the second one from left to right.", + "D. The first row, the third one from left to right." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_317.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6960.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_lj2:b;9I3M4L4L3N2N2N2M3N2O1N2N101N100O2O0O2O0O101O000O1000000O2O00O10000000000000000O2O00000O2O000O2O0O100O2O1N1O2NBeNmE0008Y1l9gNYFX1Y:N2M3N2N3LkNUOZGh0S:K5J_n>CQRA0X=0c91TYO0UT10lkN0nDOTk8" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third row from the top and the second column from the right.", + "B. The second row from the top and the second column from the right.", + "C. The second row from the top and the third column from the right.", + "D. The second row from the bottom and the second column from the left." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_318.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7153.jpg", + "mask_rles": [ + { + "size": [ + 384, + 557 + ], + "counts": "bSl14h08o9KkE in the full image?", + "choices": [ + "A. The 1st one from the left to the right", + "B. The 2nd one from the left to the right in the 2nd row", + "C. The 3rd one from the left to the right", + "D. The 2nd one from the left to the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_319.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7370.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "od^2121c;a0C8J6K3M4L4M2M3N2N2N2N1O2O1N1O2O0O101N100000001O00000000O1000001N10001N2O0O2N2O0O2N2N3M2N2N2M4L4L6J5I8IY_e3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 4th one from the left.", + "B. The 6th one from the left.", + "C. The 5th one from the left.", + "D. The 5th one from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_320.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7384.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "nmU2Q2n94M1O0000000000001O000000000000001O0001O000000000001O000000001O0001O0000000001O0000000000001O000001O000001O0000000000001O000000000001O00000O1]Od0nNofg1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the left in the third row", + "B. The third one from the left in the second row", + "C. The fourth one from the left in the third row", + "D. The third one from the left in the third row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_321.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7488.jpg", + "mask_rles": [ + { + "size": [ + 384, + 683 + ], + "counts": "mee3l0R;4M3L3N2O2M2N101N2O0O2O0O2O0O2O1N2O0O2O001N10001N10001O00001O00000000001O000000000000000001O00000000001O00000O10001O00000O101O001N100O2O001N101O1N1O2O1N1O2N2N2N2M4L3M4L5@YVZ3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. From the bottom up, the first row; from the left to the right, the second element", + "B. From the top down, the first row; from the left to the right, the first element", + "C. From the bottom up, the second row; from the left to the right, the first element", + "D. From the bottom up, the first row; from the left to the right, the first element" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_322.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000242934.jpg", + "mask_rles": [ + { + "size": [ + 227, + 500 + ], + "counts": "VfP1b0`69H5kIQOl5[1G4M5K1O2N1O2N2N1O1O2N2N1O1O3L[N]Km0b4SO_K_OI[1g4VOcKk0\\4VOcKj0]4VOdKi0\\4WOdKi0\\4VOgKh0Y4WOiKh0W4XOjKg0V4YOjKf0W4YOkKf0T4[OnKb0S4^OmKb0S4^OmKb0S4^OoK`0Q4@QL=P4DPL;P4FPL:o3FSL7n3JQL6o3JQL6o3KPL5o3MPL3P4NoK2Q4NoK2Q4OnK1R40mKOT43jKMW42iKNW43hKMX45_K1b41[K0e41ZKNg44WKLh4X10000000O1000O10O1000001N10000O0100000O100O1O2N10O1001UNPKa1^5J3N1O0O10000O101O0O10001O00001N100O101O0O10001N10001O001N1000O1103L1O000010N2O000000O1O1N2DhIEd\\b1" + }, + { + "size": [ + 227, + 500 + ], + "counts": "Rn`1221g6;K2N2O1O1N2O1O1O0O2O1O1O3L2O001O2M3N2N3M1N2O1O1O1N2O1O1O1O1N2O1O1O1O1O1O00001O01O0kNoJ8R5GoJ8Q5GQK8P5GPK9P5FRK9o4DSKb4A\\Ka0d4@[K`0e4@ZKa0f4@YK`0h4_OfJL:f0P5_OeJK?b0m4DYK:g4D]K:d4D^K;b4E^K;b4D`K;a4D_K\\5E[Jb0f5=001O00001O0O2N1O1O2M2O2N1O1N3N1N2O2L3M5Hlo1N]PNNXe20iZM10O_Rk0" + }, + { + "size": [ + 227, + 500 + ], + "counts": "Qfi03n66L4K2O00001N9H4L2N0ODoIHQ6e0000DoIFQ6:PJEo5?", + "choices": [ + "A. ", + "B. ", + "C. None of the above", + "D. Both and " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_323.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000376093.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "noc51n>1O1O100O100O101N00101N1O100O1O100O1O100O1O100O1O100O1O1N2O1O100000000000000000O1001O0000O02O000000O1001O00O10000001OO1000O1001O7I5M4K6K3M3N2M4M3L3N1O1O1N101O2N1N3N3L4M2N2M5L3L3N2N2M3N1O1N10000O2N1000001N1000001N100O1000000O10000O1000000O1000O0100O10O1000O10O10O10O01000O10000O100O100O00100O10000O00100O01O01O001O10O01O1O1O00001O1O1O1O0O3N1O1O1O1O3L3N1O3L3M4J8I4M5K6H6J6Jf>AlA4OO3LZg`6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "QfX36i>3L4N1N3M2N101O2N0O2O001N101N2O2N1O1O1N3N1O1O1O1N2O00G9O100O100N2O10000N2N1100RFXOd5h0[JZOd5e0YJ_Og5`0\\IEYMMZ9=YIMZMG]9;UI6XMCa98UI8XMBb94bG^O2k1[8fNdG@HR2d8^NcGB_OY2m8UNeGBVOa2U9lMeGHQO^2Y9jMeGm2\\ObLX8`0]Hn2ZOlLP86eHo2YOoLP81hHP3WOPMQ80gHQ3XOoLS8MfHT3WOnLT8MeHV3WOkLW8NbHW3WOkLW8McHY3UOjLY8KbH\\3UOiLZ8IaH_3TOiL\\8G_Hb3TOgL^8E_Hd3SOgL^8E^Hf3SOeL`8C^Hi3QOdLm9]3RFdLm9^3RFbLm9a3nEbLR:V400O100O100O100O1N20O10O10000kFWKR8j4oGUKQ8j4QHVKn7j4RHVKn7j4SHTKn7l4S10000ZLRFZ2n9fMXFT2T9hLnFX13l1n8dNTGZ1m8eNUGY1k8gNWGW1i8iNXGW1Q8TMjGd15UN@P3^8CRH`M]Oo2Z8FXH\\M^Oo2W8FWH`MCk2Q8H\\H]MCl2n7IZHRMI:0j2k7LZHTMI^3k7@WHnL1g3i7[O[Hc0f7^OYHb0g7_OXHb0g7_OVHd0j7]OTHd0k7]OUHb0l7Y300O10M3L4I7N2M3]ObG_Jc8`5?TOPGgKQ9Y4SGbKn8T1nF`15ZMn8T1oFb14nLI2V9]1oFb1h9\\N\\Fb1e9]N\\Fb1d9]N^Fb1]1oLY6^1]Ha1Z1QMZ6\\1^Hb1X1RM[6Y1bHa1T1VM[6W1bHc1S1VM\\6V1bHc1R1WM\\6T1dHQ1UO\\Nl1_O\\6R1dHf1P1XM]6P1eHf1[9XNgFg1Z9VNjFh1W9TNmFk1U9PNoFn1U9hMSGW2m:L2O3M2K5J7E=D0O100O1O100O100O100O100O100O100O100O100O1O100O100O100O100O100O100O100004L5K4LWmj6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "X]c44l>3L5L5K4L4L1N1001ClALS>4oAKQ>5SBGm=4nAI71k=5PBI52k=5PBI61j=6PBI]>77JYA1g>Noe_4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "WbU64l>4K2OO1000000O1000O10000000000000001O000001Nkdk2" + } + ], + "question": "Which statement correctly describes the actions and position of ?", + "choices": [ + "A. is holding and standing on .", + "B. is standing on and holding .", + "C. is looking at while standing on .", + "D. is holding and looking at ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_324.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000376093.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^aQ1>[>9ZOZOgBP1T=b0K5K3N2N5K4K4K5K7K4L4K7J3L4M2Nf:]O^Ec0b:YOaEg0]5M4K6K3M3N2M4M3L3N1O1O1N101O2N1N3N3L4M2N2M5L3L3N2N2M3N1O1N10000O2N1000001N1000001N100O1000000O10000O1000000O1000O0100O10O1000O10O10O10O01000O10000O100O100O00100O10000O00100O01O01O001O10O01O1O1O00001O1O1O1O0O3N1O1O1O1O3L3N1O3L3M4J8I4M5K6H6J6Jf>AlA4OO3LZg`6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nn[31n>2OO001001O00N\\d`20a[_M;F6M2N1O2O1O0O2O0O101O0oCWOj9j0PF@l9`0SFCj9=WFEf9a2Q1[MaN4=b2R1YMcN49e2S1XMdN36h2V1UMeN23k2X1SMeN21m2Z1QMeN3Mo2^1nLfN2IS3`1lLgN1HT3a1kLhN1EU3c1jLhN2\\N]OHh3T3iLiN2WNBHe3X3hLiN1SNFZOR4j3WLjN1oMd4W3[KjN2jMg4]3WKiN2gMi4`3UKiN3bMl4e3RKhN3`Mm4i3PKgN4[Mo4n3nJfN5XMo4S4lJfNV6Z1kIfNT6[1lIeNS6[1nIdNR6]1nIcNQ6^1oIbNP6^1QJbNn5_1RJ`Nn5`1RJ_No5b1PJYNU6h1PJjMX6W2kI_M[6a2kISM[6n2iIgL]6Z3cI_Lc6b3]IZLf6g3ZITLj6m3WInKl6R4XIeKm6\\4k1001O1O1O001O1O001O1O001O001O00YOnEaLQ:Z3YFbLf9[3_FeL_9Z3eFaL]9]3lFXLX9g3Q1O1N2O1O100O1O100O1O100O100O100O100O100OnNgL_FY3Z9WM\\Fi2b9`MWFa2e9_1_O^K^Fh4^9?L4O0N3\\McJcK_5[4kJ_I^Of1k5j4UKiJn4V5YKcJi4\\5`KXJd4h5bKlId4S6U20O01N2M3M3O1N2N2O1N2N2O1O2L3O1O2M3N1O2gLWG8j8F[G4i8KZG1g8N]GNd81^GKf83]GJe83^GIf85^GEe8:`G@c8>bGXOUNTO]:a1dGmNj8n0g2N2L6K4J9F8ISgh0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "QfX36i>3L4N1N3M2N101O2N0O2O001N101N2O2N1O1O1N3N1O1O1O1N2O00G9O100O100N2O10000N2N1100RFXOd5h0[JZOd5e0YJ_Og5`0\\IEYMMZ9=YIMZMG]9;UI6XMCa98UI8XMBb94bG^O2k1[8fNdG@HR2d8^NcGB_OY2m8UNeGBVOa2U9lMeGHQO^2Y9jMeGm2\\ObLX8`0]Hn2ZOlLP86eHo2YOoLP81hHP3WOPMQ80gHQ3XOoLS8MfHT3WOnLT8MeHV3WOkLW8NbHW3WOkLW8McHY3UOjLY8KbH\\3UOiLZ8IaH_3TOiL\\8G_Hb3TOgL^8E_Hd3SOgL^8E^Hf3SOeL`8C^Hi3QOdLm9]3RFdLm9^3RFbLm9a3nEbLR:V400O100O100O100O1N20O10O10000kFWKR8j4oGUKQ8j4QHVKn7j4RHVKn7j4SHTKn7l4S10000ZLRFZ2n9fMXFT2T9hLnFX13l1n8dNTGZ1m8eNUGY1k8gNWGW1i8iNXGW1Q8TMjGd15UN@P3^8CRH`M]Oo2Z8FXH\\M^Oo2W8FWH`MCk2Q8H\\H]MCl2n7IZHRMI:0j2k7LZHTMI^3k7@WHnL1g3i7[O[Hc0f7^OYHb0g7_OXHb0g7_OVHd0j7]OTHd0k7]OUHb0l7Y300O10M3L4I7N2M3]ObG_Jc8`5?TOPGgKQ9Y4SGbKn8T1nF`15ZMn8T1oFb14nLI2V9]1oFb1h9\\N\\Fb1e9]N\\Fb1d9]N^Fb1]1oLY6^1]Ha1Z1QMZ6\\1^Hb1X1RM[6Y1bHa1T1VM[6W1bHc1S1VM\\6V1bHc1R1WM\\6T1dHQ1UO\\Nl1_O\\6R1dHf1P1XM]6P1eHf1[9XNgFg1Z9VNjFh1W9TNmFk1U9PNoFn1U9hMSGW2m:L2O3M2K5J7E=D4K2OO1000000O1000O10000000000000001O000001Nkdk2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "lS`2=^>7J6Jb0^O7K3K4M3M3L4N2M3N2M3N2L4K5O1K5L4M3N2M3N2N2N2N2N2O1N2O1O1O1O1O100O1O1O1O100O100O100O100O10000000000000000000000001O00O10000000000O1000000000000000000001O001O0000001O0000001O001O001O001O1O00001O1O1O001O001O1O1O1O2N1O2N2N1O1O2N3M2N2N3M3M5K3M4L4L3M4L5Km0SO;E6J2N2N2L4N2N3M2N2NTfj4" + } + ], + "question": "Which person is holding , and which person is looking at ?", + "choices": [ + "A. is holding , and is looking at .", + "B. is holding , and is looking at .", + "C. is holding , and is looking at .", + "D. is holding , and is looking at ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_325.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000222317.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "kc^1g0T>>E7J2N2O0O1fCQOnN_O10[;`1fEFlNkNn:_1WF2`9N`F9Y9GgFl0f8SO[GX1Z8hNgG]1S8cNmGb1n7^NRHf1j7ZNVHj1e7WN[Hk1c7UN^Hl1`7TN`Hm1_7RNbHo1]7QNcHQ2[7oMeHR2Z7nMfHS2Y7mMgHT2X7lMhHT2X7kMiHV2V7jMjHW2U7iMkHX2T7hMlHY2S7gMmHZ2R7fMnH[2P7fMPI[2o6dMRI\\2n6dMRI]2m6cMSI^2l6bMTI_2k6aMUI`2j6`MVIa2i6_MVIc2i6]MWId2g6]MYId2f6\\MZIe2e6[M[If2d6YM]Ih2b6XM^Ih2b6XM^Ij2`6VM`Ik2_6TMbIm2]6SMcIm2]6SMcIn2\\6RMdIYOkNK2P2]7mNfISOTOG0Y2T7mNhIkN@GJ`2m6nNiIiN:Z2l5mNjIgN=\\2h5mNlIdN?_2d5mNoKR1P4nNQLR1n3nNSLR1l3nNTLS1k3mNVLS1i3mNZLQ1d3PO]LP1b3PO_LP1`3PO`LQ1_3oNdLo0[3QOfLo0Y3QOhLo0W3QOjLo0U3QOlLn0T3ROmLn0Q3SOPMn0n2ROSMm0m2SOTMm0k2SOUMn0j2ROWMn0h2ROYMo0e2QO\\Mo0c2QO]Mo0c2QO]MP1b2PO^MQ1a2oN_MR1_2nNcMR1\\2nNdMS1[2mNeMS1[2mNeMT1Z2kNgMV1X2iNiMX1V2hNjMY1U2fNlM[1R2eNoM\\1P2cNQN^1n1aNSN_1m1aNSN`1l1_NUNa1k1]NWNc1h1^NXNb1h1^NXNb1h1]NYNc1g1\\NYNe1g1ZNZNf1f1YN[Ng1e1XN\\Nh1d1XN\\Nh1d1WN]Ni1b1XN^Nh1b1XN^Nh1b1XN^Nh1b1XN]Ni1c1WN]Ni1c1WN]Ni1c1WN]Ni1b1YN]Ng1c1YN\\Nh1d1XN\\Nh1d1XN\\Nh1d1YN[Ng1e1YNZNh1f1YNYNg1g1YNYNg1g1ZNXNf1h1ZNWNg1i1YNWNg1h1ZNXNf1h1[NVNf1j1YNWNg1i1WNYNi1g1VNZNj1f1VNZNj1f1UN[Nk1e1TN\\Nl1d1SN^Nl1a1TNaNk1_1TNcNk1]1UNdNj1\\1UNeNk1[1TNgNk1Y1TNhNl1X1TNiNk1W1TNjNl1V1SNmNk1S1UNoNi1Q1VNQOi1o0WNQOi1o0WNSOg1l0YNXOd1h0[N]Oa1c0_N@^1`0aNC]1=cND\\1P2@SN>n1^OVNb0j1\\OXNd0h1[OYNe0g1YO\\Nf0d1YO]Ng0c1WO_Ni0a1TObNl0^1SOcNl0^1ROeNm0[1ROgNm0Y1ROiNm0W1QOkNo0U1POmNo0S1oNPOP1P1oNQOQ1o0nNTOP1l0nNWOP1j0oNXOP1h0oNZOP1f0POZOP1f0nN^OP1b0oN@P1`0POAo0?POBP1>POCo0=PODo0=QOCo0=QOCo0=QOCn0>ROBn0>ROBn0>QOCo0=QOCo0=QOCo0=QOCo0=ROBn0>ROBn0>ROBm0?SOAm0?SOAm0?SOAm0?SOAm0?SOBk0?UOAk0?UOBj0>WOAi0?WOAh0`0XO@h0`0XOAg0?YOAg0?YOBf0>ZOAg0?ZO@f0`0ZO@f0`0ZO_Og0a0YO_Og0a0ZO]Of0d0ZO[Og0e0YO[Of0f0ZOZOf0f0[OYOe0g0[OXOf0h0ZOXOf0h0[OVOf0j0ZOVOf0j0ZOVOUN^LV1\\4e0VOSNaLW1Y4g0TOoMhLY1T4h0TOPNgLX1U4h0TOSNdLU1X4h0TORNgLS1V4l0ROoMlLS1R4n0ROnMnLS1P4o0SOiMRMV1l3Q1ROhMSMW1k3R1QOfMVMW1i3S1QOcMYMZ1f3S1QOaM\\MZ1d3U1oN_MaM[1`3V1nN`MbMZ1`3W1lN_MfMY1^3X1kN_MhMY1]3X1lNhLfLc0R1\\1\\3Y1POYMfM]1Z3[1POVMgM^1Z3\\1POTMiM^1W3^1QORMjM^1V3`1POQMlM^1T3a1QOoLmM_1R3c1QOkLPNa1o2d1ROjLRN_1l2g1SOhLSN_1k2i1o0UNQOl1Q1QNoNo1S1nMnNR2U1kMkNV2X1fMhNZ2[1cMeN]2_1^MbNc2h1iLfIK0Ob4]3m6001O000000001O00001O0000001O000000001O0000001O0000001O0000001O0000001O00000O2N1L4K5F:J7H7M3L4L5M2N2O2M2N2O1O3M2M2O2N1O100O1O101N1O1O2O0O10000O1000001N10001N1000000O2O00001O000O101O00001N100N2J7L3N2O2N1O1N200O2N101N1O100O101O000O101O0O101O000O101O0O10001N1000000[JgHc3Z7ZLkHc3U7\\LVIY3k6gL_Io2a6PMaIo2`6PMbIn2^6QMaJQ2_5oM`KeNiL0O07h1b7CfMcNa2\\1c5O0000001O000O2O00000000001O00001N1000001N1000001O00001O00000000001N1000001O0000VK" + }, + { + "size": [ + 480, + 640 + ], + "counts": "oRk03k>4aAJP>7mAKS>6lAKS>6lAJU>6iAJX>8dAI:Lh=`0WBBi==WBEg=;YBEh=:XBGg=9YBHf=8ZBIf=6ZBKe=5[BLd=4\\BMd=2\\BOc=1]B0c=O]B3b=L^B5a=K^B7a=I_B8a=G^B;a=E^B=b=B]B`0b=AYBd0g=;O101N1O100O100O1O1O1O100O1O1O1O1O2N100O1O1O1O2O0O1O1O1O11O1O2N10O00O2O01N100001O3M00OXNkBe1X=05KM_NhB`1X=aNgB_1Y=bNgB]1_=O000000O11O000000001O1O1O1O002N1O1O1O002N1O2N001O1O1O1O1O1O2N1O1O2N1O00001O1O001O3M1O001O1O1O1O001O1N2K[Ye6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "X3n6R8000000001O1N2O0000001O001O1O002M101O00001O00001O001O000O10000000O1O100O010O100000000000O01N2L4K5I7M3O1O100O0100000O10000O1000000O10O10O10000O10000O100O00100O100O100O100O1UOcGRK^8m4dGRK[8n4kGmJU8Q5oGlJR8S5PHkJQ8U5PHjJP8U5RHhJP8X5l0O1O1O0oNZFjLf9l2SGdLn8X3\\GaLe8]3`1O1O1O1000000O01000000000000000O1000O100000000000000000O100000O100000000000O10O1000000000000O10O1000000000000000O1000O1000000000000000O10O100000000000000O10O100000000000O100000O10000000O10000000O100000O10\\MiLYIGQN`3f8RMUIo2k6aMeH_2[7lMYHU2g7nMZHn1f7XN_H]1a7cNaH[1_7aNVG_N]1n2^7gNbHX1^7mN^HR1b7TOeG[NF`2e8_OXGUN3[2e86\\GHd8:[GEe8<[GCe8?[G_Oe8b0\\G\\Od8e0\\GYOe8h0ZGXOf8i0ZGVOf8l0YGSOg8m0cGfLFj1g8a1RHXNn7j1QHUNo7l1QHSNo7o1QHoMo7g2\\GWMe8k2ZGTMf8n2YGQMg8P3YGoLg8R3YGmLg8T3ZGjLf8W3ZGhLf8Z3YGeLg8\\3ZGbLf8_3ZG_Lg8c3XG\\Lh8e3XGZLh8g3XGXLh8i3ZGoKk8R4\\G`Kj8b4i01O3M5K6J00O1O1O104K2O1N2O3M;DZ1B:D=E?A;E;E:F9GB=Cg0YOe0cLmCc2Q=WOh0WOc0^O_dP5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`UZ33k>2O2N2N1O2N1O2O0O2N1O101N00100O1O010O01000O010O101N10000O2O00001O0O101O001O0O2O001O001N101O001O001O1O2N0000001O000000001O0000001O0010O01mBfN^a0Aa0_Oo0QOa0_Oc0]Of0ZO?", + "choices": [ + "A. Lying on , which is on .", + "B. Lying on , which is attached to .", + "C. Attached to .", + "D. Lying on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_326.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000148719.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0]3c;001O00000000001O001O1O1O00001O2N1O1O000000000000O1O100O100000000001O001O1O00001O001O0000O1O1O1O1K5N204L3N1M4M1O7I4L2N1O1N9H4MgNkMiEP2X:YNbEd1^:aN^E^1b:cNeEU1[:oNdEn0\\:TOeEi0[:[OaEe0_:^O_Ea0a:BbE8^:KcE1]:3`EL`:9\\EFd:<\\EBd:a0\\E\\Od:f0\\EXOd:i0\\EVOd:m0YESOg:n0ZEPOf:S1[EiNe:Y1^EbNb:a1]E]Nc:h1YEWNg:j1YEUNg:n1[ElMf:V2Q12N2N2N1O2N2N4L2N3M4L2N2N1O2N1O00000000000000000000000000000000000000000000000000000004WMYDQ2gA:G8Ha0_O6J8H3LQX2o0ofM;D:I8H`0@7I6J;E1O1O001O000000001O0000O1001O0000001O0000000000000000000000001O0000001O001O2N1O001O00000000001O000000000000000000O1001O0000001O1O5K4L3M4L3M3M1O1O1O0aEbK\\:]4dEdK\\:\\4eEcK[:]44L4]Oc0A?001Oj0VON2O1000000O1O1O1N2O1N2N2N2M3M3001ON200O11O00000000O100O1O1O100000000000000M300N20000N20000O1O100O100O10000000000O1001O01O1O00010TNPDf0R<^NlCLj0d1`4dNObC3l;GYCj0?Dm;AdCW1;\\Ol;l0SDWOj;k0UDVOg;o0UDSOi;R1oCTOo;P2N2N1O1O2M200O2O001O0O101O001JbLhD_3W;aLiD_3[;10001O0000000L_LiDa3W;_LiDa3W;_LhDb3X;^LhDc3Z;00001O00001N1O101O001O0000001O00002N2M3N101N2N1N200O1O2M101N200O0O101O00001O000000001O0O11O0O1GdEnK\\:[40O10000000000O0O2O10000001N11O01N1O010O1O101O001O00O10001OO100000001O0O100O01000O2O01O00O101O00001O000000001O0O10000O101N100000001N1O100O2L3M3N3M2O101M2N2O100N2O2M2N2O1N3N1M3O1N2N3N1O1K6M2B[I^Hi6a7WI]Hk6b7:0fJXH\\3g7dLZH\\3e7fLZHZ3f7dLZH_3e7n1O100000000O2N11O00O2O001O00000000000O2O000000001O00O1001O0O101O001O00001N1001OO2O0000001O00O1001O00000O11O0000O105K1O1O5K3M3M100O2M3N10O01N1O2O1O10O0001O00000000000000001O000O1000000000O100000001O000O01000O2O0O1O100O10000O1O1O2O0O1O10000O1O1N2O2M2O1N2L4N2O1N2N1K6N2N2M3K5C=E;O1O20O00000000000000000000000001N100O1O100O1O2O000O100O1N3N1000000O100O2O0O1O2N1000000O10001N1O1O1O101O000O2N102N0O2N3M6K1O3L7J5J5L1N7I;E2O1O3M1N6J5K2O1N10O01O100O100O100O10000O10000000O01001O1O2N3M1O1N101O00000O101N1000000000O1000000O1O10000O0O2N1N3O1O1F:F:L41O0000000000000M3OM400O100M[CkMe:fANU>1mAGZ>0cA35Kmi7" + } + ], + "question": "Which of the following statements accurately describes the arrangement involving , , and ?", + "choices": [ + "A. is beside and in front of .", + "B. is beside and in front of .", + "C. is beside and in front of .", + "D. is parked on and beside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_327.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000148719.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "U5[3e;00O1000000000O100000O100O10O102]DoLS;d3K4L3L4M1O2N1O1O2J`KhE`4X:`KhEa4[:1DeKQF\\4m94dNObC3l;GYCj0?Dm;AdCW1;\\Ol;l0SDWOj;k0UDVOg;o0UDSOi;R1oCTOo;P2N2N1O1O2M200O2O001O0O101O001JbLhD_3W;aLiD_3[;10001O0000000L_LiDa3W;_LiDa3W;_LhDb3X;^LhDc3Z;00001O00001N1O101O001O0000001O00002N2M3N101N2N1N200O1O2M101N200O0O101O00001O000000001O0O11O0O1GdEnK\\:[40O10000000000O0O2O10000001N11O01N1O010O1O101O001O00O10001OO100000001O0O100O01000O2O01O00O101O00001O000000001O0O10000O101N100000001N1O100O2L3M3N3M2O101M2N2O100N2O2M2N2O1N3N1M3O1N2N3N1O1K6M2B[I^Hi6a7WI]Hk6b7:0fJXH\\3g7dLZH\\3e7fLZHZ3f7dLZH_3e7n1O100000000O2N11O00O2O001O00000000000O2O000000001O00O1001O0O101O001O00001N1001OO2O0000001O00O1001O00000O11O0000O105K1O1O5K3M3M100O2M3N10O01N1O2O1O10O0001O00000000000000001O000O1000000000O100000001O000O01000O2O0O1O100O10000O1O1O2O0O1O10000O1O1N2O2M2O1N2L4N2O1N2N1K6N2N2M3K5C=E;O1O20O00000000000000000000000001N100O1O100O1O2O000O100O1N3N1000000O100O2O0O1O2N1000000O10001N1O1O1O101O000O2N102N0O2N3M6K1O3L7J5J5L1N7I;E2O1O3M1N6J5K2O1N10O01O100O100O100O10000O10000000O01001O1O2N3M1O1N101O00000O101N1000000000O1000000O1O10000O0O2N1N3O1O1F:F:L41O0000000000000M3OM400O100M[CkMe:fANU>1mAGZ>0cA35Kmi7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "d45k>0N200O1O2N2O1N1O1O1O010O2O3L4M000O100O010OO2N1NI9O10O1010O0100O1O2M2O2N2FfAK^>OfANg>OjSi8" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0]3c;001O00000000001O001O1O1O00001O2N1O1O000000000000O1O100O100000000001O001O1O00001O001O0000O1O1O1O1K5N204L3N1M4M1O7I4L2N1O1N9H4MgNkMiEP2X:YNbEd1^:aN^E^1b:cNeEU1[:oNdEn0\\:TOeEi0[:[OaEe0_:^O_Ea0a:BbE8^:KcE1]:3`EL`:9\\EFd:<\\EBd:a0\\E\\Od:f0\\EXOd:i0\\EVOd:m0YESOg:n0ZEPOf:S1[EiNe:Y1^EbNb:a1]E]Nc:h1YEWNg:j1YEUNg:n1[ElMf:V2Q12N2N2N1O2N2N4L2N3M4L2N2N1O2N1O00000000000000000000000000000000000000000000000000000004WMYDQ2gA:G8Ha0_O6J8H3LQX2o0ofM;D:I8H`0@7I6J;E1O1O001O000000001O0000O1001O0000001O0000000000000000000000001O0000001O001O2N1O001O00000000001O000000000000000000O1001O0000001O1O5K4L3M4L3M3M1O1O1O0aEbK\\:]4dEdK\\:\\4eEcK[:]44L4]Oc0A?001Oj0VON2O1000000O1O1O1N2O1N2N2N2M3M3001ON200O11O00000000O100O1O1O100000000000000M300N20000N20000O1O100O100O10000000000O1001O01O1O00010TNPDf0R<^NlCLj0d1`?", + "choices": [ + "A. and ", + "B. , , and ", + "C. and ", + "D. , , and " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_328.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000098716.jpg", + "mask_rles": [ + { + "size": [ + 359, + 640 + ], + "counts": "nh59l::G1O1O1O1O1H8M3O100O100001O000YOVF1k9Mj0K`nc6" + }, + { + "size": [ + 359, + 640 + ], + "counts": "]kd0:j:6K2O1O1J]OgEe0V:700O1N200O100O@mENS:0QFMP:2SFLn93RFNm90VFOl9NUF1m9MTF3b:JnTT6" + }, + { + "size": [ + 359, + 640 + ], + "counts": "Qn]6l0Z:Y1hFZN^6_3I1O00000001O001O0SMhHTO3]2W7mMgIk1[6TNfIi1\\6VNeIj1[6VNeIj1[6WNdIj1[6UNfIk1Z6UNfIj1[6VNdIk1\\6VNaIk1`6iNaHQO0\\2`7`0g0N2O1O1O1M3N2O2O0lM_Gb1a8\\N_Hf0a7ZO`Hd0a7[O`He0`7\\O_Hd0a7\\O^He0b7[O^He0b7[O^He0b7\\O]Hd0c7\\O]Hd0c7\\O]Hd0c7]OdGE;n0Q8`1000000000000000000YK" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is sitting on with .", + "D. is over ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_329.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000203317.jpg", + "mask_rles": [ + { + "size": [ + 423, + 500 + ], + "counts": "YUi2b0_<:I7I6K4L4L4L4L3N3L3N2N2M3N2N2N2N2N2N2O0O2N2N101N1O2O0O2O0O101N100O101O0O1000001N1000000000000000O100000000O100000O0N3N2N1N3O0N3O0M4M2M4N1RNXL\\Jl0\\Nj1Y7]MVJb0kNo1P7dMnI=TOo1m6SN_INGm1j6[NWIJ0j1i6`NoHL8b1h6dNnHM:]1g6iNkH^O1A=V2f6nM[Ie0JVO5U2f6oM^I`0\\OTO=82T2h6nM`I=OAIR2j6gMfIb0LEEo1l7WNgHJ]Om1j7ZNlHIZOk1g7^NRIGWOh1e7cNWIEUOe1c7eN\\IGQO_1e7hN^IInN\\1d7iNbIKjNY1f7gNfI0eNU1g7jNfI1eNo0h7oNeI2dNk0j7ROdI2dNe0m7XO`I3gN;n7A]I4V8KkG5T8JnG7P8IQH7n7HTH8l7GUH9j7GXH8\\:1O0000O11O00000XCH`A?o8SO^G?B`0P9ROZGa0D?R9QOUGc0H`0Q9mNUGe0Ha0R9jNSGh0Ib0R9gNPGc0GWO6b1Q9eNnF40IKL5e1P9bNmF3:0G]1P9QO^GA_Ob1R9iNdGC[Of1o8dNiGDXOk1n8_NmGDVOo1k8ZNRHT2m7iMWHW2h7iMXHX2g7gMZHZ2e7fM[H[2d7eM[H]2d7cM[H_2d7aM[H`2e7`MZHb2d7_M[Hc2d7]M\\Hc2d7\\M\\Hf2c7ZM]Hg2b7YM^Hg2b7XM^Hj2a7UM`Hk2`7TMaHm2_7PMcHP3^7nLcHS3h800001N10001O0001O0001O00000000000000000000000000O1000000hLVFR3k9nLUFR3k9nLUFR3k9mLVFR3k9oLTFQ3l9oLTFQ3l94100O100O1O100O100O1O1O1O100\\MkEU2W:iMlET2V:iMmEU2U:gMoEW2a:N4M1O2M3N3L3M4L5`N_DQ1f;lN]DP1Qk;BUD>k;BUD>Z<0EBmC>So;ChC09=^<000001OO1001O00O10000O1N2N2O1O1O1N2O1N2O10O0101O00000000O1001O00000000O011O00000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000001O001O1O1O2N1O1O1O1O1O1O1O2N001O001O1O1O00010N101O1O00001O01OO1000001O00000000001O000000O10000000000O1000000O100O10000O100O1000000O1O1O1O10000N2O100O100O1N2O1O1O1O1O100O100000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001OO100000000001O00000000O100001O001O001O1O1O1O001O1O001O001O1O1O0000001O001O00001O0000001O0000000000000000000000O10000000000O10000O10000O10000O1O100O1O10000O1O1O100O1O1O1000000000000000000001O00O11O00000000000000000000000000000000O11O000000000000O11O0000000000_C" + }, + { + "size": [ + 423, + 500 + ], + "counts": "0m;Z1000000001O00O100000000000000000000000000000000001O000000000000001O00001O00000000000000000000000000001O00001O0000001O0000000000O100O100O1N2N2KgNZD[1d;5O1O1M3O1O11O0000001O00O1001O00O11O00000000001O000000000000000000001O00O11O000000001OO1ZK]NhMc1n6000RK\\NYNd1f1]NZNc1f1\\N[Nd1c1^N]Nb1c1^NVJN_3d1\\2]N\\Nc1e6O00PK_NZNa1S5_N`Ga1S:0dM_N^GOm0b1d7eNkFK:OV1a1h7]NnF240V1a1R:01O_M_NfG1i0a1a7]NgG1i0b1]7^NkG0i0b1Z7aNlGMj0b1Z7bNkGNh0a1]7]NoG1f0a1Q:OPK^N[Nb1d1_NdK1>`1X6^NiIb1W7^NiGb1W7_NQH0f0b1Y7_NPHOTO0`1b1]7]NgIb1o80PK^N[Nb1d1_NfKOB]Ob2o6lMfIA\\Ob2n6kMfHUOP1?\\O`2m6lMkIEWO^2l6kMRJGRO]2k6lMVJEoN_2j6kMZJFlN^2j6hM_JKiNY2h6cMiJ4`NW2g6eMjJ3aNV2e6gMjJ6_NR2h6gMjJ6aNi0_OhN0Q1X7UOlJ7_Nd0FNP7UOmJ9\\N`0^OSO9P1P7TOnJ:_N8G:l6SOPK:Db0]5POTK=_Ob0]5ROTK>\\O`0`5SOTK=[O`0a5ROVK=YOa0V8^OiGb0b5QOYK>TOa0c5ROYK=SOa0[8_OdGa0^8@nEC\\1m0f8HVFYOf0o0U9GUF[Oe0n0X9FTG9m8GRG9n8GRG9n8HoE[Oh0n0\\9EjE_Oi0l0_9ChEBh0k0a9BgEEf0i0d9EbEBk0h0d9L]F4d9L[F4f9LYF4h91RFOP:3lEMU:3jEMW:4gELY:5fEK[:5dEK]:4cEL]:4cEL^:3bEM^:4`EM`:4_ELa:4`EK`:6_EJa:6_EJb:6\\EKd:5\\EKf8XOhHn0bNJf8\\OdHj0fNJe8]OeHi0fNJe8]OeHi0fNJe8A`Hf0kNIf8@_Hg0kNIf8A\\Hg0oNHe8CWHh0TOEf8FmGj0]O@g8GgGl0B]Og8GcGP1FYOh8G^GS1KUOg8JYGT10ROh8o1WGRNi8n1WGRNi8n1WGRNk7WOeHf2@SNj7YOfH\\1ZO@7Ji7\\OdH`2ESNg7^OcH_2FSNg7^OcH^2GTNf7^OcH]2HUNe7^ObH_2HSNf7_O`H_2ISNg7@\\Ha2JPNj7_O[Hd2ImMl7_OZHh2HhMn7ASHn2MbMP8U400O10VLQH\\2o7XM_Hf2a7WMPHUO?d3a7WMbHi2^7SMPHYOc0d3]7oLkHP3U7oLlHQ3T7nLmHR3S7lLoHT3Q7lLoHT3Q7lLnHU3R7jLoHV3Q7jLnHW3R7iLnHW3Q7jLnHW3R7iLmHX3S7hLmHX3S7hLmHX3S7hLlHY3S7hLmHX3T7gLlHY3U7fLkHZ3U7dLmH\\3S7bLnH_3W7VLoHj3W800000001OO1001O00000000000000000000000000000000000000000000000000000000000000000000000000001O001O1eLaFl2_9QMeFn2]9nLeFR3\\9jLfFW3e903M3MM30lLSFn2U:hNhE]OY:`0jE_OW:=lECX:oNgEo032g:^ObEa0b;0O11O00000000000000000000O11O000000O10000000000000000000000" + } + ], + "question": "Which statement accurately describes the position of ?", + "choices": [ + "A. is parked on and attached to .", + "B. is attached to and parked on .", + "C. is parked on and beside .", + "D. is parked on and attached to ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_330.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000542856.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "mWY66R=5L4N1N20O00001O0O2O1O5HdVl1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "W6b5h7001O10O100000000O001O100O100O1O10000M3N2N10100O100000O1000000O100001O00O10000000O1000000O100001O00000000000000O02O000001O00O1001O0O0101OO100000000O1000000000000000000000000000000000000000000000O100000000000000000000000001O000000000000001O000000000000000000000000000000000000000000VNoGZNQ8`30000000000000000000000000000O100001O000000000000000000000000000000O1000000001OO10000000000000000000000000000000000000000O10000000000000000O100000000000000000000O10000000000000000000000000000000000O100N2N2McJ[HZ5c7hJ]HX5c780000OaJ^HW5b7hJ`HW5`7jJ^HW5b79000_J`HW5`7iJ`HW5`7jJ_HV5a7jJ^HW5b79O10000000000000000002N1_J\\HY5d77O10[J]Hb5c7]J_Hb5a7^J_H2N\\5b7dJ^Ha5b74004L1O01O00O100000O11O0`JYH\\5h7cJXH]5h7cJXH]5g740`JYH\\5h7bJYH^5g7bJYH^5f7dJYH\\5g7dJYH\\5g7dJYH\\5g7dJYH\\5g7dJYH\\5g740000000`JYH\\5g7400000000000000000`JYH\\5g7400000000000000000000000000000000000000O10000001OO100001O00O100H]JhHc5X7]JiHb5\\3^J60^Lb5U3`JgLNf30^Lb5R3dJiLJo3c5ZO_Jm0`5_3000G^JhHc5X3^JcL1g3O]Lb5Y7^JgHb5Y7]JhHc5a7N10O110O1O1N1000O1000000001O1O1O1GXJmHi5Z71O2N1O1O001O1N1N3N23M00O1O00001O00000O20O1O0000000000000000000000000000000O10000000000O1000O01O10000O1O100O1O1O1O100O1N2O1N2O1N2O1O10000O100000000000000000000O10O2O00010N101O0001OO10000001N0101O00O100000000000000000000000000000000000O11O0000O1O100000000001O0000O100000000000000000000O100000000001O00O10O1000000001O000000001O000000O100000000O101O00O03N1O0000001O8dJoG7:o3P8jKWHS4j7mK\\Hm3d7RLaHj3c8M:E7J4jLlEm2Z:O5K9F7J2N1oMmDg1^;N8aNaDFIi0g;]ObDJH0Od0k;]O_D4F>m;]O_Di0T and the other objects?", + "choices": [ + "A. is driving on and contains .", + "B. contains and is being driven by .", + "C. is parked on away from .", + "D. is an advertisement seen on and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_331.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000333745.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "RTR1=ac03K5K5N1M4M3L3M3dKVO`Em0Z:BZE`0b:NPE6n:6gDKX;?^DBa;e0YD[Of;h0UD[Oj;h0oC]OP interacting with and ?", + "choices": [ + "A. is holding and standing next to .", + "B. is holding and carrying .", + "C. is on top of , and is walking towards them.", + "D. is carrying and wearing ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_332.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000189698.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "2Z4R90O2O000000O1001OO1O1O1001OO100O100000000O11O00O1O1O100O10000O11O0000O1001O0000O1000000000000001O001O0VGXLEF1NQ8U4VHeLg7[3YHfLf7d4LNVHdJh7]5WHdJh7a5O2N1O001N102N00100N010001M2001N10O2N1O1001O00001O0O2O10O01O2N1O1O2N5K2N1nHoIg6^6MO1001O2\\IZI^6j60N200002N00001O001O1O1O3M1O1O0000O11O1OM300000000O1N2O10000000000O1O100000O10010O1O1fIPIV6R71001O1O1N2O1O1O000000O11O1O001O1O1O1O001O001O1O1OdKYJT1f5iLmLmN_Mf3c5]MUMb2j2^MWMd2f2\\MZMe2e2[M[Mf2XMYLT5Q1dMf2WM[LT5o0eMh2TMZLX5m0dMk2UMULW5P1dMk2VMTL0OQ5S1iMU3U2kLkMW3UMhKi4Q1RNW3S2iLmMk2TMXLS5m0iMk2UMSLV5R1eMQ3Y2oLgMQ3Y2oLgMn2\\2RMdMk2_2UMaMj2`2VM`Mf2d2ZM\\Mc2g2]MYMa2i2_MWM`2j2`MVM_2k2aMUM_2k2aMUM_2k2aMUM_2k2aMUM_2k2aMUM`2j2`MVM`2j2`MVM`2j2`MVMa2i2_MWMe2e2[M[Mg2c2YM]Mh2b2XM^Mi2a2WM_Mj2`2VM`Mj2`2VM`Mj2`2VM`Mk2_2UMaMk2_2UMaMl2^2TMbMm2]2SMcMm2]2SMcMn2\\2RMdMn2\\2RMdMo2[2QMeMP3Z2PMfMQ3Y2oLgMR3X2oLgMT3V2kLkMW3S2iLmMX3R2hLnMY3Q2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMY3Q2gLoMY3Q2gLoMY3Q2gLoMY3Q2gLoMZ3P2fLPN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN\\3n1dLRN\\3n1dLRN\\3n1dLRN\\3n1dLRN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3n1bLRN^3n1bLRN^3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1dLRN\\3n1dLRN\\3n1dLRN\\3n1dLRN]3n1bLRN^3m1cLSN^3l1bLTN`3j1`LVNa3i1_LWNa3i1_LWNb3h1^LXNb3h1^LXNa3i1_LWNa3i1_LWNa3i1_LWNa3i1_LWNa3i1_LWNb3WMYK_4T1[Nb3UM\\K^4S1\\Nb3h1^LXNb3h1^LXNc3g1]LYNe3e1[L[Ne3e1[L[Nf3d1ZL\\Nf3d1ZL\\Ng3c1YL]Nh3b1XL^Nh3b1XL^Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nk3_1ULbNj3^1VLaNk3_1ULbMkNROP5\\3ULbMkNROQ5[3TLbNl3^1TLbNm3]1SLcNm3]1SLcNm3]1SLcNm3]1SLcNl3^1TLbNl3^1TLbNl3_1SLaNm3^1TLbNl3^1TLbNl3^1TLcNk3]1ULbNl3^1TLbNk3_1ULaNk3_1ULbNj3^1VLbNi3_1WLaNh3`1XL_Nh3b1XL^Ng3c1YL^Ne3c1\\L\\Nc3e1\\L[Ne3e1[L[Ne3e1[L[Nd3f1]LZNa3g1_LYNa3g1_LYNa3g1_LXNb3h1^LXNb3h1^LYNa3g1_LYNa3g1_LXNb3h1^LXNb3h1^LXNb3h1^LXNc3g1]LYNc3g1\\LZNd3f1\\LZNe3e1\\LZNd3f1\\LZNe3e1[L[Ne3e1[L[Ne3e1[L[Nf3d1ZL\\NQ1WM<]4cN]NoNHn1j1SO^NlNLQ2e1SO_NkNMR2d1ROZ1o0eNRO[1m0eNSO[1n0dNRO\\1n0dNRO\\1o0cNQO]1o0cNPO^1P1bNPO`1n0`NQOn7M3O10000O1001O00O10000000000001O002N3mFlNWM6D:2En0d0ROXO?3P4Q1mLkNK;mNU13Z1d3[NbMkNIaNU12]1f3VNoNXOYMV10^1g3TNPOYOWMV11^1h3SNPOY4o0gKQOZ4n0fKROZ4n0fKROZ4n0fKRO[4m0eKRO\\4n0dKRO\\4n0dKRO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO]4m0cKSO]4m0dKRO\\4n0dKRO\\4n0dKPNjNYOb5g2dKPNjNYOb5g2dKPNjNYOc5f2cKQNjNYOc5f2cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKQMiNB0^1d5o1cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO_4k0aKUO_4k0aKUO_4k0aKUO`4j0`KVO`4j0`KVO`4j0`KVO`4j0_KWOa4i0`KVO_4k0aKUO^4l0bKTO^4l0bKTO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO\\4n0dKRO\\4n0cKQMjN?0Bc5n2cKSO\\4n0eKQOZ4P1fKPOY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1fKPOY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNZ4P1fKPO\\4n0dKRO]4m0cKSO^4l0bKTOa4i0`KVOk4?UKAn4TKBk4?UKAj4`0VK@i4a0WK_Ob2dMoMn2_O^O]2jMSNh2@^O\\2lMSNf2A^O[2mMTNe2A^OX2SNSN`2E]OS2YNWNZ2E^OR2[NXNW2F^OQ2^NVNU2I]OQ2_NUNT2J]Oo1bNVNQ2K]Om1gNSNn10[Ol1jNQNl13ZOk1lNPNk15YOj1mNQNj15YOi1oNQNh16YOh1ROoMf19XOf1TOPNe1:WOe1UOQNd1:WOd1VORNc1:WOb1XOSNb1;VO_1\\OTN_1=UO^1]OUN^1=UO^1]OTN_1>TO]1^OUN^1>TO[1@WN\\1>TOZ1AWN\\1?SOY1BWN\\1`0ROY1BWN\\1`0ROX1CWN\\1a0QOV1EXN[1b0POU1FXN[1c0oNU1GWNZ1d0oNU1GVN[1e0nNU1GVN[1e0nNT1HVN[1f0mNS1IUN\\1h0kNR1JUN\\1i0jNQ1KVN[1i0oMkNjNV2l1VN[1i0jNQ1MTNY1k0jNP1OSNX1m0iNP10RNW1n0iNo01SNV1n0iNo01SNV1n0iNo01RNW1o0hNn02RNW1P1gNm03RNW1Q1fNm03QNX1R1eNm03QNX1R1eNl05PNW1T1dNk06oMX1V1bNk0_4UOaKk0_4UOaKk0_4UOaKj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Ki0a4WO_Ki0a4WO_Ki0a4WO_Kh0b4XO^Kh0b4XO^Kh0b4XO^Kh0b4XO^Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kf0d4YO]Kg0c4ZO\\Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YOZKj0f4VOXKm0g4TOTKP1l4h30001O000000001O1O0000000000000000001O00000000001O00001O0000001O000000001O00000000000000000000001O00000000" + }, + { + "size": [ + 428, + 640 + ], + "counts": "Uj[12m in relation to ?", + "choices": [ + "A. It is driving on .", + "B. It is located under .", + "C. It is parked beside .", + "D. It is positioned behind ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_333.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000362434.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "PjU3`1Q:60000000000000000000000000000000001O000000000000000000O100O1O10000O100001O00001O1O00001O1O001O001O00001O001O1O1O001O2N1O1O2N2N1O3M3M2N3M2aG]Mf7f2hGoMR8m2N1O3M2N2N1O1O3M1O1O1O0000001O0000001O00001O001O0000000000001O00000000000000000000000000001O000000O10000000000001O00000000000000000000000000000000000000001O000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000" + }, + { + "size": [ + 375, + 500 + ], + "counts": "U6a5V6000000000000000O1000000000000000PMoI:Q6WMPJV21b0o5WMRJU22b0l5XMVJR20e0W6ZOkId0V6ZOlIe0V6^M]If1>k0i6^MTIi05g1[7`MYH;>T2d7hM_HV2d7gM]HX2b8O2N1O2N4L2N1O1O2N001O3M001O3M1O3M4L2N5K2N001O00001O00001O00001O00001O1O00000000001O00001O001O003M001O2N1O00001O2N001O001O1O001O1O1O1O1O1O001O000000O1000000000000000000O100000000001OO100000000000000000000000000O10000000000000000000000000000O100000000000000000000000000000000000000O1000000000000000000000000000000O1000O11O00000000000O10000000000000000O10000O1000000000000000000000000000000000000000000O100001O00O1000000000O1O11O1O0O11O0000000000000000000000000000000000001O00000000000000001O00001O0000001O001O00000000000000000000000000000000001O000000000000000000001O0000000000DGZE9e:H\\E7c:J]E6c:J]E6c:J]E6c:K\\E5c:L]E4c:L]E4c:L^E3a:N_E2a:N_E2a:N_E2`:0ZEHO8g:0ZEH07f:1ZEH07f:1ZEH07f:1ZEH16e:2ZEH16e:2ZEH16e:2ZEH16e:2aEN^:3bEM^:3bEM^:3cEL\\:6cEJ\\:7dEI\\:6eEJ[:7dEI\\:7dEI\\:7dEI[:8eEHZ:9fEGi92RF96De9n0[FROb9Q1^FoN`9S1`FmN^9U1bFkN[9X1eFhNY9Z1gFfNW9\\1iFdNU9^1kFbNR9a1nF_NQ9b1oF^NP9c1PG]No8d1QG\\NW82jGe1OYNT85kGd11WNR87kGd13UNn7;nGa14TNj7?QH^15SNh7a0RH\\17SNh7`0PH_17QNi7?PHa1OlM24P8>oGb17PNj7>nGc18oMk7, , and ?", + "choices": [ + "A. is on and attached to .", + "B. is on and beside .", + "C. is inside and on .", + "D. is beside and on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_334.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000052565.jpg", + "mask_rles": [ + { + "size": [ + 458, + 640 + ], + "counts": "_W\\23U>4N1O0O2O1N2O0O2O1O1N100010O000000100N100O100000O1O101O1N4L7H7J5J;F8]CVNWUO_B0_f\\2" + }, + { + "size": [ + 458, + 640 + ], + "counts": "e;d2g;O00001O00001O000000001O00000000000000000000001O00001O0000000000001O001O00O10000001O000000000000000000000000000000000000001O00000000001O000000000000001OO100000000000000001O000000O100001O00000000000nMQDi1P positioned relative to and ?", + "choices": [ + "A. is standing on , with behind it.", + "B. is standing between and .", + "C. is standing on and is positioned in front of .", + "D. is partially submerged in , next to ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_335.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000304396.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "kTW4;bc07I6K5L3M2N3M2O2M2N2O1N2O1O1O2N001N2O1O010O1O001O0010O01O010O00010O01O001O001O10O01O1O1N2O1O1O1N2O1O2M2N2N3M3M2N4K6I:Bdnl2" + }, + { + "size": [ + 640, + 425 + ], + "counts": "\\no1c0Vc0;H7H7K5K4L4M2N2N3M2N1O2N2O001N100000000O11N100O2O0O2N1O2O1N2N2M4M2M4K5K6I7I8K;\\OQS`5" + }, + { + "size": [ + 640, + 425 + ], + "counts": "R>V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LR and , and the tower, ?", + "choices": [ + "A. Both and are on , and is beside .", + "B. is on , and is beside .", + "C. Only is on , with located beside it.", + "D. is on , which is on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_336.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000304396.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "\\no1c0Vc0;H7H7K5K4L4M2N2N3M2N1O2N2O001N100000000O11N100O2O0O2N1O2O1N2N2M4M2M4K5K6I7I8K;\\OQS`5" + }, + { + "size": [ + 640, + 425 + ], + "counts": "R>V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LRZ]OCeb0=Z]ODfb0;Z]OGeb09[]OGeb09[]OGeb08\\]OHdb08[]OHfb08Z]OHfb07[]OIeb07b0N2N21O001O0e\\OLjb04R]O2lb0OQ]O4nb0`0O1O00001O00001O002N1O3M3M1O1O002N4L1O1O001O00001O001O00001O00001O1O00001O00001O00001O00001O001O00001O00001O001O00001O00001O001O001O00001O00001O00001O00001O001O00001O00001O1O1O0000O100O100O10000001O000000000000001O2N001g^OhMh`0m2K1O001O001O000Z@gLe>Z3XAUM[>k2dAhMj=X2UBTN`=m1^BbNT=^1kBkNm?", + "choices": [ + "A. , , and .", + "B. Only and .", + "C. Only and .", + "D. and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_337.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000184324.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "Ula52U=3N2N4L4M2M10001O00000000N2O1N2J6O1000000012M4L010000O11OO1O1O1N`QZ2" + }, + { + "size": [ + 425, + 640 + ], + "counts": "fka44T=1iNMXE3h:0UE1k:MWE2j:LWE4o;00N21oBNi<3TC2iV4`5eLZJNi0\\3Q5XLoICZ2U4f601O1ON2O2OO0100N200N2M3K5M3M3O100O10000O1O1O1K5O1N21O000000O10000001O1O0010O01O1O1O001O000hKQHV3o7jLRHU3o7jLQHV3o7iLSHV3o7bLWH^3j7^LYHb3i7XL[Hh3g7TL[Hl3d8M1O1O000000O100001OO1ROfLhG_3S8n0O1O1N2N2N2O100O1O1O1O100001O1O1O1O1O2N1O000O10O100N2O1O100O1N2O1O100001O1O2N1iKZHTOCa3U8YM\\Hg2f7VM\\Hi2f7UMZHk2o7aLfGM7_90UFEc0NkNOb:=Y2O0O0O2N100N2O2L\\Ya6" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i:S2V;1O2N2O0O2N1O0100O5gMQEo1Z;UNcD^1`;_N`Da1e;O2fNiD>[ and ?", + "choices": [ + "A. is carrying .", + "B. is on .", + "C. is attached to .", + "D. is inside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_338.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000184324.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "fka44T=1iNMXE3h:0UE1k:MWE2j:LWE4o;00N21oBNi<3TC2iV4`5eLZJNi0\\3Q5XLoICZ2U4f601O1ON2O2OO0100N200N2M3K5M3M3O100O10000O1O1O1K5O1N21O000000O10000001O1O0010O01O1O1O001O000hKQHV3o7jLRHU3o7jLQHV3o7iLSHV3o7bLWH^3j7^LYHb3i7XL[Hh3g7TL[Hl3d8M1O1O000000O100001OO1ROfLhG_3S8n0O1O1N2N2N2O100O1O1O1O100001O1O1O1O1O2N1O000O10O100N2O1O100O1N2O1O100001O1O2N1iKZHTOCa3U8YM\\Hg2f7VM\\Hi2f7UMZHk2o7aLfGM[ and positioned relative to ?", + "choices": [ + "A. Both and are over .", + "B. is attached to and is over .", + "C. is on and is inside .", + "D. is over and is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_339.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000169996.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Qjl5j0S>7J4L3N2M3N2N2O0O2O0O10000001O00000O2O001N1O2N3M1N5K2MLTOWBm0k=03L5NL601O2N2N1O00TOaB7`=BfB=P>N2M2K401O2O0O18H5\\NB\\Db0_;_OaDb0^;^ObDd0[;^OcDe0[;[OeDf0Z;^OcCJM34M20010000O100001O1N3N8H9G9G10O5K1O0O101N101O0O101O1O00001N103M1O001O3K:A\\Z2n0mdM4K2O000O100000001O00000000000000000000000000001O000000000000001O000000001O001O1O001O1O0010O01O2N1O1O001O001O001O1O001O1O001O1O001O1O001O001O1O001O1O001O1O2N1O1O1O1O1O001O100O010O01O1O1O001O1O1O1O1O1O1O2N1O3M1O1O1O1O2O0O1N3M2M3M4J6J6M3L3K5N1N2N4K4L`fX3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "UQ[83j>5J5M3N2O0O2O001N3N1O1N10001O0O1000O10O1000O1N1010O100O1O1O100N200O1O1O100O100O1O100O101N2O1O1O01Nia9" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y=:9h0R=XOnBh0Q=ZOmBg0R=_OhBc0V=d0O1O100O1000000000000000000000000O10000000000O10O2O010O001O001O1O1O1N101O000000000000001OO1000000000001O0000000000000000000000000000000000000O100O1O1O100O1N2O1O1O1O10000001O000000000000000000O1000000O1000000000000000000000000000000000000O11O000000O1000000000000001O00001O1O00001O00000000001O0O10010O1O4K2O102M3M3L2O1O010O1N2O1O01OO2O000001OO2O00000000O100000000O100O100O1O1O1O100N2N2N200O100O10000O100O100O100O1O1O10000O1O1O100001O6J1O1O001O1O001O001O2N1N110O00001O0000000000000000000000O10000O100O1N2O100O100O100O020O01O00000000000O100010N1000000000000000000O1O1O100O1O1M3M3N2O10000001O001O1O1O00001O0000000000000000000000O1O1O1O1O1O1O1O100000000O10000000000000000000000000O11O000000O11O0000O1001O000O10O11O0000O1001O0000O1001O0000O1001O01O2N2M200O1O1O0O11O01O000O11O00000O10000000000O100O1O1001O002N1O1O001O1O1O00000000000000000000O100O1O1O100O1N2N2M3M3N200O1O100dNhMPFX2P:hMPFX2\\;00000O100M3N2O11O3M4L3MdMmMmGQ2S8PNmGo1U8PNlGn1d:O1O1O1O1O0000hMXNYGg1g8XNZGh1f8XNZGh1n:100gMVN\\Gj1d8WN[Gi1d8[NYGe1g8\\NYEKi1i1o8cNnF^1R9bNmF_1S9aNlF`1T9`NkFa1U9_NjFb1U9_NhFd1X9\\NfFf1Y9[NeFg1Y9[NeFg1[9YNUFCTO^2f:PNSFY2j9jMVFV2j9jMVFV2j9jMVFV2j9jMVFV2j9jMUFW2k9iMUFW2k9iMUFW2g9TMbEf0g0V2o9eMQF[2^:UMcEk2Y;00000000000O10000000000000000iNVMXFj2h9VMXFj2i9TMXFl2f9XM\\EMb0l2j9iMUFW2h9lMXFT2h9lMXFT2h9lMXFT2h9lMXFT2h9lMXFT2h9lMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMVFV2j9jMVFV2j9jMVFV2j9jMVFV2o9QM`E5NK00d0o2Y;00000000000000000O1aN_MfEC`0n2j9hMVFX2k9gMUFY2k9gMUFY2j9hMVFX2j9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMTFX2l9hMTFX2l9hMUFW2k9iMUFW2k9iMTFX2l9hMTFX2l9hMTFX2k9iMUFW2k9jMTFV2l9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2m9gMSFY2n9fMQF[2P:mL`Ed0`0_2P:nL_Ec0a0_2P:dMPF\\2S:aMmE_2S:aMmE_2S:aMmE_2T:`MlE`2T:`MkEa2V:^MjEb2V:^M\\E^O:T3Z:^M\\E^O:T3Z:^M\\E^O:T3Z:^M\\E^O9U3[:^M[E]O:U3[:^MZE^O;T3[:^MZE_O:S3\\:^MZE_O:S3\\:^MZE_O:S3\\:cMbE^2^:fM^EZ2b:fM]E[2c:fM\\EZ2c:hM\\EX2d:R10000O10000O1O10RNfEVO[:d0nEXOR:g0oEYOQ:g0oEYOQ:f0PFZOo9g0QFYOo9f0RFZOo9e0QF[OP:_O\\E3f0=o9UOiEEB7i0o0l9TO^F@J\\1h9TO`Gl0`8SOaGm0`8oNcGQ1]8mNeGS1]8bNlG^1k:0000000000000000000000000000000000000000000bB" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hUn54h>7H6L4M3M3L4M4K4M4K=D2M3O2M2O2N2N1O1O1OI\\CQNf>1O1O1O1O1O1O2N2N1N`a\\2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "R:S3m;O001O2N1O1O1M3N2O100000O2O00001O000000O100001OO10000O1O11O011N3M1O2N1O1O1O1O1O001O00001O00000000000000000000O100001O00000000000000O10000000000000000O1O1O100O1O1N2M3N2M3H8L40001O0O100001O001O00001O00000000000000O100000000000000000000O2OO2O00000000001O1O00001O001O0000001O3M100O1O1O001O1O001O000000000000O1M3J6I7O1N2O1N2GUC[Nm2O1N2O1O1O1N2O002N001O1O1O1O1O1O001O000gBDn;?", + "choices": [ + "A. , , and ", + "B. , , and ", + "C. , , , and ", + "D. , , and " + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_340.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000245026.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "Y[Z2>d<7J7J5J5N3J5K6L4J6J6I7J6fEbMe9c2WF`Mf9d2VF_Mf9S3N3N2M3N3M5J4L3NO11OO1000010OO2ON3N101O001O10O0100O000O2000O100000000O10000O10000O1000001N010000000000OWNVLUJk3_5bL`J^3\\5lL^JT3_5RM^Jn2_5WM_Ji2^5]M_Jc2_5cM]J]2b5hMYJY2e5kMYJT2f5PNXJP2f5TNXJl1g5WNVJj1g5[NWJf1e5_NYJa1d5eNXJ\\1g5gNVJZ1i5iNUJW1j5T300O1O100000O100000O10000O100O10O01000000O1000001O0O2O0000000000000001N1000000010OO101O1O00001O00001O00100O1O1aKPJd1Q6WNTJh1m5TNWJk1j5RNXJn1j5oMXJP2i5mMYJS2i5jMYJT2k5hMVJX2l5dMVJ\\2l5aMVJ^2l5`MTJ`2m5^MTJb2m5\\MTJd2m5ZMUJe2l5XMVJh2j5WMWJh2k5VMWJi2j5oKYIN;a0e0b3h5mKjI>?e3`6WLaIi3a6RLcIm3l7001O01O0000001O001O00000O100000001O0001O01O1O001N101O00001O0001O0001O01O01O1O001O001O1O001N3N001O1O3N0O2N1O2M4M2N4M0O3M5K2N4M0O3N3O0N2O1N1N4M2O0O2N2N1N3M2N3M3M5ROWD3n;F[D3i;G]D4T\\j2" + }, + { + "size": [ + 424, + 640 + ], + "counts": "bY]54bHHc9^O[Fl09MIJ]9h0hF_OKI]9i0gF^OLJ[9j0hF\\OLK\\9j0fF\\ONJ\\9l0bF\\O2H\\9o1eFQN[9o1dFRN\\9n1dFRN\\9n1dFRN\\9m1dFSN^9l1bFUN]9k1cFUN]9k1cFUN]9k1cFUN]9j1dFWN[9i1eFWN[9?_FO6B[9>eFK0G[9=jFHJK]9=kFFHM]9[EE0K^:n0]EWO6Ik9e1lEcN9Gi9[2XFbMj9^2VFaMk9_2?0000001O00000000000000000000000000hN^EJb:5bEjNIj0e:;XFTOh9h0]FWOd97mFIY;1O0bCHl;7TDKS;7iDO3KS;9gDM6Jf:3nD95J7J^:o0[EWO7JQ:^1eEiN9Je9\\2ZFdMf9\\2YFfMf9\\2WFTN[9l1cFUN\\9m1cFSN]9m1cFSN]9m1cFSN^9m1aFSN_9m1bFRN^9n1bFSN]9m1cFSN]9m1dFRN[9P2dFPN\\9P2dFPN]9o1cFQN]9o1cFQN]9P2bFPN]9Q2dFnM\\9R2dFnMf6\\OYI1U2e2lMnMf6\\OYI1U2e2mMnMe6[OZI0V2f2lMmM[9S2fFlMa9V1[FgN`0A_O0Ta1" + } + ], + "question": "What are the relationships of and with respect to ?", + "choices": [ + "A. is in front of , and is attached to .", + "B. Both and are in front of .", + "C. Both and are attached to .", + "D. is in front of , and is attached to ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_341.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000115885.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "RWo35_;7K2N2L5M4L3L3N2N3M2M3M4L3M3K5K5L4L4L3J8I6M2N3M4M2N2N1O2N2N1O1O2N1O1O1O2N101O0O1O1O10000jM[LYLf3l5OnM^LPLc3P4aLkK`3U4bLhK^3Z4bLeK^3[4dLbK\\3`4fL\\K[3e4dLZK]3g4cLWK^3i4gLPK[3P5mLfJU3[5kLdJU3\\5mLaJT3`5PMZJQ3f5PMXJQ3i5nLWJR3j5nLSJT3m5[1001O001O1O001O01O01O001O001O10O000001O1O010O1O00010O001O010O10O100O01O10O01O010O0010O01O000O2O00001N2N1O2N101L3O1O2N2O1N1O2L4M3N1O2O01O01N2N3K300O2O001O0O1N3NaIcL_NOo5]3dKcL]N0P6[3eKcL\\N2o5[3fKbL\\N4l5Y3iKcLZN8j5U3mKbLYN9j5T3kKeL\\N8i5Q3kKiL[N6k5o2lKiL[N7i5o2nKhLZN1N1j5V3VLhLV2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "]bP33b;3O1N1N3N2N1N3N1N3N1N2N3L3N3M2M4L3M4M2N2N3M2N3L3N3L3N3N1N3N1O2L3M4M2N3M2M4N1N2O2N1O2N2N1O101N101O0O2O0O2O00001O001O0010O010O001000O0100O1O100O100O10O001O01O01O0010O01O010O010O0010O00010O01O0010O01O0100O010O01O1O1dH`L`6b3[IaLd6Y40010O100O1O10000O0100O02O0O010O01O00101N001O1O00001O00001O00001O001N3N1O1O1N2O1O2M2O2M2N2N2N3M2O2M2O2N2N1N2N2M3M4M1O3N1N2O2M2N3N1N2O1N3M2N2N3N0N4M2N2N2N3L3N3L4L4M3M2N3N2L4M3L4K6Ifo=" + }, + { + "size": [ + 375, + 500 + ], + "counts": "]1Y:^100000O100000000000000000000O100000000000000000000000000000000000000O1001O00O100000000000000000000000000000000O100000000000000000000000000000O11O0O1000000000O11O00O1001O00O1001O00O1001N10O11O0000O11O0000O11O0000O11O0000O11O0O10000000O1001O00O0101O00O100000000001O00O1001O00O11O0O10O1001O00O11O0000O1001OO1001O00O1001O00O1001O0000O11O000000000O10000000000000000000000O02O00000O1000O2O1O01O0O1000010O001O2N7H8I7J:Ea8h1^GkM0>c8`20\\O]GTNd8f1^GhM0001N8f8o1_GhMM0O:d8P2aGTN`8k1`GUN`8i1[GiM51J9f8P2_GTNb8j1]GXNc8h1_GVNb8g1aGPNG1i8o1_GXNa8i1^GWNb8b1XGTN7:b8a1WGTN7;c8a1VGVN5:d8`1VGWN5:f8^1VGUN7;e8_1UGTN7=e8^1TGVN6 and ?", + "choices": [ + "A. is on .", + "B. is looking at .", + "C. is beside .", + "D. is looking at ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_342.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000289659.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i_e2?e<>E8J5K4J6K5L3M3M2N3L5K4M3M4K6Ka0_O5L2M3M7I;EY1gN7I100O1OUNZISMg6g2eIQM\\6P3jIeL[6\\3ZHYLZ9R4J4K9H5K7I6J6J3M00iNYHjLh7Q3Z1J7M3L3M4N2K5M2N201O001000O1O1O1O2N1O2N3M7I:G4K7I6I7K7cGSK71X7e5L3N2NO2O01O0O20O00001POUI`Kk6\\4`IXKUO2\\7a4X1I6N3N2O010O11O1N4YGcKKo0]7i4100O1O100O100O100O1O0C_HPKd7j4a0L4N2N2M3M2O2M3N2J6J5RNlFBZ9=iF\\O\\9b0gFYO]9f0fFSO^9m0gFiN_9V1gF\\Nb9c1R10100O100O010O1O10O0100O010O010O100O010O00100O10O01O10O01O0010O01O10O01O010O01O01O10O01O1O00100OO20O010O10O01O10O01O01O0100O0010000O10O010O0100O010O10O01O01O1O10O1000O001000O010O0010000O001O1O10O1O01O1O10O100000O001O100O010000O10000000100O1O00O20O1O1O1O002M2N2N1O1E;O2N1O101O1O100O000100O1O101O2M2O002M2N3M2O0O1O1O0101N0010O1O2O1N2O1O1N2O1O0O2O1O010O000O100000000O1O2N1O1O1O2N2M2O0O3M2O1O001O2N2M4M2M5Kl]^1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "kRZ37R=3N2[CKQ<6mC0m;2PD2n;f0N2M201O00O11O0000001O00001O001O1O000000O1000000001O001O1O1O1O2N1O1O4L5K2N2N;E001O1O3M2N4L2N2N6J;E2NgNYFoNg9l0jFmM@h0f9Z1RGbNn8]1VG`Nj8^1^G[Nc8d1b1N2N2O1N200O1O100O1O1O100O100O100O100O100O100000000O1000000O101L3O1O2N1N201N2N6I3NX8o2kGcLM=Y8P3jGdLL?", + "choices": [ + "A. is standing on and eating it.", + "B. is in front of and standing on .", + "C. is standing on and eating .", + "D. is standing on and eating ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_343.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000210032.jpg", + "mask_rles": [ + { + "size": [ + 401, + 640 + ], + "counts": "Yja12\\<3L4M3M3J6M3L5L3N2L4N2O1O1N2N20O01O2OO0100000000O1000000000000jNaNYG`1c8gNZGY1e8kNhFJL\\1[9_OdFa0\\9_OcFb0]9_ObFa0^9_ObFa0^9@`Fa0`9X10000000000000000000O10000000000000000000000000000000000000000O1001O00000VHWMl5i2UJVMk5j2n102N1aM_Fl1V:UNdE[1^:aNdE_1g:0001O00001O000O1001O00O2O00000O2O0O1N3K5N11O002O0O11O0O101M2N2M4M2N2O2M2O1O2N2N2N1O1O1O1O1O1O1001N3N000O01O001O1O00001O1O001O1O1O1O1O2MRY]4" + }, + { + "size": [ + 401, + 640 + ], + "counts": "jZY17P<=K5K4L3M3N2M200N3N2N2N100O2N100O1M4M2O1O10000N200O1O101N1O1N3L3O1N2O2N1O100O2O0O100O10001O0O10000000000O1000001OO1001N100O101N1O1O10000O1000000000000O1000001O01O0000000000000001O001O001O001O000O11O00000O101O0000000001O0000000001O00000O1000000000001O000000001O00000000001O0000001O00000O10000000000000000O100000001N1000000001O1O0O1000000O0100O1000000O10000000000000000000000000000000001N10000000001N1000001O001O00001O00001O00001O001O00000000001O0O101N10001O001O1N101O0O2N2O0O2N2N2O0O2N2N2N2N:F3M2N3M3J6L4K5L4K5LbQ]3" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is eating .", + "B. is beside .", + "C. is looking at .", + "D. is holding ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_344.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000144114.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "iU_31Y<7M3N1O1O010O1O1O1IDYD=e;800O1N2O100O1000000001N2O2N3M3]OWD9QgDC1003\\;h04M3K4H9N2O01000O010O1000O010O010O10O1000000O001O010000O010O1000O0010000O0100000O01O0100O01000O10O0100O0100000O0100O10O1O0100O01000O010000O010O0100O01O1000O10O0100N1100O1000O1KHQD8n;50100O10O1000O10O10O010O10000O1000000O010000000000000002M4M1O0O3K_i8" + } + ], + "question": "Based on the provided information, what is the relationship between and ?", + "choices": [ + "A. is in .", + "B. is over .", + "C. is driving .", + "D. is under ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_345.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000144114.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "iU_31Y<7M3N1O1O010O1O1O1IDYD=e;800O1N2O100O1000000001N2O2N3M3]OWD9QgDC1003\\;h04M3K4H9N2O01000O010O1000O010O010O10O1000000O001O010000O010O1000O0010000O0100000O01O0100O01000O10O0100O0100000O0100O10O1O0100O01000O010000O010O0100O01O1000O10O0100N1100O1000O1KHQD8n;50100O10O1000O10O10O010O10000O1000000O010000000000000002M4M1O0O3K_i8" + }, + { + "size": [ + 400, + 600 + ], + "counts": "2\\<400kMOkG1U80jG0W8OiG1[:0mMMiG3W8NhG2W80hG0X80hG0X81gGOY81gGOY81gGOY81gGOY81gGOZ80fG0Y81gGOY81gGOY81gGOZ80fG0Y81gGOY81gG0W81iGNW82jGNR86nGJP89oGGP8:PHFP8;oGEP8PHCn7>RHAo7?QHAn7a0QH_Oo7a0QH_On7b0RH^On7b0RH^On7b0RH^On7a0SH_Ol7b0TH_Ok7a0UH_Ok7a0UH^Ol7b0TH^Ol7b0TH^Ok7c0UH^Oj7b0VH^Oj7b0VH]Ok7c0UH]Ok7c0UH^Oj7b0VH^Oj7b0VH^Oj7a0WH^Ok7`0VH@j7?WHBj7:XHFk74XHKh76XHJh76XHKg75YHKg75YHKg74ZHKf76ZHJf76ZHKe75[HKX6LnH9j0KW6NnH7k0JW60nH5l0LU6OnH6m0KT60oH5m0KS61PI4m0KS62oH3n0JT63mH4o0JS61oH5n0JS61oH5n0JS61oH4o0JR63oH3o0JR63oH3o0KQ62PI3o0KP63QI2o0KP63QI2o0JQ63QI2o0KP63PI3P1Ko52QI3P1Ko52QI3P1Ko52QI3P1JP63PI3P1Ko51RI4o0Kn52RI4P1Jn52RI3Q1Ke32RL0XO3Q1Je38nKJ]O4P1Kc3>jKCC4P1Kc3b0fK_OF5Q1Jb3g0bK[OK4Q1Jb3Q1XKQO53R1K`3U1VKmN74S1J`3Y1QKjN<3S1J_3`1lJbNc04R1J^3g1gJ[Ni05Q1H`3Q2\\JSNS14Q1I^3_2aKhMQ1I^3_2aKhMQ1I^3_2aKhMQ1I^3`2`KgMR1I_3b2[KfMV1H`3b2YKeMX1I`3c2VKdMZ1Ib3c2RKdM\\1Ic3c2PKcM^1If3a2kJgM_1If3`2jJgM`1Ig3_2iJgMa1Jh3]2gJiMa1Ij3^2dJiMb1Jj3\\2dJjMb1Jk3\\2aJkMd1Im3Z2_JmMd1Io3Y2\\JnMe1HS4W2XJQNe1IS4U2WJSNf1HT4U2UJSNg1HU4U2SJSNh1GX4U2oISNj1HY4S2mIUNj1IY4Q2lIWNk1HZ4P2kIXNk1H\\4n1iIYNl1H_4l1eI\\Nl1H`4k1dI]Nl1I`4i1dI^Nl1Ib4g1bI`Nl1Hd4n1YIYNT2Id4n1WIYNU2If4m1SI[NW2If4k1SI\\NW2Ii4h1PI_NW2Hl4f1nHbNV2Hn4c1mHeNU2Ho4a1mHgNT2Io4^1nHhNT2IP5^1kHjNU2HQ5\\1kHlNT2HR5Y1lHoNR2IR5T1oHSOo1IS5R1oHUOn1HU5R1mHUOo1IT5S1lHTOP2IT5S1kHUOQ2IT5Q1kHVOQ2IT5Q1kHVOQ2HU5S1hHVOS2GU5S1hHVOS2HT5R1iHVOS2HU5Q1hHVOT2IT5Q1hHVOT2HU5R1gHVOT2HU5R1gHVOT2HU5R1fHWOU2HT5Q1gHWOU2GT5S1gHVOU2GT5T1fHTOW2IR5S1gHTOW2IR5S1gHTOW2IR5S1gHTOW2HS5T1eHUOX2GS5T1eHUOX2GR5U1fHTOX2HQ5T1gHTOX2HQ5T1gHSOY2HQ5U1fHSOY2Io4U1gHSO?Hl00n5U1gHTO50V1Gn5U1gH3Z1hNo5U1gH3Z1gNP6V1fH3Z1gNP6V1eH3\\1hNn5V1eHFJROi12g5W1fHYOU2_OV5X1eHQO]2Gn4X1eHQO]2Hm4W1eHRO^2Gl4X1fHPO_2Hk4X1fHPO_2Gk4Z1fHoN_2Gk4Z1fHoN_2Hj4Y1gHoN_2Hj4Y1gHoN_2Hi4Z1hHnN_2Gj4[1fHoN`2Gi4Z1gHoN`2Gh4[1hHnN`2Gh4[1hHmNa2Hg4[1hHmNa2Gh4\\1fHnNb2Gg4[1gHnNb2Gg4[1gHmNc2Hf4[1gHmNc2Gf4]1gHlNc2Gf4]1fHmNd2Ge4\\1gHmNd2Ge4\\1gHmNd2Gd4]1hHlNd2Fe4_1fHkNe2Gc4_1gHjNg2Gb4_1gHjNg2Gb4_1fHkNh2Fb4_1gHjNg2Fc4`1fHjNg2Ga4`1hHiNg2Ga4_1hHkNg2Fa4`1gHjNh2Fa4`1gHiNi2G`4`1gHiNi2G_4a1hHhNi2G_4a1gHiNj2F_4a1gHiNj2E`4f0gH0OEj2F_4e0iHONFj2F^4j0iHGOIj2F^4j0iH6j2PO\\4R2dKmM]4S2cKnM\\4R2dKnM\\4R2dKnM[4S2eKlM\\4T2dKlM[4U2eKlMZ4S2gKmMY4S2gKmMY4S2gKlMZ4T2fKlMZ4S2gKnMW4S2iKmMW4S2iKlMX4T2hKlMX4T2hKlMW4U2iKlMV4o0nHOl2ROV4T2jKlMU4U2kKjMV4V2jKkMU4T2lKlMT4T2lKkMT4V2lKjMT4V2lKjMT4V2lKkMS4U2mKkMR4V2nKjMR4U2oKjMR4V2nKjMR4V2nKkMQ4U2oKkMQ4U2oKjMQ4W2oKiMQ4W2oKiMQ4W2oKjMP4V2PLjMo3W2QLhMP4W2QLiMn3X2RLiMm3W2SLiMm3W2SLhMn3X2RLhMn3W2SLiMl3X2TLiMj3X2VLhMj3X2VLgMk3Y2ULgMk3Y2ULgMk3X2VLiMh3X2XLgMi3Y2WLgMi3Y2WLgMi3Y2WLhMh3X2XLhMh3X2XLhMg3Y2YLfMg3Z2ZLfMf3Z2ZLgMe3Y2[LgMe3Y2[LfMf3Z2ZLfMf3Z2ZLgMe3X2\\LhMd3X2\\LhMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMd3Z2\\LgMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMd3Z2\\LgMc3Y2]LgMc3Y2]LgMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMc3[2]LeMc3[2]LeMd3Z2\\LfMd3Z2\\LgMc3Y2]LgMc3Y2]LgMc3Y2]LfMd3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LhMb3X2^LhMb3X2^LgMd3W2]LiMc3W2]LiMc3V2^LjMb3V2^LjMb3U2_LkMa3U2_LkMb3T2^LmMa3S2_LmMa3S2_LmMa3R2`LmMb3R2^LnMc3Q2]LPNb3o1_LPNb3P2^LPNb3P2^LPNc3o1]LRNb3n1^LRNb3m1_LSNb3l1^LSNc3m1]LSNd3l1\\LTNd3l1\\LUNd3j1\\LUNf3j1ZLVNg3i1YLWNg3h1ZLYNe3g1[LYNf3f1ZLYNg3f1ZLZNg3e1YL[Ng3e1YL[Ng3d1ZL\\Ng3c1YL]Ng3c1YL]Ng3c1YL]Ng3c1YL^Nf3b1ZL]Nf3c1[L]Nd3d1\\L\\Nd3d1\\L\\Ne3b1\\L^Nd3b1\\L^Nd3b1\\L^Ne3a1[L_Ne3a1[L_Ne3a1[L_Ne3`1\\L`Ne3_1[LaNe3_1[LaNe3_1[LaNe3_1[LaNe3_1[LaNd3`1\\L`Nd3`1\\L`Nd3`1\\L`Nd3`1\\L`Nd3_1]LaNc3^1^LbNb3^1^LbNa3_1_LaNa3_1_LaNa3^1`LbNa3]1_LcNa3]1_LcNa3]1_LcNb3[1_LeNa3[1_LeNa3Z1`LfN`3Z1`LfN`3Z1`LfN`3Y1aLgN`3W1aLiN_3V1bLjN^3U1cLkN]3T1dLlN\\3S1eLmN[3R1fLnNZ3R1fLnNZ3Q1gLoNY3Q1gLoNY3P1hLPOX3P1hLQOW3n0jLROW3l0jLSOW3l0jLTOV3k0kLUOV3j0jLVOV3i0kLWOU3i0kLWOV3g0kLYOU3g0kLYOU3f0lL[OS3e0mL[OT3d0lL[OU3d0lL\\OT3d0lL\\OT3d0lL\\OU3c0kL]OU3b0lL^OT3a0mL_OU3?kLBU3JB5>LB4>LB3?MA3?MA3>NB1?OA1?OA1>0B0>0BO?1AO?1AN`02@N?3AM?3AL`04@L`04@L`04@Ka05_OKa05_OKa05_OKa05_OK`06@J`06@J`06@J`06@J`06@J`06@J`06@K>6BK=5CN:2F071I015OL040LO51KO51LN42LN42KN62IO71IN82HM93GM93GM93R6000cIN<2CO=1CO=1CO=1CN>2BN>2P60`INb02^ONb00" + } + ], + "question": "What is the spatial relationship between and the other objects?", + "choices": [ + "A. is over .", + "B. is over .", + "C. is over .", + "D. is in ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_346.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000369370.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "dje28g>3M2M4N1O2M3M4M2M2N3N3Ma0^O4M3L3N2N1N2N2N2N3N2M2N2O1N2N1O100O2O0O100O1O10000O2O0O1O100O1O1O11O1O000000O10000O10000O10000000O010000O1000O10O1000O01000O01000O010_OWDjMj;T2ZDiMf;W2\\DgMe;X2\\DgMe;Y2]DcMd;]2`000O010O1O100O10O10O10O0100O010O1O10000O10001O00000O10O10O001O1O1N101N2O1O0N3N2N3L3N3L5mNfB8W>CUXi4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "b01o>0nYX50PfgJ4N7WAF2ON2S>i000O00000O101O000000000000000000001O00000000000000001O0000001O001O00010O0000001O0000001O001O000000001O0001O000001N1000000000001O00000000000000001O0000000000001O00001O001O001O001O2N1N2O001O1O1O1O1O1O1O1O1O001O1O001O1O000O2O001O001N10000O2O001N101O0O2O0O2N1O2N1O2N101M3N2N100O1N3N1O1O1O2N1O1O1N2O1O1O1O1N2O1O1O1O1O1O1O1O1O1O100O100000000O1000000000000001O00000000000000000000000000000001O0O11O000000000000000000000000000000000001O000000000000000000000000000000001N100001O00000000O100000001O01O000000O1000000001OO100000001O00000000000000000000000000000000" + }, + { + "size": [ + 480, + 640 + ], + "counts": "h1i4W:0001OO1KXKQFi4j9WKVF2Ng4m9WKUFm4k9TKSFm4m93O1LnJZFR5j9000O001N21O000O0100000000O010000000O10O100000N20O010000O10000000O0100O001O1O10000000O100O010000N1100000O1O1O1000O101N101O0000O1000O1000O10O0100O1O01000000O10O100000O1000000O10O10O100000O10O1000000O10000O100000000O10000O100000000O10O1000O10000000O010000O100000000O10000O10000O10O010000O1000O0100O10O10O101N10001N10000O101O0O101O0O2XLhEf2Y:WMlEe2V:YMmEe2T:XMoEf2S:XMPFf2S:iLaE3b0Q3T;M2O3L4M1N3N2N2M2O2aMjCo1f, , and ?", + "choices": [ + "A. is on and beside .", + "B. is on and beside .", + "C. is on and beside .", + "D. is on and beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_347.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000369370.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lg_32m>2O1N101N2O1M4M2O1N2O0N3N101N1O2N1SOUOdCm0Z:G8J3O3I5M4N1N2M3M4L3N1O2M3M3N2N1O2M3N2M3O1N2N2N101O0O2O0O2O1N101N101O1N101O1N101N2O001N2N101O001N2O1O0O2O001N2O001N2O1N2O1O001N101O001N101N10001O0O101O1O0O101O000O101O00000O101O0O10001N10000O1000000O10000000O010O10000O01000O100000O010000000000O100000O10O100000000O0100000000000O1000000000O10O10000000000O10000O100O1000000O1000000O1000000000000000000O10000O1000000O10010O0O10000000000O11O00000000O11O00O100001O00O10000O100O2N11O1O0000O2N1000000O20O000001O0000010O0O11O010OO10001O00000O1100O0000O1O10001N1O1001O00O2O0O100O1O10001O000O1O10001N1L4N21O1O00O2L3001O01N100003NO000O10010O000000010O000O11O01O00001O0000001O0O100000000010O01N101O001N101O00001O00000001O01O00001O00010O00000O101O000000001O00001O00001O001N10001O0000001O0000001O0000001O00001O00001O01O0001O00001O001O001O001O1O001O100O2N2N2M3N2M3N3L3N3L3N1O1N2O1O3L2N2O2M4M3M2M3N4K5Ib]f2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "b01o>0nYX50PfgJ4N7WAF2ON2S>i000O00000O101O000000000000000000001O00000000000000001O0000001O001O00010O0000001O0000001O001O000000001O0001O000001N1000000000001O00000000000000001O0000000000001O00001O001O001O001O2N1N2O001O1O1O1O1O1O1O1O1O001O1O001O1O000O2O001O001N10000O2O001N101O0O2O0O2N1O2N1O2N101M3N2N100O1N3N1O1O1O2N1O1O1N2O1O1O1O1N2O1O1O1O1O1O1O1O1O1O100O100000000O1000000000000001O00000000000000000000000000000001O0O11O000000000000000000000000000000000001O000000000000000000000000000000001N100001O00000000O100000001O01O000000O1000000001OO100000001O00000000000000000000000000000000" + } + ], + "question": "What is the relationship between the sandwich and the slice of bread ?", + "choices": [ + "A. is on .", + "B. is leaning on .", + "C. is leaning on .", + "D. They are beside each other on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_348.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ojk57R=4K3N2N2O0O2M3N4L3J4QEVO\\98_E1n0Mb90dE2c06h9FgE59<8@`8MRG7Oc0H8n8lN]G:D^2k8WMcG]3^8aLcG_3_8]LcGc3S9O0000000000F^LlFc3m8b0L6L4K6oG^Ko6h4kHYKU7j4gHWKY7m4bHUK]7o4^HRKa7\\5000000001O001N2O0O2SKYHe36nKb7;ZH_3^8_LcGn2MlLg83_Gn2R9nLPGo2g9O1N2jNkE`N4FN2Y:c1jE[NQ;P1eEmN]:h0Y1L4N2M4N2L6IVPe1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ldT13V=100O3M1O2N2N2O2O1O2N1O2LBgCOW and ?", + "choices": [ + "A. is looking at and playing with .", + "B. is flying over .", + "C. and are both playing with .", + "D. is walking on towards ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_349.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ojk57R=4K3N2N2O0O2M3N4L3J4QEVO\\98_E1n0Mb90dE2c06h9FgE59<8@`8MRG7Oc0H8n8lN]G:D^2k8WMcG]3^8aLcG_3_8]LcGc3S9O0000000000F^LlFc3m8b0L6L4K6oG^Ko6h4kHYKU7j4gHWKY7m4bHUK]7o4^HRKa7\\5000000001O001N2O0O2SKYHe36nKb7;ZH_3^8_LcGn2MlLg83_Gn2R9nLPGo2g9O1N2jNkE`N4FN2Y:c1jE[NQ;P1eEmN]:h0Y1L4N2M4N2L6IVPe1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ldT13V=100O3M1O2N2N2O2O1O2N1O2LBgCOW and ?", + "choices": [ + "A. is playing with , while is walking on .", + "B. is looking at , who is walking on .", + "C. Both and are flying over .", + "D. is walking on , while is playing with ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_350.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404128.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "aX?2W=5M1N2O001O00001O0000001O00000000001O00001O000000001O0000001O000000001O0000000000001O01O000001O000000001O0000001O00000000000010O0000000001O000000001O00000000000010O0000000001O00000000001O0000000eC]On;d0oC^OQW:[10O01OFeEeM[:W2>1O0O201N100O1O3UNoMXHR2f7UNSHm1k7YNPHi1n7]NlGf1R8aNdGe1X8m1LO5K4M2013L06F9H7H2_OlFeLW9X3?N2O1O1O101C relative to and ?", + "choices": [ + "A. is enclosing .", + "B. is parked on .", + "C. is under and over .", + "D. is over both and ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_351.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000493905.jpg", + "mask_rles": [ + { + "size": [ + 640, + 571 + ], + "counts": "TYf43R?0eE2Y:OgE2U:1kEOU:2jENV:4hEMW:6fEJX:9gEGY::fEEZ:=dED\\:=cED[:`0aEA_:`0]ECc:>[ECf:RDBo;?PD@Rg0T@[Ob1O[>f0o_O_Oe1J^>]1m_OcNS11P?P2j@PNW?T2d@mM]?T2`@lMa?W2[@iMg?X2V@iMj?X2U@gMl?Z2R@fMn?[2Q@fMo?Z2P@fMQ`0Z2n_OfMS`0Z2m_OeMU`0Z2i_OhMW`0X2h_OhMY`0o22OO01OV@gLn>X3m@TM]ODZ?X3UAaMBYOm>V3^APN]>P2cAaNl=`1RBdNk=]1TBfNh=\\1UBiNh=j3`DQJk8Q6RGQJm8Q6PGPJP9Q6nFPJR9S6jFnIV9T6hFmIX9V6bFlI^9W6^FiIc9Z6YFgIh9\\6RFfIn9]6lEfIT:\\6hEfIZ:[6ZETI^Ob0X;\\6VEUI_Oa0\\;[6SElIo:W6lDjIU;`71O1N2O3nLZDdMR^41O1N2\\LmAX1U>PNdBm1_=nMgBn1\\=mMjBP2Y=jMlBS2Z=^Mo@No1a2^?M4K5Mk0TO7I4L4K4M2N1N3N1O1O2N1O2N2N2N1O2M3L_bb3" + }, + { + "size": [ + 640, + 571 + ], + "counts": "SRm34jc04M2O0O2O001O000000O2N2O1NoQo6" + }, + { + "size": [ + 640, + 571 + ], + "counts": "hP11Z`0[3k_OaM0]5m8TMRG_M0^5n8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMO[5P9SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMO[5P9SMQGbMN\\5Q9RMQGbMO[5P9SMQGbMO[5P9SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMN\\5Q9RMQGo3o8QLQGo3o8QLQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9QMRGaMO^5o8QMRGbMM^5Q9QMQGo3o8QLQGo3o8PLRGP4n8PLRGbMM^5Q9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMQGbMN_5Q9oLQGbMO^5P9PMRGaMN_5o8QMSG`MN_5o8QMSG`MN_5o8QMSG`MN_5P9PMRGaMN_5P9PMRGaMN_5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMN_5P9PMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMM_5P9oLSGbMM_5P9oLSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMM_5P9oLSGbMN^5o8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMN`5o8oLSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGbMM_5P9oLSGbMM_5P9oLSGbMM_5P9oLRGbMO_5o8oLRGcMN^5P9oLRGcMN^5P9oLRGR4n8nKRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9nLSGdMM^5P9nLSGdMM^5o8oLTGcMM^5o8oLTGR4l8oKSGQ4m8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGR4l8nKTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8mLUGbMMa5n8mLUGbMMa5n8mLUGbMMa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTG`MOb5m8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8mLUGbMMa5n8mLVGaMLb5n8mLVGaMLb5n8mLVGaMLb5n8nLVG_MLc5n8mLWG`MKc5n8mLXG^MKe5m8mLXG^MKe5m8nLXG]MJe5n8nLXG]MJe5n8nLYG\\MIf5n8nLZGZMIh5m8nLZG[MHg5n8nL[Gk3e8UL\\Gj3d8WL[GXMGi5n8oL\\GVMGk5m8oL]GVMEk5n8PM\\Gf3d8ZL]Ge3c8\\L\\Gd3c8]L^Gb3b8_L]Ga3c8_L^G`3b8aL]G_3c8bL]G]3c8cL^G\\3b8eL^GZ3b8fL^GZ3b8gL^GX3b8hL^GX3b8iL^GV3b8jL_GU3a8lL^GT3b8mL^GR3b8nL^GR3b8oL^GP3b8QM]Go2c8QM^Gn2b8RM^Gn2b8SM^Gl2b8TM^Gl2b8UM^Gj2b8WM^Gh2b8YM^Gf2b8ZM`Gd2`8]M`Gb2_8_MbG`2^8aMaG_2_8bMaG]2_8cMaG]2_8cMbG\\2^8eMaG[2_8fM`GZ2`8gM_GY2a8gM_GY2a8hM_GW2a8iM_GW2a8jM_GU2a8kM_GU2a8lM_GS2a8mM`GR2`8oM_GQ2a8PN_Go1a8RN^Gn1b8RN_Gm1a8SN`Gl1`8UN_Gk1a8UN`Gj1`8WN_Gi1a8XN_Gg1a8ZN_Ge1a8[N_Ge1a8\\N_Gc1a8]N_Gc1b8oIVGV48j1b8nIYGW45k1b8nI[GV43k1b8PJ\\GS42m1b8oI]GU41k1P9UNPGj1P9WNoFi1Q9WNPGh1P9YNoFg1Q9YNPGf1P9[NPGd1P9]NPGb1P9^NPGb1Q9]NPGb1P9_NoFa1Q9_NPG`1P9aNPG^1P9bNPG^1P9cNPG\\1P9eNoF[1Q9eNPGZ1P9gNPGX1P9hNPGX1P9iNPGV1P9kNoFU1Q9kNPGT1P9lNPGT1Q9lNoFS1Q9nNnFR1S9mNnFR1R9oNnFP1R9QOmFo0T9POlFP1T9QOlFn0b8`JcGb4Kn0b8aJcGa4Km0b8bJcGa4Km0a8dJcG`4Lk0a8eJcG`4Mj0`8fJdG`4Kj0a8gJcG`4Lh0a8hJcGa4Lf0a8jJcG_4Lg0`8kJeG]4Lg0_8lJeG^4Kf0`8mJeG]4Ke0`8nJeG]4Ld0_8oJeG^4Lb0_8PKfG^4Jb0`8PKfG^4Ka0_8RKfG]4Ja0`8RKfG]4K`0_8TKeG\\4L`0_8TKfG\\4J`0`8TKfG\\4J`0`8TKgG\\4I?`8UKgG\\4J>`8UKgG]4H>a8UKgG^4HfK]AY4c>hK\\AX4c>iK^AV4b>kK^AT4b>lK^AT4a>nK_AQ4a>PL^AP4a>RL_Am3`>TLaAk3_>VL`Aj3b;oKgF8gMi3a;SLdF6jMg3c;TLaF5lMg3d;WL[F4QNd3h;ULVF8QNc3j;YLoE6WN`3l;lLXEFkN^3SkLeAU3Z>mLfAR3Z>oLeAQ3Z>PMgAo2Y>QMgAo2X>SMhAl2_8ULkLo0gJk2]8ZLiLl0jJi2]8]LUIKRN1V3P1VMf2]8`L[Hl0l1O]Md2\\8cLWHm0n1N_Ma2\\8fLRHQ1o1JcM^2[8jLoGR1P2HfM[2[8mLiGU1T2DiMY2Z8UM]GV1\\2^OlMW2[8@eIYNPNW2[8D`IVNUNV2[84oHhMeNT2\\8>dH`MoNR2]8c0]H^MVOn1]8i0WH\\MZOk1_8k0TH\\M]Oh1_8m0QH^M_Oe1`8n0oG^MBc1_8P1lG_MF`1^8Q1lG`ME_1_8R1jG`MH]1^8S1iGbMIZ1^8U1gGbMKY1^8V1eGbMNW1]8W1cGeMOT1^8X1aGfM1Q1^8Z1_GgM3n0^8]1[GhM6k0_8S4bGlK^8U4aGkK_8U4aGkK_8U4bGjK^8V4bGjK^8W4bGhK^8Y4aGgK_8Z4`GfK`8[4`GdK`8\\4aGcK_8]4bGbK^8_4bG`K^8`4bG`K^8`4bG`K^8a4bG^K^8b4bG^K^8d4`G\\K`8d4`G\\K`8e4`GZK`8g4_GYKa8g4_GYKa8h4aGUK_8k4bGTK^8l4bGTK]8n4cGQK]8P5cGoJ^8Q5bGnJ^8R5bGnJ^8S5bGlJ^8T5cGkJ]8V5cGiJ\\8X5eGgJ[8Y5eGgJ[8Y5fGfJZ8[5fGdJZ8]5eGcJ[8^5eGaJ[8_5fG`JZ8a5eG_J[8b5eG]J[8d5eG[J[8e5eG[J[8f5eGYJ[8g5eGYJ[8h5eGWJ[8i5fGVJZ8k5fGTJZ8l5fGTJZ8m5fGRJZ8o5eGQJ[8P6eGoI[8Q6eGoI[8Q6fGnIZ8S6eGmI[8S6fGlIZ8U6fGjIZ8W6eGiI[8X6eGgI[8Y6fGfIZ8[6fGdIZ8\\6hGbIX8_6gGaIY8_6gGaIY8`6fG`IZ8`6gG_IY8b6gG]IY8d6fG\\IZ8e6fGZIZ8g6eGYI[8h6eGWI[8j6eGUI[8k6eGUI[8k6eGUI[8k6fGTIZ8m6fGRIZ8o6eGQI[8o6fGPIZ8Q7eGoH[8R7eGmH[8T7eGkH[8U7fGjHZ8W7eGiH[8W7eGiH[8X7eGgH[8Y7eGgH[8Z7eGeH[8[7eGeH[8\\7eGcH[8]7eGcH[8^7eGaH[8_7fG`HZ8a7eG_H[8a7fG^HZ8b7fG^HZ8c7fG\\HZ8e7eG[H[8e7fGZHY8h7fGXHZ8i7fGVHZ8k7eGUH[8l7eGSH[8m7eGSH[8m7fGRHZ8o7eGQH[8o7fGPHZ8Q8eGoG[8Q8fGnGZ8S8eGmG[8S8fGlGZ8U8fGjGZ8W8fGhGZ8X8fGhGZ8X8gGgGY8Z8gGeGY8[8gGeGY8\\8gGcGY8]8hGbGX8_8gGaGY8`8gG_GY8b8fG^GZ8b8gG]GY8d8fG\\GZ8d8gG[GY8e8hGZGX8g8gGYGY8g8hGXGX8i8gGWGY8j8gGUGY8l8fGTGZ8l8fGTGZ8m8fGRGZ8n8gGQGY8o8gGQGY8P9gGoFY8Q9hGnFX8S9hGlFX8U9gGkFY8V9gGiFY8W9gGiFY8X9gGgFY8Z9fGfFZ8Z9gGeFX8\\9hGdFX8\\9iGcFX8]9hGbFX8_9gGaFY8`9gG_FX8b9hG^FX8c9gG]FZ8c9fG\\FZ8d9gG[FY8f9gGYFY8g9gGYFY8g9hGXFX8i9gGWFY8j9gGUFY8l9fGTFY8m9hGRFX8o9gGQFY8o9hGPFX8Q:hGnEX8R:iGmEW8T:hGlEX8U:hGjEX8W:hGhEX8X:hGhEX8Y:hGfEX8Z:hGfEX8[:hGdEX8\\:hGdEX8]:gGcEY8^:gGaEY8`:fG`EZ8a:fG^EZ8b:fG^EZ8c:fG\\EZ8d:gG[EY8f:gGYEY8g:`010Od:YEg0" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is playing with and jumping over .", + "B. is playing with and standing on .", + "C. is talking to and is beside .", + "D. is holding and standing on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_352.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000272212.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "PVZ73l>2O1M2O2M1O2N200N2O101N1kA]Om=c0SB]Om=c0RB_On=a0QB_Oo=b0oA_OQ>h01O0\\OPB8P>EUB9Z>N101O010N2OjTe1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "TVU22m>2O0O2O0000000O2L9\\ADX>c0N2N1000001O01O01O010O100O1O2M2O2M2O4K4M6I;F3M1O00YOg0O1O1O1O10000O11O001O2N2N4L2N2NCiBROV=n0kBSOS=m0oBUOmn0L4K5O13N6IWOiBFY=6iQn5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m7R7n7000000000001O00001O0000000000000000001O00000000001O000000000000000000000000001O0000000000001O0000000000000000000000001O0000000000000000001O000000001O000000000O1000000001O0000O1001O0000000000000000O10000001O00000000O100000000001O0000O2O0001O00000000000000O20O00000000000001O0000000O1UJfGQ5Y8oJhGP5X8PKiGo4W8QKiGo4W8QKjGn4W8PKjGP5V8PKkGo4U8QKkGo4U8PKlGP5T8PKmGo4T8PKlGP5T8mJoGS5Q8lJSHQ5m7oJ]Hg4c7YK^Hf4b7YK`Hf4`7ZKaHe4_7ZKbHf4^7ZKdHd4\\7\\KeHc4[7]KeHc4[7]KeHc4[7^KdHb4\\7^KcHc4]7]KcHc4^7\\KcHc4]7]KcHc4]7]KfH`4Z7`KgH_4X7bKjH\\4W7cKkH[4U7eKmHY4S7gKPIV4P7iKSIU4m6kK_Ii3a6VLfId3[6[LfId3Y6\\LiIc3W6^LhIb3X6^LhIb3X6]LhId3Y6[LXIT4h6lKQI[4o6eKoH]4Q7dKmH]4S7bKnH^4R7bKnH]4S7cKmH]4S7cKnH\\4R7eKnHZ4R7eKQIY4o6gKSIW4m6iKVIU4i6kKXIT4h6lKZIR4g6mK[IP4f6PL]Im3c6SL`Ij3`6WL`Ih3`6XLlHFWOR4l7WLmHLSOm3P8WLlH3nNf3V8WLlH5lNd3X8WLlH6jNd3Z8VLlH`4T7`KlH`4T7`KkH`4W7_KiHa4W7^KkHa4U7_KkHb4T7^KmHa4S7_KmHa4S7_KnH;eN^3]8WLnH7iNb3Y8VLRI1iNi3U8VLfIj3Z6VLfIi3[6WLfIh3Z6XLfIh3[6WLeIj3Z6VLfIj3Z6WLeIi3[6WLeIi3[6WLeIi3[6WLeIi3[6XLoH]4Q7cKiHc4W7^KiHa4X7`KgH_4Y7bKgH]4Y7dKeH]4[7eKjGT5V8nJcGW5]8f000000000000O11O0000000000O10000001O01O00000O10001O000001O00000000O2O00000000001O000000000000000000000000000000000000000000000000000000000001O00O1000000000000000000000001O0000000000000000000000000001O00O100000000000000000000000001O00000000000000001O0000001O00001O00000000001O000000000000001O0000000000001O00000000001O0000000000000000001O00O1001O00000000000001O00O100000001O000000000000000000001O0000000000000000000000001O00000000000000001O00000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000aJjFT5W9iJkFW5V9hJjFX5V9hJjFX5W9fJjFZ5V9fJjFZ5V9fJjFZ5V9eJjF\\5V9cJkF]5\\900000001O00001O0000001O0000001O001O001O0000000000001O00000001O00O10000000000000000000001O0000001O00000000001O00000000O1000000001O00O10000000000000000000000O10000O100_OiJYGW5g8jJWGW5i8lJSGU5m8lJQGU5o8mJlFV5T9;000000000000000000000000001O0000000000001O00000000000000001O000000000000000001O0001O0000gF" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PeZ74j>3N1O1N2N2N2O1O101N10000jA^On=b0QB@n=a0QB_Oo=b0oA@Q>g00000000O01O1O012N3M00000010O001O006K0O00000000001O0000000000000000000000000001O01O00010O00001O001O000000001O000000001O001O1O1O1O1N2O00001O0O101O0O2O1N3Lege0" + } + ], + "question": "Which statement accurately describes the state of the cows on ?", + "choices": [ + "A. All three cows, , , and , are walking.", + "B. All three cows, , , and , are lying down.", + "C. is walking while and are lying down.", + "D. is walking while and are lying down." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_353.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000574520.jpg", + "mask_rles": [ + { + "size": [ + 399, + 640 + ], + "counts": "g`_32Y<4N3N1PDGj;9TDIl;=00000O1O101O1N[EIU96iFOV9OhF5W9JgF9Y9GeF;Z9EeF=Z9CeF`0Y9@gFa0Y9^OfFd0Y9]OfFd0Y9\\OfFf0Y9ZOgFf0Y9ZOgFg0X9YOhFg0Y9YOfFh0Y9XOgFh0Y9XOhFg0X9ZOgFg0X9YOiFe0X9[OiFd0W9]OiFa0X9_OiF`0W9AhF>Y9BiFEd0[OoN2Ei4c0TMGPO`0Db0l4WOaLFnNc0Ca0n4VOaLGmNb0C`0Q5WO`LFlNd0A?T5WO_LFlNd0AOB@c57^LFlNe0_ONFTOl5c0SLFlNf0^OKj5IlKFlNf0^OIl5KjKFkNh0^OGm5KjKFkNh0^OGGWOS6d0mKGkNg0]OJj5IoKEjNg0^OKi5IoKFjNe0^OLi5IoKFjNe0^OLj5HnKHjNb0_ONi5HnKHkNa0^OLl5KkKIkN?^OLm5LjKIlN?\\OLn5LjKImNe0TOFU6LjKJmN`1Y5fNkKIlN`1Z5gNjKJdNd1d5bNhKKUNFKP2Y6_NhKc2X4]MhKb2Y4^MhKa2X4_MhKa2X4_MiKa2V4_MjKa2V4_MjKa2V4_MjKa2V4_MkK`2U4`MlK_2T4aMlK`2S4`MnKa2P4_MTL]2l3cMYLX2g3hM]LT2c3lM`LR2_3nMdLo1\\3QNhLl1W3TNoLg1P3YNSMd1m2\\NTMd1k2\\NVMd1i2\\NYMc1f2]N[Mc1d2]N]Mc1b2]N^Mc1b2]N^Md1a2\\N`Md1_2\\NbMc1^2]NcMb1]2^NdMa1\\2_NeM`1dLlM]5d0PN_1bLoM]5b0TN\\1]LWN\\5=XN[1\\LYN[5iNAX1`0iN]OX1c0Z500000000O10000001O00O10000000000001O00000000000000000000000000000000000000000000000000000001O0000O10000000000001O000000O100000000001O000000O1000000001O00000000O1000000001O0000O1000000000000001O0000000000000000000O11O00000000000000O100001O00O10O11O0000000000000000000000000000000000O1001O00000000000000O1010O0O100000O2O00000001O00000O11O0000O100O1001O001O001O0000O100O100000000000000000000000000000000O100001O00001O000000000000O10000001O0000_O" + } + ], + "question": "Which statement accurately describes the relationship between , , and ?", + "choices": [ + "A. is in , while is over .", + "B. and are both in but are not touching.", + "C. is standing on , and both are beside .", + "D. is standing on , and is on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_354.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000289417.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "gXn43a;4N2N00M3001N110SE4n9MoE7P:h0G:N1OI8M3N2O12O6J10O001N2\\OdEA1N\\:=SF@o9" + }, + { + "size": [ + 375, + 500 + ], + "counts": "SRf21f;000]j<0bUC1bG5P5KmJ, , and ?", + "choices": [ + "A. and are both beside .", + "B. is in front of , who is in front of .", + "C. is in front of , who is in front of .", + "D. is beside , and is behind ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_355.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439854.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "hmT19o0JR8=gGDX8`0SGG3Jj8o0SGROl8X100ZOjNUHV1i7nNUHQ1k7QOTHo0k7SOTHm0k7UOTHk0l7VORHL26l70QHJ54j73PHI73i75oGH;0f78oGH=Me7 and ?", + "choices": [ + "A. is on and is riding .", + "B. is riding and is on .", + "C. is beside and is on .", + "D. is on and is riding ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_356.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000474164.jpg", + "mask_rles": [ + { + "size": [ + 640, + 633 + ], + "counts": "h`b03kc04M2N2N1O1O2N1O100O1O011O2N3M3L2O1O1O000000000O2OO1000000000O0100N1O1O110O01O1O010O100O1O100O100O01J7]O^d:l0W[E101O0000000000000000O1O1N2J6J6K6L3M3O1N3NTXP:" + }, + { + "size": [ + 640, + 633 + ], + "counts": "^i1h13jN^a0R2J1O1L4O1000000O1000000000000TNUMcBk2Y?01O00000000iMVMYCi2g00000O1001O^MXMlCh2T000^MWMlCj2f>O0000TMYM_Dg2a;YM^Dh2a;YM`Df2a;XM`Dh2]>OO1oLWMkDi2U;WMkDi2S;YMmDg2R;YMoDg2P;[MoDe2P;]MnDd2R;XMSEg2S>00000O11O0000`LXMhEh2X:XMhEh2i=O3M0000M3003M00M30000000YMj^Od2Va0300000YMj^Od2Ya0000M[Mk^Oe2Ua0300000000000000I7L4ZOmLZ@X3c?QNP@VOk0AUO1`00_O0k?a1W@^NQ21mM0R?m4_O>I7J6K5H8L4J6M3L4K5J6N2H8L4K5N2N2O1O1O1O1N2O1O100O1N2J6M300O100O100N20000O1000000O1000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000000001O00000000000000O10000000000000000000000001O00000000000000000000000000000000000000O11O000000000000001O00000000001O0000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000001OO10000001O0000000000000000000000000000001O00000000000000O1000000000000000000000000000000000000000000001O0000000000000000000000000000001OO10000001OO1001O00000000000000000000000000000000000000001O000000000000000000001O000000000000000000001O000000000000000000000000000000001O0000000000000000001O0000000000000000001O000000000000001O000000001O000000000000001O000000001O000000001O001O000000001O00001O001O00002N001O001O2N1O001O002N1O001O1O3M1O1O001O2N1O1O002N2N1O001O1O2N1O001O2N2N1O00003M2N1O001O2N1O1O1O1O1O2N1O001O1O2N2N1O1O1O2N1O001O1O3M1O002N1O1O1O1O1O1O2N2N1O001O2N1O001O1O3M1O1O001O2N1O001O1O2N1O1O001O3M1O00001O3M1O001O1O2N1O001O1O2`MW@\\NO2N?3_ON303Ra06n^OGW1Nad0" + }, + { + "size": [ + 640, + 633 + ], + "counts": "WUm:1WP50Q\\O1fWL2N1O1O1O2N1O1O100O1O2N100O1O1O1O2N1O1O1O1O2N100O1O1O1O101N1O1O1O1O001O100O1O2N1O1O100O1O1O1O100O1O1O1O1O1O1O1O1O1O10NQB" + }, + { + "size": [ + 640, + 633 + ], + "counts": "RZn44jc05g@I`K1n>;\\EKb:8YEMd:4\\ELc:6[ELc:9YEId:?TECk:>PEHm:;PEGn:;XDWOaMb0V>:ZCiNoNf1g=CXBiNO8Bi1V>YOUBkNLd2n=cNQBnNHf2V>_NnAV2Q>m1000000000000001O000O100O10000000000000000000000000000000000000000000O1O1O2O000001O001O1O1O1O1O1O1O1O001O00000dKiAo2W>kLUBP3k=eLdBV3\\=iLiBS3W=jLoBS3R=kLQCS3oL9G4L3M2hNP_O\\O[a06l^OFXa0DX^O0o02eb0Kl`e5" + } + ], + "question": "What is the location of ?", + "choices": [ + "A. Inside .", + "B. Sitting on .", + "C. Sitting on .", + "D. Sitting on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_357.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000565391.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "e6173a2M\\M0R>3nBM13QO14O[1NbN3NNM0e=n2\\BiNOOOZN218Z4f7f7K1N2O001O1O001O001O001O001O1N101O00001O001O001O1O1O00001O001O001O1O001O001O00001O1N2O1O001\\LPC6P=]3001O1O1O1O001O00002N001O1O001O1O1O1O1O001O002N1O001O1O2N2N1O2N1O1O1O3M2N2N4L2N1O001O2RKaAW4b>eK`AHNL0W4f>n04L4L3M5K6J;`KZ@P4m?N1O1O3M1O1O00003M1O1O00O1002N01O03M0O100001O00O1001O0000O100001O0000000000000000000000000000000000O11O00O10000000000000000000000000000000000000000000000001O00000000000000000jE`Ll3`3UL_Lk3a3QL_LS4a3V6000dE_LY4a3S6000bE_L]4a3cK_L]4a3cK_L]4a3Q600000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000001O000000000000000000001O0000001O001O0000001O0000001O001O00001O1O001O001O001O1O001O1O1O1O1O2X@lK`?\\4N2N2N2N2N2N3M2N2N2N3M3M2N2[EdJb6_5n36J3M2N2N3M3M3M2N1O3M5K3M2N2N3M3L4M3N1N3M3M3M1O2N2N3M3M1O2N2N4L2N2N2eEjGh8X8UGjGj8Z8nFbGPO4R:]8kF_GSO4R:_8mFbGR9a8jF`GV9b8eFbGZ9b8_FaGa9b8\\F^Gd9c8ZF^Gf9d8QFoF09NIP:d9PF\\FP:c9QF^Fn9b9RF^Fn9j901O000000001O00000N31N00LoEYFR:f9oEYFQ:g940lEXFP:h9oEYFQ:g940000000001O00000O11O000000000001O000O11O0aM_FYHNo1c9h5_FYHOn1d9g5]F[HOn1f9]5XFfH5NOm1d9\\5]FgH:1Db1b9i5TGdH[Oc1a9i5^GVJa8g5ZFgHV1c1_8]N\\FS7X1bJ\\8[N\\FS7W1cJ]8`5dG`J]8`5bG`J_8_5aGaJa8]5_GdJ`8\\5`GdJ_8]5aGcJ^8^5bGbJ`8\\5_GfJ`8Y5_GiJa8W5YGoJd8T5\\GlJd8T5\\GlJe8S5[GmJg8Q5YG^IQOm0f9e5YGZIUOQ1`9h5YGXIWOP1`9h5YGXIWOQ1`9f5YGUI[OU1]9e5XGUI]OU1W9i5]GQI]OV1V9i5]GQI]OV1T9k5_GoH]OV1T9k5_GPI[OV1V9j5_GoH\\OW1V9j5VGgHA27\\1R9k5UGiHAN9_1Q9j5VGhH2^1h8j5VGgH3`1f8i5VGiH3^1c8m5YGfH5\\1_8R6lF_HO<3D:_1g8S6lFRIL^O2O0O9a1k8R6nFUIM^O:[1k8S6mFUIM^O:Z1k8V6kF\\I4nNOa1P9c6mFaH3]OMb1Q9Z9XKWE]LKV12GP;^NoD0Ob0o5_O^J5c0Iac0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "aX`74lc04K3N2N2N1O:F3M00000000O1O10000000O01000000O100OX]OAQb0?k]OEUb0:k]OGUb08k]OIUb07k]OIUb06k]OKUb05k]OKUb04k]ONTb01l]O0Tb0Om]O1Sb0Ol]O2Tb0Mm]O3Sb0Ml]O4Tb0Km]O5Sb0Km]O6Rb0In]O8Rb0Go]O9Rb0Fm]O;Vb0Aj]O`0Yb0\\Og]Oe0[b0XOf]Oh0[b0VOe]Ok0hb00O1O1O1O2O0O10000O2M200O1000001O000O11O0001O0000000000001O000000000000001O000000001O000000000O1001ZO]]OKh9" + }, + { + "size": [ + 640, + 480 + ], + "counts": "i]f34jc02O100O2N1O1O1@GY]O9fb0JX]O6hb0LV]O4jb0MT]O4lb0LT]O4lb0MS]O3nb0Nm\\O5Sc0<0O1000O100O100O1O100O10O1001O0000000000000O100O10O010000O10000002N1O0000000000000000001O00O1O101N1O101O0O1000000O100000000000000001O00001O00000O11O000010O000001O00000001O0000000000000000010O00000000000O101O000000000000000O10000000001O0O1000001O001O00001O001O0O3N002N1O2N1O1O2M2O1O2M5K\\jb2" + }, + { + "size": [ + 640, + 480 + ], + "counts": "n??240_OO30N11OO171H4<21KEi?o2Z@_MLBj?o3001O001O001O001O00001O001O001O1O001O001O001O001O1O001O001O001O001O1O001O1O001O001O1O001O1O001O1O001O001O001O1O1O1O001O1O1O001O1O1O2N1O001O1O1O1O1O1O2N2N1O1O2N2N1O1O2N2N1O2N2N3M2N2N3M3M4L5K6YOW]OK`c0O3M2N1O1O1O2NQhY7" + }, + { + "size": [ + 640, + 480 + ], + "counts": "T^h0191N11Od`0\\3F0000000000001O000000000000000000000000000000000000000000000000000000O100001O000000O1000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000O10000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000O11O00000000000000000000000000000000O10000001O0000000000O10000000000000000000000001O00000000000000000000000000000000O1000000001O00000000000000000000000000000000001O000000000000001O0000000000001O0000000000001O00000000000000001O00001O00001O00001O001O00001O001O001O1O001O00001O1O1O1O001O1O1O3M7I3M3M2N2N2N1O4L2N3M2N3M4L2N1O1O2N8H3M2N2N2N4L2N2N3M2N5K2N2N2N2N6J2N1O001O2N5K5K1O3M3M3M1O2N2N2N3M1O1O00000000001O001O0000000000000000000000000000000000000000000000000000000000000000O1O1F:GSDYHS and ?", + "choices": [ + "A. ", + "B. No object is between them.", + "C. ", + "D. " + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_358.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000319607.jpg", + "mask_rles": [ + { + "size": [ + 640, + 640 + ], + "counts": "cim2g1:gNVa0f3ZNe0Z@ZKg0OV=b6ZOQ1hB]HO2k;U8I3M1O1M4N10001O0O1O100O10001N100000000000000001O00001O010O1O001O010O2N1O1O1O1O2N1O1O0O1000O10000001N1000000O2O00001O0O2O001N105JW1jNR1hJiAb2V?TLi@N=h3o?^Lk_Od2n`0I7[Nd^O_O24aa0OS2Bh]o0MXiQ6" + }, + { + "size": [ + 640, + 640 + ], + "counts": "WlQ1<_c07L2N2M3M3L4M4M3M2M4L5K4N2O1N3M3M4M1O1N2O3L2O002N3L3N1O2N1N2N3N2N1O1O2M4M1O1O1O2N1N2O1O003M4L1N2O2N1N3M3N1O1O0O4M1O1O2N1N3N1O0O3N2N3L3N1O1O2M3M2O2N001N5L1O1O2M3M2O1O2N1N3N3M1O2M2O1O003NjI]Bl5f>RJdAODa4h?Bm1YLe^OF:=QY`0@YZ@0Pd05k[O3L20O00O100]\\OF`c0:_\\OJ]c07c\\OJ\\c03e\\OI14Wc0c0M2O1N1O100O1O2N1O1N4M2N4L001N3N1M3O001N4L5K3M101O1O1O3M1N102N1jGcNgM^1W2EgFnMl5U1SJ0Z9T1`LkNXJ0W9Y1nFdMV5R1eJ1V9j1PL^Oo3e0PLZOP4f0QLVNhJHX9S2oKTNkJHU9V2SLPOm3R1TLiNeJQNW9V3WLeNm3\\1SLaNo3_1RL^No3c1SL[Nm3f1SLYNl3h1ULVNl3j1ULUNk3k1ULTNk3n1XLnMh3R2]LiMc3X2]LhMb3X2aLeM_3[2bLdM^3\\2fL`MY3a2gL_MX3b2iL^MV3b2kL]MU3c2lL[MU3d2PMXMo2i2XM`LcI:T9V3bMiL]2W3cMiL]2V3dMkL[2U3gMiLX2Y3iMfLU2[3mMcLS2\\3QN`LP2`3W7101N1000O11N101O001N2O0fDQMc5P3[JUMa5l2]JXM_5j2`JXM^5i2aJYM\\5j2_JYMa5i2\\JYMb5j2\\JVMd5m2YJSMg5P3UJQMk5S3PJWMf5l2VJZMf5h2XJYMg5i2WJXMh5j2VJWMh5n2TJVMg5n2VJSMi5n2VJRMj5o2UJSMi5P3UJUMd5P3[JoLkLjM]8Y5gJYMW5i2gJXMW5m2eJTMZ5n2gJQMV5R3hJPMV5S3gJnLX5S3hJlLX5U3gJkLX5Y3eJgL[5[3dJdL\\5`3`J`L`5a3`J^L`5l3VJSLj5Q4TJnKk5T4UJkKk5W4SJiKm5X4RJhKn5Z4PJfKo5^4nIbKR6a4jI`KV6c4fI^KY6d4eI\\K\\6e4cI[K]6g4`IYK`6j4^IUKc6l4[ITKe6m4[ISKe6m4\\ISKc6n4\\ISKc6o4nI_JP6d5oI[JQ6f5nImISNcNo7b7mIiIYN]NKOn7m7mIgIa6Z6fHRHE`1f7`6bHQHI_1c7b6bI]I]6d6cI]I[6e6dI[I[6h6dIVI\\6k6fIRIY6o6hIPIX6P7iIoHW6Q7jInHU6T7lIjHT6W7lIhHS6Y7mIgHS6X7oIfHQ6Y7TJbHn5]7TJaHm5_7TJ`Hk5`7VJ_Hk5`7VJ`Hj5_7XJ_Hh5b7WJ^Hj5a7XJ]Hi5c7YJ[Hf5f7ZJYHg5g7YJZHe5g7WJ]Hh5d7TJcGfM62>o7i7ZJkGkM6m7P8WJiGmM4o7S8SJkGmM2JER8c8SJiGnM2n7V8SJkGkM2Q8S8UJiGlM2o7U8VJiGlMNNEP8e8VJVHkMUOn7g8WJSHkMXOk7h8YJoGmME]7[8hJnGPNCX7^8iJnGSN]OEN_7g8jJnGQNAU7a8mJlGmMDU7`8PKlGjMDU7_8VKiGfMFU7`8WKhGhMEQ7d8^K^GcMMo6f8WL_Ga3c8]L^Gb3c8`LZG`3g8aLVG`3l8_LRGb3Q9e31O00004L5K4L1O004L1O2N1O1O1O3M2QFWFc9T:O004L4L001O00002N2N1O2N1O1O1O00FQGbEP9_:PG`EP9a:oF_EQ9a:oF_EQ9`:PG`EP9`:PG_EQ9Q:nFYF2DQ9R:PG[FS9e9nFZFR9f9PGXFP9h9SGSFo8l9c0O1O1N2N2O1O100O100N2O1N200N2O1O10000O100M3O100000000O100O1N2N2O1O1O1000000O10000O100O1O1O1O10000O100N2O10000O10000O1O1O1O1O100O10000O1O100O10000O10000001O0000002N1O00001O1O3M001O00001OO1000000O1O1O1O1O100O100O100O100O1O10000O1O1O1O100O100O1O1O1O10000O100O1O100O1N20000O100O100O1O100O100O1O100O1O100O1O1N200O100O100O100O100O1O1O100O1O100O100O3M2O1N3M2N3M3N2M2O1N2N2NBeI[CY6cC\\BY2i=hMXBT2j=mMVB>D_OW>3VBo1k=TNSB=PB;GVOZ>" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is on top of .", + "D. is behind ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_359.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "Sjn73k?3N2N1O2N2N2N102M2N2N1N3O1N2N2M3N2O0O2M3N2M3N2N2O1N2N2O001N2O2FUN[Bn1_=XN\\Bl1`==L3[E^M[7S3_HVMY7n2aHWM\\7m2aHVM\\7n2aHSM^7o2`HRM^7Q3`HQM^7>`F]1Q2VN^7P3bHQM\\7P3dHPM[7P3fHQMY7o2gHRMW7o2hHRMW7o2iHRMV7n2jHRMU7o2kHQMU7n2lHSMS7m2lHTMS7T2oFcMT1]OoN26j0h8R2RGdMP1U1m7W1TGcMP1U1m7W1TGdMP1S1m7W1UGfMn0Q1o7W1TGhMo0n0n7Y1UGiMn0l0n7Y1VGjMn0j0n7Z1VGlMm0h0n7[1UGmMo0f0m7[1VGoMm0d0o7[1VGPNm0b0o7\\1UGSNm0?o7\\1VGTNm0>n7]1UGUNo0;n7^1UGVNo09n7_1UGXNn06o7a1SGYNQ12n7c1SG[NQ1Nn7e1RG]NR1Kn7g1QG^NS1EP8l1mF_N];`1dD_N];`1cDaN^;]1bDcN_;]1`DdN`;[1aDdN`;[1`DeNb;Y1]DiNc;W1\\DiNe;V1ZDlNg;S1XDmNi;R1WDnNk;Q1SDQOm;h0XCgNl0`0n;f0YD[Oh;c0YD]Oh;c0WD]Oj;c0UD\\Om;c0SD]Oo;b0oC_OT<>lCBY<:fCE_<7bCH`<6`CIaIaAoN^>P1:O000J60010J41KZOQAf0T?0O1010O01N20Mn@XOQ?h03O02M3O000000001O10O000000100O0100O0010O01N110O010O010O00O1001N1O2O000000001O1O0nLPOTGR1g8UOUGm0j8VOoFn0n8WOPGi0P9YOlFi0S9ZOiFi0T9\\OhFe0U9AgFa0R9GkF:U9GiF;T9HjF9T9JiF9U9IjF7S9LjF7U9KhF7X9JcF;[9F_F`0a9BWFd0h9kN_ETOf0S2k9eNeEVO?U2l9fNhEQO;[2m9bNhFb1W9\\NlFc1U9[NmFc1T9[NmFf1S9XNPGg1P9ZNoFg1Q9XNoFg1R9XNoFg1R9VNQGj1P9UNPGk1P9TNoFm1S9RNnFm1R9SNnFn1R9RNoFl1Q9VN^FYOUO^2`:YNWF_OWOW2c:ZNTFAUOX2h:VNQFFSOV2m:UNmE]2T1kLn6h0jG`2T1oLo6c0iG`2R1fL[N=g8>iG`2X1TMo6=fG_2^1SMl6`0bG^2b1SMl6a0^G\\2g1TMk6T4TInKl6Q4UIPLj6o3WIRLh6m3XITLh6i3ZIYLe6f3[I]Lc6b3]I_Lc6`3]I`Ld6_3\\IcLc6\\3\\IgLc6Y3]IgLc6Y3[IiLe6X3ZIhLf6X3YIiLg6X3WIjLh6W3UIlLj6T3UIoLi6R3TIPMl6P3SIQMm6P3PISMo6m2PITMP7m2nHUMQ7k2nHWMQ7j2mHWMS7j2jHZMT7f2iH^MV7b2iH_MW7a2hH\\LTO0T8e3fHZLYO0Q8f3fHYLCHg7o3eHYLHEd7c2lGSNf0GJCd7a2oGUNa0HMAc7`2SHUN;H6@[7d2THUN7H>]OW7e2VHVN2Jb0[OV7d2WHWNOJf0ZOT7e2WHWNOIh0ZOR7e2YHWNLJk0YOP7d2[HZNGJo0WOX6NPIf24[NBLS1TOS63TI]29aN\\OKT1TOQ65VIT2a0hNROLW1ROo5?XABh>>WADg>=XADh>=SAFn>f0O00ESADm>=TACm>?QA@P?g01O100O1O1O2O1N2N3L4I8KPdn3MU\\QL5U@Ka?46N2O100O11OOg?1Y@1OOgo30ZPL00ObbZ1" + }, + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Vlo54j?3M2N3N1N2N3M2O1N3N2M2O2M6K2M4M2N2N3L4M3L3N3M6J5dCkMd:X2UEoMh:R2REUNl:k1PE[No:d1WDeM;R1\\;a0WDTO2YO6X1_;;[DRO0]O0[1d;7[Dk0d;VO[Dl0d;h1O1O1O10O0100N2O0O2iDSLZ:o3_EXL_:e4O100O1O100O100O100O10000O10000O10000O10000O100O100O10000O100O100O100O1O1iNTJ[G2e0j5i7iJRHX5i7oJUHQ5h7TKVHl4g7XKWHi4h7YKWHg4g7[KYHe4e7_KXHb4g7`KXH`4g7m1O100O1O1000000O100O100O10000O100000000000000001O000000001O001O001SJVHb3j7]LYHa3g7\\L\\Hd3d7XLaHg3`7SLfHl3Z7QLiHo3X7mKlHR4U7mKkHS4V7kKlHT4U7kKlHd2TO\\MQ8OlHc2UO^MP8LmHW4T7hKmHW4U7eKnHZ4U7bKmH]4V7`KkH[3nNgKMk0a8JgHn2hNQM, , and interacting with each other and the environment?", + "choices": [ + "A. is looking at , while is running on .", + "B. and are running on , while is looking at them.", + "C. is looking at , while runs on .", + "D. is looking at , and both and are running on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_360.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "To59f?2N2N2O1N2O1O1N1O3M3N1O1O1O1O1O0O2O0N2O2O000O100O1O10000O1JhNbAY1b>IaAoN^>P1:O000J60010J41KZOQAf0T?0O1010O01N20Mn@XOQ?h03O02M3O000000001O10O000000100O0100O0010O01N110O010O010O00O1001N1O2O000000001O1O0nLPOTGR1g8UOUGm0j8VOoFn0n8WOPGi0P9YOlFi0S9ZOiFi0T9\\OhFe0U9AgFa0R9GkF:U9GiF;T9HjF9T9JiF9U9IjF7S9LjF7U9KhF7X9JcF;[9F_F`0a9BWFd0h9kN_ETOf0S2k9eNeEVO?U2l9fNhEQO;[2m9bNhFb1W9\\NlFc1U9[NmFc1T9[NmFf1S9XNPGg1P9ZNoFg1Q9XNoFg1R9XNoFg1R9VNQGj1P9UNPGk1P9TNoFm1S9RNnFm1R9SNnFn1R9RNoFl1Q9VN^FYOUO^2`:YNWF_OWOW2c:ZNTFAUOX2h:VNQFFSOV2m:UNmE]2T1kLn6h0jG`2T1oLo6c0iG`2R1fL[N=g8>iG`2X1TMo6=fG_2^1SMl6`0bG^2b1SMl6a0^G\\2g1TMk6T4TInKl6Q4UIPLj6o3WIRLh6m3XITLh6i3ZIYLe6f3[I]Lc6b3]I_Lc6`3]I`Ld6_3\\IcLc6\\3\\IgLc6Y3]IgLc6Y3[IiLe6X3ZIhLf6X3YIiLg6X3WIjLh6W3UIlLj6T3UIoLi6R3TIPMl6P3SIQMm6P3PISMo6m2PITMP7m2nHUMQ7k2nHWMQ7j2mHWMS7j2jHZMT7f2iH^MV7b2iH_MW7a2hH\\LTO0T8e3fHZLYO0Q8f3fHYLCHg7o3eHYLHEd7c2lGSNf0GJCd7a2oGUNa0HMAc7`2SHUN;H6@[7d2THUN7H>]OW7e2VHVN2Jb0[OV7d2WHWNOJf0ZOT7e2WHWNOIh0ZOR7e2YHWNLJk0YOP7d2[HZNGJo0WOX6NPIf24[NBLS1TOS63TI]29aN\\OKT1TOQ65VIT2a0hNROLW1ROo5?XABh>>WADg>=XADh>=SAFn>f0O00ESADm>=TACm>?QA@P?g01O100O1O1O2O1N2N3L4I8KPdn3MU\\QL5U@Ka?46N2O100O11OOg?1Y@1OOgo30ZPL00ObbZ1" + }, + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Vlo54j?3M2N3N1N2N3M2O1N3N2M2O2M6K2M4M2N2N3L4M3L3N3M6J5dCkMd:X2UEoMh:R2REUNl:k1PE[No:d1WDeM;R1\\;a0WDTO2YO6X1_;;[DRO0]O0[1d;7[Dk0d;VO[Dl0d;h1O1O1O10O0100N2O0O2iDSLZ:o3_EXL_:e4O100O1O100O100O100O10000O10000O10000O10000O100O100O10000O100O100O100O1O1iNTJ[G2e0j5i7iJRHX5i7oJUHQ5h7TKVHl4g7XKWHi4h7YKWHg4g7[KYHe4e7_KXHb4g7`KXH`4g7m1O100O1O1000000O100O100O10000O100000000000000001O000000001O001O001SJVHb3j7]LYHa3g7\\L\\Hd3d7XLaHg3`7SLfHl3Z7QLiHo3X7mKlHR4U7mKkHS4V7kKlHT4U7kKlHd2TO\\MQ8OlHc2UO^MP8LmHW4T7hKmHW4U7eKnHZ4U7bKmH]4V7`KkH[3nNgKMk0a8JgHn2hNQMS1Z9X3jFbL]OVOi9Y4iFaL]9^3dFbL^OUOh9Y4mFfLR9Y3SGgLi8V3[FlKS1d0c8[3aFPLR1=^8b3bFPLX;o3=O1N2N2O100N200O1O1O100000000000000O1000000000000001OO1O100000000O1000000000000000000O1001O0000O1001O00O100001O0000001O2N1O00000000O11O001O1O4L00N22N1O1O00000000000000O100000000O100YMnC_1SgNmAW1S>iNPBT1a>M2N2N2M3N3M1N2O2N2M6K4K_\\b3" + }, + { + "size": [ + 512, + 640 + ], + "counts": "\\jj33i?5O0O1j@K`>6^AM_>6^AM_>5`AL_>5`AIGIh>`0_AHKHb>b0bAFd>:[AGd>:[AFf>:XAIf>8XAKg>d02O2O0O2O1O1O00100O101N2N2M3N2N2M4K5Loed5" + } + ], + "question": "Which statement accurately describes the actions of ?", + "choices": [ + "A. is running on alongside .", + "B. is wearing and running on .", + "C. is running on while wearing .", + "D. is wearing and is looking at ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_361.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000500477.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0001O1O1O1O2N1O1O1O1O2N1O001O2N1O1O001O2N2N001O2N1O1O1O1O2N1O1O100O1N2O1O00101N1O1O001O1O1O1O001O1O1O1O1O001O1O1O001O1O001O1O1O1O001O001O1O1O1O1O1O001O1O1O1O2N1O002N001O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O100O1O010000000002N00O11O00001O01O00010_EmLR9T3fFTMY9m2bFXM]9i2ZF_Mg9m302O0O2N10N101O0O1O1O2O0O2O0O1O2O0O1O1O100O1O1N2O1O1O2N01gEhKi9X4TFkKl9e41OO100010O001O0O100O1O2N1O1O1O1O3M001N2O2N100O1O1O2O1N2O02N02N1OI6M4M2N3MTLfEV3Y:j0M3N3L3OSLQFj2l9W1M3MUL[F`2b9`MaF_2^9aMfF]2W9dMkF[2R9gMPGX2n8iMTGV2j8lMYGQ2f8nM]GQ2b8oM^GS2`8lMaGU2^8jM\\G_2b8aM^G`2a8`M[GUODm2P9oMVGk2h8i1N1O1N101O0O01L4E:N3N101L4bMfFnNb9Q1_FnNa9Q1aFoN_9o0bFRO]9l0dFWO[9d0iFaMBd1c9j0gFFW99jFIU97gFMY93eF0Y9Q1cFTN\\9i3O1N10001N1000dNkFnLS9S3RGiLm8W3WGfLh8[3YGcLf8_3[G`Ld8`3_G^L_8c3dGZL\\8f3gGXLX8i3iGVLV8j3lGULR8l3PHRLP8n3QHRLm7n3VHPLi7Q4XHoKg7P4[HoKd7Q4^HoK`7Q4bHoK\\7Q4fHnKZ7Q4hHoKV7Q4kHoKT7Q4nHnKQ7S4PIlKo6T4SIkKl6V4TIiKm6W4TIhKk6Y4VIeKi6]4YI`Kg6b4YI\\Kg6e4j10M3O1O1O1N2O1O1N1O20OO2O0O11O0O1O001O002M2O2M3M3L3O2N2O1O1O1O101M2O1M2O2M3N2N2N2N3M2N2M3M2L5M3I7L4K5J7L3M3N2N101N2O1O2M2O1O1O1O1O1O001O1O1O2N1O1O1O1N2O1O1O0O3N1O1O1O1N2O1N101N2O1N3L3N2N2O1N2N1N2O2O2M3K4N2Oo\\i3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i3132O0O]6R8aIPH2Nc6Q88O11O4ZIkGZ6T8aIRH0JZ6T8iInG]6P8:M301N10000010O0PISH70]6l7=0001O1O001O0021ON1O002O2MfH_HX7_7gHaH[7]7eHcH^7Z7cHdHa7Y7_HgHd7V7[HlHh7P7XHPIj7n6VHRIk7Q71JUHTIn7j6SHUIn7j66N00011NiGZIU8i6OO1010O010O01O00001O0001O101lGVIo7n61M2O101N2N1O10010002NN1O011N4M1N1O2O02OO`GbI]8`600010O010O001O1O1O1O00001O2N000O110O1O1O0001O1O1O001O101N100O00002M2O001O1N5L2N1O0O3O3L1O1O002N6J3M0O100O2N1O001O2O0O2OO1001O1O1O00001O1oHYHg6h7VI[Hi6e7WI\\Hh6d7XI]Hg6o7O010O1O001O1O1O001O2N13M0O1O00100O1O1O0O4M1O1O1O1O1O100O0010O2O0O100O02O00O10O0100O100000O0101N10O00100O1O0010O00001O002M2O2N20OO001O1O00001O1N1010O100000O001O001O00001O00001O0000O1000000O10000000O1001N100000O100000O100O1O2N1000001N1O100001O001O0000O10000000000000000000000O10001N010O11O00O100O10O10001O1N2O1O0100O0001O1O001O001O000000000000N2O10000O100O1000000O1O10000O100O1000000O1000000000000001O0000001O00000YJkGc4U8[KnGd4R8[KPHd4P8[KRHd4o7[KRHd4n7[KTHd4l7\\KUHc4k7\\KVHd4j7ZKYHe4g7WK]Hi4d7TK^Hl4c7QK`Hn4a7QK_Ho4d7mJ]HS5f7gJ]HY5e7dJ]H[5f8N1O001O001O00000000000000000000001O1O1O0000001O00001O000000000000001O00000000001O001O00001O001O001O00000000001O000000001O0000001O00000000000iKoE_3R:_LQFBHj3W:bLXF\\3h9cL[F[3f9cL\\F\\3f9`L]F_3d9_L^F`3c9\\LaFc3_:O1O1O001O1O1O2N1O1O2M2O1O1O2N1N2O8G3[MkC[12[O] and ?", + "choices": [ + "A. is attached to .", + "B. is cleaning .", + "C. is lying on .", + "D. is cleaning ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_362.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000485844.jpg", + "mask_rles": [ + { + "size": [ + 396, + 576 + ], + "counts": "Pmi21[<0RU12kjN4L8I2M4M2M3N0O2O0O2O0O101O000O10000O1O1L4O0SFmNa8U1]GoN_8R1_GSO]Oh0\\76VIUOVOn0_7OXIn0e6SOYIR1c6oNZIV1c6kN[IY1b6hN\\I\\1a6eN[I_1d6bNZIa1c6aN\\Ib1_6aN_Ib1]6`NcIf1V6\\NhIU2d5QNZJS2`5PN^Ja2Q5aMnJd2l4_MRKe2f4e2K4M\\L_KA`4W4M2N200O1O1K5O1O1N2O1O1O1N2O1M3O1O001N2O1OM3O1L4O2O1O1N3M4K4L4M3M2N3M2N2M3N3L3WOQKdIO6U5R6[KdIk4W6h0M3L4L4K6J6K7I5L4K6K6K4K5J4M5J:F5K7TNRG0j:K4fNSE>VXj2" + }, + { + "size": [ + 396, + 576 + ], + "counts": "n^Z26T<3M2N3M2O2M2O1O1O1N3N1O1N2O1O001O1O1O101OO10O10O1N2K4N3N2N2O1N1O20000000O1000000O100001O1O2N6J3M3M1N]bP4" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is holding .", + "B. is holding .", + "C. is on .", + "D. is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_363.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000485844.jpg", + "mask_rles": [ + { + "size": [ + 396, + 576 + ], + "counts": "QWd21Y<4N00001O000000O100000000001O000000001O0000001O0000000O100001O0000001O0O3Lhjk3" + }, + { + "size": [ + 396, + 576 + ], + "counts": "Vhk07S<4N1O1O1N2O1N1O2N1O2O0O101N101N10001N101O0O1000001N1000000O2O00000000001N1000000010O00O11O01O00000000000000000000000000000O1000000000O100000000O2O0000000000001N100000000O2O1O001N1000001N101O0O2O0O2O0O2O000O2N1O2N2O0O1N2M2O20001O01O0000010O00010O1O010O00100O00010O0001O001O001N`f2MdYM00003M000[OLRE4n:LRE4n:LRE4n:LRE4c;000000000000001O0000001O00000000000000MlCNU<1kCOU<40000001N3MRWl3" + }, + { + "size": [ + 396, + 576 + ], + "counts": "Pmi21[<0RU12kjN4L8I2M4M2M3N0O2O0O2O0O101O000O10000O1O1L4O0SFmNa8U1]GoN_8R1_GSO]Oh0\\76VIUOVOn0_7OXIn0e6SOYIR1c6oNZIV1c6kN[IY1b6hN\\I\\1a6eN[I_1d6bNZIa1c6aN\\Ib1_6aN_Ib1]6`NcIf1V6\\NhIU2d5QNZJS2`5PN^Ja2Q5aMnJd2l4_MRKe2f4e2K4M\\L_KA`4W4M2N200O1O1K5O1O1N2O1O1O1N2O1M3O1O001N2O1OM3O1L4O2O1O1N3M4K4L4M3M2N3M2N2M3N3L3WOQKdIO6U5R6[KdIk4W6h0M3L4L4K6J6K7I5L4K6K6K4K5J4M5J:F5K7TNRG0j:K4fNSE>VXj2" + }, + { + "size": [ + 396, + 576 + ], + "counts": "n^Z26T<3M2N3M2O2M2O1O1O1N3N1O1N2O1O001O1O1O101OO10O10O1N2K4N3N2N2O1N1O20000000O1000000O100001O1O2N6J3M3M1N]bP4" + }, + { + "size": [ + 396, + 576 + ], + "counts": "QT6b1f9aN`F[2Z9?K4M3N2O1N2O1N2O1N2K5M3O1O1N_MbGX1\\8[NTHd1l7YNWHg1h7YNZHf1g7UN]Hk1d7PN`HP2`7mMcHS2]7kMeHU2\\7iMeHW2]7cMgH]2d8000000000000000001O00000000000000000O100001O000000000001O1O5K1O1O1O1O1O0O2O01lNUNfGj1W8]NfGb1Y8`NfG`1Z8`NfG`1Z8`NgG_1X8bNhG^1X8aNjG^1V8aNkG_1U8`NlG`1S8aNnG^1R8bNnG^1R8bNnG^1R8bNnG^1R8bNoG]1Q8cNoG]1P8cNQH]1o7cNQH]1o7cNQH\\1P8dNQH\\1n7dNRH\\1n7cNSH]1m7cNSH]1m7cNSH]1n7bNSH]1m7bNTH^1m7[NYHe1W900000001O0O10001O0000000001O0000000000O1000000000000000000000000000O100000000000000O1gN[NkGe1T8\\NlGd1T8\\NlGd1S8^NlGb1S8_NmGa1R8`NmGa1R8aNmG_1S8aNmG_1Q8cNoG]1P8eNoG[1P8fNoG[1Q8fNnGZ1Q8gNoGY1P8iNnGX1Q8jNnGV1Q8kNoGU1P8lNPHT1m7oNRHR1m7PORHP1n7POQHQ1o7oNQHQ1o7POPHP1P8POoGQ1Q8POnGP1R8QOlGP1T8POlGP1T8QOjGP1V8POiGQ1W8POgGQ1Y8oNgGQ1Y8oNfGR1Z8nNdGT1]8kNaGW1_8iN_GY1a8fN`GZ1`8fN_G[1a8\\N]G\\O2X2a8ZNjGf1V8YNkGg1U8XNmGg1S8XNnGh1R8WNoGi1Q8VNQHi1o7WNQHi1o7VNSHi1m7WNSHi1m7WNTHh1l7XNTHh1l7WNVHh1j7YNVHf1j7ZNVHf1j7YNWHg1i7YNXHf1h7ZNXHg1g7YNZHf1f7ZNZHe1g7[NYHe1g7[NYHe1g7[NXHf1h7ZNXHf1h7ZNXHf1h7ZNVHh1j7XNTHj1l7VNQHm1o7SNQHm1o7SNRHl1n7TNRHl1n7UNQHk1o7UNQHk1o7UNQHk1o7TNSHk1m7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7WNQHi1o7XNPHi1o7WNQHh1P8XNPHh1P8YNPHf1o7[NQHe1o7\\NPHd1P8]NoGc1Q8_NmGa1S8`NmG_1T8aNkG_1U8aNkG_1U8aNkG_1W8bNfG^1\\8aNcG_1_8_NaGa1b8\\N_Gc1e8YN[Gg1g8XNXGh1j8WNTGj1n8UNQGk1Q9XNjFh1X9WNgFi1\\9TNdFl1^9RNaFo1d9lM[FU2k931O002N2N4L3M2N2N3M3M2N2N3M6J9G7I6I;Da]d3" + } + ], + "question": "Based on the scene, where is located?", + "choices": [ + "A. It is being held by .", + "B. It is on .", + "C. It is inside .", + "D. It is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_364.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000151480.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Z\\`26d>=E4M3K6L2M3O1O1O3]BmNV=g1G7I6J6K3L;E5L5_DlLl:Y3lDkLS;a300O1BWLdEj3Y:_L`Eb3_:KgEbLY:Y3nEfLR:_3hEbLX:R4100000001N102oK]EQ3OQMU;h2TESMn:a2_E\\Mc:U2]1G7N3M20100O2N004L2M4H;GWVT6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i^_23N11O0000000000002N8FQmf6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "YTd26f>6L2L4K5K5AWOfBj0Y=YOdBg0]=ZObBf0^=]O^Bc0c=<2O3M3M2NO10000000001O2O0O10O00O2N1NQO\\Bf0c=ZO^Bf0a=ZOaBe0^=[OdBc0]=\\O[CLg<2T1O3NePW6" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is wearing both and .", + "B. is wearing and carrying .", + "C. is carrying both and .", + "D. is carrying and wearing ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_365.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000026204.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "\\ga75R=8J4L3DY7DeH<[7EdH;\\7EdH;]7EbH:_7H_H9a7H]H8d7G\\H9d7H[H8f7GZH9i7FUH:P8BoG>W8]OhGc0[8[OdGe0]8ZOcGf0^8\\O_Gd0b8\\O]Gd0d8\\O[Gd0e8\\O[Gd0e8]OZGc0f8_OXGa0h8@WG`0i8@WG`0h8BWG>i8CVG=j8ETG;j8HTG9k8JSG6m8KRG5m8MRG3o8NoF2R9OkF2W9NgF2[9MdF3]9MaF4a9K^F5d9KZF5g9JXF7h9IXF7h9JVF7k9HSF:n9FoEd?", + "choices": [ + "A. and ", + "B. and ", + "C. and ", + "D. and " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_366.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530099.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "T\\Y11d;3N1O100O1O1O1O1O1O100O1O1O100O1O1O1O1O1O1O100O1O1O1O100O1O1O1O1O1O100O1O1O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O010O1O1O2N1O100O1O1O1O100O1O1O1O100N200O1O1O100O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1N2O1O100O100O1O1O100O100O1O100O1O10000O10000O100O100O10000O10000O10000O10000O1000000O100O10000O100O10000O100O100O10000000000O100O10000O10000O1000000O100O10000O10000O1000000000000O1OVM[Hd1e7ZN\\Hg1d7VN_Hj1a7UN_Hl1a7SN_Hn1a7QN_HP2`7PN`HQ2`7nM`HS2`7mM_HT2`7kMaHV2_7iMaHX2^7hMaHZ2_7eMaH\\2_7bMbH_2]7aMcH`2]7_McHb2]7]MdHc2\\7\\McHf2\\7[McHf2]7XMdHi2[7VMfHk2Z7TMfHm2Z7RMfHo2Y7QMgHP3Y7oLhHQ3X7nLgHT3Y7kLgHV3X7jLhHW3X7hLhHY3X7fLhH[3X7dLhH]3X7bLiH^3W7aLiH`3V7`LjHa3W7\\LiHf3b72O1O1O1N2O1O1O1O1O1O1O1O100O100000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000GhKmH11X4R7gKmH21V4R7hKmH21V4R7kKPIU4P7kKPIU4Q7jKoHV4R7gKPIY4W700000001O000000cH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "Zel11f;1N2O3L2O0O2O0O2O1N2N3N3L3M102M1O3L2N3O1N2O1N3M2N2M2O1O1O1O1O1N2O1O001O1O10O00010O01O100O001O100O100O100O1N3O1N2N1O2OO10O0100O010O010O10O01O10000O010O1000000O10O1000O1000O010000O10000O1O01000O1O1000O01O1000O0100000O01O1000O10O1000O01000000O01000O1000O1000O0100000O1000000O0100000O1000000000000O10000O1000000O100O1000000O101O0O10001N101N101O001O000O2O001O001N10001N1O1O1O2N1O1N3N1OaEROX:l0gEVOY:i0gEYOY:e0gE[OZ:d0gE\\OZ:a0fEA[:=eED[: and ?", + "choices": [ + "A. is sitting on .", + "B. is inside .", + "C. is under .", + "D. is sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_367.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is over .", + "B. is in front of .", + "C. is behind .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_368.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "1f2c:00001O0000001O00001O001O001O000000001O00000000O10000O1000000O1010O0O1000000000O10001O01N10000000001O0001OO100000000000O10001N10O1000000000000001O01N10000000000010O01O001O0000000O100001O01N1000001O000001O01O000O110O00000000O100000000O100000000001O001O1O001O00000000001O000000N2N1O201O0O1001N110O1O2N1O1OO100O100O10000O001O2O0O1000O11O0001O001O1O001O000000000000O10000001O00000000000000O10000000000000000O1000000000000000000000000000000000000000000001O00001O001O000000001O00000000000000000000O1N2O1O1N2O100O1000000001O00001O0000000000O100O1O100O100O100O1000000001O001O001O1O001O2N1O1O1O1O1O1O1OO1000000O10000O11O00O10000000000000000O10000O10000000000O10000O10000000000000000000000000000000000000000000000001O000000001O00000000001O0O110O0000000000O1000O11O0000000001O00000O11O000O01001O000000O10O2O0000000000000O10000O10O10O2O0O10O1000O2OO10O101O0000O10O101O0000000O1000000O100O100000000000000001O000000001O0000O011O000O01001O000000O1001O00O1001O0001N1000010OO010O2O000000O1001OO10001O10O01N2O1O1O0000001O010N10001O0000001O1O10OO2O1O0000001O00001O00000000001O000000000000001O00[OPFQNP:o1PFQNP:d201OO100000000000000YOoEVNQ:j1oEVNQ:a200000000000VOoE\\NQ:^2000000000O100O100000000O10000O10000UOkEaNV:_1jE`NW:`1iE`NW:Y2O02O000O100O1UOgEfNY:Z1gEfNY:Z1gEeNZ:U2O02N10000O10000000O1001O00000000000000O100010mNcEXO]:k1ON2OdEXMY:l20kNfE[OZ:k1N1000010OhNgE@Y:`0gE@Y:?hE@Y:`0gE@X:a0gE@Y:?iE@W:`0iE@W:`0iEAV:?iEBW:>hEEV::kEAZ:?gE_OZ:a0gEdNM41L\\:\\1fE[O\\:e0dEeNN9_:Q1dEfNM4OF`:" + }, + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i2T5U801O0000000001O01O1O10O01O001O00000001O00001O1O0O2O1O000O10O100000000000000000000000O11O000000001O0000001O0000O100000000O10000O100O10jLRHQ1n7iN]HR1c7jNeHR1\\7iNjHU1W7fNTIS1l6jNXIU1i6iNXIW1h6gNZIY1f6eN]IY1d6fN]IX1e6fNeHoNVO2a0W2e7eNaH_OHj1i7gN[HDMa1k7iNUHNOU1m7lNSH21n0n7oNRH4Ol0Q8nNPH8Oh0R8POoG8Og0S8POmG<0c0S8QOlG>1`0S8QOlG`01>T8ROkG`02=S8ROlGa04:P8UOkGb0=0i7]OjGc0a0Lf7@jGc0f0H`7EiGd0k0C]7HhGd0Q1_OW7MhGc0T1^OU7NfGd0\\1WOo65eGc0d1nNj6?bGb0f:_OWEa0j:DQE;P;FnD:S;GkD:U;l01O1O2N100O1O1O1O100O100O1O1O10O02O0O10001N2O1N100O100O100O101O0O2N10000O2O001N2O1OO0100000O001O10000O1O1N11000O100O10O010000001N101OVGmNa5P1_3M00001O1O00001O001O00001O00010O00000010O0O101N100O2O00000O2O0000001O0O10010N1O1N200O1O2O0O11O001O1O10O0001O0000000000000O1000N2O01000O10O1000000O100O2O00O0O2M2YFZOj6i0TIYOl6f0SI]Ok6d0SI^Om6b0RI_On6`0RIAm6`0SI@n6>SIBm6>SIAo6>QIBo6=RICn6=SICl6=TIBm6>SIBn6=RICn6?", + "choices": [ + "A. is in front of and under .", + "B. is in front of and under .", + "C. is beside and behind .", + "D. is behind and over ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_369.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "1f2c:00001O0000001O00001O001O001O000000001O00000000O10000O1000000O1010O0O1000000000O10001O01N10000000001O0001OO100000000000O10001N10O1000000000000001O01N10000000000010O01O001O0000000O100001O01N1000001O000001O01O000O110O00000000O100000000O100000000001O001O1O001O00000000001O000000N2N1O201O0O1001N110O1O2N1O1OO100O100O10000O001O2O0O1000O11O0001O001O1O001O000000000000O10000001O00000000000000O10000000000000000O1000000000000000000000000000000000000000000001O00001O001O000000001O00000000000000000000O1N2O1O1N2O100O1000000001O00001O0000000000O100O1O100O100O100O1000000001O001O001O1O001O2N1O1O1O1O1O1O1OO1000000O10000O11O00O10000000000000000O10000O10000000000O10000O10000000000000000000000000000000000000000000000001O000000001O00000000001O0O110O0000000000O1000O11O0000000001O00000O11O000O01001O000000O10O2O0000000000000O10000O10O10O2O0O10O1000O2OO10O101O0000O10O101O0000000O1000000O100O100000000000000001O000000001O0000O011O000O01001O000000O1001O00O1001O0001N1000010OO010O2O000000O1001OO10001O10O01N2O1O1O0000001O010N10001O0000001O1O10OO2O1O0000001O00001O00000000001O000000000000001O00[OPFQNP:o1PFQNP:d201OO100000000000000YOoEVNQ:j1oEVNQ:a200000000000VOoE\\NQ:^2000000000O100O100000000O10000O10000UOkEaNV:_1jE`NW:`1iE`NW:Y2O02O000O100O1UOgEfNY:Z1gEfNY:Z1gEeNZ:U2O02N10000O10000000O1001O00000000000000O100010mNcEXO]:k1ON2OdEXMY:l20kNfE[OZ:k1N1000010OhNgE@Y:`0gE@Y:?hE@Y:`0gE@X:a0gE@Y:?iE@W:`0iE@W:`0iEAV:?iEBW:>hEEV::kEAZ:?gE_OZ:a0gEdNM41L\\:\\1fE[O\\:e0dEeNN9_:Q1dEfNM4OF`:" + }, + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i2T5U801O0000000001O01O1O10O01O001O00000001O00001O1O0O2O1O000O10O100000000000000000000000O11O000000001O0000001O0000O100000000O10000O100O10jLRHQ1n7iN]HR1c7jNeHR1\\7iNjHU1W7fNTIS1l6jNXIU1i6iNXIW1h6gNZIY1f6eN]IY1d6fN]IX1e6fNeHoNVO2a0W2e7eNaH_OHj1i7gN[HDMa1k7iNUHNOU1m7lNSH21n0n7oNRH4Ol0Q8nNPH8Oh0R8POoG8Og0S8POmG<0c0S8QOlG>1`0S8QOlG`01>T8ROkG`02=S8ROlGa04:P8UOkGb0=0i7]OjGc0a0Lf7@jGc0f0H`7EiGd0k0C]7HhGd0Q1_OW7MhGc0T1^OU7NfGd0\\1WOo65eGc0d1nNj6?bGb0f:_OWEa0j:DQE;P;FnD:S;GkD:U;l01O1O2N100O1O1O1O100O100O1O1O10O02O0O10001N2O1N100O100O100O101O0O2N10000O2O001N2O1OO0100000O001O10000O1O1N11000O100O10O010000001N101OVGmNa5P1_3M00001O1O00001O001O00001O00010O00000010O0O101N100O2O00000O2O0000001O0O10010N1O1N200O1O2O0O11O001O1O10O0001O0000000000000O1000N2O01000O10O1000000O100O2O00O0O2M2YFZOj6i0TIYOl6f0SI]Ok6d0SI^Om6b0RI_On6`0RIAm6`0SI@n6>SIBm6>SIAo6>QIBo6=RICn6=SICl6=TIBm6>SIBn6=RICn6?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. No object is in the background." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_370.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000099810.jpg", + "mask_rles": [ + { + "size": [ + 332, + 500 + ], + "counts": "jmi04V:3M2M3M3L4I7L4J6K5H8D]L@d3a0\\L\\Of3e0ZLXOh3h0YLVOh3j0ZLROh3h0`LUOa3i0bLTO`3i0cLVO^3GXK@_1g0Z3E`K]OY1k0Y3GbKWOY1P1W3HgKoNU1X1U3FjM8X2GiM8X2FjM8X2GiM8X2FjM8X2FjM8X2FjM9W2FkM7W2HlM4V2KmM1U2NnMOS20PNLR23QNIQ26RNEQ2:Q400O100000000O100000000000000O10000000000000000000000000000001O00000O11O000000000O10SNFhI9V800O1O1000000O01[NJTI6i6MXI2e62ZIMe65[IKc67\\IJc67]IId66\\IJc67]IIc67]IJb66^IJb67]IIc67]IIc67YICeN6R87XIDfN5R87XIDeN6S86XIDeN7R85YIDfN6Q87WIDhN5Q87VIEiN4Q87VIEiN5P86WIEiN6o76VIEkN5o76VI3i6MVI4k6LTI5k6KTI6l6JTI7k6JTI6m6IZH@=h0Y7IXHA>g0X7JWHA`0g0Y72[HUONj0g71ZHWOLk0i7OXH>f7U100O10013NO140LN2OO00O2N2O1M3N2M3N1O2M1oJkLk2U3RMoLm2P3RMSMm2k2RMXMm2h2RMZMn2c2RMaMl2^2nLjMR3U2`LSMgNU1h4h1VLgNh3Z1QLmNo3S1lKSOS4m0hKYOV4i0eK[O[4\\3N2N2O001M3N2M4L3O3L5L4L9G8H4L4L4L6I8I5K2N4L1O3M2N3M1N2O1O000O10000O1000O0100O1O0100O010O0000010O001O10OO2N101M2M30000000000001O2N2N2N2M4L4M2O1N1O1O10O010N2K5M3N1O2L400O10000000O101O001N2O1O2M2O1O1N3M2N2N3M8F6KfXZ1" + }, + { + "size": [ + 332, + 500 + ], + "counts": "bTP21X:3N2N2O1O1O100O2O0000000000001O0000000010O00010O000001O001O1[FBZ9?cFD\\9f001O01N2N3M4L5Llod2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is standing in front of .", + "B. is holding .", + "C. is beside .", + "D. is holding ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_371.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000191013.jpg", + "mask_rles": [ + { + "size": [ + 640, + 474 + ], + "counts": "Tia22kc03N3O0N2O1N2O2O0O000O2O100O1O1O1O10O00001001O0O10000000000000000000001O002N3M00000000001O00000000001O000000001O001O001O001O1O001O1O1O1O2N1O1O1O2N1O2N2N1O002N00O10000O1001O001O1O7I1O2N3M2N5K2N1ON2N2004L0000000000000001N100O1N2O1N3N1O101O00001N2O010N3N3M=C8fNY]Oj0dc0YOiUU4" + }, + { + "size": [ + 640, + 474 + ], + "counts": "^bS68fc03K5M3N2O1N101O0O101O001N101O1O00000O1000000O2O0O10001M2O100O1O001N2O1O01O00O11O0010O2M2O100O10O01000000O100O1000000O100000000O100000000000000000000O100001O000000000000000000O10000000001O0000001O1O1O100O0000000001O01O0000O2O1O2M2N2N5H:H7J4M2M:G3KdUg0" + }, + { + "size": [ + 640, + 474 + ], + "counts": "Xc0;dc01J7M2H8L4L4M2N3O100000000000O10001O00000000001O01O00O100O2O0001O00O100O10001O0OEPOh]Oo0Yb0QOg]Om0\\b0ROd]Om0hb0N3I6NG]OY]O=mb0DR]O9]c0K`a[30X^dL?", + "choices": [ + "A. is driving on , while and are parked.", + "B. is parked on , while and are driving on it.", + "C. All vehicles, , , and , are driving on .", + "D. is driving on , while and are parked." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_372.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000191013.jpg", + "mask_rles": [ + { + "size": [ + 640, + 474 + ], + "counts": "Q_b1:fc01N3N1O010O1O0O2O00001O0000001O000000000K6J5K5L4H8J6L5J5I8E;H8I6J6I7J5K6L5I6I:C7OfJcAS5Z>PKfAo4Z>RKfAn4Y><1000O010O00000100O1VKeAn3\\>QLeAP4Z>PLgAP4X>oKiAQ4W>PLhAP4W>QLiAo3T>XKkAj0On3W>XKjAd5Y>0N10100000O0100O001O001O010O00100N20^JmAV5S><0OO2O001O10O100O10O1000O1N1gJmAg4S>c000O1O1OfJRBd4n=]KPBd4P>^KPB_4Q>aKoA_4Q>aKoA_4Q>`KPB`4o=aKRB^4n=aKSB_4m=k000O000O2O001N10010O1N101O00001O1O0001dJZB]4e=cKZB^4e=P1OUK\\B[3c=eL]B[3c=a101O1O00010O001OkJaBi3_=WL`Bk3_=[101O000010O01O001O0010O010O2O0O1O2N2O0O8H204J4L2XJjAb5^>MO2N010N101O0O2O1O0GYJXBi5g=801O0O2O0O2O0O2N101O0N3O001N101N1O2O00001UN^ISEO8i6k6VITJ0Oc8j5]H_HQO514O10Fc00g9c5QF]JP1OPO2N0S5" + }, + { + "size": [ + 640, + 474 + ], + "counts": "mei2`0Xc0a0B9H7I6L5J5L5K4L4M2M4L3N3L3L4M2N5L2N2M2M5M2N2N1O2N1O3M1O2N2N1N3N2N101M3N1O1O2N1O2N101N1O2N1O2N1O2N100O1O2N101N1O10001N1O1O2N10000O1O101N100O100O100O10000O100O10000O1000000O100000000O1000000O100O01000000O01000O10000O10000O10000O100O1O100O10000O10001N100O100O101N1O101N100O2N1O101N101N101N1O2N2N1N3N1O101N2N2O1M2O2N3M1N3N1O2O1M2O2N3M2M5K3N1O2M6K3L4M2M4M3L5J5L3L5L5J4L7I7H8G>@YSi2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is driving on .", + "B. is hanging from .", + "C. is hanging from .", + "D. is parked on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_373.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000057027.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "moe05R=6I6L3M3N2N1N3J5M4L4M2O1L5N2M2O1N3L3O1N3N1O2M2N2O1O2M2M3O1N2N2N2IZMfEj2Q9lMoF[O5S3V8mLiGY1Oj1h6WMZIGLX21j0S6g0kIYOe5n0ZJQL0V3a5m0aJlKNW3\\5Q1gJSOW5l0lJTOo4f4L4N2MWJVKaM3_5f4nLdKP3Y4mLYKeM?`5V4iLnKZ3P4fLPL\\3n3iLbKd3\\4f2M3N2N2NROTLbHk3[7\\LbHa3_7cL_H\\3_7hL`HV3`7nL]HR3`7UM\\Hk2b7_10ON300N200O0N3M30000O002N1O1O2N01001N1O2O0O1000[LVIl0h6TO]Ih0e6TO_Ij0c6ROaIm0_6jNkIT1X6aNQJ_1Q6ZNUJd1o5nM\\JQ2`8N100O100O100O2N100O100O10000O1000000O100O1O100O10000O100O1O1OlNjMVGV2h8PNTGP2i8TNVGk1i8WNWGi1f8ZNZGf1c8]N]Gc1a8_N_G`1^8dNbG\\1[8gNeGX1_7RNSHl0>R1V7MjH3S7NVHiMf0Z2Q7OSI1l64iH2V71_H9`7J\\H7d7h2N2O1O1O0O2O1O1O0O2O0O200N1O2O1O1O10O010000O100O10000000O10000000O01000000O1000000O100000000O10O100000O1O100O1000000000000O100000000000000000000000000000000000000000000O10000000000000000000000O101O00000O101O00000O100000000O100000000000000000O11O00000000000000000000000000O101O000000000010O000000000lMlHSNS7j1QIUNo6i1TIVNl6i1VIVNj6i1WIWNi6g1YIYNh6f1XIZNh6e1YI\\Nf6f1XIZNh6h1VIXNk6h1nHaLHg1[7R4001O001O0001O01bJTIb0J]2T7kL\\I<4[2b9eM_E]1f:]N_E_1c:VNhEi1l:0001O00000000000001O0000001O000000000oNZN^Ff1_9_N_Fa1`9bN^F^1`9eN_F[1`9gN_FY1_9jN`FW1]9lNbFT1]9mNcFS1[9POdFP1Z9SOeFm0Y9VOfFj0W9ZOgFg0V9^OhFb0V9AiF?U9FgF;V9n1N2N2M3I7K5L5L3N2N2N2M3M3N2N2N2N2O101O01O2N2N1O1O2O2M3M2O0O4L4M0O3M002N5K4L1O2O2M3M1O1O1O5K3M2iMZFf0i9TO^Fh0c9VO_Fi0d9QO`Fo0b9lNaFS1f9cN^F\\1m9UNWFk1e:O0000000000000000001O0000000000TJ\\N2d1HeN5[1GmN5S1FVO6j0H]O3c0LB0>OFN:2KI561EO;8^OGc0`0VO@i0e0SO[Ol0j0POVOP1l0nNTOR1Q1TKXM`3g1Z1]1aNcN]1h1ZNWNe1l1ZNTNe1m1[NSNd1n1[K[Lm2g1h1T2iKcL]1Y1j2Q4YLoKg3T4VLlKk3T4TLlKl3U4SLjKn3X4PLhKP4Z4nKfKR4\\4lKdKU4[4kKeKU4\\4jKcKX4]4gKbKZ4_4eKaK\\4_4cKaK]4`4bK`K^4b4`K]Ka4e4\\K\\Kd4i4UKYKk4c600001N2O1O00001O000000000O2O0000000000000000010O000000010O0001001O0O2N10O01oLjJmMV5R2mJkMU5o1eJYK9f2S5P2dJZK9g2S5o1PKPNT5l1lJUNU5h1_JaK:h2Y5d1]JdK3O1j2`5j40iL]JhNb5W1`JhNb5U1aJiKMj2b5eM[JT3`0TOW5`McJY37UO[5YMdJh1D[NNZ1=]1U7[MfI;^O[2e93M1O2N2M3N1O2M2O6I:F?A2O1N3M3K5L4M2M4H;XNf1F:G8H:Hb0YOc0@;E:F=A>UOk0A=H:Aknb0" + }, + { + "size": [ + 426, + 640 + ], + "counts": "m\\Z2:o<4K2K5M0_C]O_Q9BRG`0g8EYG>c8C]G?`8C]Ga0a8]O`Gf0]8\\ObGf0\\8\\OWGQ1g8ROmFY1Q9jNhF\\1W9\\1O1N3N1O1O1N2O1O_OjFPMV9m2PGoLP9n2g0N2N2N3M2O100O101OSF_MS9b2cFkMX9Y3K6J1N2O0O2O000O10000000000O010000QORGfL1h0m8\\2cG\\M^8_2jG^MV8_2nG_MS8_2PH`MP8^2SHaMm7]2VHaMk7\\2YHcMg7[2\\HcMe7\\2_H`Mb7`2d1000000001N1000O100000000000000O01000000000O2O00000O10000O100O10PGgMo6Y2nHlMP7T2mHPNR7T2hHnMX7V2aHmM`7U2ZHnMf7X2jGSNU8a3000000000000001O0000000CWGYLj8a3^G\\Lb8b3bG[L`8c3cGZL^8d3e0N3M3N1012cF_Lk8R4N01O1O00001O001nMfF2Z9MkFOU90oFMR92VGGj89^G^Ob8c0aGYO_8g0gGSOZ8m0lGlNT8U1VH`Nk7_1WH_Ni7c1VH\\Nj7e1VHYNl7g1THXNl7i1UHUNl7k1UHRNl7o1g1100O100O011N10000O100O4M2O1N1O3M3N0]E^MN0Y:Q3MN3M3oMlEe0Z:PO\\F?l9UO`Fb0Z;GZYc3" + } + ], + "question": "Which statement best synthesizes the relationships involving and ?", + "choices": [ + "A. is guiding from a position above it.", + "B. is positioned over and is guiding it.", + "C. is walking in front of .", + "D. is guiding while walking on it." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_374.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000498463.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "YQQ24j>o0ROd0]Oh0XOl0TO>B00000000000000YMTEZ1l:eNVEZ1j:eN_ESO_Oh1R;TOcF1]9OeFO[90gFOY90hF0X9OiF1W9NjF2V9MkF3U9LlF4T9LlF4T9KnF4R9KoF5P9LPG4P9KQG5o8JRG6n8ISG7m8FVG:j8DXGe8B\\G>d8A]G?e8_O[Ga0f8]O[Gc0e8]O[Gc0e8\\O[Ge0f8YO[Gg0e8YO[Gg0_;0000000001O7J3M3N2N1O2N2N2O1O0O101N100O10000O1000000000000000000001O00001O00001N10YI" + }, + { + "size": [ + 480, + 640 + ], + "counts": "ZUW28f>3N2NY1hN4K2N2N1O1O1O100O100O1O1O1O1O1N2M3O2N10O01O1O10O10O2O03Me0[Oi0VOd0ZO`\\d6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l_T7i0V>2O000000000O1O1bNVOdDl0Y;YOaCKd0V1k;YO_CY1`001O000000OUE^OZ7b0dHBZ7>cHG[7:bHI]77bHK]75bHL^74bHL^74bHL^73cHN\\72dHN\\72dHO[71eHO[71eHO[71eHO[70fH0Z70fH0Z70fH0Z70fH0Z70fH0Z7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OfH2Z7OeH1?[OV3e0[L0;2g2MoL18h0U2WOcM17]1b1bNWN16R2o0mMkN25\\2e0bMWOd0A^2d0mLL[1iNY2k0\\L\\Ng1U2^Nd2e0dJeNj0b0Hg1V2[Nf2\\2dJPM0O4Y2c2[Nf2Z5ZMeJg2^5`KRJ`0?Q4Q7`J^Ho0?b4^7^JTH`0NB`0`5h7^JjG50M=`5l8^JgF]5W:WOh0XOg0YOb0^Of0ZOd0TN]BR1^>nNeA2Q^Y7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "j`T7a0]>201O000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000O100000000000000000000000000000000000000000001O001O000000001O000000000000000000000000000000000000000000TH" + } + ], + "question": "Which statement accurately describes the arrangement around the microwave, ?", + "choices": [ + "A. is on and is beside .", + "B. is attached to .", + "C. is on and is beside .", + "D. is on and is beside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_375.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000581062.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "X]`11d21Z63_I2a6NZI8d6JjG1[OJl0d0o7AdGc000[8^O_GR2a8oMVGY2j8:OO2N1E and ?", + "choices": [ + "A. is on top of .", + "B. is holding .", + "C. is standing on .", + "D. is sitting on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_376.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000029640.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "h\\R51T=9H6M3L3N2I[OfCf0X<8M2O1N3N1O2N2O010O0000010O00010O00010O01N1O1M3O2N1000000O1O1O1O2O010O01O000001O0000001O001O0O1O2M2L5M2N3M3Mf_^2" + }, + { + "size": [ + 426, + 640 + ], + "counts": "Pim43V=3N2N1N3N1O2M3N1N2O1O000O_OeC4\\ and ?", + "choices": [ + "A. is positioned next to but not touching.", + "B. , the carrot with a slight curvature, is attached to .", + "C. , the carrot with a green stem end, is attached to .", + "D. is larger than ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_377.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000370486.jpg", + "mask_rles": [ + { + "size": [ + 640, + 421 + ], + "counts": "h4Z31W4Q9]3N2O00fN^GkFDHP9X9nGbF^8g8h1J7L4K5L4K5L4H8GhCdH^L2N1O0O2O002N1O1N3M``k6" + }, + { + "size": [ + 640, + 421 + ], + "counts": "[f97fc07J3M3N2N2M3J6M22O0000O010O001O000010O00000000001O00001N10000O2O0O2N4L5K9G_eU7" + }, + { + "size": [ + 640, + 421 + ], + "counts": "eob22nc00a_:OVdFg0^kN9G8l@lNT;X1^DVO^;m0ZD]Ob;e0XDAe;a0WDGb;=YDMc0^Nf7g1dG2a0XNi7h1cG5a0SN[72QFk1^1VN`NU2a2hMU7g2aG;U1nLW7l2`G=k0dLTN3_9o2oFR1W1SLTNMb9S3hF[1Y1_K_N<_9n2`Fc1W1mKT8j7]GfHX8j9F8YGdD]8e;H:G:E7I5J7J5L5XIkBd07g4`=lJeBS5]=gJhBX5P>001O2NBdAXK\\>f4hAXKX>f4b0ObAUKi=k4WBZKd=e4]B\\Kc=b4^B`K`=_4dB^K]=`4fBbKW=]4jBdKT=\\4mBbKU=]4oB^KR=a4W10QB^KX9i0hGi3oNaKW1MQ5X1dJZ3SObKd8d1\\F]NV1\\4JcK_8]2eGo1i9XNTFh18dKd7l2QHa1:eK^7R3VHY1[^OCea0>W^OHea0[1N5L1O1N5L3L4M3j@^Mh?AFOK1d00j?`3b_ORMc?f300O1K5O1ZOaKjA`4k>01O0000000000000000000000001OO10000000000AaK\\A`4]>oKYAS4k>?2N9G2N2N9G2N1O001O00000000000000000000000000000000000000O10000000000O100O1K5O100O100WOdK]AL=`4k=TLTBl3j=WLTBj3l=XLmAm3n<\\KlCf5o;PK_CQ5a01O00000000000000000000000000000000O10000000000hN]LSBc3cgKfAX4S?N1O001O0000001O1O6J:F;E6J7I5K3M3M2\\Nk^O9Ya0@n^OS1QAmNm>V1RAjNl>Y1SAgNh>^1XAbNg>`1XA`Nh>a1WA_Ni>a1WA_Ni>b1VA^Nj>b1VA^Nj>b1VA^Nj>c1UA]Nk>c1UA]Nk>d1SA]Nm>c1SA]Nm>d1QA]No>d1PA\\No>f1n@\\NR?e1l@\\NT?e1j@\\NV?W3O1O1WO\\KVBd4j=]KUBc4j=_KTBb4l=^KTBb4l=_KSBa4l=`KTB`4l=aKSB_4m=aKSB_4m=aKSB_4m=aKSB_4m=aKSB_4m=bKRB^4n=bKRB^4n=cKQB]4o=cKQB]4o=cKQB]4o=dKPB\\4P>eKnA\\4R>eKmA[4S>fKlAZ4T>gKjAZ4V>hKfAZ4Z>f000000000O10000000000000000001O00O1001O0000000gJdAn4]>lJjAR5c>O1O00001O0000000000000000000000000000001O00000000001O;E2N2N1O1O1O000000001O00000000000000000000000000000000000000000000001O001O00000000000000000000001O00O100000000000000000000001O3kKh@Mm2" + }, + { + "size": [ + 640, + 421 + ], + "counts": "Ybf18cc06UOj0J7K4L3O2M3M3N2M3N1O2O1N2N2O001O1O100O1O00100O1O100O1O1O001O100O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O2M2O1O001N101N101O000010O001O10O0100O0010O010O01Ec_ORM^`0l2g_OPMY`0o2=I7K4M4N101N2N1O2M201O0N3K4M4L4O0El]OiNWb0W18100O1OO2O01O0010O00001O10O010O010O1O00100O100O0010000O101O01O0000000000010O000001O0000001O001O001O0O2O00001N3N2N3Q^OPNha0\\2H7I1N101O00O010O1O1O1O1O1O1O100O1O1O1O1O1O1O2N100O1O1O101N1O1N200O1O1O2N1O1O100O1O1O2O0O1O1O2O0O1O100O2@?L4J6L5L3M3N2NSV\\2" + } + ], + "question": "Which object are both and standing on?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_378.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000087875.jpg", + "mask_rles": [ + { + "size": [ + 487, + 640 + ], + "counts": "X6n8Y60O1000O100000001O000000000000000000000000000000000000000001O00O10000001OO1001O000000O11O00000O1000000000RNiIlJW6R700000000000QNjImJV6S5jImJV6R700001O000000O10000000O2O0001OO010001O0000O1001O00000000000000O20O0O100000000000000000000000000000000000O100000000001O0000O1001O0000O11O000000O1001O0000O1000000000000000000000000fJlIa1T6]NRJ_1n5_NUJ`1k5`NUJ`1k5_NWJ`1i5_NXJa1h5_NXJa1h5_NXJa1h5^NYJb1g5\\N\\Jc1d5\\N^Jd1a5ZNbJd1_5ZNeJd1[5[NkJ`1U5_NoJ^1Q5aNSK\\1m4dNVKY1j4fNYKX1g4hN[KW1d4hN_KV1a4jNaKT1_4_MlIEg1j2]4]MSJFb1k2[4]MVJGa1j2Y4_MWJFb1i2W4`MXJGc1Z1_N]Nf5b0XJGd1Y1bNZNb5e0YJHj0ROZOT23[N`5h0YJGi0TO[OP25]N]5g0[JHh0VO[Ol16^N\\5h0\\JGg0XO\\Oh17`NZ5i0\\JGg0ZO[Oe19`NY5i0]JHe0]O[Oa1:aNY5i0_JFd0_OZO`1;aNX5j0aJCc0BZO^1;bNX5j0cJB`0E[O[1;cNW5k0dJ@`0G[OZ1:dNW5k0gJZOa0LXO[18dNX5j0mLQOkM]11hNW5h0\\N<_LkNU5>c1XOaN>Y8K5N2N2O1O1O1O1O1Nbjf00`UYO1N3N1O001O1O1O1O000aAHo=8PB6c=J]B7b=I^B8a=H_B8a=H_B9`=G`B9`0Dl2K]L3]84kG:;Gh2S1fMcNlN3\\66oI;8Hg2W1eMmNV5HWJ<7Gg2Y1dMmNW5GXJ;5Hi2Y1cMmNW5FYJ<4Gj2Z1bMmNW5FYJ<4Gl2Y1_MnNX5EZJ>3Dm2[1^MoNW5D\\J=2El1K_N^10QOW5B^J?0Dj13^NW11ROY5@`J`0NBj18]NS12SOS61cI@LL_1a0kNo02TOV6ObI@KO_1a0lNm00UOX6ObI\\OM4[1?nNm0OVOZ61mI_Ok0=oNm0NWO[60mI@j0;QOo0KWO^6NlIBi08SO`2X6fMlICh01ZOf2R6fMlIDh0L]Oj2Q6dMjIHQ2d2V4cMhIKP2c2Y4aMgIMn10XMo1d8RNPJ2\\Ml1d8TNlI2`Mj1d8UNiI2eMh1b8XNfI1hMg1b8ZN`I2PNIIV1g8POYI7WNBJW1f8QORI<_NZOJY1e8QOjG[O=U1[OTOJ[1d8ROhG^O2[1IgNK]1c8SOfGHDW19`NJ^1c8SOeGg1MgML_1b8TOdGf1NfMM`1`8VOcGe11dMLa1`8VObGf13aMMb1^8XOaGe14aMM=I`0e8N_Ge17_MK?J?f8M^Gg16]MM?J`0e8L_Gh17[MK`0Ka0d8K`Gi1a0jM\\Ob0c8K_Gk1a0gM^Oc0h6oNkJj0_Nm1`0fM^Oe0h6nNkJ8WN88W2`0eM_Of0c8SOZG85Z2?eM_Of0d8QO[G84[2`0dM^Og0]6PO`K0RN93Z2c0eM\\Oh0\\6POlK6jMY2f0hMXOf0U9WOnFV2k0kMSOg0V9VOmFV2m0kMQOh0h9\\1YGkMnNj0h9Z1\\GkMmNk0g9Y1]GkMlNn0f9W1lGiNT8W1lGhNT8Z1jGhNU8Z1iGhNU8Z1iGfNW8b1`G`N_8g1ZGZNe87TF6R1Cj86VF6P1Eh85YF6n0Fi83ZF;i0Cl81\\F=h0Ak82^F>f0@l81_F`0d0_Om81_Fb0a0cNXOe9MZFI6\\1b0\\N[Oc0c9K[FI6]1b0ZN[Oe0b9J\\FJ5]1n0oNa8I^FI4_1m0oNa8I^FI4_1m0nNb8J\\FI5`1l0nNc8I\\FI5`1l0mNd8J[FH6_1l0nNe8JZFI5^1m0mNf8KYFI5]1m0oNf8JYFI5]1l0oNh8KWFI4\\1o0nNh8MUFH5Z1Q1POf8MkFR1?QOh8LiFQ1a0ROg8LiFo0c0TOe8LiFn0d0VOd8KhFm0e0ZOb8HjFl0f0_O]8DoF3lN;i10[8@RG2nN:g17V8]OVGMQO>d1:V8WOdG<7a0l9]OVFd0i9[OXFe0h9ZOYFf0h9XOYFh0g9UO\\Fk0f9QO]Fn0n;0001O00001O000000001O0GlA^OU>a0mA^OT>`0:O2N100O1O2M4M3LU[h3" + }, + { + "size": [ + 487, + 640 + ], + "counts": "[RY24Q?5K3N100O101N1O1N3N1M4N2N2N2O1N3N2N0O100dNkNfDV1T;ROiDn0U;UOjDk0V;UOjDk0T2SO\\53`Hj0S2\\OU5JhHj0S2]OT5JhHi0S2_OT5HjHh0R2AS5GkHh0Q2CR5FmHf0R2DQ5FmHf0R2DQ5GlHe0R2FP5GlHd0T2EP5JiHb0W2CP5LhHa0X2Dn4NgH?[2Cn40bHa0_2_Oo4T2QKlMn4Y3mGYM?A[7]6^OS1[O5M3N1O1O100O1O100O1]O_G_Jb8`5aG\\Ja8d5a000O10000000000O100O1K5N2O10000000000000000001O1O7I00001O0000000000001O0kF]Jf8d5VGaJh8P6O1O1O1O0]GgIX8Z6gGfIY8[6fGfIY8[6fGeIZ8d600gH[IP6f6oI[IP6f6nI\\IQ6e6nI[IR6i6iIYIV6X7YIhHg6Z7WIfHi6[7VIeHi6\\7WIeHh6[7XIeHN7S6T7oIfHJ9V6R7PJ\\IT2QOdM12d1V1`6POjHl1c1gNe5\\OhHi1g1kNa5[OiHg1i1nN_5ZOhHh1i1nN_5ZOiHg1h1nN`5[OhHg1h1nNa5ZOgHg1j1nN_5[OhHf1i1oN_5[OhHf1j1nN^5\\OhHe1l1mN^5]OgHe1k1nNh6P1ZInNg6Q1ZImNi6R1YIbNnLKo9a1c3N2N2N2N2N2N3M2L5M3M3J5L4L4N2N2N2M3NlkQ5" + }, + { + "size": [ + 487, + 640 + ], + "counts": "\\gm21V?1O4K2O00001O0O1000000000000001O00iNK\\C5clCZO98k;b0hCVO=8k;R1UDnNk;R1UDnNd1@n7c1\\FnNd1AP8c1ZFlNe1AR8f1VFiNh1AR8g1UFhN^1_O_N2o9g1TFhN]1@_N2P:f1TFhN]1B\\N2R:e1TFgN^1La8[1QFiN^1Lb8\\1nEhN`1La8]1oEgN`1La8^1mEgNa1Lc8V2]GiMe8V2ZGkMf8U2ZGkMf8U2YGlMg8T2YGlMf8V2YGjMf8W2ZGhMe8Z2[GfMe8Z2ZGgMf8Z2YGfMg8[2XGeMh8]2VGcM7Ak6l2cGRMV1a0E_OKQ6Q3RJ_M=F_OLQ6P3RJ]M?H]OKR6Q3PJ]Ma0G]OKR6T3mIZMd0G]OKANk5W3bJYMd0H^OJAMl5Y3`JXMe0H^OJ_OOn5Z3\\JVMg0J_OH^OOR6Y3XJVMh0K_OH_ONR6Y3WJWMh0K@H\\OOU6Y3UJUMj0K@H\\OOU6Z3jIaLMc0W1MAE[O1Y6W3eIeLNa0X1M@F[O0a6KUI=9P2LjML;R1;JZO[OO`7[3VIbMNTO[OOb7[3XI_MKWO[OOb7\\3WI_MKXOZOMe7[3WI_MJZOWONi7X3VIaMIFR7g2VIcMHFR7h2TIcMJEQ7i2UIcMICU7h2RIeMHDX7e2PIjMROTO4=k7d2nH^NSOoNn7d2PIYOo6h0QIXOn6i0RIWOk6l0UITOk6l0UITOj6m0VISOj6P1SIPOm6m0VISOj6m0VISOb3\\MbN`3mMSOS3_MTL0k2_3mMROU3^MSL1k2_3lMSOV3bMlKNQ3a3jMPOX3jMmNX3jMkLUO^O1O1f1R4jNkNZ3lMhLBR1g3lNkN[3kMgLBS1h3kNjN]3kMeLXO^O6f1m3kNiN_3]MULI>:Q1f3mNjNd3XMiL>a0?`Nl1c0Nf3VMgLc0>QN_O]1Q6aNfLU1]Mk0>mM_OO0c1P6aNfLU1]Mi0a0mM^Of1o5_NfLS1\\Ml0g0AY5]NeLV1\\Mi0h0C;`N\\6T1aHh0i0D1^NTO4a7?\\HK6\\1l0G4eN]6=^HJ5[1n0F2lN\\68`HK4Z1o0FLSOa62`HJ5X1P1JnNjN6=X7KaHJ4Y1P1JmNmN4]7GcHJ2S1V14fNjNO`0b7DaHJ3S1V15fNiNKc0f7AbHI3Q1W19cNhNL<2]Oc7m7H[HEGN:U1Z1;bNeNJc0n7C\\HG5Q1V1=aNfNHc0Q8_O\\HK5l0X1a0_N@i7VO\\HL6O_Of0c0]O8]1@ZOe7YO]HM:f0J@9Z1AZOe7YO^HL:e0GEg0@@P:J`Ff0@@P:HbFh0^OAo9FdFi0]OAo9EfFi0[OBo9DgFk0YOAS:AeF5lNO=;S:_OdF5POO9=Y:XO_F;RON6?[:UO^F>QON6>j;CQDO5>k;AQD24fY0000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000O10000001O0000000000000000O100000000000000000000000000000000000000000O1001O00000000O100000000000000000001O0O2M3K=Am^j7" + } + ], + "question": "Which object is positioned between and ?", + "choices": [ + "A. No object is between them", + "B. ", + "C. Both and ", + "D. " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_379.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000542089.jpg", + "mask_rles": [ + { + "size": [ + 500, + 375 + ], + "counts": "i`P17\\?3M3N3L4L3N1N2O1O1N2O1N3N1O1O1N2O001O5K2N1O1N101O00001O00001O0O1001OO2O000000000000000000001OO100000001O000000000O100000000O101O0O100O100O2N1M3M3O2N1O1O2N1O1OdNROTDm0m;SOTDk0m;UOSDj0\\=ObNXOSDf0n;ZORDe0`=M3M2O2M3M3M4L]UY3" + }, + { + "size": [ + 500, + 375 + ], + "counts": "WgQ14_?9G2O00000O1O1N2M3K5A?N2N2O1O1001O2N5K6J1O2N2N4L4L6K1N00000000000000O1L4K6L3I7K5M3M3001O2N6J4L1O2L4K7L6IU`k3" + }, + { + "size": [ + 500, + 375 + ], + "counts": "RW73^?b0d@]Oe>0[Ak1a=UN^Bn1`=4OnMaBo1_=QNaBo1_=QNaBo1`=PN`BP2_=QNaBo1_=QNaBo1`=PN`BP2`=2O010000000O010O10000O1O1O1O10O10000000000O01000000O01000000000O1000O10000000000000O01000000000000O100000O100000O10000000O01000000000O10O101O1O2N1O1O1N2O00001O000O1000O100O0O2O1M3M210O10000000O1000000000O1000O10000000O1000O1000000000O010000O2OO10000000O1000O10O10000000O1000O1000000000O01000000000000O1000O1000O1000000N2^Ob0XOXoj2" + } + ], + "question": "Based on the arrangement of the objects, which statement accurately describes the vertical positions of , , and ?", + "choices": [ + "A. is over , which is on .", + "B. is on , which is over .", + "C. is on , which is over .", + "D. is over , which is on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_380.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000229858.jpg", + "mask_rles": [ + { + "size": [ + 555, + 640 + ], + "counts": "gPT5PNhAL;\\2n=eMWB[2\\>2EcMbAa2Q>_MlAR3Q>nLoAV3S>7H4J4SOZL_C05i3P and ?", + "choices": [ + "A. is in front of .", + "B. is looking at .", + "C. is standing behind .", + "D. is eating ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_381.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000229858.jpg", + "mask_rles": [ + { + "size": [ + 555, + 640 + ], + "counts": "gPT5d1h0I7L5K5L:E3M100O010O001O1O001O1N2N2L4M4HbSU5" + }, + { + "size": [ + 555, + 640 + ], + "counts": "0Q:Z7000000O10000O10000O1O10000O10000000000O1O100O10000000000000000O10000O10000M300000000001O1O1O1O00001O00O100001O000000002N1O001O002N1O1O001O5K2N001O001O1O010O0O2O00001O001O0000001O1O001O00001O00001O0000001O0000O1000001N100O10000001O0000010O000000001O1N101O000010O00O10000001N1000000O10000O1O1O10000O1O100O100O100O11O0000000000001O1O1OO1001O1O1O00001O2N1O0000000O1001O000000000O11O0000000000O100O10000O1O1O1O1O1O1N2O1O1O100O1000000O10000O1000000O100O1SLmHaMT7\\2XIYMj6e2[IWMf6g2^IVMc6i2`ISMb6j2iIiL\\6i2YJnLi5n2\\JPMe5o2]JoLd5P3^JmLd5R3_JiLd5V3_JfLc5Y3`JdLa5Z3dJcL\\5\\3gJ\\L_5c3cJSLf5m3]JjKi5U4YJjKg5V4YJjKg5U4[JiKf5V4\\JhKe5W4^JeKd5[4\\JaKh5^4ZJ_Kh5`4b300UOfCeLZPNhAL;\\2n=eMWB[2\\>2EcMbAa2Q>_MlAR3Q>nLoAV3S>7H4J4SOZL_C05i3P and ?", + "choices": [ + "A. is in front of .", + "B. They are both eating from .", + "C. They are looking at each other.", + "D. is eating ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_382.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000313182.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "i^b04R=4M2N1WCHa\\;BeDa0X;^OhDg0S;XOnDj0P;VOoDk0Q;UOoDl0Q;SOoDm0R;SOmDm0T;ROlDn0U;POlDP1U;oNkDQ1W;mNjDR1W;mNiDS1X;lNhDT1Z;jNfDV1[;iNeDW1\\;hNdDX1];gNcDY1_;eNaD[1`;cNaD]1e;00000001O00O1001O0000001O0000000000000000000000001O0000000000O10000000000O10000O100O1O1O100O100O100O100001O00000000000000001O00000000001O0000001O00001O00000000001O000000001O0000001O00000000001O00001O0000001O000000001O0000000000001O00000000000000001O00000000000000O100000000000000O100O10000O1O1O1O1O1000000000000GeNeD[1Y;gNcDM0\\1Z;nNeDS1Z;>N2N`NlDP1S;POnDP1P;ROQEm0m:VOREj0l:XOTEh0k:YOUEg0i:ZOXEf0f:]OXEd0f:^OZEb0c:A\\E`0a8dNaG50IU1Q1jN<^8iNaG8R1f0PO9\\8]OZHc0[OO[8_OhGS1L_O\\8^OfGU1N]O\\8^OfGU1O\\O[8@eGS11]OZ8@eGT1O]O\\8_OeGT1O]O\\8_OeGU1N\\O^8^OdGU1O]O]8^OdGU1O]O]8^OdGU1O]O^8^ObGU10]O^8^ObGU10]O^8^ObGT11^O]8^ObGT11^O]8^OaGV11\\O^8_O`GU12\\O^8e1aG[N_8d1bG\\N^8d1bG\\N_8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8c1aG]N_8c1`G^N`8b1aG]N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^N`8b1`G^N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^Na8a1^G`Nb8_1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`Na8_1_GaNa8_1_GaNa8`1]GaNc8_1]GaNc8_1^G`Nb8_1_GaNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1]GcNd8]1[GcNe8\\1\\GdNd8\\1\\GdNd8\\1\\GdNM" + }, + { + "size": [ + 424, + 640 + ], + "counts": "[96o20n60RI0n60RI0n61QIOn62RINn62QIOo61PI0P70PI0P70oH1Q7OoH0R7OnH2R7NmH3S7MdG1@2k8NcG5^ONo8LbG:\\OJR9M^Ga0\\OBV9NZGg0[O\\OZ9NYGl0XOWO^9OfFU2Y9mMaFY2^9hM`FZ2_9gM_F[2a8_MnG7_O\\2a8bMlGS3T8oLjGR3U8PMiGQ3W8PMfGQ3Z8QM_Gh12jN_8m1gGnMY8R2YH]Mg7a2`HYMa7g2`HXM`7l2\\HSMe7o2XHQMi7Q3SHQMm7Q3kGUMU8o30000001O002M2O2M5CQGXLS9[2TGUNb9d1jFnM^9k1U1I7M2M3K5M=B7Ia0^OQSa7" + }, + { + "size": [ + 424, + 640 + ], + "counts": "f[75n<6N1O2O1N1N3L3M3M4L3L4N29H6I1O1O1O1O0000000O01O1O0O1fNHbE<`:OQE1Y;FiDKGHPde7" + } + ], + "question": "Which of the following statements correctly describes the actions of and ?", + "choices": [ + "A. is driving and is on .", + "B. is carrying and is carrying .", + "C. is carrying and is carrying .", + "D. Both and are carrying ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_383.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000313182.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "\\T_11Y\\a08\\P_O;]CBS\\;BeDa0X;^OhDg0S;XOnDj0P;VOoDk0Q;UOoDl0Q;SOoDm0R;SOmDm0T;ROlDn0U;POlDP1U;oNkDQ1W;mNjDR1W;mNiDS1X;lNhDT1Z;jNfDV1[;iNeDW1\\;hNdDX1];gNcDY1_;eNaD[1`;cNaD]1e;00000001O00O1001O0000001O0000000000000000000000001O0000000000O10000000000O10000O100O1O1O100O100O100O100001O00000000000000001O00000000001O0000001O00001O00000000001O000000001O0000001O00000000001O00001O0000001O000000001O0000000000001O00000000000000001O00000000000000O100000000000000O100O10000O1O1O1O1O1000000000000GeNeD[1Y;gNcDM0\\1Z;nNeDS1Z;>N2N`NlDP1S;POnDP1P;ROQEm0m:VOREj0l:XOTEh0k:YOUEg0i:ZOXEf0f:]OXEd0f:^OZEb0c:A\\E`0a8dNaG50IU1Q1jN<^8iNaG8R1f0PO9\\8]OZHc0[OO[8_OhGS1L_O\\8^OfGU1N]O\\8^OfGU1O\\O[8@eGS11]OZ8@eGT1O]O\\8_OeGT1O]O\\8_OeGU1N\\O^8^OdGU1O]O]8^OdGU1O]O]8^OdGU1O]O^8^ObGU10]O^8^ObGU10]O^8^ObGT11^O]8^ObGT11^O]8^OaGV11\\O^8_O`GU12\\O^8e1aG[N_8d1bG\\N^8d1bG\\N_8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8c1aG]N_8c1`G^N`8b1aG]N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^N`8b1`G^N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^Na8a1^G`Nb8_1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`Na8_1_GaNa8_1_GaNa8`1]GaNc8_1]GaNc8_1^G`Nb8_1_GaNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1]GcNd8]1[GcNe8\\1\\GdNd8\\1\\GdNd8\\1\\GdNM" + }, + { + "size": [ + 424, + 640 + ], + "counts": "eig02Q=7M7H5L1000001O001O000kC[OLNc;i0_DYONNc;o0<4mCmN52e9c0aFj1U9X1M6J2N0000000000000000000000000000000000000000O1000000000000000000000000000000000000001O0000O2aNhFkN[;iN_EMf[2l0_dMUO8OV80jFR1IB4I;M@4S9BUGi3U90O11O000O10000O100001O000000000000O1N2000000003M0000O1O1O101O0000001O001bN_FSOb9k0eFnN]9P1lFfNW9Y1iFgNW9Y1eFRNAd0m9W1cFmN]9R1cFoN^9n0cFRO^9n0`FTOa9k0ZF[Oe9e0ZF\\Og9c0YF\\Oh9d0WF\\Oj9c0WF\\Oj9b0aF]NX:^1l0L4N2O1O1O1N3M2DlCAX<:lCBW<9`0Lemh5" + }, + { + "size": [ + 424, + 640 + ], + "counts": "Z96R=00000000O1O11O0001NW3N]Q12\\kN100001NV`0Oh]j02RRUO1X^l02]bZOK]oH001N1PC1i:OfF0]Nh0^:\\OiEMIZ2Q:l0K2N2O3L10001M200O1O100O6K4L001N100N2O100000O10ON301N10000001O0O1000000O1000001O000O2O0O101N1O100O10001O0O1000000O1O2O000O1M5L2N3O0O1000001N100000001O000O100O2O1O000O1000000O2O0000000O2O0O10000O100O2O000O2O001O00000O2O0010OO100O11O0001M11001O0O10000000000000001O00001O0001O01N11O0010O00000001O00O1010O0001O00000010O01O00001O00000001OO100O2O001O001O00001O0O101O000000000O1000001O0O10000O100000O10O01001O1O2N2M2O2N1O1O2N1O1O1N2O1N2O1O1O1O1N101O001O1N2O0000000O1000000000001N10000O1000000O10O10O0100O100O100O1O1O1O1O1O010O1O1O03N1N2O1O000O1000001O0O1000001O000O100000001O001O0O100O2O00000O101O000O2O000000000O2O000000000O2O1O000O101O0000001N100001O0000001O01O01O001O00010OO1000000O10000O1O10000000ZHiJ[7W5cHlJ\\7T5dHmJ[7S5dHoJZ7R5fHoJY7Q5fHQKY7o4fHSKY7m4gHSKY7m4gHTKW7m4iHTKV7l4iHVKU7k4jHWKU7i4kHXKS7_5M4N1L6N1N2O1N4M3L2M5K6J5L;E5J;E7Ic0\\O`0B:E4M6G5RMUF[2g:iMmDc1k;ZNVDn0Y, , and ?", + "choices": [ + "A. is beside , and is in front of .", + "B. is driving on , and is beside .", + "C. is driving on , and is beside .", + "D. is on , which is beside ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_384.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000463174.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "fmU11X=2000000001O00O12N[da11][^N8TJG[I3_4l0`3jMnMc0PNk0a0i0^3UOhLVOXOn0b0g0\\3\\OdLQO]Ol0d0f0Z3B`Ln08_OV3G_Lk0;^OT3OWLg0d0[OT3]3lLcLS3^3nLnIYO`2i3b3RM]Lm2d3TM[Lk2f3UMZLj2g3VMYLi2h3WMXLi2h3WMXLi2h3WMXLh2i3XMWLh2i3WMWLi2j3XMULh2k3XMTLh2m3XMQLi2P4WMmKk2T4UMiKn2W4RMgKo2Z4QMbKS3^4mL`KT3a4kL]KX3d4gLXK]3h4cLUK`3j4bLRK`3o4`LnJa1oNUOV6XOiJc1WOQOQ6\\OeJc1GfNe5GbJa1NgN`5I^Ja16cN]5K\\Ja1`Im1d1dMm4?\\Io1k1\\Mm4d0WIP2U9PNfFU2Z9m010O10O101OO0101O0001N100000O11N1000O1000000001O00000000001O0O2O2N001OmLmFn1S9RNnFm1R9SNoFm1Q9QNQGo1n8oMUGS2h8mMYGT2e8lM]GT2`8nM`GX2S1PM_5g0`IZ2i0UMf5`0cI[2HjLa0_67eIi2GTMd63fIj2_OYMk6LgIn3Y6QLiIn3W6RLjIm3V6TLjIk3U4^KnLg0nNj3R4eKlLa0SOi3o3kKkL;XOi3j3QLlL6[Oh3h3ULjL4_Of3g3XLhL3Ae3d3\\LjLMDf3a3`LiLFgN[OP1^4_3dLiLBhN\\OP1^4^3fLhLELc3\\3jLfLDNb3V3PMkL^O0b3l2bKRLc1S1XO0c3c2`M\\MmN2b3a2cM\\MkN5a3]2gM\\MiN7_3[2kM]MeN:`3e1gK`MV2`0cN<`3a1QLWMQ2k0]N>b3`1bNPNmMa0`3^1dNPNmMb0_3_1cNnMmMf0`3\\1bNmMnMh0`3Z1cNmMlMk0b3W1bNnMlMk0h3Q1]NRNkMn0S4e0RN\\NjMQ1X4`0mM^NgM]O]Of1P5>mM]NfM@\\Of1S5;kM^NjMX1^46jM`NhM\\1_43hM`NiM^1a41eMaNhM`1d4NeM`NgMc1d4LfM`NfMe1e4JgM^NdMi1f4HgM\\NcMo1f4CbN`0_1_O`Nb0a1\\O`Ne0a1XOaNg0a1VO_Nk0c1RO[NQ1f1mNYNU1g1jNZNW1e1gN]NY1c1fN\\N]1c1aN]Na1b1_N^Nb1a1]NaNb1`1]NaNb1_1]NbNc1]1^NdNa1\\1^NeNb1[1^NgN`1Y1`NiN^1W1bNjN]1V1bNmN\\1S1dNoNZ1Q1eNSOX1m0hNUOV1k0iNXOU1h0jN[OT1e0lN]OR1c0nN@o0?RODk0GB;, , and ?", + "choices": [ + "A. has already hit with .", + "B. is holding while looking away from .", + "C. is using to point at .", + "D. is swinging and is about to hit ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_385.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000192904.jpg", + "mask_rles": [ + { + "size": [ + 436, + 640 + ], + "counts": "oTo37[=5L3M2N2N2O1N101N100ODYCNf<1[COe<1\\CNd<2\\CNd<2\\COc<1]COc<1]COd<0\\C0d<0\\C0d<0\\C0d<1\\CNe<1[COe<2[CMe<4ZCLg<3ZCL^S5DnJYIAi6;ZIDg69\\IFf64_IKb63cII_65bIJ_65aIK_64bIL_63aIM`62aIM_62bIN^62bIN_60bI0^6OcI1]6NeI1\\6MhIOY60iIOX6OiI1W6NjI2W6LkI3U6LnI2S6LgIHoL=Y9JiIInL=Z9IPJ6P6IQJ7o5HSJ7n5GSJ9m5GSJ:l5DVJgJ\\O_L6h8`0iJZO_L6o75WH?Z3VO`L5n7]1bK]NaL5n7^1fKaN[4_1]K_NeL2o7^1[KaNeL1Q8_1YK^NhL2P8`1XK]NiL2P8c1TK^NjLOR8d1SKdNn4]1QKbNP5`1nJ_NS5b1lJ^NT5c1kJ]NU5d1jJ\\NV5e1iJ[NX5d1hJ[NY5f1gJYNZ5g1eJYN[5f1`JUNVM5[8f1_JVNUM4]8f1]JUNWM5\\8g1\\JTNXM5\\8f1aJ[N`5b1bJ]N`5b1eGXNc26h5i1YJWNh5h1jGTNLNg17d6h1RJZNk1J^Om1dNZNn1K^Oj1cN\\No1I_Ol1`N\\NQ2H@l1]N]NS2G@m1\\N[NT2J^Om1\\NZNV2I^On1ZNZNW2I^Oo1ZNWNY2J]OP2XNWNZ2J]OQ2UNXN]2H]OR2TNWN_2H\\OS2RNVNb2G[OV2oMTNg2FYOY2mMRNj2EXO[2lMQNk2EXO]2iMoMP3DWO_2fMnMS3DUOa2dMmMW3BUOe2_MiM]3BSOi2ZMhMa3ATOj2UMiMf3_OSOl2QMhMl3\\OSOQ3dLjMZ4UOQOc4o0^KoNc4Q1]KoNb4R1_KlNb4S1_KmNa4R1`KnN_4Q1cKnN^4Q1dKnN\\4Q1eKoNZ4R1gKlNZ4T1fKkN[4T1fKlNZ4T1gKjNY4W1hKhNX4X1hKhNW4Y1iKfNX4Z1iKeNW4Z1jKeNV4Z1mKeNS4[1nKcNS4]1nKbNR4^1nKaNR4`1oK^NR4b1oK\\NR4d1nK\\NR4d1nK[NR4f1oKYNQ4f1PLYNQ4g1PLWNQ4j1oKTNQ4m1oKSNQ4n1oKPNR4P2oKnMQ4T2nKkMS4V2mKhMT4X2lKgMT4[2mKbMT4^2mK`MT4a2mK\\MS4e2nKYMS4h2mKVMT4k2lKQMV4Q3jKmLW4S3jKkLV4W3lKdLV4]3jK`LX4`3lKYLV4j3iKRLZ4o3hKkKZ4V4jKbKZ4^4iKYK]4h4V22N2O001N2O001N100O2O0O2O1O0O2O000O2O1O00100O001O001O001N102N1O1O1O1O1O1O1O1O1O101N1O1O1O100O2N1O2N2N3M4L2N4L3L5L3L4K5DH9B=D;G9C=H9G8M3L6J4M5J6J5L5I7H?WOhmk0" + }, + { + "size": [ + 436, + 640 + ], + "counts": "]hn35^=2O0O2O000000001O00000000001O0001O000000000000001O0000000000001O0000000000001O0000000000001O00000000001O000O10000000001N1001O000001O000001O01O0000O1000000000100O2N1N1O2O1N2O1N101N101N100O100O100O100O1000O1000000O10000O10000O100O1O100O101N1O101N1O2N2NkRl2" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is on .", + "B. is inside .", + "C. is on .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_386.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000015335.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "fYc2=`>5K5L3L4N1O3K3O1O1M300O1O1N2O2N1O1M2001O01M3O001O1O1N2O001N110O1O0O101O1O0O2O001O1O1O1O001fDiMi9X2VFmMfNKl:X2^FSN_9n1bFSN[9o1dFSNZ9n1eFUNX9l1gFWNW9h1iF[NS9g1kF\\NS9e1iFaNT9_1jFfNT9Y1hFoNT9S1`FZO]9h0[FAb9R3M2M3N1N4M1O2M4M2M4M2N2M3N2N1O2N2M5L1O3M3M1N101O0gN\\IVJd6f5dIVJ]6g5hIVJX6i5jIUJX6i5lITJT6S5TIZJO;0Ek0b0R6Q5kI[J5c0o5l4SKSKm4i4YKTKh4h4]KWKc4f4aKYK_4f4cKYK]4f4eKYK[4f4gKYKZ4c4jK[KW4d4kK[KU4e4lKYKU4f4mKYKS4d4V3O1000000O1O1O1O100O100O10000O1O100O1000000O1000000000000000000000000001O000000001O0000001O001O0000001O0O110O1O1O1O1O001O0O3N1O10O01cMTKdJm4k0UKW1OmMl4j0YKb3i4ZLZK^1GiMQ5d0[Kd1ChMS5a0\\Kj1C`MR5c0]Km1A`MS5?_KR2]O_MU5;aKV2ZO_MW55dK\\2UO^MX5NjKd2oN\\MZ5JkKd1iNjM4e0Y5GPLT3dNSM^5ZO\\LV2UNTN1:e5oN]La2oMUNO:`8`1bGVNN7c8`1cGXNKiNOU1g8h1_GYN4K`8k1]GYN4Ci8e1mF`M`0X1KAi8d1dGiNH[Oi8h1cGkNKTOe8m1eGlNKoNf8P2dGRNiNe0h:U1cFTNgN1N2O9j:]1\\2K5M2N3M3N2M3M3K5K6K5F:IWVh3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "g4l1T=0O101OO01N101000O1J6N20000O2O1N101OOL5M3O10000O101L3N2O4Jocj1WOh\\UN6N2M2N4M2M3N2N1O2L3O2N2N1N3L4N1O2O001O1O1O00001O1O00001O10O01O001O001O1O1O001O1O1O2N1O1O2N3M2N3M4L4L4L5K5K7I6J4L4L3M4M3L1O0000O1M3O100N2O1O1N20O01O1O1O100O2N1O1O101O0O10001N100O100O10000O100000000O100O1000000iN[DSOe;i0eDPO\\;l0jDROV;`0YDlNh0b0o:6hEHX:2oELS:NTFOm9NWF1i9L[F3e9HaF6W43000000O10000000001N1000jiU1" + } + ], + "question": "Which statement accurately describes the relationship between and ?", + "choices": [ + "A. is pushing .", + "B. is positioned on top of .", + "C. is sitting on .", + "D. is standing in front of ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_387.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000060932.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "Ydg0=h<:I5L4J5M3N3N1gDfN_:\\1REWOi:k1Ja0@T1kN9H3L7J6I3M3@mJcHT5T7XKhHh4V7]KcHh4Z7f000O100000nNQIgKn6m3fIlKZ6R4kIkKV6R4nIlKR6S4QJkKo5T4SJjKn5V4VJeKk5Z4d1010O20ZLVGj2g8TM\\Gn2a8oLbG_OEb3h8kLlGT3g9AOTMfEMa0o1P;JNWNdD`1f;1N1M3N01N3N4J7J6J6J6I3N2M5LVSk6" + }, + { + "size": [ + 428, + 640 + ], + "counts": "m7o0]<0000001O1O1O0000O1O1O1O1O1O100001O001O000000O1O1O1O10000000O2O01O0000O01O1O100O10000001O001O00O1O100O010O11O1O1O0010O0O101M2F;HmR`7" + }, + { + "size": [ + 428, + 640 + ], + "counts": "QQZ1?gh4Y7eK`Ha4Z7aKcHc4[7j0L3M2O1N1000001O0000000001oNmHWK00T7f4QISK23n6f4V1oKcGo2_8QMcGm2^8QMhGk2X8RMlGn2U8lLoGU3P8hLTHX3T8[L\\H1PO1JU3Y:K8H5ZM[EY2g:dM\\E[2k:0O2M1O1102gMTEm1j;lMXD[1R, , and ?", + "choices": [ + "A. is in front of , who is beside .", + "B. is looking at , who is beside .", + "C. and are both in front of .", + "D. is beside , and is also beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_388.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000076417.jpg", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "oXg29`>5M3N3K4M3N2N101M4M2N2O1O1N2O1O1O1O1O1N200O100O100O10000O1O100O100000000O10000000000000000000000000000O100000000000000O11O0000O100001O00O1001O000000000000000000000000001O0000000000O1001O000000000000000000001N10000O100001O1O0000VOlBJU=3nBLR=P100001O001O001O00O1N2N2M3M3N2M3M3M3N2N2M3M3M3M3N2M3N2M3N2N2M3N2L4N2M3M3M3N1N3N2M3N2M3M3N2N2N2M3N2N3M2N2N2N2N2N2O1M3N2O1N2N2M3N2M3N2O1M3M3O1N2N2M3N2N2O1N2M3N101N3N1M3N2O1N2N2M2O2O1N2N2M3N2N2N2N2N2N2N2O1N2N2N2O1N2O1N2M3O1N2O2M2N101N2O2M2O001O1O2M101O1O1O2N001O1O100O1O1O100O2OO0100O1O2O0O00101N1000O100001O0000O02O0000O01000000000001O00O10O100O2OO10O10001OO10000001O00O1001N1000O100001O000O011O0000O100001O0000O02O0000O10000000O10001O000000000000000000000000000O100000000000000000000O100000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000WK" + }, + { + "size": [ + 478, + 640 + ], + "counts": "R\\_6Q1Z and in relation to ?", + "choices": [ + "A. and are both over .", + "B. is beside , and is over .", + "C. is over , and is beside .", + "D. is over , and is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_389.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000326627.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "VYY45g>2N3M:F2N1O0000001O00001OO2N1O2O1N4M2N4LQmQ4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Q>n0R>000O10000000000O100O10000001O0000O100O1000000000000000000O10000000000000000O100000000000000O10000000000000000000000000000O100000000O10000000000000000000000O100000000000000O10000O10000000000O100000000O10000000000000000000000000O100000000O1000000O10000000O010000000000O100000O0100000O1000O100000O10O01O100O1N2O1O10O10000000000O1000O0100O101O0O10001O00000O2O001O0O10001N10000O100O2O000O01000O10O100O10O10000O010O0100000000O0100000000O01000O1000000O10000O10000000000O10O1000O100000O01000000O010000000O1000000000000O1000000O100O100000000O1000000O010000000O1000000O1000000O100O10001NndS5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "R>m0S>0000000000000O1000000000000O1000000O10000000000O10000000000O1000000000000000000000000O10000O10000000000000000000000000000O1000000O10000000000O10000000000000000O10000000000000000O100000000O100000000O10000O10000000000001O0000O1000000000000000000000000000000000000O1000000000000O100000000O100000000O100O1N2N2O1O100000000001O0000O1000000000000000000001O001O001O0000001O0000001O0000000000000000O1000000O1000000O1O1O100O100000000000000O10000000000001O000000001OO100O1000000O100000000O100000000O100000000000000000000O10000000000000000000000000000000000000000000000O10000000000000000001TNPC0Ob1b=H1O1O00000000000000O1O1000000000000000000000000O100000000000000001O001O000000000000O1O10000000000000000O1N2O1O100000000001O00O100O100O100O100000000O10000O1O1J6M3M3O11O3M1O1O0000001OO10000000000000000000000000000000000O10000O100O1000000O100001O1O00000000O1001O00O100001O0000000000O1CSNcCQ2\\<:O1O1O10000001O0000000000001O00000000000000000000O100000000001O001O0000000000000000O10000002N10O0001N1001O0000O100000O1O10001O01O1O1O1O0O10001O00000000Q1oNU1iNQ`69d_I?E;mAoNa=m1`BQNe and , and the road, ?", + "choices": [ + "A. is parked on , but is parked on .", + "B. Both and are parked on .", + "C. Only is parked on .", + "D. Both and are enclosing ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_390.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000077460.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "Sj[55ic07G5N2O3M4N1ON2Ci\\OMXc0Om\\OOUc0Nn\\OOn\\i2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "gQo17gc07\\\\ODYc0n0^OQOZ]OZ1[b07N2N11Bf]OSO[b0k0f0NK7H10oN_]Oa0cb0\\O`]Ob0db0YOh]O;]dS6" + }, + { + "size": [ + 640, + 428 + ], + "counts": "[a093Jh0028^`09g^O@0M13N\\1Ya0V10N2N2O10e^O`MVa0a2g^OaMYa0c20000000000000000000000001O001O0[Ni^Oa0Wa0^Oj^Ob0Va0\\Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0\\On^Od0Sa0[Om^Oe0Sa0[Om^Oe0Sa0\\Ol^Od0Ta0\\Ol^Od0Ta0\\Ol^Od0Ta0]Ok^Oc0Ua0^Oj^Ob0Va0_Oi^Oa0Xa0^Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0@h^O`0Xa0@h^O`0Xa0_Oi^Oa0Wa0_Oh^Ob0Xa0^Oi^Oa0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0_Og^Oa0Ya0^Oh^Ob0Xa0^Oh^Ob0Xa0^Og^Oc0Ya0]Og^Oc0Ya0^Of^Ob0Za0^Oe^Oc0\\a0P1000000001O000gMg^Ok1Za0RNh^On1Ya0PNi^Oo1da0UNR^Od1Yb0L4`Ne]OS1db001O0000O1M31O1O1O001OO100[OPOP^OZ1ka0`0M3O1L4O100001O0000002N:F5TNk]Oc1_b0J=C2N1OO10000O11O1O0000O1ROQOk^Oo0Sa0ZOf^Of0Ra05Z^OLfa0W100000000001O00000000000000001O0000000000000000000000O100001O000000000eN[^O;ea0SOm^Om0Sa0POQ_Oo0o`0POR_OP1n`0oNR_OR1Qa0kNo^OU1ma01O1BlNl]OX1Rb0<00O1O100O100000000001O0000O1JWNS^Ok1ka06O1000000001O00000000000000^OoMV_OP2^a0O1O1O1O00O100O100O100LPNW^OQ2ia0oMW^OQ2ma0O000000002N1O0000000000001O000000001O0000O100000000O1O10000000000001O0000O100000000001O000000000000O100000000001O00000000000000O100JRNZ^On1ma0O7I4L2N1O3M3M2N1O1O0000O100ElNi]OU1na0jNQ^O7KP1Sb0mNm]O_1Sb070000000000000000000000000000000000000000O1001O001O1O1O0000O1O1O10000000000001O00O1001O0000O100000000000000001O000000O11O001O2N1O001O0000000cNk]Oo0Ub0POm]Oo0Sb0QOQ^Ok0oa0UOR^Oj0na0VOS^Oi0ma0VOT^Oj0la0VOS^Ok0la0VOS^Ok0ma0UOS^Ok0ma0VOQ^Ok0oa0TOS^Ok0na0TOR^Ol0bb0O1O1O00001OO10000UO[OZ^Of0ea0@V^O`0ja0_OV^Ob0ja0^OV^Ob0ja0]OW^Oc0ia0]OW^Oc0ia0]OX^Ob0ha0_OX^O`0ha0@X^O`0ha0@X^O`0ha0@W^Oa0ja0^OS^Oe0ma0\\OQ^Oe0oa0\\Oo]Oe0Pb0]Oo]Oc0Qb0f0000000000000000000000000000000000000000000000000000000000000001^Nm]OH[1" + } + ], + "question": "What are and doing on ?", + "choices": [ + "A. Both and are running on .", + "B. is running on and is standing on .", + "C. Both and are standing on .", + "D. is standing on and is running on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_391.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000077460.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "gQo17gc07\\\\ODYc0n0^OQOZ]OZ1[b07N2N11Bf]OSO[b0k0f0NK7H10oN_]Oa0cb0\\O`]Ob0db0YOh]O;]dS6" + }, + { + "size": [ + 640, + 428 + ], + "counts": "[a093Jh0028^`09g^O@0M13N\\1Ya0V10N2N2O10e^O`MVa0a2g^OaMYa0c20000000000000000000000001O001O0[Ni^Oa0Wa0^Oj^Ob0Va0\\Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0\\On^Od0Sa0[Om^Oe0Sa0[Om^Oe0Sa0\\Ol^Od0Ta0\\Ol^Od0Ta0\\Ol^Od0Ta0]Ok^Oc0Ua0^Oj^Ob0Va0_Oi^Oa0Xa0^Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0@h^O`0Xa0@h^O`0Xa0_Oi^Oa0Wa0_Oh^Ob0Xa0^Oi^Oa0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0_Og^Oa0Ya0^Oh^Ob0Xa0^Oh^Ob0Xa0^Og^Oc0Ya0]Og^Oc0Ya0^Of^Ob0Za0^Oe^Oc0\\a0P1000000001O000gMg^Ok1Za0RNh^On1Ya0PNi^Oo1da0UNR^Od1Yb0L4`Ne]OS1db001O0000O1M31O1O1O001OO100[OPOP^OZ1ka0`0M3O1L4O100001O0000002N:F5TNk]Oc1_b0J=C2N1OO10000O11O1O0000O1ROQOk^Oo0Sa0ZOf^Of0Ra05Z^OLfa0W100000000001O00000000000000001O0000000000000000000000O100001O000000000eN[^O;ea0SOm^Om0Sa0POQ_Oo0o`0POR_OP1n`0oNR_OR1Qa0kNo^OU1ma01O1BlNl]OX1Rb0<00O1O100O100000000001O0000O1JWNS^Ok1ka06O1000000001O00000000000000^OoMV_OP2^a0O1O1O1O00O100O100O100LPNW^OQ2ia0oMW^OQ2ma0O000000002N1O0000000000001O000000001O0000O100000000O1O10000000000001O0000O100000000001O000000000000O100000000001O00000000000000O100JRNZ^On1ma0O7I4L2N1O3M3M2N1O1O0000O100ElNi]OU1na0jNQ^O7KP1Sb0mNm]O_1Sb070000000000000000000000000000000000000000O1001O001O1O1O0000O1O1O10000000000001O00O1001O0000O100000000000000001O000000O11O001O2N1O001O0000000cNk]Oo0Ub0POm]Oo0Sb0QOQ^Ok0oa0UOR^Oj0na0VOS^Oi0ma0VOT^Oj0la0VOS^Ok0la0VOS^Ok0ma0UOS^Ok0ma0VOQ^Ok0oa0TOS^Ok0na0TOR^Ol0bb0O1O1O00001OO10000UO[OZ^Of0ea0@V^O`0ja0_OV^Ob0ja0^OV^Ob0ja0]OW^Oc0ia0]OW^Oc0ia0]OX^Ob0ha0_OX^O`0ha0@X^O`0ha0@X^O`0ha0@W^Oa0ja0^OS^Oe0ma0\\OQ^Oe0oa0\\Oo]Oe0Pb0]Oo]Oc0Qb0f0000000000000000000000000000000000000000000000000000000000000001^Nm]OH[1" + }, + { + "size": [ + 640, + 428 + ], + "counts": "RRR62mc03N0O4M1N3N1OIb\\OM]c02f\\OMXc04h\\OLXc03j\\ONTc02m\\ONSc01n\\O0Pc00Q]O0ac0M_QP2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "Sj[55ic07G5N2O3M4N1ON2Ci\\OMXc0Om\\OOUc0Nn\\OOn\\i2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "0^21]N2[O1001N020OO101O0O1000e0M]O3Oc00\\O91GOO2O000Mh0OB5CZ1O;RMiAm2X>SMhAl2Y>TMgAk2Z>TMjAh2V>XMjAh2W>VMjAj2f?000000000000000000000O1001O00001O00000000000000mKYMmFg2T9XMkFi2U9XMjFh2V9XMjFh2V9YMiFg2W9ZMhFf2Y9XMhFh2Y=O0000000000O1O1001O1O1O001O001O1O00O1O100O1001O2N1O1O1OO100O10000O10000]O_Mg_Oa2Y`0`M[_OK8e2]`0dMb_O\\2_`0cMa_O]2d`0]M]_Oc2c`0]M]_Oc2c`0]M]_Oc2b`0^M__Oa2b`0^M^_Ob2b`0^Mb_O^2Ra0N001O1O1O0000001O001OO100O1H8O1O1iM\\MRCd2m<]MSCc2l<]MVCb2j<^MWCa2V?N3MN2N2O1O1O10000C[M^_Of2b`0ZM__Oe2a`0\\M^_Od2o`00L40000000ZMm^O_2[a0O00O1O100001O3M3M2N1gM[^OU2]`0jMi@0kNV2d>kMWDY2h;hMWDY2j;fMVDZ2f>2`MdM[C]2d1O1TMnMiCS2P?200O1000000001O001O1O00000000O1O100O100000000000000M3O100O1O1000000O11O00^HfM^MZ2b2gM]MY2b2hM]MY2c2gM]MY2b2hM^MX2b2hMWFKP7]2h2hMYFKo6]2h2hMYFMm6[2j2hM[FKl6\\2i2iMbMV2_2iM]FLg6[2l2iM]FLg6[2l2iM]FLg6[2P3eM]M[2T:0000000O1000000lJgM`HZ2_7gMaHY2_7gMaHY2`7fM`HZ2d0O100aMeMYC[2fS1@UN1OOR3`0nLOW3BmL3M20Li0NZO3N103L12LNM3:Y;" + } + ], + "question": "Based on the provided relationships, where is located?", + "choices": [ + "A. Being held by .", + "B. Next to .", + "C. On .", + "D. In ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_392.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000287545.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[jm05n=1gB2]OM4LV=;RC8FAT=:RC7JCo<9RC7O@oVCBj<>UCCl<=SCBn<>QCCo<=QCCo<=PCDP=0O0001O1O2N1N3N^b\\3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "\\]^34l>2N1jA1Y=0dB2\\=NWBJ5:e=2YBOh=2SB1n=?2O0O1OO0O2O2O02N2O0O1O[CYOS;g0iD^OV;b0gDBX;n0UDUOk;n0dCcN5c0W interacting with ?", + "choices": [ + "A. is looking at .", + "B. is standing on .", + "C. is beside and looking at it.", + "D. They are standing on different objects and looking away from each other." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_393.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000545219.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "\\Zm12j>8J3N2M3M5K5L2M3N3L5K4M3M2M4L8I2M2O2N1M3O2N1O1N2N2O1OiNgCIX<7kCGS<:PDDo;XD@g;a0ZD^Oe;c0\\D\\Oc;f0]DYOc;h0^DVOa;l0_DSOa;P1]DnNc;T1]DkNc;[1WDeNh;e1PDXNQoG\\OQ8e0RHUOa7NoEl0S=VOlBj0S=YOjBh0U=]OfBc0Z=b0O010O0010O100O100O1O101N1O2N2N2N3M3N1N1O2N2O1N3M3M3M4L2NkQo4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aag1>_>;eBC`;X2C7F:I6I7J7K4M2O2N1O2M3O0O100O2O0O101N3M4M3L2O1O1N1O2N100O1O101N101N2N2N1000O1O0M301O01O010O10000O10O010O01O10O010000O01O001O0010O001000O10O10O1000O01O10O010OdNgFQM[9j2_1F;1O1O0O101O1N100O1O1O1000O10O0100000O0001000O1O2O00000O2O1N1O101N1O100000000O1O2O000XFWLX8j3gGVLY8l3fGQL\\8P4dGPL\\8Q4cGRLZ8o3dGTLY8P4dGRLZ8V4TGULj8Y4hFjKV9W4iFiKW9W4iFiKV9X4jFhKU9T4lFVK1e0R9V4nFTK0f0R9Z4nFgKQ9Z4mFgKS9X4oFfKQ9W4nFSK=6i8f4k00O101N3M101O000O2O1O1N2O0O2O0O100O1O1O2O0O1O1O1O1000O10OL4M40O100O01000O100O00011O0O2N101O000O7J001O0O2O001N100000000O10000O101O00000O3N1O1O1N102M4L3N1O1N2N2N2cLVIbNk6i41N100O2O00002N4L1ZHnH:Na6[7b0011`HdHZ7a700jH^Hn6b7RI^Hm6d7TI^Hi6V8F1\\IPHV6_8N3L5L9H3L1O000010O000000001O001O0000O2O00000O100O2M2O100000000001O0000001O000000000000000000010O0000000000001OO1001O000O10001O00000O100O101O00000010O0000002OO01O1O1O1O001O0001O2N1O0O10000O1O1O101M2O100O2O1O002M2O000O101OO1001O0O1000001O0O10001O001aN]J_Ic5`6_J^Ic5`6`J^Ia5]6fJQIUO6V6e6XKSIj4j6i1M001O1O2M2N3O0O2O1kNmGoJB1c8j4\\HTKe7j4\\HUKf7h4]HVKe7h4[1N2N3M2N3N2M2O2N1N3N2L4N1O2N1O2M3N2N1O2N2N2N2N2N2N3M2N2N2N1O2O1N3M3M2O2M3N1O1N105L1N1O2M2O3O00SCoMO2OO`<[24M2O4K4L3M3L5K7J5J5L5J;CXlR1" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is looking at .", + "B. is wearing .", + "C. is inside .", + "D. is moving towards ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_394.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000096001.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "ejb06Q=7I7I5K4N3M2M3M4M1O2N2N2N1O2N1O2N1O2N1O2N101N1O100O1O2O0O1O2O0O1O101N1O10001N100O2O0O2O000O100O2O0O100O101O0O100O1000000O10000O10000O10000000000O100000O100000000O10O1000O10000O10000O100O100O100O100O100O2N100O1O1O2O0O1O1O1O1O2O0O1N3N1O1O2N1N3M2O2M3M2N3N2M2O2M3N3M2M3N3L4L4K6HZQn5" + }, + { + "size": [ + 426, + 640 + ], + "counts": "Pfm37R=3M2N3M2N2N2O2M3M2N3N1N2N3M2N2O2N1N3M5L1N3M2N3N1N2N2N2N3M2O2N1M3N3M2O1N3M2N2O1N2N2O2M2N2O2M2N2N2N3M2O1N3M3M2O1N2O1N3M3N1N3M2N2N2O2M2O1O1O1N2N200O2N1O1O2N1N2O1O1O2N1O2O0O1O1O001O001O1O0000000000000000O1O100^OVHcKk7P4eHlK\\7R4hHkKY7Q4QIiKo6U4T1XOTGoLo8Q3SGiLP9X3a0O11O000000000000001O00001O00001O00010N1001O010O010O100O2N100O100O10001N100O10000O10001O0O1O100O10TFoLM0W9P3i0O11O2mEWMa9Y30O1O1O2N2M2O2N1N3N3M2M5L4L7I000O01O001O10O0100O1O01O1O001O001O1O0O2O1OgHbMg3^2c3O001N101O1O001N101O001O0O101O1O0O2O1O1O1O1O0O2O1O1O1O0O101O1O001N101O1O000O2O001O1O0O200O1N101O0O2O1O1O00001O0000O0101O001O0O2O1O2N1O1N3N101M3N1N3N1O1O2N001O2N1N4LRkf0" + }, + { + "size": [ + 426, + 640 + ], + "counts": "oc51Y=000Pm<0PSC0\\R<0Wd10[YB3M2O1N2O1O2M2N2O1N2O1N2O2M2O1O1N2N3N1O1N3N1N2O2N1000001O0001O0001O000001O01O00000001O01O00000010O000001O000001O01O00000001O01O0001O0001O00010O00001O0001O01O0000010O0000001O0001O01O00000010O00000010O0000001O000001O0001O00010O00001O01O01O000000O2M2N2N2N2M4M2N2O1N2N3M2N2N2O1M3N3M2O1N2N2Niao4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "d1Z29Bb0f0b4IkJBc0e0a4JjJDd0b0a4JjJFe0`0a4Q2_KPN_4Q2bKnM^4R2bKnM\\4U2dKjM\\4V2dKjM[4W2eKiMY4Y2hKgMV4DoJ`0k0LU4\\2jKdMU4]2kKdMS4]2mKcMS4]2nKbMQ4_2oKaMP4`2PL`Mo3b2PL^Mo3c2RL]Mk3e2UL[Mj3g2VLXMi3i2XLVMg3k2YLVMf3j2ZLVMe3k2[LUMd3m2\\LSMb3n2^LRMa3o2_LQM`3P3`LPM^3R3cLmL\\3U3cLlL\\3T3dLlL[3U3eLlLY3U3gLkLY3U3hLjLW3W3iLiLW3X3hLiLU3Y3lLfLS3[3mLeLR3\\3oLcLo2_3QMaLn2`3RMaLm2_3TM`Lk2i0[L[Oi0Lk2f0dLZOa00j2e0kLWO;5h2b0RMXO56h2`0XMXO08g2?]MWOM9d2a0aMUOL9b2a0eMUOI;a2?hMUOH;_2`0kMTOF=^2>nMUOD<^2=QNVOA=^2;TNWO^O>^2;TNWO^O>^2:VNXO\\O=^2:XNXOZO>^29YNYOYO?]27\\NYOXO?\\27]N[OVO>]26_N[OTO?]26_N[OTO`0\\24aN\\OSO`0\\23cN]OQO`0[22eN^OPO`0[21fN_OPO?Z22gN^OoN`0[20gN@nNa0[2nMdLc1S2OnN?o2ZOTN7mN`0o2XOUN7lNa0Q3UOTN:lN`0R3SOSN>jN?T3QOTN?hN`0U3POSN`0hNa0V3lNSNc0gNa0X3jNRNd0gNa0\\3eNnMk0eN`0a3`NkMP1eN`0a3gM^Ka0\\2X1eN`0b3fM_K?\\2Z1cNa0l3RNaM]1dN`0n3oM_Mb1bN`0P4kM`Md1`Na0P4kM`Md1aN`0P4kM_Me1aN`0Q4jM^Mf1aN`0R4hM_Mh1_N`0R4eMaMk1]N`0j6@VI`0j6@VIa0i6@VI`0k6_OUIa0k6_OVI`0j6@VIa0i6_OWIa0i6@WI?j6@VI`0j6@VI`0j6@VIa0i6@VI`0j6@WI?i6AWI`0h6@YI?g6AYI?g6BXI>h6AZI?e6A[I?e6A[I?j2fMIj1^Ma0h2iMFd1eMb0e2kMEb1gMc0d2lMD_1jMe0b2nMB\\1mMf0a2PN_OZ1RNf0^2RN^OV1VNh0\\2TNWLEk2^1dNh0Z2_NROg0fNj0X2_NROf0hNj0[2[NmNi0jNm0Y2YNlNj0lNm0Z2WNjNj0oNn0X2WNiNi0QOQ1V2UNiNi0ROR1d5lN^JT1a5lNaJS1_5kNcJU1]5jNdJV1\\5hNgJX1W5hNjJX1V5gNkJZ1T5dNoJ[1P5eNRKZ1n4eNSK[1l4eNVKZ1j4fNVKZ1i4gNWKZ1h4gNWKY1i4gNXKX1g4iNYKW1f4jNZKV1f4jNZKW1d4jN]KU1b4lN^KU1a4lN^KT1a4mN_KS1a4mNaKQ1^4PObKQ1\\4POeKo0[4QOeKo0Z4SOeKn0Y4SOgKm0X4TOhKl0W4UOjKj0U4WOkKj0l0nM]OY1Gi0j0QN]OW1Jg0i0SN[OX1Ke0j0SNZOY1Le0i0SNQMGd1b1b0d0h0UNoLHe1`1d0c0h0VNmLIe1_1g0a0g0XNjLJg1_1g0?h0YNhLKf1_1j0>g0\\NcLIj1_1l00n3d2dMXOPNTN>Oo3e2cMYOoMSN`0MP4f2aMZOoMSN`0LQ4g2aMXOoMVN?IR4j2_MWOPNVN`0FS4m2]MWOQNUNY5d2gLVOPNVN=IV4k2\\MVOQNVN60]4d2\\MVOQNWN32_4a2]MWOnMXN53^4^2_MWOlMZN65\\4Z2cMGPNQN\\4X2dMHnMSNo2@QNd2S1g0e0PMSNZ2W1g0b0UMTNT2Z1i0=YMVNn1]1j0;[MVN:]OOS2\\27_MXN3@3P2\\26aMXNMF5l1^23cMZNHI7j1^21gMZNCL8j1e0iMGU2K[N^O0;g1d0kMGR2N[N[O2=e1c0lMHQ2M]NXO4?b1d0lMIo1O]NSO8b0`1b0mMIm11^NPO:d0^1b0nMIk12^NmN>e0[1c0oMHi15^NiNa0g0Y1c0PNHf16aNeNb0k0V1b0RNGd19aNaNe0m0T1b0TNEa1=aN]Nh0o0S1a0UNE^1?aNZNl0P1P1b0VND\\1a0bNXNm0Q1o0b0VND\\1b0aNUNP1S1m0b0WNCZ1e0bNoMS1X1i0a0YNBY1f0bNmMV1Y1g0a0XNCY1g04UO;a0XNCX1h06SO:b0YNBW1i06TO9a0ZNBW1i06TO9a0ZNCU1i08TO9?ZNDU1i09SO8`0ZNDU1j08RO9`0ZNDU1j08RO:?YNDV1k08RO8?ZNDU1l09QO8?ZNDU1l09QO8?ZNDU1l09QO9>YNEU1l0:QO7>ZNEU1l0:QO7>ZNEU1l0:QO7>YNFV1k0:QO8=XNGV1k0;PO7>WNHW1j0;PO7>VNHY1j0:QO7Y2a7VNQHA>Z2`7VNVIj1j6VNVIj1j6WNUIi1k6WNUIi1k6XNTIh1l6XNTIh1l6XNTIh1l6XNTIi1k6XNTIh1l6XNTIh1l6XNSH^O9[2OfMc6a0UI^O9[2NgMd6`0UI^O:Z2MhMd6a0UI\\O:[2MiMc6`0VI\\O:\\2LjMc6>VI^O:Y2MmMa6gM]O:W2MQOX2oNJ_OA]NSO?S2IiNl1c0\\O_O`NSO>R2JiNj1h0ZO[OeNQO=S2JiNi1m0VOWOjNPO=T2IgNj1Q1TOUOkNoN>T2IgNi1S1ROUOnNmN?S2HgNh1W1oNUOROjN?S2HgNg1c1dNjN_OhN>T2HfNg1l1ZNfNIcN>U2HeNg1S2QNdN2`N=T2IeNf1Z4SNmJ>T2IdNf1\\4TNkJ=U2IdNd1^4VNiJ>T2HeNb1a4XNeJ>U2HeNa1b4YNdJ>V2HbNa1f4XNbJ?U2IcN_1g4YNaJ?U2IcN^1h4[N_J>V2IcN]1j4\\N\\J>W2IcN[1l4_OaLVOcN[1l4@`LUOcN[1n4@_LUOcNY1Q5A\\LWObNW1S5B[LWObNU1V5CXLXObNT1W5EVLWOaNU1Z5DULWOaNS1]5ERLXO`NS1_5EQLXO`NR1`5FPLXO_NR1c5EnKYO_NQ1d5GlKYO^No0i5GiKZO^Nn0j5IgKYO^No0k5HhKXO]No0l5IgKXO\\Nn0o5JeKXO[Nn0R6IcKYO[Nl0U6K_KYO\\Nk0V6L]KZO]Ni0W6M\\KZO\\Ni0Y6M[K[O[Ng0[6NZK[O[Nf0]6OXKZO[Ne0_61VKZO[Nd0`62UKZO[Nc0a64RKZO\\Nb0d63PK[OZNb0h64nJYOZNb0j64lJ[OXNa0m64jJ\\OYN?n66hJ[OYN?Q75fJ\\OYN=S77dJ\\OYN_IDZNMX8`0]IC[NLY8a0\\IC[NJ[8c0ZIDZNH]8c0ZIEZNF^8d0XIFZND`8f0VIGZNBa8f0UIH[N_Ob8i0SIG]N^Ob8j0QIH]N\\Oe8k0nHJ[N\\Og8j0nHJZN\\Oj8i0lHK[NYOl8k0jHKZNYOn8k0hHL[NWOf0M[7P1cIL[OVOR7n0cILZOXOR7l0dILXO[OT7i0cILXO^OS7f0eILWO@S7e0eIKWOBS7c0fILVOBS7b0gILUODT7?gIMTOGS7=hILTOHT7) and the rectangular tag ()?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_395.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000322829.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "`:j2a:0000000O10000000000000000001O00001O0000001O00001O0000000000001O000000000000000000000000000000O10000000000000000001O000000000000000000000000000000001O000000000000001O00000000001O000000000000000000000000000000001O000000001O001O1O001O1O0gMSET2m:lMSET2l:mMUER2k:nMUER2k:nMUER2k:mMWER2i:oMWEP2h:QNXEo1h:QNYEn1g:RNYEn1g:QN[En1e:RN\\Em1c:TN^Ek1b:TN`Ek1a:SNgEf1P;O001O1O00000000001O000000000000WO\\NkEd1T:]NlEc1S:^NmEb1S:^NmEb1R:_NnEa1S:]NnEc1R:\\NoEd1l:0001O000000000000001O000000O10000000000000000O100000000000000O100000000O10000O1]OYNbEg1]:[NbEe1^:[NbEe1^:\\N`Ee1_:]N`Ec1`:^N_Eb1a:^N^Ec1b:]N\\Ee1d:\\NZEe1f:[NYEf1g:[NWEf1i:[NVEe1j:\\NTEe1k:>00000000000000000iMWEl1i:TNWEl1i:;00000iMZEi1f:UN\\Ek1e:QN^Eo1o:000000000000000000000O10000O100O10000O1000000O1O1O1O100O100OWNPEY1n:c0O1O100O1000000O100001O00O1000000000000001O00000000000000000000O1000000000000001O000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000O10000000SNXEW1h:gNZEY1f:fN[EZ1e:dN^E[1b:bNaE^1_:`NdE_1\\:aNdE_1\\:`NeE`1\\:_NdEa1[:`NeE`1[:_NfEa1Z:_NgE`1Z:_NfEa1Z:_NfEa1Z:_NfEa1Z:_NfEa1Z:^NgEb1Y:^NhEa1Q;00000000000000000000000000000000000000000000000O1000000O10000O1O10000000000O1000000O1000000001O_OYN]Eh1c:XN]Eh1c:YN\\Eg1d:ZNYEh1g:ZNREk1n:9000000000000000000000000000000O1000000000000001O000000000000O100000000000000001O000000000000000000000000000000000000000000000000000000O100000000000000000000000000000000O100000000000000O1O1O1N200O1O100000000001O000000000000001O00000000000000001O0000000000000000001O001O00001O000000000000000000O1000000001O00001O0000000000001O0000000000000000000000000000000000001O0000000000000000000000000000001O00O1001O0000O1000000000000000000QE" + } + ], + "question": "Which statement accurately describes the relationship between and the other objects?", + "choices": [ + "A. is in front of and behind .", + "B. is in front of both and .", + "C. is behind and in front of .", + "D. is behind both and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_396.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000261116.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "PoT51`01`:4\\EOa:LUE68Oc:KTE86Ng:JSEf0l:30O100O010O00100O010O1O100O100O1O101N1O2OO0100O100O1O1O2N1O10000O010000000O0100000000O100jJ" + }, + { + "size": [ + 375, + 500 + ], + "counts": "bUP58];6K3N3M2M2O1N101N100O100O10O10O100000O10O100O1000000O100OO1010000000O0100O101O001O002N3L4M1N2N3L6Kjc4" + } + ], + "question": "Based on the provided information, what is the relationship between and ?", + "choices": [ + "A. is stuck inside .", + "B. is resting on top of .", + "C. is on top of .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_397.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404534.jpg", + "mask_rles": [ + { + "size": [ + 500, + 386 + ], + "counts": "o841300040G1R14oNj<[4TN4M10000O1000000000000000000000O100000000000000000000000000O10000000000000000000000000000O2O00000000000000O1000000000O100000000000001OO10000000O100000000000000000000000000O100000000000000000000000000O1000000000001O000000000O1000000000000000000O1000000000000000001O1O2M3O3L1N10000000000000000O10O11OO1000000N1L5M30O00010O3N00O0011M30O1O01O1O0PL^Dn3c;01O001O2O001M2ZLWDO00000U3j;lLeDQ3Z;PMfDP3Z;QMeDn2\\;RMYDL21JQ3k;RMdDm2];SMcDl2^;[M\\Dc2e;]M`D]2b;\\MRDM>f2`;\\MTDM=e2`;eM_DZ2b;fM_DX2b;hM^DX2b;hM^DW2b;jM^DU2c;lMlCD60Oa2o;kMTDDM`2P1lk[2" + }, + { + "size": [ + 500, + 386 + ], + "counts": "g?a4P;4O000000O1001O00O11O00001O00001O00001O0000M3N2N20000O10000001O00001O001O1O001O00002N:F1O1O1O00000000O1000000O10000O10000O10000O100000000O1000000O1000000O100000000O10000000000O10000000000O1000000O100O1FUEjKl:]42ESEoKm:o3SEQLo:m3RESLo:k3QEXLn:g3REZLn:e3QE^Ln:a3REaLm:\\3SEhLl:W3UEiLk:U3UEnLj:Q3UERMj:l2VEVMj:h2WEYMi:f2VE\\Mj:c2WE]Mi:a2UEcMk:[2VEfMj:X2VEjMj:U2VElMj:R2VEPNj:o1VERNj:l1VEVNj:i1UEYNk:e1VE\\Nj:c1VE^Nj:_1VEdNj:[1VEfNj:W1XEjNh:U1WEmNi:R1VEPOj:P1TEROl:k0VEVOj:h0WEYOi:e0WE]Oi:b0VE@j:>WECi:;VEHj:7WEIi:6WEKi:3VE0j:OUE4j:JWE7i:GWE;i:DWE=i:BUEa0k:T2000UMWEk0i:SOWEo0i:POWEQ1i:m10QMUEU1k:fNgEm0Y:nNmEQ1S:hNTFX1l9gNUFY1k9gNUFZ1j9eNWF[1i9eNWFZ1j9fNVFY1k9gNUFV1QOeMi:U1VFV1POgMi:S1WFQ1PORNh:m0XFP1POUNg:j0ZFl0ROjNV::gFl0ROkNX:9fFi0ROPOY:7eFh0QOQO\\:6dFf0oNWO8POk9S1nFf0nNWO7SOm9P1nF`0QO_O4ROm9o0nF`0PO^O6TOl9n0nF;ROE4SOl9m0nF:ROE7nNn9S1iFEPOG23M_O0OP;S1TFoNQ351U>m0jAL\\=8aBI_=V1000000000000O10000000000000000000000000000O10000000000000000O1000000000000000000000000000000000000O1000000000000000000O1000000000000000000000000000000O10000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000O100000000000000000000O1000000000000000000O1000000000000O10000000000000000O10000O100O10000O100O1O1O100O1O10000O1O100O1O10000O1O100O1O1O1O1O100O100O100O100O100O1O1O100O100O1O100O100O1O1O100O1O1O100O100O100O1O1O100HXLcDi3];ZL`Df3`;71O0000004L1OJVLbDj3];XLbDh3\\;ZLcDg3X;^LhDb3W;_LiDa3V;?O100O100O100O1O100O100O10000O1O10000O10000O10000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000O1000000000000000000000000001O00001O0000O100O1O100]Oc00000O1000000O10000000000000000001O1O1O000000000000000000O100000000003M4L1O1O1O3PK^E0N051O002NQ3NYMY and ?", + "choices": [ + "A. is mounted on .", + "B. is over .", + "C. is over .", + "D. is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_398.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404534.jpg", + "mask_rles": [ + { + "size": [ + 500, + 386 + ], + "counts": "o841300040G1R14oNj<[4TN4M10000O1000000000000000000000O100000000000000000000000000O10000000000000000000000000000O2O00000000000000O1000000000O100000000000001OO10000000O100000000000000000000000000O100000000000000000000000000O1000000000001O000000000O1000000000000000000O1000000000000000001O1O2M3O3L1N10000000000000000O10O11OO1000000N1L5M30O00010O3N00O0011M30O1O01O1O0PL^Dn3c;01O001O2O001M2ZLWDO00000U3j;lLeDQ3Z;PMfDP3Z;QMeDn2\\;RMYDL21JQ3k;RMdDm2];SMcDl2^;[M\\Dc2e;]M`D]2b;\\MRDM>f2`;\\MTDM=e2`;eM_DZ2b;fM_DX2b;hM^DX2b;hM^DW2b;jM^DU2c;lMlCD60Oa2o;kMTDDM`2P1lk[2" + }, + { + "size": [ + 500, + 386 + ], + "counts": "g?a4P;4O000000O1001O00O11O00001O00001O00001O0000M3N2N20000O10000001O00001O001O1O001O00002N:F1O1O1O00000000O1000000O10000O10000O10000O100000000O1000000O1000000O100000000O10000000000O10000000000O1000000O100O1FUEjKl:]42ESEoKm:o3SEQLo:m3RESLo:k3QEXLn:g3REZLn:e3QE^Ln:a3REaLm:\\3SEhLl:W3UEiLk:U3UEnLj:Q3UERMj:l2VEVMj:h2WEYMi:f2VE\\Mj:c2WE]Mi:a2UEcMk:[2VEfMj:X2VEjMj:U2VElMj:R2VEPNj:o1VERNj:l1VEVNj:i1UEYNk:e1VE\\Nj:c1VE^Nj:_1VEdNj:[1VEfNj:W1XEjNh:U1WEmNi:R1VEPOj:P1TEROl:k0VEVOj:h0WEYOi:e0WE]Oi:b0VE@j:>WECi:;VEHj:7WEIi:6WEKi:3VE0j:OUE4j:JWE7i:GWE;i:DWE=i:BUEa0k:T2000UMWEk0i:SOWEo0i:POWEQ1i:m10QMUEU1k:fNgEm0Y:nNmEQ1S:hNTFX1l9gNUFY1k9gNUFZ1j9eNWF[1i9eNWFZ1j9fNVFY1k9gNUFV1QOeMi:U1VFV1POgMi:S1WFQ1PORNh:m0XFP1POUNg:j0ZFl0ROjNV::gFl0ROkNX:9fFi0ROPOY:7eFh0QOQO\\:6dFf0oNWO8POk9S1nFf0nNWO7SOm9P1nF`0QO_O4ROm9o0nF`0PO^O6TOl9n0nF;ROE4SOl9m0nF:ROE7nNn9S1iFEPOG23M_O0OP;S1TFoNQ351U>m0jAL\\=8aBI_=V1000000000000O10000000000000000000000000000O10000000000000000O1000000000000000000000000000000000000O1000000000000000000O1000000000000000000000000000000O10000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000O100000000000000000000O1000000000000000000O1000000000000O10000000000000000O10000O100O10000O100O1O1O100O1O10000O1O100O1O10000O1O100O1O1O1O1O100O100O100O100O100O1O1O100O100O1O100O100O1O1O100O1O1O100O100O100O1O1O100HXLcDi3];ZL`Df3`;71O0000004L1OJVLbDj3];XLbDh3\\;ZLcDg3X;^LhDb3W;_LiDa3V;?O100O100O100O1O100O100O10000O1O10000O10000O10000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000O1000000000000000000000000001O00001O0000O100O1O100]Oc00000O1000000O10000000000000000001O1O1O000000000000000000O100000000003M4L1O1O1O3PK^E0N051O002NQ3NYMY is attached to?", + "choices": [ + "A. ", + "B. The gate secured with a latch mechanism.", + "C. ", + "D. " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_399.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000548780.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "W[R33W=1O2N1N201N1O100O1O100O100000000O100N2O1O1M3N2N1O2bLMbI5o5j0]IZO_6U1RIoNk6[1jHiNU7a1_HbNa7d3O1O1O1O010O1000000000000000N1TOlGTLd8i3`0000001O001O0]NnFmNS9l0WGPOj7kNbHb1;]OR7TObH\\1c0\\Oi6ZOdHX1i0YOb6AeHV1k0gNbNLQ88cHT1e9oNZFP1d9SO]Fl0`9WOaFh0\\9[OeFd0Y9^OfFb0W9BiF>U9DkFfGeJg7b5N1O1O00000001O1O001O1N104L1O1O1O2N2`LPHiN7k1l7POTIAeNUOd0d0j7:RIKD^Oo9?`1N1O1N2M7ITcm5" + }, + { + "size": [ + 427, + 640 + ], + "counts": "_kX38R=2L5M1L5N2M200O100O0100O010O010O1O0010O01000O10O1000000000000001O0000001O1O001O001O0010O01O001O000000000000001OO1000001OO10000001N2O00000O101O0O2O2N2N2M2O0O3N3L3J_jo3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "n_T51X=2O1O2O0O3M5L7H5L1O1O0O10000000O010001O000O1O2N1O2N2O1N3L5M3YDdNX;i1M2O1FQNWEo1i:QNWEP2Q;1O1O1BnMbER2]:PNbEQ2\\:QNcEP2[:RNeEm1Z:TNfEm1W:VNhEj1X:VNhEk1U:XNjEh1U:YNkEh1T:XNmEg1S:YNmEh1R:XNnEh1R:XNoEh1Q:XNnEh1S:WNmEj1S:UNmEl1R:TNoEl1Q:SNPFm1P:SNoEn1S:oMmEQ2V:mMiET2X:jMiEV2X:hMhEY2Z:dMfE]2c:010O]OXEUN29f:^1eE`N[:^1hE`NX:^1lEaNS:^1oEaNR:^1oEaNQ:^1QFbNo9]1RFbNn9]1TFcNk9\\1WFcNi9\\1ZFcNf9\\1T101O1O1O10O02O0O10001N1O2N101O1O1O1N102N1O1O1N2O2M3M4L4M4KaVj1" + } + ], + "question": "Which object is being carried by ?", + "choices": [ + "A. , the dark-colored, structured handbag.", + "B. , the tan and teal handbag.", + "C. , the handbag with a multicolored abstract print.", + "D. , the woman with long blonde hair." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_400.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000008211.jpg", + "mask_rles": [ + { + "size": [ + 459, + 640 + ], + "counts": "j[i53V>2I8L4H7M3M7fBTOi<[1L8mCYNT;m1YD[O1gNP:0cFb3Y9bLdF`3Z9dLbF]3]9dLbF^3D`L_92lFj3S9VLlFk3S9VLmFj3S9VLlFk3S9ULnFk3R9XLjFi3V9[LdFg3[98eFfK^9[48O1I8UOUFeLM4T:S3h001JeFmLj7S3SHRMj7o2oGYMP8h2fFmLn0b0\\8f2\\GaMd8`2VGeMj8]2nFjMQ9i30O001O1O001O1bKdFk3_9PLeFn3^9mKdFR4k901H8N2@eEhLa:o2b0H9H7H9H7N3K6K5E`CeNdGX9HXFm02C=IZ9DZFQ1MC9M`9_O[FQ1LEM4b:6`Em0b:RO^Em0d:QO\\Eo0f:ROXEm0j:UOREl0o:UOoDj0S;V11O1AjDhM^;o1cDPNa;l1`DSNb;k1_DTNf;HYDi12^NP<]1QDbNR<[1nCeNS to the other elements?", + "choices": [ + "A. It is beside .", + "B. It is on but behind .", + "C. It is on .", + "D. It is on and beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_401.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000156643.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "j`l2<`>5N2M5L3M3M2N2N3M2N2O2M2O3L4M3M1O7I3M4L1O1O3M2N2N2N3M2N4L2N1O1O100O100O1O0010^NTDNl;1VDNk;1VDNj;2VDNk;0WDOm;MTD2k;NVD2j;NWD0j;OXDOj;0VDOk;0WDNj;2WDMi;3XDKi;5XDIi;6YDHi;7\\DBf;=]DAc;?_D_Oa;a0`D]Oa;c0`D\\O`;d0cDXO_;g0cDVO^;j0fDPO];o0[101N00010O001O01O1O100PCQOm;P1QDROn;P1QDoNo;R1o010O01O1N3N1O001O000010OQDiNU:X1hEQO`NE\\;Z1SF\\Oh9e0VFAe9?ZFI`97_FN\\93bFN^92_F1a9O\\F4d9MXF6h9LTF6l9KRF6n9KnE8R:IkE9V:GgE\\:CaE`0^:BZEd0g:k1O1O100O000000000000XO^ERMb:k2eEQM\\:l2gESMY:k2lERMT:l2PFQMQ:n2QFQMP:n2QFPMP:o2QFoLR:o2P1O101O1O1O1O10O01O0000100O1O1O2N1iN^DnNe;d0iDYO_=08GR]10mbN7I3M4N2N0O2N2M4M2M3M4L3N2N1O2N2M2O1M4N2N1N3N2N4L2N2M5L2N2N2N1O00001O1O1O1O1O1O1O1O1O1O2M3L5L3L4L3N3M3O0O11O01O2O5J2O0O01O000000000000000000001O001O01O01O001O1O001O1O1O2N001O1O2N1O1O2N1O1O1O1O001O2N2O0O2N2N101N2N1O2O1N4L2N1O2O2M2N4L4L2N2N2N101N1O2N4L2O0O2N2M3N2M4L6J6I9EdVZ2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "kV43h>5N2N2M4M2N2L4O1L4L4K5M3M2M5M2L4M3N2K5N2J6N1O2K6K4K5mDbNoNfN]9l2ZGYOW8i0fGD]O]Mg02e6S3VIm0[6UOcIY1_OdKU6Y3ZJf1T5bNkJj1AbJh1?;@YNP43b26d0JEBWI2X7;V700N2O10000O1O10000O100O100O0100001O00000000000O100000000000000000000000000000000000000000000000000000000000000O100001O000000000000000000000000000000000O1001O01O000000001O001O001O001O0000001O001O001O00001O1O001N2O1O0010O01N100010OO3N1O010O1O1O2N1O2N8aBhND0T30`6\\3YF5o7h3F2N3RJdHR4k7oJTIb4U9@g0YO7I7I3M3M4L2N4L1O2N3M2N4L2N2M3N3M3M2N3M4L4J8_Ob0_O:J7J5N1N2M4K5N2M2N3N2K5_Oa0A>O2L3nMXMkGi2T8ZMiGg2U8[MkGe2T8]MjGd2U8^MgGe2X8\\MgGe2Y8[MfGf2X8\\MhGd2W8]MgGd2Y8]MhGb2W8`MiG^2W8cMiG\\2W8fMhGY2W8lMfGT2Y8oMeGP2X8WNfGe1Y8cNcG[1\\8lN`GS1_8QO_Gl0d8XOXGf0h8^OVG4X9OeFL`96^FJb98\\FGe9;ZF@j9a0UFZOP:g0PFWOQ:j0nETOT:n0iEQOY:P1fEoN[:S1bEmN_:U1_EjNb:W1\\EjNd:W1[EhNf:Y1YEgNg:h21000000O100000000O100000000O10000000000000000000000000000000000001O001O0000001O0000001O001O2N001O001fGgKk5[4QJhKn5Y4oIjKP6W4nIjKR6Z4iIhKV6Z4dIkK[6X4aIiK_6Y4\\IkKc6X4WIkKi6Z4`HnJHP1h7n5000000000001O0000001O00001O1O1O1O001O1O1O1O1O1O1O2N1O1O2N1O1O001O1OWNjHjKT7R4`I_K_6d1oHKh0\\NX6h1RIKj0ZNS6k1TIJn0XNl5m1XIKV1kMd5Y2XIKZ1bMb5b2WIKe84\\GLd83^GMa82`GN`81bGN^80fGNZ80iGOW80kGOU80mGOS8OSHLn72XHJh75ZHJf74]HJd74`HIa76cHG]76hHGY78jHFV78mHGS77SIDn6:WIBj6;[IBf6=[IAg6<]IBd6<_IAd6:cI_Ob6>aI_Oa6=eI@\\6;lIAU6;RJ]OS6a0QJXOT6d0V401O1O1O001O1O1O0O2N10001N2N2N3LdaR2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Z`l31l>;G5J6K4K4M4M2M3N8H2N001ODdBTO\\=l0gBPOY=Q1hBnNX=R1jBkNW=U1:0;F34KM4L8GUWT5" + } + ], + "question": "Which statement accurately describes the spatial arrangement of the objects relative to ?", + "choices": [ + "A. is holding in front of .", + "B. is in front of , which is holding .", + "C. is in front of , which is holding .", + "D. is in front of , which is in front of ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_402.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000252332.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "abe1h1V=Z1kBQMh;c3@l0TOf0ZO?A4L2N2N2N2N2N2M201N2N2N2N2N1O2N2N2N2N2N2N1O2N2N2N2N1O2N2N2N2O1N2M2O2N3M1O2N[MPI]Mn6b2VI\\Mj6d2VI]Mh6d2WI_Mf6b2XI`Mg6a2YI`Me6a2[I`Mc6a2\\IaMa6a5OnL`IeM^6[2dIdM[6]2eIQKNS2[6m2gIcMX6^2hIcMV6^2jIcMT6^2lIcMR6]2oIdMo5^2PJcMn5i5N2N2N2N]MXJPLg5P4ZJQLd5P4\\JPLc5R4\\JPLa5i6N2N2N1OYMgJjKW5W4iJjKU5W4kJjKS5W4mJjKQ5S7OUMPKiKn4X4SKgKl4Z4SKhKk4Y4UKiKh4Y7NPMZKhKe4j3[KdI0c2c4Y4]KgKb4Z4^KhK`4]7NlLbKhK]4Y4cKhK[4Y4dKhK[4`7N2N2N2N2N2N1O2N2NfIWL`1g3bNYL^1e3cN\\L]1b3Q5O2O1MbIbL_1]3aNcL_MOa1\\3Q1eL^MOa1[3c3eL^LX3g7N3N1N1OcJoLmNP3c6NkJUMWNj2j1XMUIN6M]4l2Y2TMYI0j4i2V7N3N2N3N1N1O2N1O2N2O0O1O2N1000O1000000001OO1000001OO10000001O0O100000000000000000001N1000000000000000000000001O000O1000000000000000000O1000001O00000000000000001N100000000000000000000O2O0000000000000000000O1000001O00000000000O10000000001O00000O10000000000O100000001O00000000000O11O00O10001O000000000000000O2O000000000000000O101N1O1O1O1O1O1O1O100N2O1O2N1O1N2O100O2N1O2N2N2N2N1N3N2N3N3L2N1O3M2N2N2M3N4L2O1N2M3N3M2N2N2N2M4N0O2N3M3M2N2N2M3N2N2O1N2N3M3L3N3M1O3M2N2N2O3L2M3N2N3M1O2N3M3M2O1N2M4L3N3M1O3M2O1N2N2N3M2M4L3N2O1N2N2O1M4N2L3M3N3M1O2N2K5H:]Oa0H9^Oa0D2010O1001O00000001O0000O10000000000000001OO1000000000000000000000000000000000000000000000001OO100001O00O10000001OO10000001OO10000001OO10000001OO1000000WET1_5kN]JQ2j4PNTKk2S4UMkKY3i3gLULS4YLlKe61QMm4[2SKdM\\5n1dJQNS6Y1mIfN]6Q1cImNe6m0[IROi6l0VISOm6j0TIUOn6j0RIUOQ7i0oHUOT7j0lHUOV7k0iHUOX7i0iHVOY7i0gHVOZ7j0fHUO]7i0cHVO_7j0`HUOb7i0_HVOc7i0]HVOd7j0\\HUOf7k0ZHROi7m0VHTOk7j0VHUOl7j0THUOm7k0SHTOo7l0PHSOQ8m0oGROS8l0nGSOT8l0lGSOV8l0jGROY8n0fGQO\\8n0dGRO]8l0dGSO^8l0bGTO_8k0aGSOa8m0_GROc8m0]GROe8m0[GSOf8l0ZGSOg8m0YGQOj8n0VGQOl8n0TGQOn8n0QGSOP9l0QGROQ9m0oFQOT9n0lFQOU9P1jFoNX9o0iFQOX9n0hFQOZ9n0fFPO\\9P1dFoN^9P1bFPO^9P1bFoN`9o0aFPOa9o0_FPOc9P1\\FoNf9P1ZFoNg9Q1YFnNi9Q1WFnNk9R1TFmNn9Q1SFnNo9Q1QFnNQ:Q1oEnNR:R1mEoNT:P1lEoNV:Q1iEnNY:Q1gEnN[:Q1eEnN]:Q1cEnN_:Q1aEnNa:Q1_EmNd:R1\\EmNf:R1ZEmNg:S1YEmNh:R1XEmNi:S1XEkNj:T1UElNm:S1SElNo:S1QElNQ;S1oDlNS;S1mDlNT;T1lDkNV;T1jDkNX;T1hDkNZ;T1fDkN\\;T1dDkN];U1cDjN_;U1aDjNa;U1_DjNc;U1]DjNe;U1[DjNf;V1ZDiNh;V1XDjNi;U1WDjNj;V1VDiNl;V1TDiNn;V1RDiNo;W1QDhNQM3O10001N1000000000000O1000000000000000000O10000000000000000O100000000O100001OO1000000000000000000O1000000O1000000O1000000O10000O100O1O1O100O1O1O1N2O1O1O100O1O1O1O1O1N2O1O1O1O1O100O1O1N2ZCkM` relative to the other objects?", + "choices": [ + "A. is located behind .", + "B. is located between and .", + "C. is positioned in front of both and .", + "D. is on the back of ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_403.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B5I7B>H8M3M2O2N3M2N1O1O2N001O1O010O1O1O1O1O1O2N2O0O1O1O2N2N2O1N2N2N1O3M1O01O0O1O100000001O0001O01O00000001O010O0000001O00010bEeMn8[2iFPNU9P2iFSNV9n1\\FaNb9_1[FfNc9[1TFbMMW1n9n2N1O1O_NVFgNh9Y1XFiNg9V1YFkNf9U1ZFjNg9d27M4K4iNjE[NX:a29K5L4G:E:O1O10001O01O00001eF_Mn6a2PIcMn6]2hHnMX7R2bHUN\\7k1_H[N`7e1[HaNd7`1UHZMoNY1k8]1oGPOP8P1jGVOU8k0`GA^8`0YGXM1\\2e8a0YGAg8Q3N2N2O1N9G001O1O10O0001O00010O00000001N1O1K6I6N2K5MWH" + } + ], + "question": "What is the primary action of in relation to and ?", + "choices": [ + "A. is holding and sitting on .", + "B. is sitting on and eating from .", + "C. is looking at and holding .", + "D. is holding and sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_404.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B_OcX[4" + } + ], + "question": "Which statement correctly describes the interaction between , , and ?", + "choices": [ + "A. is using to eat from .", + "B. is looking at while holding .", + "C. is holding who is looking at .", + "D. is eating from inside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_405.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "glZ29n=3N1O02O1OO101O00010O6J2N4L00N2N3M3MJ7M2N3J6L4M3L4L3M4K5L3M5J6I6J8F9K5FiDZM];`28L4K4N3M2O4K3L3O1N4L4N1N1O2M4K5L3N3K4M4J6G]BHg=2goV1" + }, + { + "size": [ + 455, + 640 + ], + "counts": "]XX18n=8G:G8H7I4L5K7I4M3L3M3M3N1O2M3M3N1O2M3N1O1N2O2M2O1O1N2O1O1O1O1O2N1O1O1O1O2N1O2N10O01O1O100O10O01O001O100O1O001O001O1O1N2O001O2N1O1O1O1O1O1O1O001O001O0010O2OO000L5O001J5O1O1000gKbFi3^9`0O2O00000O1O1O101O0O1O100O10000O101O0000000O11O0000O1000000000000000000000O10000000000000000O100000000OjNPKWIP5i6PKWIP5i6PKWIP5i6PKWIP5P80001N1000000O100000000O100O10000O2O00000O100O100O2O0O100O100O101N100O1O1O100O1N2O2N1N2N2O1O1O1O1O2N1O100O2N1O1O100O2N1N200O1O2N100O2N1O1O2N1O1N3N2M2O2M2N2N3N1N3N1L5L4K4N3L4L4K5I7D>_OcX[4" + }, + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B5I7B>H8M3M2O2N3M2N1O1O2N001O1O010O1O1O1O1O1O2N2O0O1O1O2N2N2O1N2N2N1O3M1O01O0O1O100000001O0001O01O00000001O010O0000001O00010bEeMn8[2iFPNU9P2iFSNV9n1\\FaNb9_1[FfNc9[1TFbMMW1n9n2N1O1O_NVFgNh9Y1XFiNg9V1YFkNf9U1ZFjNg9d27M4K4iNjE[NX:a29K5L4G:E:O1O10001O01O00001eF_Mn6a2PIcMn6]2hHnMX7R2bHUN\\7k1_H[N`7e1[HaNd7`1UHZMoNY1k8]1oGPOP8P1jGVOU8k0`GA^8`0YGXM1\\2e8a0YGAg8Q3N2N2O1N9G001O1O10O0001O00010O00000001N1O1K6I6N2K5MWH" + } + ], + "question": "Which object is sitting on, and which object is attached to ?", + "choices": [ + "A. is sitting on and is attached to .", + "B. is sitting on and is attached to .", + "C. is sitting on and is attached to .", + "D. is sitting on and is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_406.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481390.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Voj43l>2N1O011N3N2N1O1N10001bABY>c0O0001N101N2N4L3LhUW4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "dRh55i>3O001O01O001O0000100O00000001O0mBMY;3gDNk97`EKd0Ok99_EHAN`02^:;_EF@1a0N`:<^EE@4?Ma:<_ECA6=Kc:=^EBA;:Ff:?^E@A?8Bi:?^E@@b07_Oj:a0^E_O_Od06]Om:`0^E_O^Of06[On:`0^E_O]Oh04[OQ;>^E_O\\Ok00[OW;:\\EA]O]1W;SOYEB@[1X;SOTEFBX1[;SOoD9IOY;IjD90LW;W1mDeNU;Y1oDcNY;U1mDeNT;[1nDbNR;h0WD^Oh0GR;k0VD_Oi0DQ;h0[DDe0CP;h0YDIh0\\OP;j0YDJk0WOn:n0VDMka02N1O1O1O1N2O1O1N2O1N2O1N3M3KSej1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y3e;[3000001O000000001O0000001O0000001O00000000001O0000000000001O0cKbLXM^3f2iLUMW3i2mLUMS3i2QMUMP3i2SMUMm2j2ZMPMf2[2eLYJk0W3`2_2hLWJl0W3]2`2jLWJk0W3]2_2kLVJk0Z3]2[2ZNdMl1V2UNiMl1V2TNjMQ2P2PNPNW2i1iMWNX2h1hMXNZ2f1gMYN[2e1eM[N\\2d1dM\\Nd2\\1]MbNk2W1VMhNk2X1UMhNj2X1XMfNi2Z1\\M`Nh2]1cMWN`2g1cMUN_2k1cMQN^2P2fMjM]2V2hMbMk2m1XMnMj2Q2VMnMk2R2TMnMl2R2UMmMl2R2UMmMk2S2WMkMi2U2XMjMh2V2]MeMc2[2bM`M^2`2eM]M[2c2gM[MY2e2iMYMW2g2jMXMV2h2kMWMT2j2mMUMS2k2nMTMQ2m2QNQMn1P3SNoLm1R3TNlLn1R3SNmLo1R3RN`LmKLQ6d3WNWLoK3j5f3dNZL\\1f3eNYL^OOaMi3P3XLZO5fMc3V7^LjHc3U7^LjHc3U7]LkHd3S7gLbHZ3^7QMWHP3^2VLa1Q1kKi2d2YL^1P1lKh2e2ZL]1o0lKj2f2nKbL6j4S1WKgN`0T4R3PLV1\\1eKg2T8]MgGf2W8\\MfGh2X8XMhGi2R4bKgKOm3b6aMmIh0Di1a6ZM`J;oNZ2d6XMbJ9kN_2f6UMbJ7jNe2g6PMdJ5gNj2i6mLdJ4dNo2m6hLbJ6aNS3n6dLcJ7`NU3n6cLeJ4^NY3n6QLhH9Q2N`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PTT42m>2O1N100O1O100O1O1O1O1O1O100O1O1N2O1N2L4N2M3M2OF81200OO00M41N2N3O0N20O2O0O1O100M400O001O1O1O1000]NCbD>Z;HdD7[;McD3];NaD3_;O_D1d;MZD3i;LVD3l;NQD3o;OoC1R<1kCOU<2iC1Z4M2M4N110O2N0011O2M1O0O10000000O0010O1O1O000010O000000000001O00O100O10000RCFU;;gDKW;5hDMW;5fDLZ;;_DF_;`0ZDCe;a0UDC];o0\\DSOa;Z1QDjNm;W2M4L6J9HM3O12N2N1O3M2O1N10O2N2O9F3M2N010O1POZDdNf;Y1dDbN[;\\1hDdNW;[1kDdNU;[1lDeNS;Z1oDgNP;W1SEgNn:X1TEgNl:X1TEiNm:U1TEjNm:U1UEiNm:T1VEjNl:T1TElNm:d0ZDSOk08l:b0eE\\O^:`0eE]O]:b0R2N2N3M2N2N2N3M3M3LPko2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "^g<2k>8J2M2O1M4L3N2N2N2N200O1O101O3RBoNg=\\1H:YOaN[Cg1[ and in what state of motion?", + "choices": [ + "A. is running on while holding .", + "B. is jumping from while holding .", + "C. is standing on while holding .", + "D. is running on while holding ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_407.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481390.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "YZn76i>2N10000O0100O0001O0O101_NMXC074=FQ;;VD070a0Jm:;WDg0j0ROl:c1TE_Nj:a1UEbNj:^1UEdNi:\\1VEfNj:Y1VEjNh:U1WEnNh:R1WEPOh:P1WEROh:n0TEWOk:j0nD]OQ;d0lD_OT;a0iDBV;U200001cLgDP3[;lLgDT3b;2O0O2N2O1N2_EdL[9]3aFeL_9^3]FcLd9`3WFaLi9b3SF_Lm9\\2iE]N2\\OU:U2nEROR:l0QFROQ:=aFA`9fF_O\\9?gF^OZ9UOcEg0Y10T9YOgE`0[13P9\\OiEj8BmEM]1?h8CfE2m11^8j0dGSO^8k0dGSO^8l0cGSO]8l0dGSO^8k0dGSO^8j0eGUO\\8d0kGYOY8b0U3M3Jgh>" + }, + { + "size": [ + 480, + 640 + ], + "counts": "bgP32l>3N101O00001O01O010O10O010O010O001O0010O000001O000XOJeB7W=OfB2X=1dB2[=1bB0^=2_BO`=h0100O1N2O1N3N1O2O101O100O10O0\\CZOl:g0QE]Om:d0PE_Oo:b0mDAT;?hDEW;Q<@`DK@b0T3O001O01O001O0000100O00000001O0mBMY;3gDNk97`EKd0Ok99_EHAN`02^:;_EF@1a0N`:<^EE@4?Ma:<_ECA6=Kc:=^EBA;:Ff:?^E@A?8Bi:?^E@@b07_Oj:a0^E_O_Od06]Om:`0^E_O^Of06[On:`0^E_O]Oh04[OQ;>^E_O\\Ok00[OW;:\\EA]O]1W;SOYEB@[1X;SOTEFBX1[;SOoD9IOY;IjD90LW;W1mDeNU;Y1oDcNY;U1mDeNT;[1nDbNR;h0WD^Oh0GR;k0VD_Oi0DQ;h0[DDe0CP;h0YDIh0\\OP;j0YDJk0WOn:n0VDMka02N1O1O1O1N2O1O1N2O1N2O1N3M3KSej1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`i?5h>6L4L4M5J5WOXOQCm0iYEI8d1b:oNlEk0V:RO^F:g9B_F7f9F]F6i9E\\F6h9F\\F6g9G[F6k9DXF9j9EYF9i9DYF:XN`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PTT42m>2O1N100O1O100O1O1O1O1O1O100O1O1N2O1N2L4N2M3M2OF81200OO00M41N2N3O0N20O2O0O1O100M400O001O1O1O1000]NCbD>Z;HdD7[;McD3];NaD3_;O_D1d;MZD3i;LVD3l;NQD3o;OoC1R<1kCOU<2iC1Z4M2M4N110O2N0011O2M1O0O10000000O0010O1O1O000010O000000000001O00O100O10000RCFU;;gDKW;5hDMW;5fDLZ;;_DF_;`0ZDCe;a0UDC];o0\\DSOa;Z1QDjNm;W2M4L6J9HM3O12N2N1O3M2O1N10O2N2O9F3M2N010O1POZDdNf;Y1dDbN[;\\1hDdNW;[1kDdNU;[1lDeNS;Z1oDgNP;W1SEgNn:X1TEgNl:X1TEiNm:U1TEjNm:U1UEiNm:T1VEjNl:T1TElNm:d0ZDSOk08l:b0eE\\O^:`0eE]O]:b0R2N2N3M2N2N2N3M3M3LPko2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "^g<2k>8J2M2O1M4L3N2N2N2N200O1O101O3RBoNg=\\1H:YOaN[Cg1[OmA3l=e0AVO]BO4l0[=^OcBc0Y=e0N3M4L4M2lCUNo:n1kD[NP;f1mD]NQ;f1lD\\NS;e1jD]NU;f1gD^NV;h2fDaLQ;f32N=C2N1O0O2N1SO[D_Nf;X1fDcN[;\\1hD`NZ;a1fD]N[;d1fDYN[;h1eDVN];k1cDRN_;o1i01O0010O0eNYC=g<^O^Cb0c?", + "choices": [ + "A. and are standing, while is running.", + "B. and are running, while is standing.", + "C. and are standing, while is running.", + "D. and are running, while is standing." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_408.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000042296.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "jg\\11X=0WgP19WXoN;J7J4L4M3M3M4L3M3M3N3L3N3L3N3M3M3M2N2N2N1O2N1O2O0O3M3N1N1O100O2N100O100O101O0O10000O1000000O1000000000000O01000000000000O1000001O00000O10001O000000001O0O101O001O000O2O001O1O0O2O1N101O1N2O1N2O1O1N101O1N3N1O1O2M2O1O2M1O3M2N2N3M3K5L5K5I9D`0^On`Y4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "QUo172LhA5L1O2N1O1O1O1O1O1O2O0O1O1O010O100O1O10000O100O10O010000O1000O0100000O010000000000000000O2O000000000001O0001O0000001O0001O0000001O01O01O001O00001O0010O01O001O1O001O1O0O2O1O0O2O1N2O1O1O1O2M2M4L3M3N3M2N3N2M3L5L2N4KVbb5" + } + ], + "question": "What is the relationship between and the other objects?", + "choices": [ + "A. is looking at and about to hit .", + "B. is about to hit .", + "C. is looking at .", + "D. is attached to ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_409.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is behind .", + "D. is standing on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_410.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y[1CdN41b0c3ZO\\LN6OP2U1hN]Ob1@aMO0O60d00\\O000:1@>1Gc00[O]JRBb5\\>J>B?UKe@S4i?K002N1O1O2N001O1O1O1O1O001O001O000000001OO1jM`LlC`3Te1oAQNU>h1QBUNR>d1SB[No=`1UB_NS>V1QBiNS>n0TBPOo=j0UBUOn=`0ZB@h=5aBKf=_OiBa0[`001OO1001O001O0000001O000000000000O1cITO]C8m5d0f6KoH5P7NoH1Q74jHLV7S1aNQ6]2fHSOY1aNn5d2cHkN_1bNh5n2cH`Ne1bNe5Y3]HVNn1bN_5e3ZHjMW2bN[5j3[HdM[2bNW5m3]HaM\\2dNT5P4\\H\\M`2dNT5Q4[H\\M`2cNT5T4ZHZMb2aNT5W4WHYMe2aNP5]4WHSMh2`NP5`4VHPMj2aNn4b4VHmLl2cNi4e4YHhLn2dNf4g4ZHfLQ3cNa4l4[HbLT3cN_4n4ZH`LW3bN^4R5WH]L[3cNZ4T5XHYL_3dNV4[7iKfHU4\\7jKeHR4^7nKfHm3[7SLeHl3\\7ULdHg3[7fHoGc3f0f3\\7gHnGc3f0a3a7lHiGc3i0[3e7eL[HZ3f7gLYHY3g7gLZHX3f7hLZHW3g7iLZHV3g7jLYHS3i7mLYHo2i7QMXHl2j7TMVHk2k7VMUHg2n7XMTHe2m7[MSHe2n7ZMTHc2m7]MTH]LmNo5R9dMUH[2k7eMWHX2k7gMWHV2j7kMVHS2k7mMUHQ2m7oMSHP2o7PNTHk1m7UNUHf1n7ZNUHb1l7^NZH[1g7eN]HV1d7jN]HS1e7nN[Hn0h7RO\\Hh0f7YO]Hc0c7]O_H\\LhMP4k9DaH9a7GbH\\LbMe3m9OPICS7=PITORMZMR:b3ZIdNP7]1VIZNl6f1[IRNf6o1^IkMc6U2cITMkKVO06f:_3lI[LPL5T:`3bJ`L^5`3^JZLlJ7f:_3bJ`L^5a3fJZLZ5f3gJYLY5g3gJZLX5f3iJZLV5f3b5ZLi_Oc3W`0]Li_Oc3W`0\\Lj_Od3Y`01O01O0001O00000010O1OO2O000001OO2O1OYLj_Od3V`0\\Lj_Od3V`0[Lk_Oe3U`0[Lk_Oe3U`0[Lk_Oe3X`00O10010O01OdM^LYBNh1e3e;[L_B`0f0B8c3c]LiAc3W>]LiAc3W>[LkAe3X?0000001QOXLdAh3\\>XLdAh3Z?001QOWLeAi3[>XLVAOK0Ij3V?WLl@O42O0Ji3V?WLk@O64LMMi3V?jLl@[3S?j0YAXKC00O0k0O\\O0b11XN5Z1MSOl2A`M]2AfM0On05:_c0PO" + } + ], + "question": "Which statement accurately describes the positions of the objects relative to ?", + "choices": [ + "A. is standing on , which is on .", + "B. is standing on , and is also on .", + "C. Neither nor is on .", + "D. Only is on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_411.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000217400.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^6T2l<0000001N100000001O000000000000000000O100O010001O0O2O001O0O1000001N101O00000O2O1O1O2N001N1000000000O11N10000O1000001O000O101O0000O10O100000000000000O2O00001N1000001O001O000O101O000000000O101O0000001N1000001O000O10000000000O10001O000O1000000O1000000O100000000000000000000O1000000O2O0000001N101OO0101O1O1O00001O1O0O2O00001O1O000O2O000O2O0000001N1000001N100000001O0000000O101O0000001O00000O10000O10001O001O000O101O000O10001O00000000001O00000O2O001O00000O1000001N1000000O101O00000O10000000000O2O000000000000000O10000O10001O4K4M2N2N1N2O3M0000000000000O1000O1O100O1O0100000O1000000001O00001N10000O101N100000001O001N1000001O001O1O00000O2O00000O2N100000000O100000001O1O2N1O1O1N3N2N00001O001N2O001O0000000O10O01O1000O100O1000000O10000O1O1M3O010000000000000000000000000001O0O101O1N2N100O2O0O10001O0000000O10001O00000000000000001O00001O001N1O1000O010000000000O0100000000O10000000000O100000O10000000O100O100O11O1O000000O10000001O00000000000O100001O000O1O100000000O10001O000O2O0O2N3L2O1O1N2O2O1N1000000001O0001O1O1O00001O10O0001O001O001O001O0O2O00O11O9F3N001N2N2L3G\\FPKj9Q5210I7M4M1N2N4J9hKYEd00ZOS11GOUOd0[;G\\V`1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m5>b>0000000O100000O0101O00O010000O1000O1000O100000O01000000O0100000O1000O01000O10O1000O10O10000000O010000O100000O01000000O01000O10000000O0100000000O100O10O1000O010000000O10O1000O100O00100000O10000000O100O10O10O10O1000O10000000O1000O100000000O1000O01000O100O1000O2N1000000O010000O10O100O1000000O10O1000O10N2O2_OkAJ40R>g0M2O1N1O0_Ob0M3O1O1000O010000O1000O010000O1O10000O10O10O01000O100O10000O010O10000O10O10O10000O10O010000O1000000O01000O100O10O010000O10000O01000O1000O0100O1000O010000O10000O10O1000O10O10O100O01000O10000O10O10O10O010000O10O10O100O10O1000O100O100000O01000N1O2O100O010O10O100OMWCPNj and ?", + "choices": [ + "A. is positioned over .", + "B. is a component attached to the side of .", + "C. is traveling on top of .", + "D. and are parallel and side-by-side." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_412.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000017182.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "]bc57R=4N1O1O1O1000O10O10000O1O10O2O1O000000001N2O1O2N2MmQ]2" + }, + { + "size": [ + 428, + 640 + ], + "counts": "[b]25T=7K5K6I6K4L4L7I4M0O2N10O01O100O100O1000O10O1000000O10000000O10O1000000000O010000000000000O10000000O100000O101Oc0]O[SV5" + }, + { + "size": [ + 428, + 640 + ], + "counts": "o^g12Y=3N00001O0O1001OO1O2O1N5SCB0132K=b;S3TN:F1O1O000000000000000O100000000O1000000000000000000O10000000000O10000000000000000O10000000000000000000000O100000000000000O1000000000000000000O100000000O1000000O100000000000000000000000YLoFS3Q9mLoFS3Q9d03nKmFJ2e3b9O000000O1O100N2O100O1O100000000000000000000000000001O000000001O0000000000O100oMULaJk3`700000O10000000000000000O100000000O1000000000000O100000000000000O100000000O1000000000000O1000000O10000000000O10000000000000000O10000000000O1000000A?N2O100O100000000O100000000000000O1000000O1000000000000000000000000001O0000000000001O00000000001O0000000000001O000000000000000000001O0000001O002N3M00000000000000000000O1000000000000O100000000000000O100O100000000000000O100000000000000000000O10000000000000000000000O1000000000000O100O10000O10000O1000000000000000000000000000000O10O2YMcGkM43O1OP1MQOh8g0e3M302N?", + "choices": [ + "A. and are on .", + "B. is on , and both are on .", + "C. and are on .", + "D. Only is on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_413.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000017182.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "[b]25T=7K5K6I6K4L4L7I4M0O2N10O01O100O100O1000O10O1000000O10000000O10O1000000000O010000000000000O10000000O100000O101Oc0]O[SV5" + }, + { + "size": [ + 428, + 640 + ], + "counts": "o^g12Y=3N00001O0O1001OO1O2O1N5SCB0132K=b;S3TN:F1O1O000000000000000O100000000O1000000000000000000O10000000000O10000000000000000O10000000000000000000000O100000000000000O1000000000000000000O100000000O1000000O100000000000000000000000YLoFS3Q9mLoFS3Q9d03nKmFJ2e3b9O000000O1O100N2O100O1O100000000000000000000000000001O000000001O0000000000O100oMULaJk3`700000O10000000000000000O100000000O1000000000000O100000000000000O100000000O1000000000000O1000000O10000000000O10000000000000000O10000000000O1000000A?N2O100O100000000O100000000000000O1000000O1000000000000000000000000001O0000000000001O00000000001O0000000000001O000000000000000000001O0000001O002N3M00000000000000000000O1000000000000O100000000000000O100O100000000000000O100000000000000000000O10000000000000000000000O1000000000000O100O10000O10000O1000000000000000000000000000000O10O2YMcGkM43O1OP1MQOh8g0e3M302N and ?", + "choices": [ + "A. is in front of .", + "B. is on .", + "C. is in front of .", + "D. is on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_414.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000559543.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "]lV24Q:8oK4RJAX3`0c2i0cL\\O\\3i0]LZOb3k0XLWOf3o0TLSOl3o0oKTOP4n0hKYOX4j0XKCU4Q1fKSOS4ZNTKf2c0VOW4P1_KZO^4k0[KZO`4\\3N2N2I7L4O1N2O001N3N10OO201O0O0100001OO1001O00O1001OO010001OO100001O00O20O0000O10100O1O001O2M3N1O103L2N002N3M5K1O5K6J2eMPKUOR5i0RKSOQ5]NjJk18FS5YNgJINo1=M]5SNVJ71^1=7^5\\NUJ\\1`06h5F]J8e5SOPJVOENj0g1c5nNUKP1l4gN]KX1e4bNaIKm1_1c4fN`IJ10k1]1h4fN\\IM10l1[1i4gN`KW1a4hN`KV1m6O1O2N2M4L3M3N3L3N3L4L3M4KPb0OS^O0gE0ef<6jaP1" + }, + { + "size": [ + 333, + 500 + ], + "counts": "led0:R::F7J4L2N1N2O0O2O000O1O100O1O101N1000001N1BdNkG[1S8lNiGT1V8QOfGo0Z8LdG[O\\8c0iGYOX8f0kGYOS8h0nGVOS8i0oGVOQ8j0oGWOP8i0PHXOo7b0gGQO<=m7a0XH_Oh7a0XH_Oh7`0YH@g7`0YH@f7a0ZH_Of7a0ZH_Of7a0YH@g7?ZHAf7?ZHCd7<]H0W70iH8o6HQI`0f6AYIb0e6^O[Ic0d6]O[Ie0d6\\O[Ie0d6[O\\Ie0d6[O[Ig0c6ZO]Ig0b6YO^Ih0a6XO_Ii0`6WO`Ij0_6VOaIk0^6VO`Il0OYNk5k0VJl0N[Nj5k0WJk0I_NP6g0VJk0EbNU6c0VJ_1j5cNTJ\\1m5fNQJX1Q6iNnIR1V6oNjIn0Y6SOfIk0\\6TOeIj0]6VOcIi0^6VOcIi0]6XOcIg0^6YOcIe0^6[OcId0]6\\OcIc0]6]OdIc0\\6\\OeIc0\\6]OeIb0[6^OeIa0\\6^OeIb0Z6_OfIa0Z6^OhI`0Y6@gI`0Y6@gI?Y6BgI>Y6BgI>Y6AhI>Y6BhI=W6DiI8k0i7TOVHm0j7SOUHn0k7SORHn0P8QOoGP1Q8QOmGP1T8oNjGR1W8b0001N1000O010000O10000O1000O01000O010000O100O=D7Hf0ZO6K4J5Mnji2" + }, + { + "size": [ + 333, + 500 + ], + "counts": "mVl25U:3O1O2O0O2O001O2O2M00N3M3M4LW`Q2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is on the floor next to .", + "B. is standing next to .", + "C. is placing on .", + "D. is holding ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_415.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000578545.jpg", + "mask_rles": [ + { + "size": [ + 474, + 640 + ], + "counts": "[me2:Y>9K4M2M4J5J6M3N3L3N2N2N2N2N2N3M2O1O1N3M2N3M3N1O2L4N1O2N2M3M3N2N5K6J4`LbL]Kb3_4`L`Kb3]4`LaKf3Y4aLbHDk2o3_4eL\\K_3`4gLZK\\3d4fLZK\\3d4eLZK^3c4fLYK^3d4dLYK_3d4fLXK\\3e4iLVKY3h4iLWKY3f4lLUKX3g4nLTKT3i4RMQKQ3l4QMSKP3k4QMTKQ3k4PMTKQ3k0kJj1U2YMR3l4PMQKR3n4oLPKR3P5oLnJR3R5oLlJR3T5PMiJQ3X5PMeJQ3\\5QM`JP3a5QM\\JP3d5RMYJo2Z1`Jb2d2oKn2_1^Jc2a8k100000O10000oJ^Gj3b8VL^Gj3b8UL_Gk3`8X100N2O1N200N2O1O10kIiG2N2O1N2O1O1N2N2O1N3N1N2O1N2N2N3UCQOg;Q1XDPOg;Q1UDSOj;n0VDSOg;P1WDQOg;Q1XDQOe;R1ZDnNd;T1\\DlNc;V1[DkNd;X1ZDiNc;[1YDgNe;]1WDfNf;\\1XDfNg;\\1WDeNh;W2N2O1O1O100001O00001O00001O00001O0010O0001O00010O0001O01O00001N1O2N1O1O2N1O1O2N1O1O2N1O2N1O2N1O2N1O1O2M2O1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O1O2N1O2N1O2M3^OPB7Z>O1O1O1N3Moj12PUN1O10O_>2`AO1Nnj7NVUH0\\Zm1" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is holding .", + "B. is attached to .", + "C. is reading .", + "D. is sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_416.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000437898.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "gXR3\\1f;:G9A?kNV1mMaM`HV3f6T2O2O1O1N2O001O001O1O1O1O1O1O1O1O1O001O001O1O1O1O1O1O1O1O001O1O1O1O010O1O1O001O1O1O1O1O001O001O000000001O001O1O1O0000000O10O100000000O101O0eK`Jk0`5UObJJ>TMR5P3bJlMJ\\On1Gf3Q3kJiLn25W2R3^NlLc1T3^NkLb1T3`NjLa1V3aNhL_1X3Z40000000000000000O100000000000000000000000000000000O2O00000000000000001O00000ZOmESNT:l1nESNS:0YFi0FVOT;f0REVOo:?", + "choices": [ + "A. is located between and .", + "B. is over .", + "C. is below both and .", + "D. is above ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_417.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000225532.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "gdb27_;4K3N1O1O1O011N100O2OO100000000O11O000000O1001O0000O101O3M10O0010O00001L4Ibaf2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "h_d21f;1O1O2N1O1N2O1O1O1O001O001O001O0oDBi:?VEBi:?VECh:=XEDg:=ZEBe:>\\ECb:>]EDa:=_ED_:=aEC^:=bED]:=cED[:=eECZ:=gEDW:=hEEV:SHAl7?UHAj7a0UH^Ok7c0UH\\Ok7e0TH[Ol7f0THYOl7g0VHWOj7j0VHUOj7l0VHSOj7n0VHQOj7P1VHoNj7R1VHmNj7S1WHlNi7U1WHjNi7V1XHiNh7Y1WHfNi7[1WHdNi7\\1XHcNh7_1WH`Ni7a1WH^Ni7c1WH\\Ni7d1XH[Nh7f1YHXNf7j1ZHUNf7l1ZHSNf7n1ZHQNg7o1YHPNg7Q2YHnMg7S2YHlMg7U2XHkMh7V2XHiMh7X2XHgMh7Z2XHeMh7[2YHdMg7]2YHbMf7`2ZH_Mf7b2ZH]Mf7d2ZH[Mf7f2ZHYMf7h2ZHWMf7j2ZHUMf7l2ZHSMg7m2ZHPMg7P3`01O1O1O100O002N001O001O2N001O1O1O1O1O1O0000000000000000000000000000cMQHR1o7eN_HV1a7eNgHX1X7fNPIU1Q7hNXIQ1h6nNbIi0]6VOiIf0W6YOmId0S6\\OnIc0R6\\OQJb0o5^ORJ`0o5@SJ>n5@TJ2fMOU80WJKdML14OLU89WJ and ?", + "choices": [ + "A. is driving on .", + "B. is driving on .", + "C. is driving on .", + "D. is parked next to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_418.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000564336.jpg", + "mask_rles": [ + { + "size": [ + 360, + 640 + ], + "counts": "gTV12T;3N2N2I6L4M3M3G9M3M300N1010N101M2IPFVOR:g08O1M4M210000O100O2O3dEFd93ZF6e9J]F6a9JiFLX93nFGR9:n000001O2_E_OW:n0J;F4K1O2AbNoFa1P9j0100O10O1O00001O1Ng]h2" + }, + { + "size": [ + 360, + 640 + ], + "counts": "0[5m500000000000000000000000000000000O100000000000000000000000000O10000K5O1O1000000000000001O0000000000000000O11O1O1O00002N3M3M2N3M2N2N1O001O000000M5aK^JlN0000O12NN3O]10hN14OF2OO11O0Oc81YV7OWlH11OP\\n5" + }, + { + "size": [ + 360, + 640 + ], + "counts": "R_P24kg0NWC:E6K4L4L4L4L4N2O0100000000O1L4L400O11O00000001O01O00000O100010O0000000000000000000000000N2L4L4L4M4L3LQa]4" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is looking at and is positioned in front of .", + "B. is looking at while being beside .", + "C. is looking at and is positioned beside .", + "D. is looking at and is positioned in front of ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_419.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000319935.jpg", + "mask_rles": [ + { + "size": [ + 398, + 640 + ], + "counts": "i9d2j90O1000000O1000000O100O1O1N2N2O100000000O10000O10000O100000000O1000000O10SMkFa2U9]MRG^2n8aMVG\\2j8cMZGZ2e8gM\\GX2d8hM]GW2c8hM^GX2b8hM^GX2a8iM_GW2a8iM_GW2a8iM^GX2b8hM^GX2b8hM]GY2b8hM^GX2b8hM^GX2b8gM_GY2a8gM^GZ2a8fM`GZ2_8gMaGY2_8fMbGZ2^8fMaG[2_8eMaG[2^8fMbGZ2^8fMbGZ2]8fMcG[2]8eMcG[2]8dMdG\\2\\8dMcG]2\\8dMdG\\2\\8dMdG\\2\\8dMdG\\2\\8dMdG\\2\\8cMdG^2\\8bMdG^2\\8aMeG_2T91000000O100000000O10000O1000000O100VO\\MjGd2V8]MiGc2W8^MgGc2Y8]MgGc2Y8]MgGc2Y8]MfGd2Z8\\MfGd2Y8]MgGc2Y8]MgGc2Y8]MfGd2Y8]MgGc2Y8]MgGc2Y8\\MhGd2X8\\MgGe2Y8ZMhGf2P9000O1001O3M6J7I7I7I8H6J4L4L2N1O1O7I1O1O1O1O0000000000000000O1O1O1O1N2O1O1O1O1O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000VLPOVLP1j3TORLl0n3VOPLj0o3XOPLh0o3]OmKc0R4_OmKa0R4@nK`0Q4AoK?Q4APL>P4BRLo5ARJ>n5BSJ=m5CSJ=n5ASJ?n5@RJ`0Q6\\OoIe0S6YOlIh0U6WObIR1^6mNkHk1V7TNhHn1X7QNhHP2X7PNhHP2X7oMjHP2V7PNjHP2V7oMkHQ2U7oMlHP2U7oMlHP2T7PNlHP2T7oMmHQ2S7oMmHQ2S7oMnHP2S7nMnHR2S7mMmHS2S7lMnHT2R7lMoHS2Q7lMPIT2Q7jMQIU2o6kMQIU2o6kMRIT2n6lMSIS2n6kMSIU2m6kMSIU2m6jMUIU2k6kMUIU2k6kMUIU2l6jMUIU2k6jMVIV2j6iMXIV2i6iMWIW2i6iMXIV2h6iMYIW2h6hMYIW2h6hMXIX2h6gMYIY2h6fMYIY2g6gMYIY2g6fMZIZ2f6eM\\IZ2d6fM\\IZ2e6eM\\IZ2\\801O1O00001O00001O001O00001O001O1O001O6Jd0\\Od0\\O7I1O1O00001O0000O1O1O1oKDlJ0VN=m6EZJP2g5oMWJS2Q81O1O100O1bMiMdJX2Y5lMeJU2[5jMfJV2h71O1O100O1O1O1O1eM`MhJ`2Y5_MaH2o1`2c5aM]J_2h71O1O1]N[M\\If2d6[M[Ie2d6\\M[Ie2\\6ZM\\H1X1e2T6cMcHHX1e2^6[MZH0W1e2i6\\MUIh1fN\\Ni8L`H]1U8dNjGU1]8kNbGo0e8QOZGo0g8ROWGn0j8ROVGm0k8SOTGm0m8SORGn0n8RORGm0o8SOPGn0Q9QOnFo0o8mNUF1l0R1o8nNXGR1h8nNWGR1j8oNUGQ1l8mNTGS1S:010O1O1O1000000mNlNlFT1g:lN]D=P6CX;:bDIc;=2L4K5M300001O=D4K2N]OnDKQ;3f00000003MO2Liel2" + }, + { + "size": [ + 398, + 640 + ], + "counts": "Q^k433c0P;g0I5J6O1O1O1O1O100O1O100O100O100O10000000000000000001O00000000O1O1O1O1O1O1O1O1O1O1O1O100N2O100O1O1O100O1O100O1O1O1O1O1O1O1O100O1O1O1O1O1O1O100O1O1O1O1O1O1O100O1O100O100O1[Oe0O100O10000O1000000000000000000000000001_LmGe2S8XMRHf2n7YMSHg2m7XMUHg2k7WMXHh2h7UM[HK^Oh2W8[M^HK^Oh2U8[M_HL\\Oi2U8ZMiHe2W7ZMjHf2W7XMkHg2U7XMmHg2T7XMlHh2T7WMnHh2]8O2N1O1O1O1O1O2N1O001O2N2N1O1O2N2N2N1O2N1O1O1O1O1O001O1O1O001O001O0000000000O100O1N2O100O1O1O10000000000001O1OK5M3O1O1O1O1N200O100M3O1O100O1M3N2N2O100O1N2O1O1O1N2N2O100bNQM\\IP3b6QM]IQ3b6PM\\IR3b6PM]IQ3a6QM^IP3b6PM]IQ3b6QM\\IP3d6PMbHJ;X3R7SM]HH?V3T7RM\\HI?V3T7\\MlHd2T7\\MkHe2U7[MjHf2V7ZMgHi2Y7WMgHi2Y7XMfHh2Z7YMdHh2\\7YMbHh2_7XM^Hj2b7Q11O00000000001O00001O00001O00001O1OVH" + }, + { + "size": [ + 398, + 640 + ], + "counts": "nQY1>n;:G8H5L6I9H7I7I7I8G8H8H8H4L5L1N2O1O3M3M1N3N0O100O1O11O00O01O1O001O1N10001000O100O10O1000O100O10000O1O1O1000000000000000000001O1O1O1O1O2N2N3M2N2N3M4L3M3M1O1O0O11N0100000000000000000000000O10000000000O10000O10000O100000000000000000000000001O6J2N1O1O001O000001N1O3L6L4J9_O:N2N2M3M3L4L4L3N3M3K6K4M3L5Ab0BZdS2HW\\lM11O\\b`2" + } + ], + "question": "What spatial relationship do , , and all share with ?", + "choices": [ + "A. They are all next to .", + "B. They are all under .", + "C. They are all on .", + "D. They are all inside ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_420.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000116439.jpg", + "mask_rles": [ + { + "size": [ + 640, + 429 + ], + "counts": "fcg49fc0?B2N2N3M3M7I1O001O00O11O000000O10000001O000O1000001O1O1O1O1O1O2N1O1O1O00hb0A\\]O;fb0C^]O6jb0EZ]O7`hT3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "aWd5h0Wc07K0O10O02O00O1O01O01O01O00000000001N10000O1000O10O100O1O1O1O1N2N200O100001N101N2N1O2O1M4M5J5JbKcA1Lj4a>UKcA2Jj4c>SKdA2Kj4j>VKVAj4a>UKcA2Jj4c>TKcAQ5]>PKaAQ5_>oJ]A60e4c>UK]A60e4h>ZKXAf4h>93M1O001O4L2N002N001O001O2N1O1O3M2N001O1O3M3M2N5K0000001O00000000O10000001O00000000000000O11O000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000001O000000O11O00000000000000000000000000000000000000000000000000000000000000001O00000000000000O1000000000000000000000000O100000000000000000000O1000000O100000000000000000000O100000000000000000000O10000O10000O100O10000O10000O1000000O1000000O100000000000000O1000000O100000000O10000O1000000O1O1000000O100O100O1O1O100O100O1O1TO]KYBe4f=`KTBb4k=aKoAc4P>aKkAa4T>f0O1O1N2O100O10000001O0000001O00001O00001O000000001O000000001O00001O000000001O0000A[JdBd5\\=]JcBc5\\=^JdBb5\\=_JcBa5\\=aJcB_5]=aJcB_5]=aJdB^5\\=cJcB]5\\=dJdB\\5\\=eJcB[5]=eJcB[5]=eJcB[5]=eJdBZ5\\=gJcBY5\\=hJdBX5\\=hJeBW5[=iJeBW5[=iJeBW5[=iJeBW5[=iJfBV5Z=kJeBU5[=kJfBT5Z=lJfBT5Z=lJfBT5Y=mJgBS5Y=mJhBR5X=nJhBR5X=oJgBQ5Y=oJgBQ5X=QKgBo4Y=QKgBo4X=RKiBm4W=SKiBm4W=TKhBl4W=UKiBk4W=VKhBj4Y=UKhBj4X=VKhBj4X=VKhBj4Y=UKhBj4X=VKhBj4X=WKgBi4X=XKiBg4W=YKiBg4W=ZKhBf4X=ZKhBf4X=[KgBe4Y=[KhBd4X=\\KhBd4X=\\KhBd4W=^KhBb4Y=]KgBc4Y=]KhBb4X=^KhBb4Y=^KfBb4Z=^KfBb4[=]KfBb4[=]KeBc4[=]KeBc4[=]KfBb4Z=^KfBb4Z=_KeBa4Z=`KgB_4X=bKhB^4W=cKjB\\4R=hKnBX4Q=iKoBW4Q=iKoBW4Q=jKnBV4Q=kKoBU4Q=lKoBS4V=hKjBX4W=fKjBZ4V=gKiBY4W=gKjBX4V=hKjBX4W=hKhBX4X=hKhBX4X=hKiBW4W=jKhBV4X=jKhBV4X=jKiBU4W=kKiBU4V=mKiBS4V=nKkBQ4S=QLmBo3R=SLmBm3hj?BV@>i?DU@>i?CW@=i?CW@=i?DV@ZOSAe0o>XORAh0P?VOPAj0R?TOm@n0V?nNk@ZNLd2Z?QOn@n0S?POo@o0R?POn@P1R?POn@P1S?oNm@Q1S?oNm@P1T?oNl@`NG]2_?ROj@bNF\\2`?QOk@S1V?lNj@dNE[2c?POi@cNF[2c?QOk@o0V?nNl@Q1V?nNj@R1V?mNk@T1U?kNk@U1V?iNk@nN\\OV2h?kNm@oN[OV2i?iNm@PO\\OU2j?hNl@ROYOW2l?eNo@\\1Q?bNPA^1Q?aNo@_1S?]No@b1S?\\NPAc1P?ZNVAGiNJ1c1R`0fNmAIoMV1T`0oN^Bf0_`0O2N4L3Bg\\OOfc0N100001O0000001O0O1000001O1O1O001O001O01O00004D\\\\O3jc0O1O0Ob^P3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "\\_c51dc0?", + "choices": [ + "A. , , ", + "B. , , ", + "C. , , ", + "D. , , " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_421.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000116439.jpg", + "mask_rles": [ + { + "size": [ + 640, + 429 + ], + "counts": "cd69fc0hb0A\\]O;fb0C^]O6jb0EZ]O7`hT3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "[`02332OO2ib0W1X]OhN4Li`01T_O^4^>bKcA1Lj4a>UKcA2Jj4c>SKdA2Kj4j>VKVAj4a>UKcA2Jj4c>TKcAQ5]>PKaAQ5_>oJ]A60e4c>UK]A60e4h>ZKXAf4h>93M1O001O4L2N002N001O001O2N1O1O3M2N001O1O3M3M2N5K0000001O00000000O10000001O00000000000000O11O000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000001O000000O11O00000000000000000000000000000000000000000000000000000000000000001O00000000000000O1000000000000000000000000O100000000000000000000O1000000O100000000000000000000O100000000000000000000O10000O10000O100O10000O10000O1000000O1000000O100000000000000O1000000O100000000O10000O1000000O1O1000000O100O100O1O1O100O100O1O1TO]KYBe4f=`KTBb4k=aKoAc4P>aKkAa4T>f0O1O1N2O100O10000001O0000001O00001O00001O000000001O000000001O00001O000000001O0000A[JdBd5\\=]JcBc5\\=^JdBb5\\=_JcBa5\\=aJcB_5]=aJcB_5]=aJdB^5\\=cJcB]5\\=dJdB\\5\\=eJcB[5]=eJcB[5]=eJcB[5]=eJdBZ5\\=gJcBY5\\=hJdBX5\\=hJeBW5[=iJeBW5[=iJeBW5[=iJeBW5[=iJfBV5Z=kJeBU5[=kJfBT5Z=lJfBT5Z=lJfBT5Y=mJgBS5Y=mJhBR5X=nJhBR5X=oJgBQ5Y=oJgBQ5X=QKgBo4Y=QKgBo4X=RKiBm4W=SKiBm4W=TKhBl4W=UKiBk4W=VKhBj4Y=UKhBj4X=VKhBj4X=VKhBj4Y=UKhBj4X=VKhBj4X=WKgBi4X=XKiBg4W=YKiBg4W=ZKhBf4X=ZKhBf4X=[KgBe4Y=[KhBd4X=\\KhBd4X=\\KhBd4W=^KhBb4Y=]KgBc4Y=]KhBb4X=^KhBb4Y=^KfBb4Z=^KfBb4[=]KfBb4[=]KeBc4[=]KeBc4[=]KfBb4Z=^KfBb4Z=_KeBa4Z=`KgB_4X=bKhB^4W=cKjB\\4R=hKnBX4Q=iKoBW4Q=iKoBW4Q=jKnBV4Q=kKoBU4Q=lKoBS4V=hKjBX4W=fKjBZ4V=gKiBY4W=gKjBX4V=hKjBX4W=hKhBX4X=hKhBX4X=hKiBW4W=jKhBV4X=jKhBV4X=jKiBU4W=kKiBU4V=mKiBS4V=nKkBQ4S=QLmBo3R=SLmBm3h, , and ?", + "choices": [ + "A. is on , but is not.", + "B. is on , which is on .", + "C. Both and are on .", + "D. is on both and ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_422.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000581615.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "QXi223m1ORNTa0j3eNQ1\\MYKkBM2Z6ob3M3O1N2O0O2N1O2O2N1N101N101N1O2O1N2N2N101O1N2N2O0O2O0O2O0O2N2N2N2O1N100O3N1N1O2N101N2O1N2O0O2N2N1O2O1O1N2O1N1O1O2O1N2N2N1O2N3M2N2M3N3L4I8H:D?BPGYE]9V:c0^Oa0_Oc0]Oe0]Oc0[Of0TOe0D7N2O1O1N101N101N2N101N2N2M4M3N2L4L8Hf0jNX1`NWQh0mMl]WO5MMO520OOWa0Li^O^30eLe?_4[@`K4OR=d6L4L4N2N2N2M3K5M3O1N200O1O1O1O100O1O10000O100O100O10000000000O100000003VDPHK?3Y2n7U9M101O0000000000000000000000000000000000000000000000000nJjESMYNS3m;HSF5m9JUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF5k9KUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF5k9KUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LUF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF3k9MUF3k9MUF3k9MUF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LUF5k9KUF5k9KUF5k9KUF5k9LSF6l9JQF9o9IaEe0_:^40000000000000000000000000000000001O000000000O101O00001N2O0M3]Nc1iMX2M201O1O001O1O1O1O1O1O2N2N2N1O4LB:A=ZDfGn:R9A>@>F8E and ?", + "choices": [ + "A. is mounted on .", + "B. is leaning against .", + "C. is placed on the floor in front of .", + "D. is behind ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_423.png", + "model_output": "A" + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_8b_detailed.json b/evaluation/GAR-Bench/model_outputs/gar_8b_detailed.json new file mode 100644 index 0000000000000000000000000000000000000000..f5d046bc33a2cf384baacd7bed18a924803bd793 --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_8b_detailed.json @@ -0,0 +1,2463 @@ +[ + { + "image": "images/caption_detailed_0.png", + "subject_name": "person", + "object_name": "skateboard", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hT\\63W=1N3M2O1O2N1O1O2O0O1O001O00O10O10O001000O011N1O1O10000O1O1000000\\MCZH>e7GTFGk1b0Q81mGOS83kGMU85hGKY86fGJZ88cGJ\\88`GIb88[GIe8:WGGj8:PGIQ99lFHU99dFK]96\\F0d92WF0k90SF0o90PF0P:1nENU:b12O1O1O1M3N2M3L4N2O2N2O1O001N1OZFoMT8P2kGUNS8k1jGYNU8c1aFRNY1>V8^1aFTNX1a0d0@j5b1ZH]NW1d0a0@n5^1ZH^NV1g0>@Q6[1\\H]NT1k0=]OT6[1`I\\O9ZOV6\\1^I]O:WOX6]1\\I_O:UOZ6\\1ZIB:SO[6[1ZIE8QO^6Z1YIH7nN`6[1VIK8jNb6]1SIL8iNd6\\1RIN0ZN_O<`7[1PI2OWN@=a7Z1oH6LUNB=Ic0^O7J4K2N2O0O2O0O10O010TOhBOY=KoBAGNg=a0d0O0101XOAlB`0Q=ElB=Q=FmB=P=FmBbNXO]:8QG`0bNXO_:6oFb0j9\\OVFb0m9]OSFa0R:\\OnE?Y:^OiE?\\C1N100O100O10O0000010O0100O000000000000000000000O10000O100VNiCj0WRSN`0\\9RO`H?TN?[9RO^Hc0XN9\\9SOZHf0\\N3\\9WOVHg0aNOZ9ZOTHh0cNLZ9[OSHj0dNIZ9[OSHm0fNCY9]ORHQ1hN^OX9_ORHS1iNXOX9EoGS1jNUOX9HoGR1lNROW9KnGS1QOeNX95jGV1Y9hNhFX1X9gNiFX1X9hNhFX1X9gNhFZ1X9eNhF\\1X9dNfF]1[9cNcF_1]9aNbF`1]9aN`Fb1a9]N_Fc1a9]N_Fc1a9]N_Fc1a9]N^Fd1b9\\N^Fd1b9\\N]Fd1d9\\N[Fe1e9[NZFf1f9ZNYFg1g9YNXFh1h9XNXFh1S3XNj10RKi1R3\\Ni1KUKh1Q3`Ni1HVKh1Q3aNg1HXKf1o2fNh1DZKe1i2mNl1^O\\Kc1g2SOk1ZO_Kb1e2VOk1XO`Ka1f2XOi1WOaK`1f2[Oh1UO_K`1l2[Oe1UO]K_1Q3\\Oc1TO[K`1R3^Oa1TO\\K]1T3_O`1UO[K[1V3_O`1XOWKY1Z3_O_1ZOUKU1]3B^1ZOTKS1_3C]1\\ORKQ1a3C]1]OQKo0c3E[1]OQKn0c3G[1_OnJj0g3H[1_OlJi0i3HZ1AlJg0j3IY1BkJe0k3JZ1DhJb0n3JZ1GeJ?Q4JZ1IcJ>R4I[1KaJM2\\2^OgMb0MO^2\\OfMe0LO^2\\OeMg0LM`2ZOdMjN^NP1_1k0c2VO`MfNjNX1S1k0g2RO]MjNkNY1P1k0k2nNZMlNoNZ1m0k0m2kNXMnNQO\\1j0j0m2lNWMnNTO\\1h0i0n2_M`KY1f1SOWO[1d0j0n2`MbKV1f1TOXO[1c0j0l2aMgKR1b1XOZO[1`0j0W3aNoLZO\\OZ1?j0X3_NnL]O[O[1`0g0aNbMa4n0RM_O]OZ1>g0Z3^NkLA]O\\1>d0\\3\\NjLD\\O\\1>d0]3ZNiLF]O]1c3QNdL2]O_1==f3lMaL9[O`1=:kNjM]6<[Ja1=8kNoMX69`Ja1=6lNoMV6:dJ_1:8nNmMT6ZMdNl1i5i1SKeM0Z1`0F[4\\1PKSLYOc1k0W1`0I\\4[1nJjM6P1`0M[4Y1kJoM9j0a00Z4W1jJRN;f0`03Z4U1iJUN=?b09W4S1cJSLKU2i09c0>U4R1bJTLKU2j07c0cMcNi2b5i3TKnK1^MMP3m4e3SKnK2^MNP3l4g3fJTL?XMNo2l4i6SK[F1m2k4h6RK^F1l2l4f6RK_F2l2k4e6QKaF4l2i4c6SKaF4m2h4b6SKcF4l2h4a6RKXFF7a0R3d4_6UKfF7k2c4`6TKfF9l2a4_6TKfFk2^4Z7bKgH]4Y7bKiH]4W7cKiH]4W7_KnH`4R7`KnH`4Q7aKQI]4o6aKXEEh3i4P7aKZEFf3h4Q7aKUI_4k6aKUI_4l6`KUI_4k6`KVI`4k6^KWIa4j6^KWIa4k6\\KWIc4T;O001O001O001O00001OL`Ki@_4]?O1O002N2N1O2N2N2N2N1O2N2N1O2N2N2N2N1K6M5K6J0O10000O2YOo^O^NQa0X20O100O1[On^O]NRa0V220000N20000N3O1N2^Od^O]N`a0b1`^O]Naa0b1a^OSNia0j18N2N2O1O1N2N2N2O1N2N2N2O1O1O1M3O1N2O1N1O2O0000MXOP]Of0Rc0ZOn\\Od0Xc0N2OIAo\\O=Sc0Cl\\O=Uc0Ck\\O;Wc0Eh\\O;Yc0Eg\\O:Zc0Ff\\O8[c0Gf\\O9[c0Ge\\O7^c0Gb\\O8`c0H`\\O6ec001N[KM`E3W?N2OPPi1" + } + ], + "model_output": "The sky is a soft, pale blue with a smooth gradient, transitioning from a slightly darker blue at the top to a lighter blue towards the bottom. The sky is clear with no visible clouds, and a person is over it." + }, + { + "image": "images/caption_detailed_4.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "al_6=^>8I7AWO[BS1a=:M1N04MmNfN_DZ1];kNaDU1[;POdDQ1X;ROhDn0S;WOmDi0Q;YOPEf0m:]OSEc0k:_OUEb0h:@ZE>d:FZE:e:HhDoNIM8[1V;KhDmNIN9Z1U;MgDPO4T1l0Ne73WGgNOM9^1`0f0_7YOkGbNO08b16j0f7RORHbN3e1ES1U8gNaHQ3_7nLbHS3\\7nLdHR3[7oLeHQ3Z7PMeH`NYO^4Q8TMeH]N[O_4^7aKQIc1I]N20F_4Z7eKUI_19l2_6iKVI\\1;k2\\6mKXIX1=j2Y6PLZIV1=j2X6PL[IW1=i2V6RL]IU1>i2R6ULVI@1b1g0i2P6WLUI\\1l0\\2n5\\NRJd1m5]NSJc1l5^NTJb1l5]NUJc1j5^NVJb1j5]NWJc1i5]NWJc1h5^NWJc1i5]NWJc1i5\\NWJe1i5[NVJf1j5ZNVJf1j5ZNRJj1n5UNPJn1P6RNnIP2R6PNiIU2W6kMcI[2^6eM\\I`14^L`6]6eIaI[6[6mIaIS6e0[I\\4i0lJm5f0]IW4l0RKg5e0`IS4o0UKe5d0_IR4Q1VKe5d0]Im3V1^K_5c0]Ii3Y1dK[5a0^Ia3`1nKY56]Ic3b1ULU53\\Ia3^:K4L3K6G\\DRMg;m28M3N2N6I6K5Ic0J5M2L5L3N3N2M4M2M3N3M2N2M2O2N1O2N1O1O1O2O1N1O001O00WO]CaN1?c2O0L4N00100N2O1O0]OFaB<^=J\\B7e=KVB7j=>`BSOP=l0QCVOnRCCo<=PCCR==kBCX=>cBCb=k03M2N1N2O1N2NTCXOd;g0\\D\\Ob;d0]D^Oa;i0XDXOh;m0RDTOn;S1hCPOX6K4L3N2M3N2N1O2N100O2N100O100O1000O01000O1O010O1O00001O1N2O0O1O2M3C=DeA4Y>LjA4T>LmA7o=ISB5m=KTB3m=MUB0l=1a000O2Oln00SQO2O0N4L4L3N1M4F9M3O1O2O00O0100000O100O101N1O2N2M2O3L3M6GTl]6" + } + ], + "model_output": "A dark-framed bicycle, likely black or dark gray, with a straight handlebar and a visible front wheel equipped with a black tire and spokes, is being ridden by a person. The bicycle features a visible front brake mechanism and a partial view of a rear wheel with similar features. The pedals are not visible." + }, + { + "image": "images/caption_detailed_8.png", + "subject_name": "surfboard", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 408, + 640 + ], + "counts": "Ved18VO11O000000001O1N2N2N2N2N2N2N2N2O12N1O2NjN]E9b:EgE6W:IkE9R:DSF;m9DUF;_OQO[:>[Fc0e9[O_Fa0o:N2M3M1O4M:F000O1O0O2O010O1O1O1OhV^3" + } + ], + "model_output": "A person with shoulder-length hair, wearing a black wetsuit with a red stripe down the side, is captured in a dynamic pose with one arm extended forward and the other bent at the elbow, legs spread apart in a running or leaping motion, on a surfboard." + }, + { + "image": "images/caption_detailed_9.png", + "subject_name": "person", + "object_name": "grass", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "d45k>0N200O1O2N2O1N1O1O1O010O2O3L4M000O100O010OO2N1NI9O10O1010O0100O1O2M2O2N2FfAK^>OfANg>OjSi8" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i8U6k80O10001O0000000001O1O2N2M6K6K1N2M2O3M2O0O1O0O2O001O1O1O001O10O00O10000000000000000000001O0000001O000000001O000000000000001O000000000000000000O1000000O2O00O02N1O1O001O1O1O1O100O1O1O1N2O1O1N2O1O1N2N2N2010N2O001O001O00001O00001O001O001O1O000000001O0000000000001O00001O00001O0000001O000000001O00001O1O3M1O1O2N2N2N001O2N1O100O0O2O1O001O001O0000001O0001O000001O00O010000000000000O100000000O10000O100000000000000001O000000000000O1O10000001O0000000000000000001O00000000001O00001O000000001O0000001O000000001O0000010O000O10001O0000001O00001O000000001O0000001O00001O0000001O00001O000000001O000000001O00000000001O00000000001O0000001O000000001O00001O000000001O00000000000000001O0000010N2O3M2N4L2N3M3M2N1O2N1O1O001O1O1O1O0000001O0000000000000000001O000000000000O10000000000O10000O10000O10000O100O100O1O100O100O1O100O100O1O1N2O1N2N2N2O1M3O1M3N2M3L4M3K5K5M3G9L4L4N2000000000000001OO10000O1O100O1O100O1O1O100O100O100O1O100O1O10000O100O100O100O100O100O100O100O100O1O10000O100O100O100O1O10000O100O1O100O1O10000O100O1O10000O10000O10000001O00001O2N001O001O001O001O0000001O000000000000000000O10000O1O100O100O1O1N2O1O2K3K6H8I7O1O100000001O00O1000000000000000O1FfHmH[7Q7iHlHX7S7;O10000O1O100L4M3ZOQHkI31R8S6a00000O100001OO10000000000O100001OO10000000000000000000000000000000000001O0000000000000000000000000000001O0000000000mI" + } + ], + "model_output": "A person with short dark hair, wearing a light-colored, long-sleeved shirt with a collar, and a dark-colored jacket with a zipper, is standing on grass." + }, + { + "image": "images/caption_detailed_10.png", + "subject_name": "umbrella", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "b>;j1JWN3[a05f^OE106OO6Jm0Xa0d1M2O1O0O2O00001O0O2O1O00001N2O00001N101O001N101N10001O000O2O001O1N101O00001N101N101O0O3N001O0O2O001O1O0O2O1N101O001N2N101O1O0O2O001O000O2O0O2O000O2O000O100O2O00000O101N10000O100O100O10000O01000O100O010O1000O1000000000O0101O00O10O10000O100000000O100000000O1000000O3N7I7dK[@o3U`0F9G5KcNmL]Bc2b=fMZBV2e=UN[BXOaNS2T?QOXBWOeNS1S?0WBbNiN]1P?[1i@^Mg0X1_>f1XAZNh>i1UAWNj>l1RAkLIY1U?`3000000000001O0O1VHUK?k4AUK?k4[71O0000000000000O11O0000O0100O1O100O1O100O10O010000O1O1O1O10000O1O010O1O100O10000O100O00101OO0100O100000O010000O10O1001O1N100O2O0000001N100000001N10001N1O100O101O0O2O0O100O1O2N10000O100O1O1O2O0O1O100O2N1O1O1O100L5O0O100O101M2N2O1O1O1O1N2O1O1Oo_O]Mh>a2ZA_Me>^2`1O100O1N2N2O1O1O1O1N2O1O1O1N1O2O1O1O2Nf0YO7J5L3L7H3M3N1O1NUiV2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "cj04413L20gb0j7Q@gJa3Z5[LiJd3X5]LgJd3W5^LhJb3X5_LgJ`3Z5`LfJ`3Z5aLeJ_3Z5bLfJ^3Z5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5dLdJ\\3[5fLdJZ3\\5fLdJZ3[5gLeJY3[5hLdJX3\\5hLdJX3\\5hLdJX3\\5iLcJW3\\5jLdJV3\\5jLdJV3\\5kLcJU3]5kLcJU3\\5mLcJS3]5nLbJR3^5nLbJR3^5oLaJQ3^5QMaJo2_5QMaJo2_5QMaJo2^5RMbJn2^5SMaJm2_5SMaJm2^5UMaJk2_5VM`Jj2`5VM`Ji2a5WM_Ji2`5XM`Jh2`5YM_Jg2a5YM_Jg2`5[M_Je2a5\\M^Jd2b5]M]Jc2c5]M]Jc2c5^M\\Jb2c5_M]Ja2c5`M\\J`2d5aM[J_2e5aM[J_2d5cM[J]2e5cM[J]2e5dMZJ\\2e5fMZJZ2f5fMZJZ2e5hMZJX2f5iMYJW2g5iMYJW2g5jMXJV2g5kMYJU2g5lMXJT2h5lMXJT2h5mMWJS2h5nMXJR2h5oMWJQ2h5QNWJo1i5QNWJn1j5SNUJm1j5TNVJl1j5TNVJl1i5UNWJk1i5VNVJj1i5WNWJi1i5WNWJi1i5XNVJh1j5XNVJh1j5XNVJh1i5YNWJg1i5YNWJg1i5YNWJg1h5[NWJe1i5[NWJe1h5\\NXJd1h5\\NXJd1h5\\NXJe1f5\\NZJd1f5\\NZJc1f5^NZJb1f5^NZJb1e5_N[Ja1e5_N[Ja1e5_N[Ja1d5`N\\J`1d5`N\\Ja1cNcIj6m4cJ`1cNhIe6h4hJl1U5TNlJP2P5PNPKU2i4mMWKS2g4oMYKQ2f4PNZKP2f4oM[KQ2iNUIX5j4oKQ2fN[IY5c4QLR2eN^IY5^4SLU2bN`IZ5Z4TLR3oNVHR4h4oLT3fN]HY4_4QMU3aN`H]4\\4RMl3n2TLRMm3l2TLTMl3h2WLYMo3i1WGlLo4[1Q4`1dL`Nd3U1_LkNe3m0_LSOj3OjL2h3RObLn0P4_NPLb1W;00000O100000000O1000000000000000000000000O11O0000iJ]NQHc1m7cNoG]1`7]NhC=g4V1U7AkH?o6GQI9n6HRI8n6IQI7n6JRI6l6KUI5j6LVI4i6MWI3e61[IO`66`IJZ64L2O1N3N2M2O2M2O1O1N3N2N2M3N1O1N3N1O1N2O1N3N2M3N1N102M2O2M2N2O1O2M2N3M2O1N2N2O2M2N3M2N2N2O1N3M2N1O3M1O3M2N1O2N2O1O1N2N2N2N2O1N2M3OiL^EQ2`:oMcEP2[:PNiEn1V:QNnEn1o9SNUFj1i9WNYFi1e9VN_Fh1_9YNeFe1X9[NlFd1Q9\\NSGb1k8]NZGa1e8_N^G_1`8aNcG^1[8cNhG[1V8eNmGZ1Q8fNSHX1k7hNXHW1g7iN[HV1c7jN`HU1^7kNeHT1Z7kNiHS1V7nNkHR1S7nNPIP1o6QOSIn0l6QOVIn0i6ROZIm0d6TO]Ik0b6UOaIj0]6WOdIh0[6YOgIf0X6YOjIg0T6ZOmIe0R6[OPJe0n5\\OTJb0k5^OWJb0h5^OZJa0d5_O^J`0b5_OaJ`0\\5BfJiLAW3`0hL_OX3b0hL^OW3c0iL\\OW3e0hL\\OW3e0iLZOW3g0iLXOX3h0hLWOX3j0hLUOY3k0gLUOZ3k0dLUO]3l0bLSO`3n0^LQOd3o0[LPOg3Q1VLoNl3R1RLmNP4S1oKlNT4T1iKlNY4U1eKjN]4V1aKjNa4V1^KiNd4W1[KgNi4X1VKfNl4[1RKeNQ5Z1nJdNU5\\1iJdNY5\\1eJdN]5[1bJfN_5Y1aJgNa5X1^JhNc5W1\\JkNd5U1[JkNf5U1XJlNi5T1VGTNg2i0S6R1SGZNg2e0W6^1eIcN\\6^1bIaNa6^1]IcNc6^1\\IaNf6_1XIbNj6^1TIaNn6^1QIbNR7\\1nHdNR7]1lHcNW7\\1gHeNZ7Z1eHfN]7X1cHhN_7W1`HiNb7V1]HkNd7T1\\HkNg7U1_FaNi09j8W1YFdNd0\\OWOi0n9X1RFhNf07Y9Y2[FhMh9Y2SFhMo9d31N2N3N1N2N2N3M2N2N1O3M2O1N2O1N2O1N2O1N2N2O1N2N2N2N3M2O0O2N2O1N2N2N2N2N2N2N2O1N2N3M1O2N3M1O2O1N2N3M1O3M2N102M1O2O1M3O1N2O1N3M1O2O1N2N2O1N2O1N2N2N2O0O2O1N2N2N2O1N2N2N3N1Mocg0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "2l>400001OO1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000oJb0`J^O\\5k0_JUO]5S1_JmN]5[1_JeN^5`1`J`N[5i1aJWN]5m1aJSN[5S2cJmMZ5Y2cJgMZ5]2eJcMW5c2gJ]MV5h2hJXMV5k2iJUMT5P3jJPMR5U3mJkLP5Z3nJfLn4_3QKaLm4c3QK]Lk4h3TKXLi4l3VKTLf4R4XKnKg4T4XKlKa4\\4^KdKc31XJ]4U2bKb36SJ[4Z2_Kc38nI^4]2ZKd3=hI]4c2VKe3a5[L_Je3b5ZL^Jf3b5ZL^Je3d5ZL\\Jf3e5YL[Jg3f5XLZJg3h5XLXJh3h5XLXJe3k5[LUJb3o5]LQJ`3R6`LnI]3V6bLjI\\3Y6cLgIX3^6hLbIV3a6iL_IS3e6mL[IP3i6oLWIn2l6RMTIk2o6UMQIf2U7YMkHe2W7[MiHa2[7_MeH^2_7aMaH\\2b7dM^HY2f7fMZHW2i7iMWHS2m7mMSHQ2P8nMPHm1U8SNkGk1X8TNhGi1[8WNeGf1^8ZNbGc1a8]N_G`1d8`N\\G]1g8cNYG[1j8dNVGY1m8gNSGX1o8gNQGW1Q9iNoFU1S9kNmFT1U9kNkFS1W2gLd4V2UIR1X9nNhFP1Z9POfFo0\\9POdFo0^9PObFn0`9RO`Fm0b9RO^Fm0c9SO]Fl0d9TO\\Fk0f9TOZFk0g9UOYFk0g9UOYFj0h9VOXFi0i9WOWFh0k9WOUFh0l9XOTFg0m9YOSFf0n9ZORFf0o9YOQFf0P:ZOPFf0P:ZOPFe0R:ZOnEe0S:[OmEd0T:\\OlEd0T:\\OlEc0V:\\OjEd0V:\\OjEc0W:]OiEc0W:]OiEb0Y:]OgEc0Y:]OgEb0[:]OeEc0[:]OeEb0\\:^OdEa0]:_OcEa0]:_OcEa0^:^ObEa0_:_OaEa0`:^O`Eb0`:^O`Ea0a:_O_Ea0a:_O_Ea0b:^O^Ea0c:_O]Ea0c:_O]Ea0c:_O]Ea0c:_O]E`0d:@\\E`0e:_O[Ea0e:_O[Ea0e:_O[Ea0f:^OZEb0f:^OZEa0g:_OYEa0h:^OXEb0h:^OXEb0h:^OXEb0i:]OWEc0i:]OWEc0i:]OWEb0j:]OWEc0i:]OWEc0i:]OWEc0j:\\OVEd0j:\\OVEd0k:[OUEe0k:\\OTEd0m:[OSEe0m:[OSEf0l:ZOTEf0l:ZOTEf0m:YOSEg0m:YOSEg0n:XOREh0n:XOREh0n:XOREi0m:WOSEi0m:WOSEi0n:VOREj0n:UOSEk0m:UOSEk0m:UOSEk0m:UOSEk0n:TOREl0n:TOREl0n:TOREm0m:SOSEm0n:SOQEn0n:ROREn0n:QOSEo0m:QOSEo0m:QOSEo0n:POREQ1m:oNSEQ1m:oNSER1l:oNSEQ1m:oNSEQ1m:oNSEQ1m:oNSER1l:mNUES1k:mNUES1k:mNUES1k:mNUES1k:mNUET1k:kNUEU1k:kNUEU1k:kNUEV1j:kNUEU1j:lNVET1j:lNVET1j:lNVET1j:lNVEU1i:kNWEU1i:kNWEV1h:jNXEV1g:kNYEU1g:kNYEV1f:jNZEV1f:jNZEV1f:jNZEV1e:kN[EU1e:kN[EV1d:jN\\EV1c:kN]EU1c:kN]EU1c:kN]EU1c:kN]EV1a:kN_EU1a:kN_EV1`:jN`EV1_:kNaEU1_:kNaEU1^:lNbET1^:lNbEU1]:kNcEU1\\:lNdET1[:mNeES1[:mNeET1Y:mNgES1Y:mNgES1Y:mNgES1X:nNhES1V:nNjER1U:oNkEQ1U:oNkER1S:oNmEQ1S:oNmEQ1S:oNmEQ1R:POnEQ1P:POPFP1P:POPFP1o9QOQFP1m9QOSFo0k9SOUFm0k9SOUFn0i9SOWFm0h9TOXFl0g9UOYFk0g9UOYFk0f9VOZFk0d9VO\\Fj0c9WO]Fi0b9XO^Fh0`9ZO`Fg0^9ZObFf0]9[OcFe0\\9\\OdFd0[9]OeFd0X9^OhFb0W9_OiFb0T9@lF`0R9BnF>P9DPGRHCl7>THBl7>THBk7?UHBj7>VHBj7>VHBj7>VHCi7=WHDh7UKAk4?VKAi4?WKAi4?XK@h4`0YK_Of4b0ZK_Oe4a0\\K^Od4b0\\K^Od4b0]K]Oc4c0^K]Oa4c0`K\\O`4d0`K\\O_4e0bKZO^4f0bK[O]4e0dKZO\\4f0eKZOZ4f0fKZOZ4f0gKYOX4h0iKXOV4h0kKWOU4i0kKXOS4i0nKVOR4j0nKVOR4j0nKWOP4j0QLUOn3l0SLSOm3m0SLTOl3l0ULTOi3m0XLROh3n0YLROf3n0ZLROf3n0[LROd3n0\\LROd3n0]LQOb3P1_LPO`3P1aLoN]3S1cLnN\\3R1eLnNY3S1gLnNX3R1iLmNW3S1jLmNU3S1kLmNT3T1lLmNR3T1PMkNm2V1TMkNk2U1VMkNh2U1YMlNe2U1[MkNd2U1^MkN`2U1cMiN\\2V1gMiNY2U1iMmNT2S1mMmNR2S1PNmNl1U1UNlNj1R1YNnNe1R1]NmNa1U1_NlN_1T1cNmNZ1P1kNPOT1m0POROP1m0QOTOm0m0SOUOk0k0VOUOh0i0\\OWO`0j0CUO=i0EXO9h0IYO4g0MZO1b05^OJa08_OF?=C@=a0D]O=e0BZO=h0DRO=Q1ElN;U1FhN;Z1FbN;_1F`N7d1JZN4i1LVN1n11nMOT22eMWMdLg2h54bMO`22]MMe24XMMi27QMJQ36lLKV36gLH]3:_LCf3?WL@j3b0SL_On3b0PL^OP4g0eIaLQ2f2^4S1YKlNk4Z1\\IQL^1e2W5_1gJ`N[5`1cJ`N_5`1`J_Nb5b1[J\\Nh5e1VJZNl5g1SJYNn5g1oI[NQ6f1nIXNU6h1iIVN[6j1dIUN^6k1`IVN`6k1^IVNc6i1\\IUNg6l1WIPNo6P2PIoMR7R2kHoMU7R2jHnMW7R2gHmM\\7S2bHjMb7V2]HhMg7X2WHiMi7X2VHhMk7X2SHhMo7X2PHeMT8[2jGdMX8a42N1O2L4N2N101M3K5N10000O2M3M200O3L3L4M2O2N1N3M3M201O0O2M3N2N1O2M2M4N2M2N3M3M3M2L4N3N1O2N101N10_J" + } + ], + "model_output": "A piece of white tissue paper with a soft texture and a slightly crumpled appearance, featuring a gradient of brown discoloration on one side, indicative of a stain or shadow, is on the dining table." + }, + { + "image": "images/caption_detailed_13.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\n_13W=1N2N3M2N2N2N2N2N2N2N2O1M3N2N2N2O1N2N2N2N2O1N2N2N2N2O1N2N2N2O1N2N2N2N2N2O1N2N2N2M3[NbM\\H_2c7cMZH_2d7cMZH_2e7bMXHa2h7_MWHb2h7^MWHd2i7]MUHd2j7]MTHe2l7[MRHg2n7YMPHi2P8WMnGk2R8UMmGl2T8SMjGo2U8RMiGP3W8QMgGP3Y8PMeGR3[8nLbGU3^8lL_GV3a8j00000001OO100000000O10000O100000000000000000000000000000000O10000O100O100O1N2N2N2N2O1N200O1O100O1O1O1O1N2jNlJSJV5k5mJSJT5l5nJXIO:Z5]6UK[IP5e6k0N2O1O1N2ZO\\IcJf6\\5_I^Jc6b5_I[Jb6b5e0K5OaI[JZ5c5hJ]JX5b5hJ_JX5_5hJdJW5Y5gJmJX5R5dJSK\\5k4bJgJRO?\\6i4bJ[K^5d4aJ^K_5b4\\JcKd5\\4ZJgKf5Y4XJhKi5X4VJiKj5W4UJjKk5V4SJkKn5U4oInKP6S4oImKS6R4lIoKT6Q4kIoKV6P4iIQLX6o3fIRL[6n3cITL]6k3cIUL^6j3bIWL^6h3cIWL^6i3aIWL`6i3_IWLb6i3]IXLc6W5100O10000O100O1O10000002N5K;E3M4L4VLlHU1X7eNRIS1R7eNSIZ1o6bNTI]1n6_NYI\\1j6^N[I_1i6[NZIf1g6TN]Il1e6lMaIT2a6dMeI\\2`6XMgIh2^80001O000000O100DTMWFn2d9]MTFe2l9=0000O10O1001N0lNfLYHZ3`7nL^HS3Y7YM]Hn2a7WMZHk2d7XMXHk2f7XMXHi2f7^MSHd2k7_1O2A>ASJ^Io5`6TJ_Il5]6ZJcId5X6bJiI\\5W6fJhIY5W6jJiIT5W6mJiIR5V6PKjIo4V6SKiIl4V6UKkIj4U6UKlIk4S6dJcI7;T5S6cJdI89U5T6aJeI97V5_6jJ`IW5`6jJ_IV5b6jJ\\IW5d6kJYIV5g6g000000000000000000000000000000000cNgIcJLQ1P3nN:P5hLTKa0h0\\2WO9m4jLTKj0?T2^O9o4iLTKl01SO3o2GTM0Q3Q5mLTKl01TO1o2JRM1P3o4oLSKm02XOKk22nL2Q3l4QMSKm03\\29@a4XMRKm03[2;_O`4YMRKo02Y24fLMg2k4[MRKo03W2>^O]4\\MUK2Ma04a2?^O[4^M_Kc0H_2a0^OZ4]M^Kg0G]2a0_Om4UNaJ\\2a0@o4RNaJ^2h0XOW4fMkJN?=Hg2f0XOX4fMeK3^Oo2e0XOj5g0VJYOk5f0UJZOk5f0VJYOk5f0UJZOk5f0UJZOl5e0TJ[Ol5e0TJ[Om5d0SJ\\On5c0RJ]Oo5b0QJ^OP6b0oI^OQ6b0oI^OR6a0nI_OS6`0mI@U6>kIBV6=jICV6=jICW6mGZO@O123O17_8QEBo:N1000000O100O2O1N2NQW_7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "f;Y3g;000000000000000001O0000000000000000O100000000000000O100O1O1000000000000000000001O001O1O001O001O1O3M1O1O000O100000000000O10001O0000000O10000O1000000O1000000M3O1O1N2O1L4J6N2O100J6O1OISE]Lm:k3000000000000O1O100O100N2O10000000000000000001O0000000000VOjKjFV4P:01O00000000001O0000001O00001O0000001O00000000001O000000000^ORERMo:_3000000001O00000000001O000000001O00000000001O0000001O000000001O0000000000001O00000000001O0000000000001O0000001O00000000001O0000001O000000001O000000001O000000000000001O000000001O00000000001O0000001O00000000001N11O000001O000000001O00000000001O0000001O000000001O00001O001O0000001O00001O0000001O0000001O0000001O0000001O00001O000000001O0000001O0000001O0000000000001O0000001O00001O00001O001O00001O00001O000000001O00001O0000001O00001O00001O0000001O00001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O0000001O00001O00001O0000001O00001O00001O000000001O000000001O00001O0000000000001O001O0XNlBd1U=[NkBe1U=ZNlBf1W=000O11O000000000000O1000000000000000000O10000000000000000000000O10000000000000000000000O100000000000000O10000000000000000O10000000000000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000O10000000000000000000000000000000000O1000000000000000000UC" + } + ], + "model_output": "A white pickup truck with a visible rear section featuring a tailgate, a license plate mounted in the center, a rear bumper, and a partial view of a taillight on the right side, is driving on the road." + }, + { + "image": "images/caption_detailed_18.png", + "subject_name": "person", + "object_name": "baseball glove", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "YRa01Z=000Uh?4eW@8I3L5L2N3M3N1\\DTOf:n0XEVOc:m0YEWOe:k0XEWO2Jl9Q1oEWO5Hk9S1mEXO7EEOk9U1VFYO9ECOm9U1TFYO4E^9N`F:3I\\9MeF50NZ9NgF3OOY9OjF0IUOJk0b91mFNE7]9LnFME7[9MjF3HnNNR1_9NhF5J1]9KdF9NL^9d20OO2O010O0001O001O00000O2N2N10100O001O0N3O010O000M3001O001000000O1O1O1O1O2N0G^LiFd3R9=N2N2O1N2O010O100O01000O010O10000^GhKQ8X4nGjKQ8V4nGlKQ8T4jGQLV8P4gGSLX8`400010O01O01O000010O000010O0dMiGEW85oGJQ81UHNk70VH1j7NWH1k7MUH4k7KVH5k7JUH7j7JUH6l7JSH6m7KRH5o7LoG4R8LmG4V8JiG6X8JgG6Z8lM`G22U12n0T9SOjFm0V9TOiFl0X9UOfFk0[9b11O1O2N5J4XOYFfMh9X2YFhMh9V2ZFiMg9`0TFc06mNg9>TFd07mNf9=TFe07mNf9>SFd08nNf9oEd0m:\\OTEa0n:_ORE`0P;_OPE`0R;_OnD`0T;@kD>X;BgD;];EbD:_;FbD8`;H`D5b;M]D1e;N^DMe;3^DFf;:\\DZOn;e0<10O100O100O10lC]O_O0k;d0dDC[;=cDE\\;;cDG^;8[D1c;n0N00O110O002M2N2N100O1O100O2N1O0O1O2N3M3L3N3K4L4L5M3KjSi4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V\\o23Z=N^n25[QM3N2N2N2ZOEXD>e;EXD?e;CWDb0f;AVDb0i;a0O1N110O01O1O2N1O010O1O101M2O1O0O1O2N2N3K6K3L4L4N3L4L^fh4" + } + ], + "model_output": "A baseball catcher in full gear, including a helmet with a face mask, chest protector, shin guards, and a mitt on the left hand, is crouched in a ready position. The catcher is wearing a dark blue jersey with red and white accents, gray pants, and black cleats with red detailing. The catcher's right hand is not visible, and the left hand is holding a baseball glove." + }, + { + "image": "images/caption_detailed_19.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "j8o01UO03m0J\\16iM0N12JX>b1gA]N=;BM00000:0A1N11O50:OB1N10l=P6XBSJN?af4=00000O100001O00001O1O001O001QLi@S3X?lLh@T3X?lLi@S3X?kLi@U3X?jLh@W3X?hLh@X3X?gLi@Z3W?dLk@e3l>XLVAl3h>gKbA\\4P?102M1O100O1O1O1O1O100O001O010O1O001O10O01O1O00100O1O1O10O01O1O1O00001O010O1O1O1O0000010O00001O0000O1O2SNYAZNl>d1XAVNl>h1WASNm>m1TAnMQ?P2QAkMS?U2n@fMW?X2k@dMX?\\2i@_M[?`2h@ZM\\?f2g@SM^?l2k00001N101O1O0O2N101N2N101N1O2O1O1O1O1O1O1O1O2N1O1N2O2O1N1O1O1O1O1O2N1O01O001N3N2N1N3M2N2O1N1O2O1O1N00O20O103K2O1O1O1O003M1O10N11n^OXMl`0n2O1OK5N2100O1O1O101O000O100000002NO1O1001O7I0O2O0O100O2N101OO1000O2O00O1000O010000O10O1000O10O100000O10000O1000000O10001O00001O0O1000001O000O2O001O001O1O0O2O001O001O1O0O2O00001N2O001O1N3N3L1000001N2O0O3N2N2N3M1N3N1N3M3M2O2M4L5J6K5J6J6Hh\\b0" + }, + { + "size": [ + 640, + 425 + ], + "counts": "Yi0;6HWb09f]OH6V3a?m0O00000000O10O1000O100000000O10000000O1000O1000000O100000O100000O1000000O100O10000000000O10000O10O1000O10000O100000000O10O100000O100000001O000O0100000O1000000O100000O100000O100000000O0100000000000O10000000O10000000O10000001O00000000000O10000000000000000O100000001O0010O0001O0101O1N2O1O2N2M2O2N1O3MROfLWAW3h>jL[AT3d>nL]AX3Z>jLgAU3W>lLnAP3P>RMQBl2n=UMXBd2h=^MYB`2f=aM]B]2a=eMdBV2Y=mMkBo1S=RNRCi1mVC^Oj01OO1WHRIS7n6mHRIS7n6g0O003M00_HVI`6i6`IWI`6i6_IXIa6h6_IXIa6h6TIWI_O020[7i6oHWID00020\\7h6_IXIa6h6^IYIa6h6_IXIa6h6^IYIc6f6nHXIN2T7f6\\I[Ic6f6[I\\Ie6d6]IZIc6f6oHWID051Z7g6mHXID0j7h6RIYIb6g6^IYIb6g6]IZId6e6[IZIh6e6XI[Ih6e6VI[Il6e6RI]Io6b6QI]IP7c6oH^IQ7b6oH]IR7c6nH]IS7b6mH]IT7c6lH]IT7c6lH]IU7b6lH]IT7c6k0000001O0000001O1O0000001OeGbIg7^6YHbIh7]6XHbIi7^6c00001O00000000aGcIo7\\6PHeIP8[6a00000000]GdIV8[6iGfIW8Z6iGfIW8Z6jGeIV8[6>000001O01O00000000000000000000000000000000000000000O2O00000000000\\GcIX8]6hGcIX8]6<0000000000000000000000000000001O000001O0000000000001OO101O1O00001O1Oj3_9fLSGX3m8jLQGV3o8mLnFS3R9nLlFS3T9\\M\\FT22iLb9[1SFc1f0QMW9]4n0O2K7ULnDA=f2X;^MRELXO4>5Pjm1" + }, + { + "size": [ + 493, + 640 + ], + "counts": "V;P11T2W;lMiDT2W;lMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;lMkDT2U;kMlDU2T;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2R;kMnDU2R;kMnDU2S;jMmDV2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;lMmDT2S;mMlDS2T;mMlDS2T;mMlDS2T;mMlDS2T;lMmDT2S;mMlDS2T;mMlDS2T;mMkDT2U;kMlDU2T;kMlDU2T;kMlDU2T;lMjDU2V;Q100000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000001O0000001O0000001O0000000000000000001O000000000000001O0000001O0000001O000000001O0000001O0000001O00001O00001O001O001O001O1gL^De2b;ZM`De2`;ZMbDe2_;YMcDf2^;WMeDh2[;WMfDi2[;UMgDj2Y;UMhDk2Y;SMiDl2X;RMiDn2W;RMjDm2l;O1O001O00001O001O0000001O001O001O001O000000001O1O001O00000000001O00001O1O00001O001O00001O000000001O0000001O001O1O1O00000000001O001O001O001O001O00001O001O001O001O001O00000000001O0000001O00001O00001O0001O01O0O101O001O001O001O001O00001O0000001O001O00001O001O000000001O001O002N00001O00000000001O0000001O001O00001O001O0000001O00001O001O1O1O00000000001O00001O00010O0O101O001O000010O0001O001O001O1O00000000001O0000001O001O00001O00001O00001O001O001O001O001O0000001O0000001O00001O001O0000001O001O001O1O001O001N2O00hhP3" + } + ], + "model_output": "A red and black diesel locomotive numbered 1803, featuring a yellow front bumper, two sets of headlights, and a pair of windshields with wipers, is driving on the railroad. The locomotive has a black chassis with yellow railings and steps, and is coupled to a maroon freight car with visible ribbing and a white roof." + }, + { + "image": "images/caption_detailed_21.png", + "subject_name": "sandwich", + "object_name": "sandwich", + "predicate_name": "leaning on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lg_32m>2O1N101N2O1M4M2O1N2O0N3N101N1O2N1SOUOdCm0Z:G8J3O3I5M4N1N2M3M4L3N1O2M3M3N2N1O2M3N2M3O1N2N2N101O0O2O0O2O1N101N101O1N101O1N101N2O001N2N101O001N2O1O0O2O001N2O001N2O1N2O1O001N101O001N101N10001O0O101O1O0O101O000O101O00000O101O0O10001N10000O1000000O10000000O010O10000O01000O100000O010000000000O100000O10O100000000O0100000000000O1000000000O10O10000000000O10000O100O1000000O1000000O1000000000000000000O10000O1000000O10010O0O10000000000O11O00000000O11O00O100001O00O10000O100O2N11O1O0000O2N1000000O20O000001O0000010O0O11O010OO10001O00000O1100O0000O1O10001N1O1001O00O2O0O100O1O10001O000O1O10001N1L4N21O1O00O2L3001O01N100003NO000O10010O000000010O000O11O01O00001O0000001O0O100000000010O01N101O001N101O00001O00000001O01O00001O00010O00000O101O000000001O00001O00001O001N10001O0000001O0000001O0000001O00001O00001O01O0001O00001O001O001O001O1O001O100O2N2N2M3N2M3N3L3N3L3N1O1N2O1O3L2N2O2M4M3M2M3N4K5Ib]f2" + } + ], + "model_output": "A sandwich with white bread, containing a layer of green lettuce and a slice of tomato, with a visible spread that appears to be mayonnaise, is attached to a sandwich." + }, + { + "image": "images/caption_detailed_22.png", + "subject_name": "cat", + "object_name": "sand", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "g^S67T=3N3M101N3]CBS5b0@W30oKf5ImJ=1a0]OY30PL\\62aIHj0j0XO]32nK[6MfJe0nN_32oKZ6LiJd0jNb32oKh7=VHe32nKi7;UHh32mKk79SHj32mKl77SHl31mKm76QHn32lKo72PHS40lKR8NoGV4OlKT8JoGY4MnK^8R4bGnK^8S4_GoKa8^4001O00001O1O001OOQM^GV1b8hNaGW1^8hNdGX1\\8hNeGW1[8jNdGV1\\8lNaGU1^8nN`GR1`8m1VMXKPMh4n2\\KZJNb2f4S3fKhLZ4W3jKfLV4Z3mKcLS4]3RL]Lo3c3ULXLl3h3TLVLn3j3RLTLP4l3PLRLR4n3nKnKV4S4iKjKZ4V4eKjK\\4V4dK^Kh4b4VK\\Kn4e4jJ[J@h0P6m4`J\\J^Oe0U6P5\\JPKh5P5WJmJm5S5SJkJo5V5PJiJQ6W5oIhJR6X5nIcJW6]5hI]JEKl00h4h5gJ[Jg0Oa4f5fJ\\Jj0N`4f5fJZJf6e5[I\\Jd6b5_I]Ja6c5_I\\Jb6d5c000000000000QJeJkNI\\4b5dK\\J`0<\\OI_4_5eK]JO`KTIb2l6\\MXIb2h6]MYIc2h6[MZId2f6[M[Ie2e6[M[Ie2e6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2e6YM[Ig2e6ZMZIf2f6[MXIf2h6o10000000mIRIH3j5k6a000000gITIR6l6mIUI5Me5n6VJUIS6k6lIVIT6Q700RJkHe5U7UJmH00j5S7]JlHb5Q7bJnH^5R7bJnH^5R7aJoH_5P7aJQI_5o6bJPI^5P7bJPI^5Q7bJnH^5S7aJmH_5S7aJmH_5S7aJmH_5S7=0000O10000000000001O0RKmHc3S7\\LQIa3o6`KPIa01o3o6`KPIa01o3o6_KQIb00o3P7^KQIb0OP4P7^KQIa01P4o6^KRI`0OR4o6_KQIa0MQ4R7_KnH`5R7=000000000000001O001O00000000000000000YJiHY5X7=0000O100000000000000001O00O11O0000000000000000001O00000000001YKjHV3V7iLoHS3Q7lLQIS3o6mLRIR3n6nLTIP3l6oL[Ik2f6TMhI^2X6bMiI]2W6dMlIX2T6hM[Ji1f5WNdJ^1\\5bNeJ]1[5dNeJ[1[5gNdJX1\\5iNdJV1\\5kNcJU1]5kNdJT1\\5mNcJS1]5mNcJS1]5mNcJWOSNT1[7DaJUOYNV1W7D`JUO\\NU1T7G_JSO_NU1R7I]JROcNT1P7J[JSOfNS1P7H[JSOgNU1n6H[JROiNU1m6HZJSOiNU1m6HZJROjNV1m6F]JoNhN[1k6F\\K:e4D\\KcJC]5>bJB^5a0_J_Ob5a0\\J@d5a0[J_Oe5c0YJ]Og5g0UJYOk5k0oIWOR6S1mH]MMa1V7]1\\H]M8W1[7T2_HmMa7o3001O001O000000001O00O100000000O100000aJ`HR5_7nJcHQ5]7oJdHP5]7oJdHP5\\7oJeHQ5[7PKdHP5\\7PKdHP5]7oJcHQ5]7oJcHQ5]7PKbHP5^7SK^Hn4b7>1O3M1O001O0000000000001aLTH\\1m7aNVH^1k7VMSHg02S2l7TMSHh03S2j7UMSHh03S2j7UMTHh02R2k7TMTH:0C2n2j7UMTH;OB4m2i7VMTH;0A3n2i7VMTH;0A3n2j7UMSH;1B3m2j7UMRHm0Qc04N000O100000000000000000000000000000000000000000000000000000000000000000000000O100000O10000000000000000000000000000000000000000000000000000000000000000000000000000O10000001N4M7Ib0\\OjSQ10WlnN3X\\OLSRS6" + } + ], + "model_output": "An individual with a contemplative expression, wearing a short-sleeved shirt and pants, is sitting on a bench with one leg crossed over the other. The person's arms are crossed over the crossed leg, and they appear to be wearing shoes." + }, + { + "image": "images/caption_detailed_24.png", + "subject_name": "airplane", + "object_name": "sea", + "predicate_name": "flying over", + "mask_rles": [ + { + "size": [ + 488, + 640 + ], + "counts": "kjg73T?2O00000O1K5VAH]>NfA:MI0OT>1mA8OHO1T>OnA?NBT>OnA?NBT>OnAf0R>8O7E=D6M100000O1O1L4I7J>Cc0]O8L10000O10000000000000000000O100N5K7G6K4N10O1M4Ha0E2N2O001N_nn0" + }, + { + "size": [ + 488, + 640 + ], + "counts": "Y7k0Q1R4^7nKbHR4_7mKbHR4^7nKbHS4]7mKcHS4]7mKcHS4\\7mKfHR4Z7jKeG[OQ1k4Z7fKnH[4Q7cKQI]4o6bKSI]4m6bKTI^4l6bKTI_4k6aKUI_4k6`KVIa4i6_KWIa4i6^KXIb4h6^KXIc4g6\\KZId4f6\\KYIf4f6ZKZIf4f6ZKYIg4g6ZKWIh4h6YKhHGWOU5P8UKfHY5Y7iJcHY5]7hJaHZ5^7gJaHZ5_7fJ`H[5^7eJbH\\5^7dJbH\\5^7dJbH\\5^7dJ_H`5`7`J^Hc5a7^J^Hb5b7bJYH_5g7eJUH[5k7hJPH[5o7o0000001O001O0000000O110O00001O00gIYHd2ObNh7fN`Hc2KfNe7dNgH_OA0OL7h1N9d7bNlH[OC0=f1C0001OO10000O100O2N1O1O1PLc0kEUO^19b8k0dE[Ob1Kg8V2mFkMW9>^ESONj1i0hNW;^1XDhNi;[22N2N002N1O1O1O001O1O001O001O001O00001O001O1OmNaM]E_2Z:mMdE7[Ol0j:UOkEKAm0\\:ASF]OFQ1n9L_FgNL\\1Q9b0PH^Oo7d0oG]OP8d0oG]Oa5gNXJ2iNl1]1[Ob5iNUJ5eNl1a1WOe5kNRJb28cNf5oNnI_2:cNh5POlI_2:aNj5TOjI[2:aNl5VOkIW28cNm5XOkI\\OZOS2k0ZOP6XOeJZ1ZO^OQ6ZOfJV1XO@R6]OeJR1WOBS6^OeJP1VODU6^OcJo0VODX6]OaJQ1ROE]6[O`JR1nNFb6]O[JX1dN_MIl1X7@WJa3i5jLkIW3U6kLhIV3X6mLeIS3Z6SM_Io2a6VMWIm2i6YMlHl2U7c2000000000000000000O100001O0000000000000000000000000000000000O10000001O00O1000000000000000000000000001O0000000000000000000000000O10000O1SOSI^In6[6S1001O1O00001O00001OmIeGY5[8cJkG\\5T8cJnG\\5R8dJnG\\5R8cJoG^5P8cJoG]5Q8cJnG^5R8dJlG\\5T8hJfG[5Y8j0000bJiGeN0n4W8TL\\Hj3d7PLbHQ4]7nKeHQ4[7mKgHT4X7kKiHU4W7jKjHW4V7gKkHZ4T7fKlHZ4T7`KRIa4m6[KXIe4g6[KYIe4g6ZKZIh4d6XK\\Ii4c6WK]Ij4a6WK_Ii4a6WK_Ii4a6WK_Ij4`6VK`Ik4_6TKbIm4]6SKcIm4^6PKcIQ5]6nJdIS5[6mJdIT5\\6lJaIW5_6iJRIf5n6ZJQIh5n6ZJPIf5P7\\JmHe5R7^JlHb5T7_JjHb5V7_JiHa5W7^JiHc5X7\\JgHe5Y7[JeHg5Z7ZJfHf5Z7ZJeHg5[7ZJdHg5[7ZJdHf5\\7aJ\\H`5d7bJZH^5f7cJYH]5g7R101O00aIZH[5g7dJ`HV5`7iJbH=Kh0M9e7bNdH9Oi0J;c7aNPIUOB6=e1A>`7_N_Jo0UNa0\\7]NcJP1RNc0[7XNjJR1lMf0Z7VNnJP1kMi0X7PNWK7]Ni1b;000O1O10000O1O2O0O100N2_ObMUDa2b;g02YMQDQ2Z<_MRDZ2a00001O0O1000000O1N2O100N2]LDXH>[7nMdAT2\\>PN^AR2a>PNVAX2i>nMY@k2b=oLUCe4j<_KfBALT5\\=W1O1O1M3O100O1N2O1O100O1M3O100O1O1O100O100O10000000000000000000000000000001O00aNUIiEk6U:XI`EJXOo6W;a1N2I7N2O1O100O1O1O100O1O100O1O100O10000O1O1O1O100O1O100TOfFQG[9l8iFSGW9j8oFdFD9]9Q9XGmFi8R9S1O100O100oM^EdJb:[5`EcJa:\\5aEbJ`:^5aE_Ja:`5aE\\Jb:c5_E\\Jb:c5bEWJa:i5aESJa:m5`ESIAa0R;[6_ESI_OI2129P;j6\\EXIAE5ON0Q;T7ZE_IE^OX;R7SElIj:T6VEmIi:S6WEmIi:R6XEmIi:S6WEmIi:R6XEmIi:S6XEkIi:T6XElIh:T6YEjIh:V6YEgIi:X6ZEbIj:^6WE`Ij:`6YE]Ig:c6ZE_Ic:a6fEXIX:h6fEXI\\:i6`EZI`:W6aDiIU1J[:\\6eDcIk<\\6=000000000000O10hBgIiYL^A?GoN2k2j>eMjA1IT2`>eMlA3HV2_>bMnA4GW2h`0N3M1O2J6XOZ^OQOia0h0b^ORO_a0j0X1D;E8Hm]T2N^njM=Ji00i7ZN]Ag1c>]NWAe1i>bNo@_1Q?h1O10000000000O10000O10000O100O1O100O100O1O100O100O1O100O1O1O1O1O1O1O1O1N2M3lKeJ`I^5_6dJ^I^5`6eJZI`5e6aJoHk5P7VJfHT6Z7mIYGM`NKh0`6^9iIXGc0gNg5P:gIVGb7i8_HVGb7i8^HVGd7i8]HUGe7k8[HTGf7k8[HUGe7j8\\HVGd7i8]HWGc7i8^HVGb7i8_HVGb7j8^HVGb7i8`HUGa7k8bHRG^7o8cHnF^7R9gHiFY7X9jHbFX7_9lHZFV7f9mHSFW7n9]1001O1O1O1O2TGlEh7V:QHSFk7U;L2N2N2N2`LXDdNi;X1^DdNc;X1bDfN_;W1dDhN];V1dDjN];S1fDlN[;P1hDPOY;m0iDSOX;k0jDTOW;h0lDXOU;c0PE\\OY;ZMeC2?[1j0Y1oYLmAg3Q>\\LnAd3o=_LQBa3n=bLoA_3o=dLPB\\3n=gLQBY3n=jLoAW3n=mLPBT3n=oLPBR3o=PMmAS3l=d1J6J6E;D3M101N2O2M3N1O1N0100O010O01O0001000O010O001M201O100O1O1O1O1N2N3N1N2O101O000O2O0O101O0O2O1O1O1N2N2O1N3N1O2M3N2N1N4M2LLXCWNeoDA]12d94_]A4YAIc>;N2O1O10000O01O0102NO0020aA@[>c000O00100O10O1O001O001O1O001O001O001N2O1M3O1O10OO3M2O00Y`g5" + } + ], + "model_output": "A small bird with a stout build, featuring a mix of brown and white plumage, stands in front of a wall. The bird has a distinctive brown streak above the eye, a short, conical beak, and a white underbelly. Its wings display a pattern of brown and black markings, and the tail is long and narrow with brown and white bands. The legs are thin and pinkish, ending in sharp, clawed toes." + }, + { + "image": "images/caption_detailed_28.png", + "subject_name": "bird", + "object_name": "floor", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "\\WR3=k<5L4K5M3L3N3M3L8H>C3M2N2M3N3MAVEdNd:\\1^EdN`:\\1bEdN]:\\1dEdNZ:]1gEbNY:]1jEaNV:_1kE`NT:a1nE]NR:c1oE\\Nj9JgEk1`0ZNg9NhEi1a0XNf90iEh1b0WNBLj96RFg1c0VN@Oi96SFe1e0VN^OOi98SFc1g0UN]O1f99VFa1g0UN]O1e99XFa1g0SN]O3`9=\\F^1l0VNd8`0`FZ1k0WN]8f0iFS1j0WN[8h0kFQ1j0WNZ8h0mFQ1h0XNZ8k2fGUMZ8k2eGVM[8j2eGVMZ8k2eGVM[8j2dGWM\\8i2cGWM^8i2aGWM`8h2_GZMa8f2\\G]Md8c2TGeMl8[2oFkMP9j2TG^Ll8]3YGbLg8[3]GdLd8Z3^GeLd8X3j0dMPFW1U:]NTFa1Q:YNTFb1FYNi:c1[EaN`:^1cEaN]:]1fEbNZ:\\1hEcNX:\\1kEbNU:[1oEdNR:Z1PFeNP:Z1QFfNP:X1RFgNo9V1SFjNP:o0UFPOm9k0VFUOk9i0VFWOl9e0VF[Om9a0TF_Oo9=RFCR:7QFHi;000QZY4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\7m5^7000000000000000000000000000000000000000000000000000001O00O10000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O1001O00000000000000000000000000000000000000000000000000001O1O4L3M2N2N1O2N1O2N2N1O3M1O2NcJYOYOV1g0jNQO^1R5b0\\Od0A?@`0YOg0TOl0L4O1M3N2N2N2N2N2N2M3N2M3O1N2O1N2O1N2N2N2N2N2O1N2O1O1O21M3N3M2N2N3M3M2N1O3M102MGRJjJl5f5K3M3M2N3M2N3M2N3M3M1O2N2N4L5L7H9G9F9H7I7IgLoLUNh2k1XM[Nc2e1\\MdN[2\\1eMlNS2T1mMUOi1k0XN\\Ob1c0^NDZ1=fNJS16mN3j0MVOc03DMS61O1^DN];2cDN\\;3dDM];2cDM^;2cDN\\;4bDN];1eDM];0_V20giM1O0O11O0O10O11000SlV2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "nnW35a;2N2ZEJd9=XFFf9=UFFj9=nEJQ:7lEKT:l000000000001O001O001O000O1001O0O100O10000O10000O10000000O1000001N10001O0O10O1O100000O0100000000O100000010O0100O010O01O00000000000O100000000000000000000dNeN\\H[1c7gN\\HY1a7kN^HU1b7kN^HU1a7lN_HT1a7lN^HU1a7lN_HT1`7mN`HS1_7nNaHR1]7PO[HDVO]1]8ROZHEWOY1^8UOWHV1i7lNSHV1l7lNoGX1Q8iNlGY1T8iNhGY1X8iNeGX1[8R10000oMoL]KP3b4SM\\Km2c4VM[Kj2e4XMYKh2f4ZMYKf2g4[MWKf2i4]MnI^Oj0U3X5]MkICk0P3Z5_MhICl0o2]5jMbJW2^5iM`JY2`5gM]J\\2d5cMYJ`2g5`MXJa2i5_MUJb2k5^MTJc2n5[MRJe2Q6XMnIi2U6TMjIm2W6SMhIm2Z6QMfIo2]6nLcIR3^6nLbIQ3_6nLaIR3`6nL_IW1DNn6kN^IQ3c6oL^Io2d6PM[IP3f6PMZIo2g6QMYIn2i6QMWIn2k6QMUIn2l6QMTIo2m6QMRIo2n6QMSIn2m6TMPIm2Q7TMeH@1_3Z7i01O00001O0000O10000O1000000O1XOhKQJX4o5hKPJY4o5iKoIX4Q6hKmIZ4S6gKiI\\4X6eKaI`4`6;1O00000000001O2N1O1O000000O1N2O100O1000000O100000000001O2N6J3M2N4L1O1OO100O1O100O1O10000001O1O001O1O1O1O1O2N1OdH" + } + ], + "model_output": "A tree with numerous slender branches covered in clusters of small, vibrant green leaves stands beside a stop sign. The branches extend outward in various directions, creating a dense canopy. The leaves are oval-shaped with smooth edges, and the branches are thin and slightly curved." + }, + { + "image": "images/caption_detailed_30.png", + "subject_name": "grass", + "object_name": "stop sign", + "predicate_name": "on back of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "\\:c4^:O000000001O1O001O1O001O1O1O2N1O001O00001O000000O10000O100O1O100O1O1O1O100O100O10000001O001O001O00O100000000001O1O1OO1O1000000O1O1000000000000001O001O1O1O001O0000001O0000001O1O0000001O1O001O1O1O1O1O001O1O1O1O001O001O1O1O1O1O1O1O1O1O1O001O1O002N001O1O001O1O1O001O1N2O1O001O1O002N010O1O1O1O001O1O1O1O001O1O1O1O1O001O1O1O1O001O002N1O001O001O1O1O1O001O001O1O001O1O1O001O1O1O1O1O001O1O1O1O1O1O001O1O1O1O1O001O1O1O1O1O001O1O1O1O001O1O1O001O1O001O1O001O1O001O001O00001O00001O00000000000000000000000000000000O1000000000000000000000000O1000000000000000000000000000O2DoAEogX5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "abe1h1V=Z1kBQMh;c3@l0TOf0ZO?A4L2N2N2N2N2N2M201N2N2N2N2N1O2N2N2N2N2N2N1O2N2N2N2N1O2N2N2N2O1N2M2O2N3M1O2N[MPI]Mn6b2VI\\Mj6d2VI]Mh6d2WI_Mf6b2XI`Mg6a2YI`Me6a2[I`Mc6a2\\IaMa6a5OnL`IeM^6[2dIdM[6]2eIQKNS2[6m2gIcMX6^2hIcMV6^2jIcMT6^2lIcMR6]2oIdMo5^2PJcMn5i5N2N2N2N]MXJPLg5P4ZJQLd5P4\\JPLc5R4\\JPLa5i6N2N2N1OYMgJjKW5W4iJjKU5W4kJjKS5W4mJjKQ5S7OUMPKiKn4X4SKgKl4Z4SKhKk4Y4UKiKh4Y7NPMZKhKe4j3[KdI0c2c4Y4]KgKb4Z4^KhK`4]7NlLbKhK]4Y4cKhK[4Y4dKhK[4`7N2N2N2N2N2N1O2N2NfIWL`1g3bNYL^1e3cN\\L]1b3Q5O2O1MbIbL_1]3aNcL_MOa1\\3Q1eL^MOa1[3c3eL^LX3g7N3N1N1OcJoLmNP3c6NkJUMWNj2j1XMUIN6M]4l2Y2TMYI0j4i2V7N3N2N3N1N1O2N1O2N2O0O1O2N1000O1000000001OO1000001OO10000001O0O100000000000000000001N1000000000000000000000001O000O1000000000000000000O1000001O00000000000000001N100000000000000000000O2O0000000000000000000O1000001O00000000000O10000000001O00000O10000000000O100000001O00000000000O11O00O10001O000000000000000O2O000000000000000O101N1O1O1O1O1O1O1O100N2O1O2N1O1N2O100O2N1O2N2N2N2N1N3N2N3N3L2N1O3M2N2N2M3N4L2O1N2M3N3M2N2N2N2M4N0O2N3M3M2N2N2M3N2N2O1N2N3M3L3N3M1O3M2N2N2O3L2M3N2N3M1O2N3M3M2O1N2M4L3N3M1O3M2O1N2N2N3M2M4L3N2O1N2N2O1M4N2L3M3N3M1O2N2K5H:]Oa0H9^Oa0Df2`1000001O1MgN" + } + ], + "model_output": "A lush bouquet of deep red roses with velvety petals, tightly clustered to form a dome shape, with hints of green foliage peeking through the blooms, sits beside a TV." + }, + { + "image": "images/caption_detailed_32.png", + "subject_name": "truck", + "object_name": "road", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Q7V1U<1O1O1O00001O000O0100O1000001O000O101N10001O1O1O1O00000001O1O2N1O1O000O2OOO2O1000000000O100O10000O10000001O1O1O000000000000000001M2O1O1000000O1O1O100000000000O10O100L4O1O2N2WObC4a[V7" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\8m08E\\;;dDE\\;;dDF[;:eDF[;:eDF\\;9dDG\\;9dDH[;8eDH[;8eDH[;8eDH[;8eDHZ;9fDHY;8gDHX;9hDHW;8iDHW;8iDHW;8iDIV;8iDHV;9jDGV;9jDGV;9kDGT;9lDGT;9lDHS;8mDHS;8mDHS;9lDGU;8kDIT;7lDIU;6kDKU;4lDKU;4kDLU;4kDLU;4kDLU;4kDLV;4iDLW;4iDMV;3jDMV;3jDMV;3jDMV;3jDMV;3kDMT;3lDMT;3lDMT;4kDLU;4kDMT;3mDLR;5nDKR;5nDLQ;4oDLQ;4oDLQ;4oDLQ;5oDKP;5PEKP;5PEKQ;4oDMP;3PEMP;3PENo:2QENo:2QENo:2QEOn:1REOn:1REOm:2SENm:3RENm:2SENm:2TEMl:3TENj:3VEMi:4WELi:4WEMh:3XEMh:3XEMg:4YELg:4YELh:4WEMi:2XEMm:NSE3m:LSE4m:LTE3m:LSE4m:LRE5o:JQE6o:JQE7n:JRE5n:KRE5n:KRE6m:JSE6m:JSE6m:JSE7l:IUE7k:HUE8k:HVE7j:IVE7j:IVE7j:IVE8i:HWE8i:HWE8i:IVE7j:IVE8i:HWE8i:HWE8j:GVE9j:GVE:i:FWE:i:GWE8i:HWE9h:GXE9h:GXE9h:HXE8g:HYE8h:GXE9h:GXE9h:GYE9g:FYE@O6k:8VE_O37h:9VE[O7;c::lEES:=mECR:=oEBQ:>oEBR:=oEBR:>nEBR:=nECR:>mEBS:>mECR:>]EXON9e:`0ZEZO07e:a0WE[O34f:c0SE[O72g:R1YEnNg:S1XEmNh:S1XEmNh:S1XEnNg:R1YEnNg:R1YEnNg:R1YEnNf:T1YEmNf:S1ZEmNf:S1ZEnNf:P1[EPOe:P1\\EoNd:Q1\\EoNe:P1\\EoNd:1oDa0=_Oc:P1]EPOc:P1]EQOb:n0`EQO`:S1\\EmNd:T1ZEmNf:S1[EmNd:S1\\EmNd:T1[ElNe:T1[ElNe:T1[EmNc:T1^EkNb:U1^EkNb:U1^EkNb:V1]EkNb:U1_EkN_:V1aEkN^:U1bEkN]:V1cEjN]:V1cEkN\\:U1dElN[:T1eElN[:U1dEkN\\:U1dEkN\\:U1dElN[:K\\Ei09\\OY:V1gEjNU:Z1kEfNU:e0bE[O90U:Z1kEfNU:`0bEF8JU:\\1kEdNU:\\1kEdNU:\\1kEdNU:\\1lEcNT:]1lEdNS:\\1mEdNS:\\1mEeNR:[1nEeNS:Z1mEfNS:Z1mEgNR:Y1nEgNR:Y1nEgNQ:Z1oEgNP:Y1PFgNP:Y1QFgNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFhNk9X1UFhNk9X1UFhNk9X1UFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFjNi9V1WFjNi9V1WFkNh9U1XFkNg9V1YFiNh92QF47Kg9V1YFjNh94oEO9Ng93PFO9Ng92QF08Ng9OTFXOKf0:3g99oED:3g9:UF\\O4;f9S1ZFmNf9S1ZFmNf9S1ZFmNf9S1ZFnNe9R1[FnNk9l0UFTOn9i0RFWOQ:f0nE[OT:c0lE]OV:a0jE_O]::cEG_:6aEJ`:5`EK`:5`EKa:4_EM_:4aEL_:4aEL_:4aEM^:3bEN]:2cEN]:2cEN^:nNYEP193\\:mN]Eo065o9POnE0Kk094m9UOlEMOh087h9XOQFIOh078d9]OTFe08Nc9h0]FXOc9h0]FYOb9g0^FYOb9g0^FYOb9g0^FYOb9g0^FYOb9f0_FZOa9f0_F[O`9e0`F[O`9e0`F[O`9e0`F[Oa9d0_F]O`9c0`F]O`9c0`F]O`9c0`F]O`9c0_F_Oa9`0_F@a9`0_F@a9`0_F@a9@XF27>a9@XF]O285k0b9JZFlNL;8P1a9J]FVO2P1b9JWFZO7l0b9<_FDa9]OXF27a0a9^OVF29a0`9;`FEa9:_FFb99^FHc96\\FKf93ZFNf91ZFOh9OXF1i9NWF2j9MVF3j9MVF3j9MVF3j9MVF4i9LWF4i9KXF5h9KXF6g9JXF8g9HYF8g9HZF6g9JYF6g9YOQFC7U1h9gNRF3O17V1g9fNSFd06g0f9eNTFd06h0e9dNTFe07g0e9F[F:e9VOXF]O3]1e9UO[F\\O0`1d9TO\\F\\O0`1d9TO\\F[O1a1c9VOUF^O8\\1c9D]Fc9A^F?b9A^F?b9YNVFR18f0c9VNXFR15h0j9_NmE=9T1m9TOSFl0m9TOSFl0m9TOSFm0k9TOUFm0j9SOVFn0j9TNnEh09T1n9SNjEd08X1S:]NiEJ4i1S:]NjEI3j1S:]NeEG127j1S:]NeEH018k1f9hNRF\\OO29k1e9hNUF^O5l1e9fNWF\\O5o1c9eNXF\\O5o1d9cNYF]O3P2e9bNXF^O3P2n9YNoEG3P2j9]NRFD3Q2f9nMQF`02C7o1f9iNZFX1d9iN\\FW1d9`NSFC9m1e9_NRFD9m1i9[NnEH9n1g9[NPFG9n1e9]NRFD:o1c9^NTF^O1O8U2d9]NSF^O207U2e9\\NbFe1_9^NRF\\O7V2g9bNYF^1h9bNWF^1i9bNWF_1i9`NWFa1k9\\NUFd1l9[NTFd1m9[NTFe1h9jMPFa08e1g9`NYF`1[9iMgFf0Na1[9iMjFc0Kd1[9jMiFb0Lg1X9gMjFd0Mf1Y9fMhFf0Od1Y9fMiFe0Ne1Y9fMoF>Il1X9fMPG=Hl1Y9gMnF>Ik1Y9gMgFe00d1Y9gMgFe0Of1Y9eMhFe0Og1X9dMiFe0Oh1W9cMjFe0Oh1W9cMjFe0Oi1V9bMkFe0Oi1V9bMkFd00j1U9bMkFd0Om1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9_MmFd0Om1T9_MmFd0No1`9QN`Fo1`9QN`Fo1`9PNaFP2_9PNaFo1a9PN_FQ2`9oM_FR2a9nM_FR2V9[MlFc0OQ2T9^MlFa00P2V9^MjFb0OQ2W9]MjFa00R2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM^FS2b9lM_FT2a9lM_FT2a9lM_FT2a9lM^FV2a9jM_FV2a9iM`FW2a9hM_FX2a9hM_FX2a9hM_FX2a9hM_FW2b9iM^FW2b9iM^FX2a9hM_FX2a9gM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9eM`F[2`9eM_F\\2a9dM_F\\2a9dM_F\\2a9dM_F\\2a9cM`F]2`9cM`F]2a9bM_F^2a9bM_F^2a9bM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9`M_F`2a9`M^Fa2b9_M^Fa2b9_M^Fa2b9_M^Fa2b9_M]Fb2c9^M]Fc2c9\\M]Fc2d9\\M]Fd2c9\\M]Fd2c9\\M\\Fe2d9[M\\Fe2d9[M\\Fe2d9[M\\Fe2e9YM\\Fg2c9ZM]Ff2c9ZM]Ff2c9ZM]Ff2d9YM[Fh2e9XM[Fh2e9XM[Fh2e9XM[Fh2e9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM[Fk2d9TM]Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2e9SM\\Fm2d9SM\\Fm2d9SM[Fn2e9RM[Fo2d9PM\\FP3e9QMZFo2f9PM[FP3e9oL\\FQ3n91000000O101OO010000O100O100O1FiL_FX3`9jL^FW3b9900O100000001N100O1O0100000O10000O2M200O0100000000000O101OO1000001O1O1O000000O1O1N200O100000001N11O3L5L1O01O00O11N1O1K5O10O11O00000O1000000000000001O0000000000O100000000O10000O1000000000000O100000000O100001OO1000000O11O1O00000TOjFcMW9Y2SG_MQ9]2Q1N2N3M2M5L4L00000000O100C=O1000O100000000001O0000000000000000O11O000dF" + } + ], + "model_output": "A red fire engine, featuring reflective striping, a side compartment with handles, a visible wheel with a silver rim, and a section of a yellow and black striped pattern on the rear, is parked on the road." + }, + { + "image": "images/caption_detailed_33.png", + "subject_name": "paper", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 500, + 333 + ], + "counts": "R`?3`?2N3N1O2M2O1N103M1N3N1O1N2O1O0O2O0O0100O2VOZAd0k>00O2OO010000O01000O100O100O2O00000O2N1N2N3HS?0TA0^Ul3" + }, + { + "size": [ + 500, + 333 + ], + "counts": "Q>e0U=\\OmBd2OaM100LS<_3M100O100O100O100O1O100O1O10000O11O00OBYDQMf;P3]DmLd;Q3^DnLb;R3_DmLa;S3`DlL_;T3cDkL[;W3gDgLY;3ZDn2>oLY;V3iDiLX;V3e0L3M4N1N2N2N3M3L3N2N3L3M2L5J6K4O1O1O1000O010O10O10O010O0100O001O001O001O10O10O10N101O001O1G^N`Bb1^=dN^B\\1b=;2M2O1O1O2N100O2N1000010N10000000O1O2O0O00001O1O1O2N6J2ZCoMe;Q2WDVNf;k1WDZNf;f1YD^Nd;e1VD^Nj;f1mCeMMg0W?VA@a>o0F9F:H8I7F:H9F9H8CDH8EE;K6B=K5N3N100010O01PLkI_NV6c0gJ]OZ58PKHQ57oJIS55lJLU53kJMU53kJMV53hJNX52gJOY52fJN[51eJO[51dJ0]50bJ1]5OcJ1^5NbJ2_5N_J3a5M_J3b5L^J4b5L]J5c5K]J5d5K[J5e5KZJ6g5JXJ6h5JWJ7j5HVJ8j5IUJ7l5HTJ8l5oMPHn0T2S1l5oMQHm0R2U1n5mMRHm0o1W1n5lMTHl0m1Y1P6jMTHm0k1Y1Q6kMTHl0i1Z1T6iMSHm0i1Z1T6jMSHk0i1[1U6iMSHk0g1]1V6hMSHk0g1]1V6hMSHl0f1\\1X6hMRHk0e1^1Y6gMRHk0e1^1Z6gMPHk0f1_1Y6fMRHj0d1a1Z6eMRHk0c1`1\\6eMPHk0d1`1\\6eMQHj0b1b1^6cMPHk0b1b1^6dMoGk0a1c1_6bMQHk0_1c1a6aMPHl0_1c1a6bMoGk0`1c1b6aMoGk0^1e1c6`MoGl0]1d1d6`MoGl0]1e1d6^MPHl0[1g1e6^MoGk0\\1g1f6^MnGk0Z1h1h6]MnGk0Z1h1h6]MoGj0Y1i1i6]MmGj0Y1j1R4nLmL>hMk0X1i1e3]MZMOiMk0X1i1X3jMhMBhMj0W1l1k2UNVNUOhMk0V1k1\\2eNfNeNhMk0U1l1m1TOWOTNhMk0T1m1_1BEfMhMk0T1m1S1N1[MgMk0S1m1d0>c0iLgMk0R1n1NT1Y1SLgMk0R1n1F\\1a1lKfMj0R1P2ZOf1o1_KfMj0Q1k7Y1\\GeMj0Q1j7[1[GdMj0R1k7Z1[GdMk0P1k7\\1ZGdMk0P1k7\\1[GdMi0P1l7]1ZGcMk0o0k7^1ZGcMk0n0l7_1ZGcMi0n0m7`1ZGaMi0o0m7`1ZGaMj0m0m7b1ZG`Mi0n0m7b1[G_Mh0o0m7c1]GZMg0S1l7c1oH\\NR7d1nH\\NR7e1nHYNS7g1mHZNR7f1nHZNR7g1mHYNS7g1mHYNS7g1mHYNS7g1mHXNT7h1mHWNS7j1lHVNT7j1lHVNT7j1lHVNT7j1mHUNS7l1lHTNT7l1lHTNT7l1lHTNT7m1kHSNT7n1lHRNd6^2\\IbM]6e2cI[M\\6g2RIYLiNQ1V8e2iHfLkNg0\\8c2dHXN\\7h1aH[N_7e1^H[Nd7f1[HZNg7e1XH]Ng7c1XH_Ng7`1YHbNe7_1ZHdNd7\\1[HeNe7\\1YHfNf7Z1YHhNf7X1ZHhNe7Y1ZHhNf7Y1YHgNf7Z1WHiN`7`1`HaNV7i1iHWNj6V2UIkMa6`2^I`MZ6h2eIYMP6R3PJnLe5^3YJcLP5U4nJlKd4b4\\KXKE`Mf4c7cKlJ[4[8\\KXER4Z;F;I6N3N1O101O000000001O00001O0000001O0000001O0000000O2O0000001O0000001O0000001O0000001O00000000001O1O0O1000001O0000001O0000001O000000001O000O11O01O000O2O000000001O1O0000000000001O000000001O0O11O01O000101N4Lb0^Oc0^OV1\\DoJW:U6RO`0@W1jN;D000001O00001O000000001O000000001O0000001fHnHP6R7mI[Ii5f6RJnI\\OfNn5\\7eJ]JQ5c5nJhJh4^=" + } + ], + "model_output": "A glossy chestnut horse with a white blaze on its forehead and white socks on its front legs stands on a chair. The horse's mane is a lighter shade, possibly flaxen, and it has a muscular build with a prominent withers and a well-defined neck. Its ears are perked up, and it has a calm, attentive expression." + }, + { + "image": "images/caption_detailed_35.png", + "subject_name": "person", + "object_name": "giraffe", + "predicate_name": "feeding", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "mgV58^;4L1O100O100O01000O100O1000000O1000001O1O001O1O2N1O1O2O1N2O00N1N3M3M3M4L3MaT1GhI" + }, + { + "size": [ + 375, + 500 + ], + "counts": "bil0=U;:H6J5K3N2O0010O001O100O001O010O00100O001O010O10O0100O010O10O10O100O10O10O10O010O10O10O1000000O10O01000000O10O10O10000O1000O10O101OO10O@aEJ_:5fEF[::hE@FOb:a0a00O10000O10000000001N100O10001N10000O10001O00001N100O2O1O00001O0010O010O0101N003N2N1N2O1N101N1O101O0O2O0000000001OO2O0O101N8H1O1O1N2L3O1O2O000000O1RNjF>2IV9HlF5;Jl8NnF1W:MkE0mRT3" + } + ], + "model_output": "A person with short, light brown hair, wearing a white shirt with a collar, and a dark-colored jacket, is feeding a giraffe." + }, + { + "image": "images/caption_detailed_36.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Wme21kc06N000[O6Q]OMkb0d01O14T]OXOYb0[1L3N2N3L5K1O1DSNd^Om1[a0=O0100000001O1kM_^Oh1da0PNb^OP2ha0N1G9F:K6O0O2O1O2N3Mf0SO[\\O1gVP6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "hU18d?LVDZ2i9nMPFR2o9lMTFT2l9jMVFV2i9jMXFV2h9jMXFV2h9iMYFW2g9dM^F\\2c=0TJXN`Ih1[6bNaDQOl4]2^6QO[Io0d6UOYIk0f6XOXIh0h6XOXIh0h6WOYIi0g6WOYIi0Y7_NmHa1Z;QNm@o1o>YNm@g1P?aNk@_1S?eNk@[1n>oNo@Q1n>UOo@k0P?XOn@h0P?[On@f0o>_Oo@a0n>El@>Q?Z2M3N2N2O1M3N2N2N2O1O1O1O100001O1O1O1O1O1O1O1O1O1O1O1O1O001O1O1O00001O001O0000POXKhBh4U=^KYBD7n4_=iK_BW4`=kK_BU4_=nK`BR4_=PL_BQ4`=RL]Bo3c=SLYBo3f=V100M3O1N200O1001gIbBJ5k5S>I2N1O1O00002N;E:F7iLl@T1Y?cNm@[1[?YNi@g1b?lMa@S2a?hMc@W2_?eMc@[2^?`Mg@_2]`00001O00001O00000000000000001O000ZO`^OlN`a0S1a^OmN^:EWL_1ZIlN]:LTLn1l3RNULm1j3WNSLi1m3XNRLh1m3]NoKc1P4`NfETOU6\\2T4eNkK[1U4fNjKZ1k3iMSFj0OROa23k0X2a6lNSGhNQO\\1Y1`7lNTG]O1e0S1R1h7lNTG]O2j0l0m0n7iNXG_O0a136e8iNYG@4c1G4l8hN[G_O4d1E5l8_NVFVOZ1a00e1C5n8YNjGLFf1A5o8UNPHMBi1^O5Q9oMUH2]Oj1\\O5S9nMUH1^Ol1YO5T9mMVH1^Om1XO5T9mMVH1_Om1VO5V9lMVH1^OQ2SO2[9iMUH4]OR2RO1j;mMTER2RO1k;lMSES2RO1k;lMREU2ROOm;kMPEW2SONm;jMoD[2SOKo;iMmD^2SOIP`0\\2n0]:eNiBN=>^2o0X:lNgBM`06d2P1o9DYCZOj2R1k9GZCUOn2S1h9HZCTOP3S1e9J[CQOR3U1c9KZCnNU3W1a9KZCmNV3X1`9K[CkNW3Y1^9L[CjNX3Z1]9L[CiNh1M\\O^1a;L[ChNa1OVNk0d0b0ZB1O00O1J6L4[OSNU_Oo1o?gM]@S3`?TMR@J1S3m?g0001O1O00001O1O2TMk_OTO0`2W`0PNc@e1Va0F5K1O1O0000IZNS^Og1Sb0000002YNh]O[1cb0L3NO2001O2N2M3DM4O010N2O1N101O001O01O00001N100O2O1O1N10100O_OnI_KR6`4PJ_KQ6Q4^JoKb5k3dJUL\\5k3oIhK?;e5k3bJUL^5k3bJUL_5j3aJVL`5i3`JWL`5i3`JVLa5j3_JVLb5h3^JYLb5g3^JYLc5g3\\JXLe5h3[JXLf5n3gIhK4:U6`3dIVL3O2129W6^3UJYLD9W6\\3WJ[LB9W6[3XJ\\LA9W6Z3ZJ\\L_O:X6W3fJiLZ5V3gJiLZ5W3fJiLZ5V3gJjLY5U3hJkLX5T3iJlLW5T3\\JcL]O9W6T3\\JcL]O9W6S3jJmLV5S3jJmLV5S3iJnLW5R3hJoLX5Q3^JeLUO<]6n2^J[Mc5d2\\J]Mc5d2[J^Me5b2ZJ_Mf5`2ZJaMe5`2YJbMg5^2XJcMh5\\2XJeMg5\\2XJeMh5Z2XJhMg5X2XJiMg5X2XJiMh5V2WJlMh5T2WJoMh5P2XJQNg5P2XJQNh5o1WJSNg5m1YJTNg5l1WJVNh5j1XJXNf5h1ZJYNf5g1XJ[Nh5d1XJ^Nf5c1YJ^Ne5f1nIiL1c1o5k3N3L3K6I6K5N2M3L4LdMTKUNh4o1YKPNe4Q2\\KPN`4S2`KmM_4T2aKmM[4V2eKkMY4V2gKRMNoNY4P4jKoLNROW4o3lKlL0VOP4P4QLiLOYOm3o3TLgL1]Of3m3YLeL3AQ3QOPMk4LbL4Hd1lNoN]5XOnK5NT1GdNd43gK53l0`5nN]J67g0^5RO[J7h<01M20100000O1nNEXES;BmD>S;BmD=T;ClD=T;DjD=V;EfD=Z;DeD<[;DeD<[;DeD;];EbD<^;DaD=_;C`D>T;]OhD44`0R;^OkD12a0R;_OmDO2b0o:@oDO1`0Q;AnDO1`0Q;AoDOO`0S;@nD1Ma0U;^OnD0Mb0U;@lDONa0V;3gDMZ;P10XNjD^1c;N1O1O1O000O_DgNT;Y1hDoNT;P1kDTOS;l0lDWO7DZ:V1\\EYO8CZ:l1dEUNP:LoEQ20SNo91nEm12SNn9\\2PFeMQ:h2001O0\\MmET2T:jMPFR2Q:mMPFS2Q:lMQFR2R:jMQFS2Q:jMQFV2`:0O01DUE\\Nk:c1WE\\Ni:c1XE\\Nh:c1ZE]Ne:c1\\E]N_:f1dEWN^:f1a0XOgDGV;9mDFR;;oDCQ;=RE@n:a0UEZOl:g0k0N2O2ROjCg0[<000O2N2N2N1O2N2O1N2OO0O110N2O00000L400100O20N100N2O0010O01O5^OeC0a1O10000001O3M3M1O001O00000000000000000000000000O100^NSMWB1A2=j2^>_MbAa2^>_MbAa2]>aMbA_2l=SMSB?1^2l=SMSB?0_2^>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`MbAa2^>_MaAb2^>_MbAa2^>^McAb2]>^McAb2]>^McAb2^>]MbAc2^>\\McAd2\\>\\MdAe2\\>XMgAh2Y>VMiAj2V>VMkAj2U>UMlAk2R>WMnAi2Q>XMoAh2o=ZMRBe2l=\\MUBd2i=^MWBb2i=^MWBb2i=^MWBb2h=_MYB`2g=`MZB_2f=aM[B^2e=bM[B^2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=bM]B^2c=bM]B^2c=bM\\B_2d=`M]B`2c=_M^Ba2c=]M^Bc2b=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=]M^Bc2b=^M]Bb2c=`M[B`2f=_MZBa2f=`MYB`2g=`MZB_2f=aMZB_2f=aMZB_2f=aM[B^2e=bM[B^2e=cMZB]2f=cMZB]2f=cMnAEFh2\\>dMlAFGf2]>eMjAGGe2_>dMjAHFd2`>dMjAi2V>WMiAj2W>VMfAm2Z>TMdAm2\\>SMdAm2]>RMbAo2^>RM`Ao2`>QM_AP3a>f00000000000001O00000000000000000000001O001O4L2N1O1O1O001O1O1O1O2N1O2N1O1O1O2N2N3M1O1O1O2N1O1O1O1O1O1O1O001O1O1O1O001O1O001O1O001O1O001O1O1O1O1O001O1O1O001O0000001O00001O000000001O000000001O0000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000O100000000O10000000000O1000000O1000000O10000O100O1O100O1O100O1N2O1O100O1O1O100000000001O00001O0000001O0000000000\\OaMj@_2U?cMi@^2V?eMg@\\2Y?eMe@\\2[?c0O10000000000O1000000O10000O100O100O10000O1000000O100O10000O1O10000O10000000000000000000000000000000000000000000000000000000000O100000000O10000O100O100O100O10000O1O100O100O100000000001OP1PO5K3M2N:F8H=C;E3M2N1O1O001O1O001O001O001O0000001O000000000000000000001O0000000000000000000000000000O100000000000000O100000000O10000O1O1L4K5hLmN]BN\\O0R1j1T=ZNUBb3k=^LUBb3k=]LWBb3i=^LWBb3i=^LXBa3h=\\L]Bb3c=]LaB`3b=\\LbBa3j>aLe@NF3NX2]a0SO?A6J4L3M001O1O001O001O001O0000001O000000001O0000000000001O0001OO100001O00000000000000O1000O2O000000000000O100000O2O000000000O0100000N2O1nK1TE1l:3lD1S;1lCHnMd0V>EkCn0UdDA\\;`0dD_OUM" + } + ], + "model_output": "A white ceramic bowl with a flared rim and a smooth, glossy finish is on the dining table." + }, + { + "image": "images/caption_detailed_41.png", + "subject_name": "sports ball", + "object_name": "person", + "predicate_name": "touched", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kiR13W=2EOYC2d<3YCNe<<00000O10000001O00O20O0001O1ObCDn;=PDHm;8RDKl;3UDOi;2VDOj;0VD1j;0TD1k;0TD2k;f010000000000O1000VDiN`;W1_DjNa;V1_DjNa;V1_DjNa;U1_DlNa;T1_DlNb;S1^DmNb;S1]DnNc;Q1^DoNc;P1]DoNd;Q1\\DoNe;P1ZDQOf;o0ZDQOf;n0[DROf;m0YDSOi;m0TDUOm;T101O10O1O2N1N2O1N2M5J5M4L5K4L3K4NST\\6" + }, + { + "size": [ + 427, + 640 + ], + "counts": "bQT12Y=1J0oB01Nm<0`U:8QjEd0E:XDPOl:n1G7G9I6J8J5L5J5G:K4L4L5K6G7L5K5L4K3VLeKgKUOe2Y5b1mKjKjN\\11\\OY5]3nKiKjN[13[OW5_3RLUMnNTOR5e3SLTMROiN^O2a5P4RLRMi4l2[KQMf4n2[KoLh4P3YKWLWN`0a6Y3ZKTLWNa0`6Z3bKeL^4[3cKbL_4]3bKcL^4\\3cKdL]4\\3fK`L[4_3lKZLU4e3mKYLT4g3lKYLT4g3mKXLa27RL`3d6eLZIZ3d6kLZIT3d6PM[IP3d6RM[Im2e6VMYIi2i6XMUIh2k6\\MQId2o6]MPIb2Q7_MnHa2R7`MmH`2S7cMjHc0K[NAn0k79dH;>VOo6`0bH7d0VOk6h0[H2n0ROh6R1SHKZ1POc6h2dIRM]6m2gIoL[6P3iIkLY6S3P2O2N1O10000O2O0O10002N5K4K4M4L2N4L2N2N4L2N3N2M5K9H0O0010O00001O2N1O001dEfNe8[1UGkNj8W1RGlNm8U1PGnNo8S1mFPOS9S1eFRO[9o0`FVO_9k0oE_N1n0o9U2O00lNTFjNk9[201VNSMTIn2i6VMUIj2j6YMTIh2k6ZMSIf2l6`MoHa2X3fLkNLRM^1f0Q2Z3kLaNe1RNa1[3kL\\Nl1XNZ1Z3lLSMCNd2_Oo0X3XNQMX1[Ob0a3_NaLW5]3]2O1O1O1O1O1O2N001N2O1O1O1O1O1N2O1O1M4L3O1M4M2M8G6J5UNcD]1o;aNUDi0c3N2N2O1O0O101O00000000000000000000000000000000000000000000000O1000O10000000000000000000000000QJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0[63nJV7R5jHnJV7R5jHnJV7R5jHnJV7R5iHWJO6W7c5jHWJO6W7c5jHUJ19T7b5kHTJ2:S7b5kHSJ3V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LR9J6K4L5K4L3N1O2M2O2M101N2O1N3M2M3M3M2O2N2O1O1N1O2O001O0O1OO21O0O0XORN`Do1_;WNZDl1f;UNXDk1i;f01N2N101N1O2N2O0O2O1N2N2N1O2N100O2N1O1O100O1O1O100O2O0O1O10000O2O000000010O01O00001O0010O00010O01O1O100O1O010O1O010O1O10O01O100O1O1O10000O1O1O100O100O0010000O2N2N2O2N1N2N1O2O2M2N1O100O2O1N10000O100O1O1O100O1O0010O0101N1O3N3L5K3M3M3RHnKi4U4RKlKn4V4oJkKR5W4gJmKY5X4aJiK_5[4[JfKf5d4nI^KS6g4fI[KY6k4^IXKb6l4YIUKg6n4TISKm6S5jHPKV7]60O1000000GUH\\Il7b6YHZIh7d6;O0O2O1iNeGhK\\8T4RHbKn7[4\\H]Ke7d4[HZKf7h4YHWKg7n4SHRKn7T5gGSKW8l5OO1O2N11O1OnLiGUO]8j0XH_Nk7a1bHfLkNR1d8X2PIaMQ7_2UI[Mk6e2[ISMg6m2[IPMf6P3^IjLe6U3c21O0000001O001O010O00001O00001O00001O00010O001O00001O001O000000001O00000O10001O0000000000001O0000000000000O1000000000000000O01000000000O1000000000O01000000O10000O10O010000O100O10O10O1O1O1O100O100O100O1O\\GjL`5U3`JmL_5R3^JSMa5l2\\JXMd5h2[JYMe5g2YJZMh5f2UJ\\Ml5e2PJ^Mo5e2lI]MU6e2hI\\MX6e2eI]M[6f2aIZM`6h2\\IWMg6k2VIPMP7R3lHnLV7T3gHmLY7T3eHkL]7W3`HjL`7X3\\HiLd7Y3ZHgLg7[3VHdLl7]3QHcLQ8`3kG_LW8c3eG]L]8e3`GZLb8i3YGXLh8l3RGTLP9l42O1N2O1O1N3N1O1O1N2O1NmGnJ\\6Q5dIPK]6m4bIWK]6h4cIZK\\6d4dI_K[6`4dIbK]6\\4cIeK]6Y4eIgK\\6W4dIjK\\6U4bInK_6Q4_IQLa6R4YIQLg6Q4SIRLo6P4gHVLZ7m3\\HWLh7]53N2O2N1O2O0O2N1O1M4XM[GPOh8n0^GbNo8[1VGWNU9g1PGRNT9l1PGlMW9Q2mFgMZ9V2jFbM]9Z2iF^M]9_2d1M2M4L3N3M3M3L3M3N3N2M2M4M3L4PO_B9g=F_BOh=0d00001O00001O001O1O0O3NTef1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "V6e17W5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6k1000000000000000000000000000gHiIi5W6WJiIi5W6WJjIh5V6XJjIh5V6XJjIh5V6XJjIh5W6WJhIj5X6UJiIk5W6VJhIj5W6`100001O5K0kKcIG^6RMaIo12o0]67cII]67cII^67`IJb6Y4000000001gH^IR6b6mI`Ii1N[1b6lL_Ii11Z1`6mL_Ii11Z1_6oL^Ih12Z1`6nL^Ih11[1a6U400000000000000O_H_Ie6`6\\IbIb6]6_IdIj1Mm1_6YLcIa6]6_IcIa6]6_IcIa6]6_IbIb6]6Q1000UKcIlLOd3^62dIN\\6dLbIb23j0Z6eLcIa23j0Z62eIO[60fI0Z60fI0Z6dLcI`23l0Y6eLdI_23l0Z60eI1[60dI0\\6OeI1Z6kKfIo306Z6jKhIo3N7Y6kKiIn3N7Y6jKjIo3M7X6kKkIn3M7X6lKjIX8V6hGjIT4LKZ6SLgIR40KY6TLfIP43KV6>jIBV6?iIAV6`0jI@U6b0iI_OV6f4O1O1OTNoIkJQ6S5RJlJm5S5UJmJk5S5VJlJj5S5WJmJi5S5WJmJi5R5XJnJh5R5XJnJh5S5VJnJj5R5VJnJj5S5SJoJm5Q5SJoJm5Q5SJoJm5T5nInJR6P700000000000000O100000000001_GRJn7n5mG^Jl7b5RHbJl7\\6M2N2N1O001O1O000000001O0000000000000000O1000000O10000O100N2O1N2N2O1O1O100O100O100O100N200O1O1O100O100O1N200O1O1O1O1O1O100000000O10000O11O00O1000000001O00001O0000001O1O1O2N001O1O001O1O1O001O001O1O2N001O2N1O1O1O001O1O1O1O001O2N1O1O3M2N2N2N1O2N3M2N3M001O002N2N1O1O2N1O1O1O1O1O1O1O00001O2N2N3M3M4L2TMVG^On8O1O11O0000000000000000O100000000O1kLRMXJn2h5_MkGYOW13`NX3^8eM`G_OS1V3]7UNZHl1f7VN`GWNa0d3o7\\NoGe1Q8]NkGe1V8b200001O001O001O0000001O001O00001O001O001O0000001O0000001O00001O00000000001O00001O000000000000000000000000000000000000O1000000000000O100000000O100000000O10000O10000O10000O1O10000O100O1O100O1O\\L_Gk0a8SOkGc0T8[OPHd0P8YOTHf0k7YOWHg0i7XOXHh0g7WO\\Hh0c7WO_Hi0b7QObHP1^7oNcHQ1^7kNeHT1]7iNeHW1\\7dN_GVNU1W3^7aNeH`1\\7^NcHc1`7YN`Hh1b7UN^Hl1c7QN^HP2d7mM[HU2f7iMZHX2g7fMQHnNQO_3Q9^MnGj2S8TMnGl2S8RMmGo2W8lLjGT3X8hLhGZ3[8aLeGa3\\8]LcGe3k91N2M3O1N2O1O1N2NPMcEa1\\:\\NiEc1W:[NlEd1S:[NoEe1P:ZNSFe1i9^NXFb1g9^NZFb1e9^N\\Fb1c9^N^Fb1`9_NbF`1]9`NdF`1[9`NfF`1Z9^NhFb1W9^NjFb1U9[NoFe1P9WNUGi1l8TNVGl1l8jM[GW2g8]McGc2\\:100O1O1O1H8O1O1O100O1N2N2O1M3F:bM]LTIn3i6ZLlHl3S7WLgHm3V7YL`Hn3^7UL\\HP4a7WLTHP4i7VLPHn3P8a1N2N2L4M3N2L4O1M3N2O1M3O1K5N2H8@`0O1O1O1000000O100000000N2O10000O1N2O100000000000000000\\HWJQ6i5mIYJS6g5lIZJT6g5kIYJU6g5kIZJT6e5mI[JS6e5mI[JS6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHQMX1bNhN3^7k5jIQJjN3\\7[4bHQM\\1dNR6[4cHPM^1bNo5^4dHoLm8Q3TGnLl8R3UGmLk8S3TGnLl8R3SGeKNU1o8V3SGPMl8R3RGnLn8Q3TGfK1n0k8\\3TGfK0o0l8Z3UGgKNP1m8Y3UGoLk8Q3VG\\LMEm8o3VGcKLe02Jk8n3ZGYLJIl8n3ZGlLf8T3ZGlLf8T3YGnLf8R3YGoLg8Q3XGQMg8o2YGQMg8o2WGSMi8R4VGiJk8\\4WG`K00OJN6l8`4WGaKOO0IO7k8`4VGbK0OOH08j8T4VGkK1NNL20a8T4oFmK`0Oa8T4oFmK?0^ONj8U4YGnK=0@Mj8O" + } + ], + "model_output": "An adult zebra stands with its body profile facing left, showcasing its distinctive black and white striped pattern. The stripes are unique, with varying widths and patterns across the body, legs, and mane. The mane is erect and striped, and the ears are pointed upwards. The zebra's eyes are alert, and the nose is dark. The tail is partially visible with a tuft of hair at the end, standing on dirt." + }, + { + "image": "images/caption_detailed_47.png", + "subject_name": "cake", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "PYR41h01[=2cBO[=4dBL[=6cBK\\=7bBJblKBT4=mKCT4fFGU9;dF@cN7f:=aFN]92dE^O=d0n9OcE@b5n600000O10000000000000000O100001OO10000fH" + } + ], + "model_output": "An individual wearing a bright yellow ski jacket with a hood, red ski pants, and black gloves, equipped with a black helmet and goggles, is jumping from the snow, with their left arm extended outward and their right arm bent at the elbow." + }, + { + "image": "images/caption_detailed_49.png", + "subject_name": "donut", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 612, + 612 + ], + "counts": "_Ql1l0oa0c0A8H7J6I:D;K5J5M6H7H6L2N2M4K7I5L3M3N4I6L2N4L4K0020001O1N2N110O1N2N1O2O1O1O2M2O2N1O1N2O001O100N2N3OO10O1N2O011N1O1O1N201O0O1O1O1O1O101N2O0O1O1O1O10000O2N1O101N1O100O100O1O1O101O0O100O10000O1O1O10000O101N100O100O2N1000000O10000O101O0O10000O1000lLhD\\NW;a1PE\\NP;a1VE]Ni:b1ZE\\Ne:e1\\EZNd:f1]EYNc:f1^EZNa:g1aEWN^:i1dEVN[:k1eEUN[:j1fEVNZ:i1hEUNY:j1hEVNW:k1jETNV:l1jETNV:l1jEUNU:k1kEUNU:j1mEUNR:l1nETNR:l1oESNQ:m1oESNQ:m1oESNP:n1QFRNn9n1RFRNn9n1RFRNn9n1RFRNn9n1RFRNm9o1TFPNl9P2TFPNl9P2TFPNl9P2TFQNk9o1UFRNj9n1VFRNj9o1UFRNj9n1VFRNj9n1VFRNj9n1WFRNh9n1XFRNh9n1XFRNh9n1XFRNg9o1YFQNg9P2XFQNg9o1YFQNg9P2XFPNh9P2XFQNg9P2YFPNf9Q2YFoMg9Q2YFPNf9Q2YFoMg9Q2YFPNe9S2ZFlMf9U2YFlMf9U2YFlMf9T2[FkMe9V2ZFjMf9W2XFjMh9X2VFhMj9Y2TFiMk9X2TFhMl9Z2PFhMP:Z2mEhMR:]2cEjM\\:_50001O001O001O1O1O01O0001O1O001O00100O00002N10O01O001O100O010O1O100O1O010O100O100O2OO01O011O0O01O010O10O01N101N2O0O2N2O0O101N1O2O1N2N1O2O1N2N1O2N1O2N2O0O2N2N2N2N1N3M3N2N1N3N2N2N2N1O2N2O1N2N2N1O3M2N2N2N2L5J5L4N3M2M3M5K3N2M3N2M3M3N2M3N3L3N2N2M3N3K5M2M4K6J5J5L7I5K8F9I6K9F6JZb0`0F7G8H8G:B=F:B>N2O1000000000000O10001O00000000000000O02O00O100000000O100O1O1O1O1O1O1O100O1O001O1O100O1O1O1O1O10O010000O1ROf_OROZ`0h0o_OUOQ`0g0T@XOk?g0W@YOi?e0Z@ZOe?f0]@YOc?e0`@ZO`?e0b@ZO^?d0d@\\O\\?c0f@\\OZ?c0h@[OY?d0h@[OY?d0i@YOY?g0h@VOZ?i0j@POZ?P1]100O1000001O000010O3M10O00010O000O101N1O1O2N1O2N1O1Ok_O_Oi=`0TBEk=:TBIl=4TBNl=1oA5Q>JkA;V>DeAa0[>^OeAc0\\>ZOcAi0]>VO[AS1f>lNTAZ1m>dNPA`1Q?]Nn@f1U`03H8N2K5L4N3K3M4K5L5K4I7L3L6L3J6K4J7M3M4L2M4H8G9N2L4N2N200O100O1O100O100O1000000O10000000000000000000000001O000000010O0O2O00010O1O001O001O1N2O1O001O2L3@`0H8J7J5I7K5K5H:F9I7J5K6J8UNY_Om0ea0H6L4K6J6K4L4M2MdgZ6" + } + ], + "model_output": "A doughnut with a glossy red icing, generously sprinkled with white nonpareils, is held by a person." + }, + { + "image": "images/caption_detailed_50.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0R8n6000000O10000O100O10000O10YLSH;l7]3000`LTHLk75VHJj73[HKd7i3000000O100O100O1000000O10000O10002M102N2M5L3L5L3M3L5L5K3L4M1N2O00001N101N2O0O2O00000O2O001N2O001N2O001N2O1O0O2O1O1N2O0O2O1O0O2O1N102N001N2N101O1O1N101O1N2N2O1O001N2O0O2O1N2O1O1N101N2O1O0O2O1O001N3N0O2O1O0O2O1N2O001N2O1N2O1O0O2O1O001N2N2O1O001O1N101N2O1O1O0O2O1N2O0O2O1O001O2M101N2O001N3N001N2O0O2O1N2O1O1O0O101O1O1N2O1O0O2O1N2O1O1O0O2O0O2O1O0O2O1O0O2O1N3N1N101O001N2O1O001N2O1O1N2O1O0O2O001O1N2O1O0O2O2N001N2O1N2Nf]Q6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "U8i6W8000O100O10000O10000O10000O100O1000000O100O100O1000000O100O10000O100O10000O10000O100^HlHX7T7gHnHW7S7iHQIS7o6lHVIP7j6PIWIn6j6RIYIk6g6UI_Ie6a6[IaIb6`6^IaIa6_6_IaIa6_6`I`I_6a6aI_I_6a6`I`I`6`6`I`I_6a6aI_I_6a6bI^I]6c6bI^I^6b6bI^I^6b6bI^I]6c6cI]I]6c6dI\\I\\6d6dI\\I[6e6dI\\I\\6d6dI\\I\\6d6eI[I8VOe5_7SJ[I8VOd5`7TJZI7XOd5^7TJ[I8ZOa5[7WJ[I7\\O`5Z7ZJYI6]O`5Z7ZJYI6^O_5Y7[JXI6@_5X7[JXI6@_5X7[JXI6A^5W7\\JXI5C]5V7^JWI5D\\5U7_JWI5EZ5U7aJVI5FY5T7bJVI5GX5S7cJVI5GW5T7dJUI5HV5S7eJUI5IT5S7gJTI5JS5R7hJTI5KR5Q7iJTI4LR5Q7jJSI4LR5Q7jJRI5NP5P7kJQI60m4P7mJPI53l4m6oJPI54j4m6QKoH54j4m6QKoH55h4m6SKmH66g4m6SKmH58g4k6TKmH59e4k6VKlH5:c4k6XKkH5:c4k6XKkH4`4i6[KiH5>`4i6\\KhH4`0^4i6]KgH6a0\\4h6^KgH6b0Z4h6`KfH6c0Y4g6aKfH6c0Y4g6aKTHL>:g0X4h6bKSHL>9j0W4e6dKdH6h0U4d6eKcH7j0R4d6gKaH8l0P4c6hKRHL:=Q1n3d6iKQHL:=R1m3c6jK`H7n0o3b6jK`H7o0m3b6lKPHL;;T1l3a6mKPHL;:V1k3`6oKoGL;:W1j3_6PL^H6T1h3_6RL]H6T1h3_6RL\\H7V1f3^6SL\\H6X1f3\\6TL\\H6Y1d3\\6VL[H6Z1c3[6WL[H5[1c3[6XLZH5\\1b3Z6YLkGM;8`1b3Z6YLlGL:9a1`3Z6[LYH4`1_3W6]LYH4a1^3V6^LYH4a1]3W6_LXH4b1\\3V6`LWH5c1Z3W6aLVH5e1X3U6cLVH5f1W3T6dLVH5f1W3T6dLhGM99l1T3T6fLhGL89m1U3S6fLSH6k1R3S6hLRH6k1R3S6hLQH7n1o2Q6jLQH6o1o2Q6kLPH6P2n2P6lLoG7R2l2o5mLoG7S2j2o5oLnG7S2j2o5PMcGL7:X2i2n5QMmG6V2g2n5RMlG7X2f2l5SMlG7Y2e2k5TMhGNJ9c2d2l5UMgG;^2_2k5VMgG:`2^2j5XMfG:a2]2i5YMgG9a2]2h5ZMfG:c2[2g5\\M`GL2=g2Z2h5]MdG9e2Y2g5]MeG:e2X2f5^MeG:f2V2f5`MdG9h2V2d5bMbG9j2U2d5aMcG:j2S2d5cMcG9i2T2d5cMcG9j2S2c5eM]GL2j4BWK=i4BYK=g4BZK>f4BfHEf0h0d6CbHLh0?g6E_H1h08j6F\\H7i02k6GYHk0Jm6HWH`0k0Go6ITHc0n0Bn6JTHe0o0_On6LRHg0P1\\On6MQHh0Q1ZOo6NoGj0R1WOo6OnGk0S1UOP70mGl0T1ROo62lGm0V1oNo64kGn0W1kNo67jGn0Y1gNo6:iGo0i:QOWEP1h:POXEQ1g:oNYEQ1g:oNYER1f:nNZER1f:nNZES1e:lN\\EU1c:kN]EU1c:kN]EV1b:iN_EW1a:iN_EX1`:hN`EY1_:gNaEZ1^:fNcEY1]:fNdE[1[:eNeE[1[:eNeE\\1Z:dNeE^1Z:bNfE_1Y:`NhE`1X:`NhEa1W:_NiEb1V:^NjEb1V:^NjEc1U:\\NlEd1T:[NmEf1R:ZNmEh1R:WNoEi1Q:WNoEj1P:VNPFk1o9UNQFk1o9UNQFl1n9TNRFm1m9SNRFo1m9QNSFo1m9PNUFP2j9PNVFQ2i9oMVFR2j9nMVFS2i9mMWFT2h9lMXFU2g9kMXFV2h9jMXFW2g9iMXFX2h9gMYFZ2f9fMZF[2e9eM[F[2e9eM[F\\2d9dM\\F]2c9bM^F^2b9bM^F_2a9aM_F`2`9`M_Fb2`9^M`Fb2`9^M`Fc2_9]MaFd2^9\\MbFe2]9[McFe2]9[McFf2\\9ZMdFf2\\9ZMdFg2[9YMdFi2[9WMeFj2Z9VMfFj2Z9VMfFk2Y9UMgFl2X9TMgFm2Y9SMgFn2X9RMhFo2W9QMiFP3V9PMjFQ3U9oLkFQ3U9oLkFR3T9nLkFT3T9lLlFU3S9jLnFW3Q9iLoFW3Q9iLoFX3P9iLnFX3R9gLoFZ3P9fLPG[3o8eLQG\\3n8dLRG\\3n8dLRG]3m8cLSG^3l8bLSG`3l8`LTGa3k8_LUGa3k8_LUGb3j8^LVGc3i8]LWGd3h8\\LXGe3g8[LXGf3h8ZLXGg3g8YLYGh3f8XLZGh3f8XLZGi3e8WL[Gj3d8VL[Gk3e8UL[Gl3d8TL\\Gm3c8SL]Gm3c8SL]Gn3b8RL]GP4b8PL^GP4b8PL^GQ4a8oK^GR4b8nK^GS4a8mK_GT4`8lK`GT4`8lK`GU4_8kK`GW4_8jK`GW4_8iKaGW4_8iKaGX4^8hKbGY4]8gKcGY4]8gKbG[4]8fKbG[4]8eKcG\\4\\8cKeG]4[8cKeG^4Z8cKeG^4Z8bKeG_4[8`KfGa4Y8_KgGb4X8^KhGc4W8\\KjGd4V8\\KiGf4V8ZKjGg4U8YKjGi4U8WKkGj4T8VKlGj4T8VKlGk4S8UKmGl4R8TKnGl4R8TKmGn4R8RKnGo4Q8QKoGo4Q8QKoGP5P8PKPHQ5o7nJRHS5m7mJSHS5m7nJRHS5m7mJSHS5m7lJSHV5l7jJTHW5k7jJTHV5l7jJTHW5k7iJUHX5i7iJVHY5i7gJWHY5i7gJWHZ5h7fJXH[5g7eJXH]5g7cJYH]5g7cJXH_5g7aJYH`5f7`JZH`5f7`JZHa5e7_J[Hb5d7]J\\Hd5d7]J[Hd5c7]J]Hd5b7\\J^Hd5b7\\J^He5a7[J^Hg5`7ZJ`Hg5_7YJaHg5_7YJaHh5]7YJcHh5\\7XJdHi5[7WJeHi5[7WJeHj5Y7WJfHk5Y7UJgHl5X7TJiHk5W7UJiHl5T7VJlHk5R7VJoHi5Q7WJPIi5o6WJQIj5n6WJQIj5m6WJTIh5k6ZJVIe5i6[JWIe5i6\\JVIe5i6[JWIf5g6[JYIf5f6ZJ[If5d6[J[Ie5e6[J[If5d6[J[If5c6\\J\\Id5c6]J^Ic5`6^J`Ic5_6^J`Ib5`6^JaIb5^6^JbIb5]6`JcI`5\\6`JeI`5Z6`JgI_5Y6bJfI_5X6bJhI_5V6bJjI_5U6bJjI_5T6bJmI^5Q6dJnI]5P6dJPJ]5o5dJQJ[5n5gJRJY5m5gJTJX5k5iJUJX5j5iJUJX5i5iJWJW5i5iJXJW5f5kJYJV5f5kJYJV5e5kJ[JU5d5lJ\\JU5b5mJ^JS5a5mJ`JR5_5oJaJR5^5nJbJS5\\5oJcJQ5\\5PKeJP5Y5QKgJP5X5PKiJP5U5QKkJP5T5QKlJn4S5SKmJn4Q5TKoJXNYOa6f5XKQKk4n4WKQKj4m4WKUKg4j4ZKWKf4g4\\KXKd4g4]KYKd4e4]K[Kd4c4^K\\Kb4d4^K]Kb4b4^K^Kb4b4_K]Kb4a4_K_Kb4_4_KaKa4_4`KaKa4\\4aKcK_4]4bKbK_4\\4bKdK_4Z4bKgK^4W4cKjK\\4V4eKjK[4U4eKlK[4R4fKnKZ4Q4hKnKY4P4hKPLY4o3hKQLX4m3iKSLW4m3iKSLX4k3jKULU4k3lKVLS4h3nKXLR4h3oKWLR4g3oKYLQ4f3PLZLP4e3QL\\Ln3d3SL[Ln3c3SL]Ll3c3VL\\Lj3c3WL]Li3c3XL]Lg3c3YL]Lg3b3[L]Le3b3\\L^Lc3c3]L]Lc3b3^L_La3a3`L_L^3b3bL^L^3a3cL`L[3`3fLaLY3_3gLcLV3]3lLcLS3\\3nLdLQ3]3oLcLQ3\\3PMeLn2\\3RMdLn2Z3TMfLl2Z3TMfLk2Z3VMfLi2Z3XMgLg2Y3YMgLg2X3ZMhLe2Y3^MdLb2[3_MeL`2[3aMeL_2[3aMeL^2[3aMhL^2W3dMhL[2X3fMmKXLg0R6\\3iMeLV2[3jMgLU2X3lMhLS2X3oMgLQ2Y3PNeLP2[3QNfLn1Z3QNhLm1X3TNiLk1W3WNgLi1X3YNgLf1Y3SNQLTLh0h5W3TNPMk1P3VNPMj1o2YNoLg1Q3\\NlLc1T3]NmLb1T3]NnLb1Q3_NPM_1P3bNPM^1o2dNPM\\1o2eNQMZ1P3eNRMY1n2hNSMW1m2jNRMU1n2lNRMT1m2mNSMR1n2oNQMQ1n2POSMn0m2TORMl0m2VORMi0o2VORMj0m2WOSMh0m2YOSMg0m2ZORMe0n2\\ORMd0m2^ORMa0o2_OQMa0n2@RM?o2BPM>P3BPM=P3EoL;P3FQM8P3HQM7n2KRM3n2NPMPKUOQ5k3OSM0m22RMNm23SMLm25SMKm26RMIn28SMGl2;TMCl2>UMAk2`0TM_Ol2c0SM]Om2c0SM\\Om2e0TMZOk2g0VMWOj2k0UMUOk2l0TMSOl2n0TMROl2n0TMQOk2R1TMnNk2S1UMlNl2T1TMlNk2V1PMYJZO`4f3X1PMXJZO`4f3Y1oLWJ[O_4g3Z1nLWJ[O_4f3[1oLVJ[O^4f3^1oLSJ\\O]4f3`1SMSJVOY4f3f1VMZNi2h1VMWNk2j1gLoIH0KV4f3l1gLnIIOKV4e3n1fLmI0S4Y3Q2gLkI2R4W3U2fLjIE09Q4[3V2gLiIE19n3[3Z2fLfIH08o3[3[2eLfI5o3U3\\2fLeIL2I10k3f3^2dLeILU4a3W2aLeIMU4a3W2bLeIHM2V4d3Y2cLfIHQ4d3Y2eLgIFP4d3Z2fLfIFo3e3[2eLfIGm3d3^2eLaILNKo3d3c2RM[Mm2h2SMWMm2i2cL]I0NKl3b3k2bL\\I0MLl3a3l2cLZI00Li3a3m2lL[IFd3^3R3kL[IHb3]3T3jL[IH`3_3W3fL[IJ]3`3X3gL\\IH\\3`3Y3iLZIG\\3a3Z3hLZIG[3`3]3iLYIFZ3a3^3hLXIGZ3`3_3hLYIGW3b3a3gLWIGX3a3b3hLVIGW3a3e3gLgLX3Z3iLdLW3]3iLcLW3o0" + } + ], + "model_output": "A black laptop with a glossy finish, featuring a keyboard with white lettering on the keys and a trackpad with two buttons below it, sits on a table. The laptop has a visible logo on the lower right corner of the palm rest area." + }, + { + "image": "images/caption_detailed_51.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "bS\\1SEGg:=VEFg:;XEGf:;XEHd:4N2M2O2N1O101L3O1M3O1O0\\OWORCj0jI7I7L4M2O2M3N2M3N2N2M3O1M3N2L4L5WIiIc4[6SKTJb4R6]KQJ\\4T6cKoIV4V6jKlIn3[6QLfI^3k6_LWI`3l6]LVIb3R7ULPIi3T7bKoGOo0_4R9O2N1O2M2O2N1O2O1M2O2M2O1N3N1N3M3N1O2L3N3L4M3M2N3L4L5L5K4K6J6I8J6I7KU1iN_Rj0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nj_66l03V@`Rj0" + } + ], + "model_output": "A smiling man with short hair and glasses, wearing a blue and gray fleece jacket with a logo on the left chest, blue jeans, and brown shoes, stands with his hands in his pockets and a slight lean to his right side, attached to a bicycle." + }, + { + "image": "images/caption_detailed_53.png", + "subject_name": "handbag", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Qha0a0f=F^BY1j2N10000O0100O0001O0O101_NMXC074=FQ;;VD070a0Jm:;WDg0j0ROl:c1TE_Nj:a1UEbNj:^1UEdNi:\\1VEfNj:Y1VEjNh:U1WEnNh:R1WEPOh:P1WEROh:n0TEWOk:j0nD]OQ;d0lD_OT;a0iDBV;U200001cLgDP3[;lLgDT3b;2O0O2N2O1N2_EdL[9]3aFeL_9^3]FcLd9`3WFaLi9b3SF_Lm9\\2iE]N2\\OU:U2nEROR:l0QFROQ:=aFA`9fF_O\\9?gF^OZ9UOcEg0Y10T9YOgE`0[13P9\\OiEj8BmEM]1?h8CfE2m11^8j0dGSO^8k0dGSO^8l0cGSO]8l0dGSO^8k0dGSO^8j0eGUO\\8d0kGYOY8b0U3M3Jgh>" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y3e;[3000001O000000001O0000001O0000001O00000000001O0000000000001O0cKbLXM^3f2iLUMW3i2mLUMS3i2QMUMP3i2SMUMm2j2ZMPMf2[2eLYJk0W3`2_2hLWJl0W3]2`2jLWJk0W3]2_2kLVJk0Z3]2[2ZNdMl1V2UNiMl1V2TNjMQ2P2PNPNW2i1iMWNX2h1hMXNZ2f1gMYN[2e1eM[N\\2d1dM\\Nd2\\1]MbNk2W1VMhNk2X1UMhNj2X1XMfNi2Z1\\M`Nh2]1cMWN`2g1cMUN_2k1cMQN^2P2fMjM]2V2hMbMk2m1XMnMj2Q2VMnMk2R2TMnMl2R2UMmMl2R2UMmMk2S2WMkMi2U2XMjMh2V2]MeMc2[2bM`M^2`2eM]M[2c2gM[MY2e2iMYMW2g2jMXMV2h2kMWMT2j2mMUMS2k2nMTMQ2m2QNQMn1P3SNoLm1R3TNlLn1R3SNmLo1R3RN`LmKLQ6d3WNWLoK3j5f3dNZL\\1f3eNYL^OOaMi3P3XLZO5fMc3V7^LjHc3U7^LjHc3U7]LkHd3S7gLbHZ3^7QMWHP3^2VLa1Q1kKi2d2YL^1P1lKh2e2ZL]1o0lKj2f2nKbL6j4S1WKgN`0T4R3PLV1\\1eKg2T8]MgGf2W8\\MfGh2X8XMhGi2R4bKgKOm3b6aMmIh0Di1a6ZM`J;oNZ2d6XMbJ9kN_2f6UMbJ7jNe2g6PMdJ5gNj2i6mLdJ4dNo2m6hLbJ6aNS3n6dLcJ7`NU3n6cLeJ4^NY3n6QLhH9Q2N`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + } + ], + "model_output": "A person wearing a white basketball uniform with the number 6 on the back, white shorts, and white sneakers with black accents is captured in mid-stride, suggesting movement, running on the playingfield. The individual has short dark hair and is looking to the side with a focused expression." + }, + { + "image": "images/caption_detailed_55.png", + "subject_name": "bowl", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "ejb06Q=7I7I5K4N3M2M3M4M1O2N2N2N1O2N1O2N1O2N1O2N101N1O100O1O2O0O1O2O0O1O101N1O10001N100O2O0O2O000O100O2O0O100O101O0O100O1000000O10000O10000O10000000000O100000O100000000O10O1000O10000O10000O100O100O100O100O100O2N100O1O1O2O0O1O1O1O1O2O0O1N3N1O1O2N1N3M2O2M3M2N3N2M2O2M3N3M2M3N3L4L4K6HZQn5" + }, + { + "size": [ + 426, + 640 + ], + "counts": "d1Z29Bb0f0b4IkJBc0e0a4JjJDd0b0a4JjJFe0`0a4Q2_KPN_4Q2bKnM^4R2bKnM\\4U2dKjM\\4V2dKjM[4W2eKiMY4Y2hKgMV4DoJ`0k0LU4\\2jKdMU4]2kKdMS4]2mKcMS4]2nKbMQ4_2oKaMP4`2PL`Mo3b2PL^Mo3c2RL]Mk3e2UL[Mj3g2VLXMi3i2XLVMg3k2YLVMf3j2ZLVMe3k2[LUMd3m2\\LSMb3n2^LRMa3o2_LQM`3P3`LPM^3R3cLmL\\3U3cLlL\\3T3dLlL[3U3eLlLY3U3gLkLY3U3hLjLW3W3iLiLW3X3hLiLU3Y3lLfLS3[3mLeLR3\\3oLcLo2_3QMaLn2`3RMaLm2_3TM`Lk2i0[L[Oi0Lk2f0dLZOa00j2e0kLWO;5h2b0RMXO56h2`0XMXO08g2?]MWOM9d2a0aMUOL9b2a0eMUOI;a2?hMUOH;_2`0kMTOF=^2>nMUOD<^2=QNVOA=^2;TNWO^O>^2;TNWO^O>^2:VNXO\\O=^2:XNXOZO>^29YNYOYO?]27\\NYOXO?\\27]N[OVO>]26_N[OTO?]26_N[OTO`0\\24aN\\OSO`0\\23cN]OQO`0[22eN^OPO`0[21fN_OPO?Z22gN^OoN`0[20gN@nNa0[2nMdLc1S2OnN?o2ZOTN7mN`0o2XOUN7lNa0Q3UOTN:lN`0R3SOSN>jN?T3QOTN?hN`0U3POSN`0hNa0V3lNSNc0gNa0X3jNRNd0gNa0\\3eNnMk0eN`0a3`NkMP1eN`0a3gM^Ka0\\2X1eN`0b3fM_K?\\2Z1cNa0l3RNaM]1dN`0n3oM_Mb1bN`0P4kM`Md1`Na0P4kM`Md1aN`0P4kM_Me1aN`0Q4jM^Mf1aN`0R4hM_Mh1_N`0R4eMaMk1]N`0j6@VI`0j6@VIa0i6@VI`0k6_OUIa0k6_OVI`0j6@VIa0i6_OWIa0i6@WI?j6@VI`0j6@VI`0j6@VIa0i6@VI`0j6@WI?i6AWI`0h6@YI?g6AYI?g6BXI>h6AZI?e6A[I?e6A[I?j2fMIj1^Ma0h2iMFd1eMb0e2kMEb1gMc0d2lMD_1jMe0b2nMB\\1mMf0a2PN_OZ1RNf0^2RN^OV1VNh0\\2TNWLEk2^1dNh0Z2_NROg0fNj0X2_NROf0hNj0[2[NmNi0jNm0Y2YNlNj0lNm0Z2WNjNj0oNn0X2WNiNi0QOQ1V2UNiNi0ROR1d5lN^JT1a5lNaJS1_5kNcJU1]5jNdJV1\\5hNgJX1W5hNjJX1V5gNkJZ1T5dNoJ[1P5eNRKZ1n4eNSK[1l4eNVKZ1j4fNVKZ1i4gNWKZ1h4gNWKY1i4gNXKX1g4iNYKW1f4jNZKV1f4jNZKW1d4jN]KU1b4lN^KU1a4lN^KT1a4mN_KS1a4mNaKQ1^4PObKQ1\\4POeKo0[4QOeKo0Z4SOeKn0Y4SOgKm0X4TOhKl0W4UOjKj0U4WOkKj0l0nM]OY1Gi0j0QN]OW1Jg0i0SN[OX1Ke0j0SNZOY1Le0i0SNQMGd1b1b0d0h0UNoLHe1`1d0c0h0VNmLIe1_1g0a0g0XNjLJg1_1g0?h0YNhLKf1_1j0>g0\\NcLIj1_1l00n3d2dMXOPNTN>Oo3e2cMYOoMSN`0MP4f2aMZOoMSN`0LQ4g2aMXOoMVN?IR4j2_MWOPNVN`0FS4m2]MWOQNUNY5d2gLVOPNVN=IV4k2\\MVOQNVN60]4d2\\MVOQNWN32_4a2]MWOnMXN53^4^2_MWOlMZN65\\4Z2cMGPNQN\\4X2dMHnMSNo2@QNd2S1g0e0PMSNZ2W1g0b0UMTNT2Z1i0=YMVNn1]1j0;[MVN:]OOS2\\27_MXN3@3P2\\26aMXNMF5l1^23cMZNHI7j1^21gMZNCL8j1e0iMGU2K[N^O0;g1d0kMGR2N[N[O2=e1c0lMHQ2M]NXO4?b1d0lMIo1O]NSO8b0`1b0mMIm11^NPO:d0^1b0nMIk12^NmN>e0[1c0oMHi15^NiNa0g0Y1c0PNHf16aNeNb0k0V1b0RNGd19aNaNe0m0T1b0TNEa1=aN]Nh0o0S1a0UNE^1?aNZNl0P1P1b0VND\\1a0bNXNm0Q1o0b0VND\\1b0aNUNP1S1m0b0WNCZ1e0bNoMS1X1i0a0YNBY1f0bNmMV1Y1g0a0XNCY1g04UO;a0XNCX1h06SO:b0YNBW1i06TO9a0ZNBW1i06TO9a0ZNCU1i08TO9?ZNDU1i09SO8`0ZNDU1j08RO9`0ZNDU1j08RO:?YNDV1k08RO8?ZNDU1l09QO8?ZNDU1l09QO8?ZNDU1l09QO9>YNEU1l0:QO7>ZNEU1l0:QO7>ZNEU1l0:QO7>YNFV1k0:QO8=XNGV1k0;PO7>WNHW1j0;PO7>VNHY1j0:QO7Y2a7VNQHA>Z2`7VNVIj1j6VNVIj1j6WNUIi1k6WNUIi1k6XNTIh1l6XNTIh1l6XNTIh1l6XNTIi1k6XNTIh1l6XNTIh1l6XNSH^O9[2OfMc6a0UI^O9[2NgMd6`0UI^O:Z2MhMd6a0UI\\O:[2MiMc6`0VI\\O:\\2LjMc6>VI^O:Y2MmMa6gM]O:W2MQOX2oNJ_OA]NSO?S2IiNl1c0\\O_O`NSO>R2JiNj1h0ZO[OeNQO=S2JiNi1m0VOWOjNPO=T2IgNj1Q1TOUOkNoN>T2IgNi1S1ROUOnNmN?S2HgNh1W1oNUOROjN?S2HgNg1c1dNjN_OhN>T2HfNg1l1ZNfNIcN>U2HeNg1S2QNdN2`N=T2IeNf1Z4SNmJ>T2IdNf1\\4TNkJ=U2IdNd1^4VNiJ>T2HeNb1a4XNeJ>U2HeNa1b4YNdJ>V2HbNa1f4XNbJ?U2IcN_1g4YNaJ?U2IcN^1h4[N_J>V2IcN]1j4\\N\\J>W2IcN[1l4_OaLVOcN[1l4@`LUOcN[1n4@_LUOcNY1Q5A\\LWObNW1S5B[LWObNU1V5CXLXObNT1W5EVLWOaNU1Z5DULWOaNS1]5ERLXO`NS1_5EQLXO`NR1`5FPLXO_NR1c5EnKYO_NQ1d5GlKYO^No0i5GiKZO^Nn0j5IgKYO^No0k5HhKXO]No0l5IgKXO\\Nn0o5JeKXO[Nn0R6IcKYO[Nl0U6K_KYO\\Nk0V6L]KZO]Ni0W6M\\KZO\\Ni0Y6M[K[O[Ng0[6NZK[O[Nf0]6OXKZO[Ne0_61VKZO[Nd0`62UKZO[Nc0a64RKZO\\Nb0d63PK[OZNb0h64nJYOZNb0j64lJ[OXNa0m64jJ\\OYN?n66hJ[OYN?Q75fJ\\OYN=S77dJ\\OYN_IDZNMX8`0]IC[NLY8a0\\IC[NJ[8c0ZIDZNH]8c0ZIEZNF^8d0XIFZND`8f0VIGZNBa8f0UIH[N_Ob8i0SIG]N^Ob8j0QIH]N\\Oe8k0nHJ[N\\Og8j0nHJZN\\Oj8i0lHK[NYOl8k0jHKZNYOn8k0hHL[NWOf0M[7P1cIL[OVOR7n0cILZOXOR7l0dILXO[OT7i0cILXO^OS7f0eILWO@S7e0eIKWOBS7c0fILVOBS7b0gILUODT7?gIMTOGS7=hILTOHT7W2SAjMk>V2UAkMj>V2UAmMh>S2WAPNh>Q2PAWNn>_3N2O1N2N2N2O0O2N1O2N2N2O0O2O0O2N2O0O2N101N1O101N1O1O100O2O1N1O2O0O101N1O100O2N100O1O100O100O101N100O1O100O2O0O100O1O100O1O100O1O10O01O100O010O1O010O00100O010O0010O01O1O00100O001O01O10O010O010O01O010O0010O00100O0010O0010O01O010O01O010O01O01O010O0010O001O010O01O010O01O0010O1O010O01O010O001O010O010O001O010O10O01O01O01O10O1O01O0010O01O010O010O010O001O10O010O01O010O1O0010O10O1O001O010O1O10O01O001O010O1UKbBS3_=f11O001O100O1O00100O100O00100O1O1O010O1O1O1O1O101N1O1O1O101N1O1O2N1O2O0O2N1O2O0O2N2N2N1O2N1O2N2O1N1O2N2N1O2N3M2N4L4M4K7I2N2N2N2N1O2O1N2N2N2O1N2N2N2N2O2M4L3N2M6J:F3M1O3N1N2N3M3Mf6" + } + ], + "model_output": "A golden-brown hot dog bun with a soft, fluffy interior and a slightly crisp exterior, featuring a split top design to cradle its contents, is attached to a hot dog." + }, + { + "image": "images/caption_detailed_57.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0V5j90WFlJf9W501O1O4L4L1O2N2N5K2N3M1O3M2N3M2N3M2N3M6J1O002N4L3M1O12O0100NO2N2O3M1O01O02M7J2M1O2O2L3N3L2O1N5J:E4N2O004N2NO2N0101O1N8H0O00003N1N2N2N2M3N1O0000O1O1O2O0O1M3O1O0100000O001O01M3O1N2JkE_KW:_46N2O00100O10001O0001eEdKP:\\4oEfKP:[4PFdKP:]491iEfKf9]4QFiKo9d40000000000000000000000001O00001O1O001O000iKRF\\3o9`LUF_3k9^LYFa3g9]L[Fc3e9\\L\\Fd3d9[L]Fe3d9XL_Fg3a9XL`Fh3`9VLbFj3Z:0O10000000000M3O1000000O1HmK`E0NT4e:mK[ES4e:nKZEQ4f:5L4000000LeKbE\\4\\:5N2000XMkEe0T:XOWFa0h9\\ObF>Z9lMnEc1R1:T9CQG;o8\\OjElN[1f1k8ZOaGe0^8ZOdGf0\\8WOhGh0U8YNmEn0o1i0T8XOnGh0R8WOPHh0o7UOUHk0k7QOYHo0g7POZHP1f7nN]HQ1c7mN_HS1`7jNcHW1U7ZN[F=c2Y1Y7gNgHY1Y7fNhHZ1Y7dNhH\\1X7bNiH_1W7`NfHIZMg1P:]NiHLWMg1P:[NoHe1U:1M3JVNUCk1P=00000K5LoMYCQ2j<100M3O100000000O100000000O1O1001O00000O2O1O1O2N2N001O1O1O1O001O001O1O0000001O1O00000000MYNmBg1S=YNmBg1S=YNmBg1S=YNmBg1S=YNmBg1V=000001O000000000000001O00000000001O00001O1O00000O2O00001O001O001O1OYMfNhGZ1W8gNjGX1V8iNiGW1W8iNjGV1V8kNjGT1V8lNkGS1U8nNkGQ1U8oNkGQ1T8QOkGo0U8QOlGn0S8TOmGk0S8UOnGj0R8VOnGj0R8VOoGi0P8YOPHf0o7[OQHe0o7[OQHe0o7[ORHd0n7\\ORHd0m7^OSHa0l7@TH`0l7@UH?j7CUH=k7CUH=k7DUH;i7GWH8i7JWH5h7LXH4h7LXH4g7MZH2d70\\H0c72\\HNc73^HLb74^HLa75_HKa75`HJ^78bHH]7:bHF^7:cHE\\7=cHC[7?eHBZ7?eHA[7?eHA[7?eHA[7?eHAZ7a0eH_O[7a0eH_O[7a0eH_O\\7`0eH_O[7a0eH_O\\7a0bH@^7`0bH@_7?bH@^7`0bH@^7a0aH_O_7a0aH_O`7`0`H@`7`0`H@a7`0^H@b7`0^H_Oc7a0\\H@e7`0YHAg7?YHAh7>WHCi7>VHBj7>VHBj7>VHAl7>THBl7>THBm7>RHBo7=QHZOeMB[:T1PHUO^8k0bGSO_8m0aGSO_8n0aGPOa8o0`GPO`8Q1_GoNb8P1^GoNc8Q1^GlNd8U1[GjNf8V1ZGjNg8U1ZGjNf8V1c2O10O01O01O0010O01O01O1O010O001O001O01O000000010O00100O010O0001O3M2O0O2N2N2O2M1O2N3N0O3N1N3M2N3N1NTgZ3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[5e2=`1KdM`5l0eJ`1KdMa5k0cJb1LcMc5i0`Je1MbMe5h0]Jf1NbMg5f0[Jh1NbMk5b0WJl1NbMm5`0UJm1OcMn5>SJo1OcMP6R1V8]O]G@>R1U8]O^G@`0Q1S8]O^GBa0o0Q8^O`GAb0o0n7@`GAd0m0l7AaGBd0l0k7BbG@d0n0j7AcGAd0m0h7CdG_Of0m0f7CfG^Of0n0d7DfG^Og0m0d7DeG_Oh0l0P2eNj1o0_K_Oh0m0b7DfG^Oj0m0`7DgG_Oj0l0_7EgG_Oj0l0^7EiG^Ok0l0\\7EiG@k0k0\\7EiG_Ol0l0[7DjG@k0l0[7DjG@l0k0l1mN]1g0kKAm0k0k1POYLLh4g0XLBm0j0j1ROY1b0PLAn0k0j1QOX1b0QLBm0k0j1SOW1>SLCm0l0i1YOQ18XLDn0k0j1XOP18YLEm0k0j1YOo06[LEm0l0j1XOn06\\LFl0l0j1_Og0ObLFn0l0i1Bd0KfLEP1m0g1C7ZOPLa0R1DR1n0e1G3WORLa0T1CR1n0e1H2VORLb0T1AU1o0d1JNNYMYOU1o0d1KMLZMZOV1o0d1JLMZMYOW1P1c1JKM[MYOY1o0b1KIM\\MYOY1o0c1JHM\\MZOZ1o0b1KFM\\M[O\\1m0b1LEL\\M[O^1m0a1LEK]M\\O]1m0b1KDL]M\\O\\1n0c1KCJ^M]O]1n0c1JBK]M^O^1m0c1K@K^M]O`1m0c1J_OL^M]O`1m0c1J@J^M^O`1n0c1I_OK^M^O`1n0c1J^OJ_M]Oa1o0b1J^OI`M^O`1o0c1I]OJ`M\\Ob1Q1a1J\\OHbM]Oa1Q1a1J\\OHbM\\Ob1R1a1JYOIdMZOc1S1`1KXOGc0>5KXOGb0?6JXOFc0`05JXOFc0`05KWOEd0`05KWODe0a04KWODe0a04LVOBg0b04KUOBh0c03KUOBh0c03LTOAi0c03LTO@j0d02MSO_Ok0d03LRO_Ol0e01NRO]Om0e01OQO\\On0e02NPO\\Oo0f01NPO\\Oo0f01OoN[OP1f01OoNZOQ1g00OoNZOQ1g00OPOYOP1h000POVOP1k00OPOVOP1k000POTOP1l000QOSOo0m001POQOQ1n0O1POQOQ1n0O1QOPOP1o0O2POoNQ1o0O2QOmNQ1Q1N2ROkNP1T1O0ROkNo0U1N1TOhNo0W1M2d3N\\L2d3N\\L2d3N\\L2d3N\\L3c3M]L3c3M]L4b3L^L4b3L^L4b3L^L4b3L^L5a3K^L6b3J_L5a3K_L6`3J`L6`3J_L7a3I_L8`3H`L8b3F_L9a3G^L:c3E]L;e3C[L>e3A[L?e3A[L`0e3_O[La0e3_O[Lb0d3^O\\Lb0d3^O\\Lb0e3]O[Lc0e3]O\\Lb0d3^O\\Lc0b3^O^Lb0b3^O_La0a3_O_La0a3_O`L`0`3@bL?]3AeL=[3CfLhM@X2`0kM]OU2c0lM\\OT2d0nMZOQ2g0PNYOo1g0SNWOm1i0UNUOj1l0WNSOi1m0YNQOg1o0_NjNa1W1T60000000O100000000O1000000O10000O1000000000000O100O100000000000000O100000000O100000000O1000000O1O100O1O100O100O1O10000O1000000O1000000000000000000000000001O00001O000000001O00001O000000001O000000000000001O00000000001O00eB" + } + ], + "model_output": "A black laptop with a visible keyboard featuring white lettering on the keys, a touchpad, and a series of ports on the left side including USB ports, an HDMI port, and possibly a card reader slot, is placed on the table. The laptop has a visible hinge mechanism and a partially visible screen with a thick bezel." + }, + { + "image": "images/caption_detailed_58.png", + "subject_name": "person", + "object_name": "surfboard", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "_nd22\\=3N2N100O11O000000QFJe65ZINd62ZI0f60XI2i6MTI6m6JnH9S7GkH;U7EhHj7BTH?m7BoG?S8AkG?X8@fGa0[8@bG`0`8@_G?c8A\\G>g8BWG=k8CTG;o8EPG9T9GkF7W9JgF6[9IdF7]9JaF6a9J]F7c9I]F6d9J[F7f9IXF7i9JUF7k9IUF6m9JQF7o9JoE7Q:KlE5MBP9lFO0ET9=kF2JD[9>fFR1Y9QOeFo0[9SObFn0^9SO`Fm0a9UO]Fk0c9WOZFj0g9XOUF?U:Y16WO`EZNh:c1d0O2M101O0O101O001N2O1O1O001N2O1O1N2O2M4L=C3L5K6Hejf4" + }, + { + "size": [ + 432, + 640 + ], + "counts": "c\\k24\\=001O00000000001O0O100000001O000000000000000O1000001O0000000000000000000000000O1000000000000000O10000000O1000O100000O01000000O10O0100O10O0100O1O100O2Meab4" + } + ], + "model_output": "A person in a full-body black wetsuit with long sleeves and legs, featuring a front zipper and a logo on the left chest, is playing with a surfboard." + }, + { + "image": "images/caption_detailed_59.png", + "subject_name": "horse", + "object_name": "sand", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VSR21[=0ZS_16]l`N5K4M3M4L5QDUOT;R1bDVOZ;`1M3L4M3N2N3M\\OPE@\\:=jEBU:8QFHn98QFJo97oEJQ:h10]OkEPNT:n1SFmMn9U2c02N00]OWEcNj:\\1[EYNI1m:d1d0N2N30\\EaNZ9b1bF`N]9b1`F_Na9`1^FaNc9^1[FdNe9[1YFhNg9X1UFlNl9S1SFlNP:S1oEkNU:V1eElN]:o12N2M4L5M3L2B`0H7L5M3N2M3N001N101O0@eC1[d3^8bLaG^3_8aLUGM6b3e8aLUGN6`3e8oLTGS3l8f000O1000000000000000000000O11O000000000000000000000000000000bLTG]2m8QM[GDI10Z3l8mLcGX3]8dLhG[3X8cLjG]3R9000000000004L0000000000001OO1M3N2000000000000001O000000001O1O001O00000000001O00O1WOdLcG\\3T92002N1O000000000000O1O100O1O2N00101N1O1O1L4001O1O2N3M1O1O0000000000000000001O2N1O0000001O00mNYMaGh2\\8_M`Ga2\\8dMcG\\2\\8fMcGZ2]8Z11OO101N1000000000000O10000000000000000000000000000000000001aK`GQ4Q9L2N3M2N2N2N8SLaF5KR3R:00O2O000001O0000O01AQMbFo2]9TM`Fm2`9TM_Fl2a9UM^Fk2a9UM`Fk2_9RMfFm2Y9oL`FM8T3h8mLPGO682l2g8cMZG]2f8`M]G`2c8^M_Gb2`8ZMeGf2[8YMfGg2Y8ZMgGf2^8PMnFKf0U3\\91M3M3O1L4N2M3N2N2N2J6E;M3O10000O1001O3M>B>oK^F]3l9K2O1O00001O0000lNmLRG>:e2_8jM]GV2`8PN\\GQ2^8kL`GW10n1_8a110000000000O1000000O10000O10000O10000000000O1000000000000000000O100000001O0000000O100000000000000O1000001O0O10000000000000000O2O0000000000001O0O1000001O00000000000O1000000000000000O100000000000O1000O1000000000000000000000000000000000O11N100000O11O000000O11O00000000000000000000000000000000000000000000000000000000000O100000001O0000000000000000000000000000000000000000000000000O2O01O00O1000000000000000000000000000001O000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000001O0001O0000000000000000000000000000000000001O00_OWG`Li8`3XG_Lh8a3YG^Lg8b3YG^Lg8a3ZG_Lf8a3[G^Le8b3[G^Le8b3[G^Le8a3]G^Lb8c3^G]Lc8b3]G^Lc8a3^G_Lb8a3_G^LW1" + } + ], + "model_output": "A dark bay horse with a sleek coat and a black mane and tail stands on the sand. The horse features a white blaze on its forehead and white socks on its front legs, with a muscular build, prominent withers, and a well-defined neck." + }, + { + "image": "images/caption_detailed_60.png", + "subject_name": "banana", + "object_name": "bowl", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c`]5c0\\>`0@3N1N2O1N2O0O2O1N101N2O0O2O1N101O0O2O0O2O001N10001O000O2O00001N100000001O0000001O0000001O0000000O11O000001O00000000001O0000001O00000001O0001O00000001O0001O00000000010O00000001O01O00000001O01O000000010O0000010O000001O01O0000010O0001O01O00010O0001O01O01O01O00010O000010O00010O00010O00010O0100O0001O0001O10O001O00010O010O02O0O1O010O101N010O1O10O02O0O1O100O100O100O2O0O101N101N1eGZM[4f2aK`MbLK[7g2PLaMaL1Y7_2SL[Ng3g1TL_Nj3a1SLdNj3^1QLeNP4]1kKeNV4\\1dKhN\\4\\1\\KgNf4[1SKgNo4\\1gJgN^5\\1YJgNj5_1gIfN\\6c1RIdNQ7c42O2N1N3M3N1N3N2N1O2N2M3N1O2N2N1O2O1N1O2N2N2N2N2O1N1O2O1N2O1N2O1O1N2O1N2O1N2O1N2O1N2O1N3M2N2O1N2N3M2N3M2N3M2N2N3M2N3M3M2N3L4M3M3L4L3OXJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hZ^55j>3N1M3eI9YMJe2k0iGmNe4:`3_1UL`Ni3c1VL\\Ni3e1YLZNe3g1[LZNc3g1^LYN`3h1`LXN_3i1aLWN^3j1cLVN[3k1eLUNZ3l1hLRNW32aHV1Z4gNS31hHV1W4gNP33nHR1T4iNm26RIn0Q4mNk25XIk0n3oNi27ZIi0m3POh27]Ih0k3QOg28_If0j3ROf29aI7VOAc4Oe29dI5WOA`41d2:fI2WOC_41b2lIIV4Jl1=QJFT4Mj1>SJDS4Ni1?TJCS4Nh1`0VJAR4Og1a0WJ@R4Ng1c0XJ]OQ41f1c0ZJ[OP42e1c0\\JZOP43c1d0^JWOP45a1e0O[O0f00ZOOf02ZOMg03YOLh04XOLh04XOKh05YOKg05YOJh06XOIh07YOIg07YOHh08XOHh07YOIf08ZOGg09YOGg08YOIg07YOIg07YOIf07[OHf08ZOHf07[OIe06\\OJd06\\OJc06^OJb05_OKa05_OKa05_OK`06@J`06@J`07_OI`09_OGa09_OGa09_OGa0:^OFa0<^ODb0<^ODb0=]OCc0=]OCc0=]OCc0>\\OBc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAb0a0]O_Oc0a0]O_Oc0a0]O_Oc0a0]O_Ob0b0^O^Ob0b0^O^Ob0a0^O@a0a0_O_Oa0a0_O_Oa0a0_O_Oa0`0@@?a0A_O?a0A_O?a0A_O?a0A_O?a0A_O?`0B@>`0B@>`0B@=a0C_O=a0C_O=a0C_OX15nJ_Oh3=Z14nJ_Og3>[13mJ@g3=]13kJAh3<]12kJCg3;_12iJDg3;`11hJEg3;a10gJFg3:c10eJGg3:d1OeJFg3;e1NdJHf3;f1McJIg39g1NaJJf39j1M_JKf38l1M^JKe38n1M\\JLe38o1L[JMd38R2JYJ0d36T2JWJ0e36U2JUJ1e35W2JRJ3f33Y2JPJ4f32[2JnI5NZOg2f0_3KjI7OZOg2c0a3MfI81YOf2c0d3LbI:3YOf2a0f3M^I;5XOf2`0h3NYI=7WOg2>j33nH=?TOg2=n3a1ZISNf2=Q4`1WIUNg2:T4a1SIWNg29X4_1oH[Ng26[4_1lH]Ng24`4^1fHaNg23d4\\1bHdNi20g4[1]HiNi2Mk4_2RKbMQ5^2lJcMU5]2gJfM[5Z2bJfMa5[2ZJgMh5Z2RJiMP6Y2jIiMX6Y2_IlMd6V2RIoMP7X2_HoMe7d42M3N4L3M4L9GA;E?B=B;D9GmeT1" + } + ], + "model_output": "A ripe banana with a bright yellow peel, exhibiting a gentle curve and a smooth surface with a few small brown speckles, is placed beside a bowl." + }, + { + "image": "images/caption_detailed_61.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 482, + 640 + ], + "counts": "W]o31o>4M2N2[D1T81gG9R8IiG?R8CkGm0f7UOWHR1c7oNYHV1e7lNXHW1g7kNkGa1T8bN_Gj1^8\\NUGm1j8VNkFS2U9PNbFV2]9h1O10000O1O1O10000O1000000O1000000hMiJbH1ONk1X5Z5^KdJb4X5dKfJ\\4m4SLQKm3n4XLnJh3R5ZLkJg3T5aLeJ_3X5gLeJY3Y5mLcJS3Z5SMcJm2\\5XM_Ji2`5[M]Je2\\5eMaJ[2Y5nMdJR2W5UNfJl1W5ZNfJf1W5_NgJa1V5dNgJ]1V5hNgJY1S5^4O1N2O1M3M3N2N2O1O1L400O1O1O1O1O1O1O100N200OoGiKY5W4i2O1O10000O100O100O10000O1000000O1000001O0O0100001OO01000000001O00000000001O00001O1O1O0000001O0000010O0O2O1O1O001O000010OO2O1O1O1O1eIfKl1[4RNiKk1X4TNkKi1V4UNnKh1S4WNnKh1S4VNPLh1R4UNPLj1R4SNQLk1P4SNQLm1Q4PNQLo1Q4mMQLS2Q4iMQLW2Q4fMPLZ2S4aMPL^2S4]MoKd2U4UMnKj2Z4jLjKU3a4`JSJ11O0_1_1P4]5]KfJb4a5UKaJk4`5PKdJP5^5nJcJQ5^5mJcJS5^5kJdJU5\\5iJeJW5^5dJdJ\\5b7N100010OO100010O01O001N110O0O1000000jJWFS5i9mJWFS5i9lJXFT5h9lJXFT5h9mJWFS5i9mJWFS5i9mJWFS5i9lJYFS5k900001O01O1O001O1O0O1000O2O0O10000000O101O0001O01O0O101O00001O0000001O001O1O1O1O00001O001O001O1O00001O0000001O1O1O1O1O1O1O1O1O1O001O1O1O1O1O001O001O1O1O001O002N1O1O001O001O1O1O1O1O1O001O1O1O1O001O2M2O0O2O001O100O1N2O010O1O1O100N101O1O1O1O1O00100O1O1N2O00011N2M110O002N1O1N110O1O2N1O001O1O1O2N001O002N1O1O001O2N1O1O1O2N2N1O1O2N1O1O1O2N1O1N4L2N2O2L4L5K:DRo0" + }, + { + "size": [ + 482, + 640 + ], + "counts": "Zn071200001J0005O11J0O2O50K0:0K0J10001O0O5^2IiM7JKR7h4WJSMb5o2ZJUMe5l2WJWMi5l2SJUMm5m2PJTMP6o2iIUMW6n2bIVM^6f510000000000O100000000000000O1000000000O010000000000000000O10O11O1O0O2ROaIWI_6a6U1K5K5L4N102N1O1O00010O10000O1O1N200O100000000000000O10000O2O000O10O1TH^JX6b5fIfJT6Z5lIhJR6W5nIlJP6U5oIkJQ6U5oIkJR6U5lImJS6S5lInJT6R5lInJS6S5lImJU6S5jInJV6R5iIoJW6R5hInJX6R5gIoJY6Q5fIoJ[6e3RI`La0L]6a3XI`L:O_6_3[I`L61_6^3]I`L33`6\\3`I`LO3c6[3aIaLH7g6W3cIaLAVOES1X7U3cIbLB9n6R3aIfL_O9R7o2`IhLZOg0QO_OKKB2H7CJo11f2S1XKiN1c1h0_O]O^OLK0GW34^1k2PLUN^OYONMNFo0LW19_2i2QLWN^OWOONNF<`0f1Fb2f2PL\\N_OROO1NE;c0e1Dc2d2PL_N@POO1OD9f0e1Dc2e0]Ke0e0H_OnNO4OA9h0d1Ce2c0]Ke0e0L]OlN05N_O:j0d1Cd2`0`Kf0c0N^OiN07;:`1Cc2?^Kh0e0O^OeN298:b1Bc2e0^K;f08\\OaN3=5:d1Bd2b0^K9h0=YO_N4>49e1Cc2n1XLZOWO^N6>2;e1Bb28`Kg0j0=XOTN6d00:f1Bb28`Kc0l0c0WOPN5h0O9e1Cd25`K>Q1i0]OiNH9f1Ba2[OdKh01?n0m0]OgNH8g1Cc20dKd0i0m0_OeNJ8e1Dc2DeKI3V1e0m0AdNI9e1Ec2BgKH0Y1e0n0BbNH:f1Cc2@hK_1d0c0]OeMOX2c1cNd2^OhKa1c0b0_OdMOX2b1eNc2_1XL3CaM0Y2a1eNb2a1UL4G\\M2[2^1eNc2h1kK41C_1cNc2IoKl3NkM^1bNc2HQL_6[1kIc2V7[MlHd2U7ZMlHf2V7VMmHi2T7UMnHk2S7QMPIn2R7nLQIQ3Q7kLRIT3Q7eLSI[3Q7_LRI`3Y9O1O1O1O001O1O1O1O1O001O1O1O1O1O1O1O1O1O001O001O001O2N001O1O1O1O1O001O1O1O1O1O1O1O1O1O1O1O001O1O1O2N002N001O1OhMRN]Gm1`8YN^Gf1`8_N^G`1`8dN_G[1m2WNa0a0aLW1l2^N=>gLS1i2cN>=iLn0g2jN<:mLj0f2oN;9oLg0d2SO;8QMd0c2WO98TM?b2[O87XM=]2_O:4ZM<[2A:5[M:Y2C;4]M8V2F<3^M7T2H<2aM6R2H=3aM4Q2K=1dM2o1M=1eM1n1O<1gMMm13<0iMJl16:1kMKg15>0lMKe15`0OlMLb17`0OnMJ`19ROWNbM1h0h1UOVNc1_o0" + } + ], + "model_output": "A woman with short brown hair, wearing a black V-neck top and a gray cardigan, stands in front of a wall. She has a black helmet with a badge on the front, a neutral expression, and is adorned with a large circular necklace." + }, + { + "image": "images/caption_detailed_62.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 444, + 640 + ], + "counts": "mm[43g=3N1O2O001O001N1010O01O0aJD]M1=Q1]OS2I_Me4a0bKoNa0n0@S3\\4lMnKROHQ3Y4QNiKPOOo2W4UNbKPO8k2U4NlK3S4LmK5R4KoK5P4LPL4o3MQL3o31mKOS43iK0U4l40000O10000000YHfKl5Y4l101O00000O10QOnKTHR4g7XLTHh3f7cLTH^3d7PMUHQ3b7i1^OmIVIX6g6a0M2O20O000001O0001O000001O00010O0000001[JTIX4l6`K`I\\4a6bKaI]4_6bKcI]4]6cKdI\\4\\6cKfI\\4[6nJQI9h0h4W6mJTI9f0j4W6kJVI4i0P5R6kJ]JU5V7O1O010O001O001O1O1O1O1O1O2N1O001O1O2N001O1O1O1O1O1O002O0O1N200O2N1O1O1O2[N[FoNf9c0YFSN0L4\\1d9l0XFhM3]1g9i0VFjM2^1j9f0TFlM2^1l9h0UFWOm9h0RFXOo9Z23M2M4M2N3L3N4L6H4M4L3N3L3L6J4M3M4J:D8I6M5GUCWOZ=6c__1" + }, + { + "size": [ + 444, + 640 + ], + "counts": "o_l5:P1Ib9?gFNjND?3n93L3O1O1O1N1O2O001N2O2N2M4M1O1N2N0O2O1O1O1O001O2O000O010O2O000000010O1O:F2N2N1O001OO100010OO1O011O002M5L1O1O1O2N1O2N1O1O1N2O1N2N2N8G6D`ALcml02WaSO2M2O2N1O2M2O2K6L3N1O2N1O3M2N2O0O2N4L2O0YHeNi1\\1UNiNg1X1UNmNh1V1VMeNQL?b6n0YMjNiKe0i6b0\\M:_2H]M`0^2B_Mc0\\2@[Mj0a2ZOYMm0c2TO[Mo0c2ROZMQ1e2SOUMR1g2SORMR1k2QOQMS1l2ROnLR1o2TOkLP1R3QOlLR1Q3POkLT1S3nNkLT1S3lNlLW1R3jNlLZ1R3eNnL^1o2aNPMd1n2\\NQMf1m2ZNSMh1l2VNUMl1j2RNWMo1h2PNWMT2g2kMTM\\2l2dMQM`2n2_MPMd2o2[MmLk2S3TMgLS3Y3lL`L\\3_3dL\\Lb3d3^LWLg3i3YLTLj3k3WLPLn3P4SLjKR4V4oKUK[M200P7h4S3000000O1O100O0010000O2N100O100O00100O100O100O1O2O0O001O100O100O1nMiEnMFLNO;b0Z:\\1\\ESN_1>U9Z1XGcNi8Y1_GcN`8[1eGaN]8V1nGfNT8Y1j2N2O1O1O2O000000O01000001M200O100O10O01O1O2O0O10000O10000O1000000O100000nFXOl3h0V501O0000O0101O0000000000000000000001O00001O00000YFXOV5i0`4001O001O001O1O001O001O001O1O1O1O001O001O1O1O1O1O2N2N1O1YEcNi7^1VHbNj7_1THcNk7^1gFiNFKc9^1VF`N@i03^OV:Z1oEgNEZ1\\:1iEjNIV1^:3aElN0S1^:g2O1O00001O001O1lMeKaI\\4Z6iKeIX4V6oKgIR4S6VLjIj3Q6\\LnId3o5bLPIQOCF:5M5JYbd2" + } + ], + "model_output": "A white football with a pattern of blue curved lines and a blue and white logo is beside a person." + }, + { + "image": "images/caption_detailed_64.png", + "subject_name": "car", + "object_name": "road", + "predicate_name": "driving on", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "kRZ83h`09H9a_OQ7l7jIdGWOkNgAT1^>bNhA]1n>O001O00000000O100000000O100O100O1O1SO]NXBi1g=h0M300O100O1M3O100M3O1O1N2O1O1O1N2N2N2M3O100L]LkBd3Y=O1O3M;cLWBn2n=O00cMQMkFn2U9RMSGf2j;ZMdA[2d>L1O5K4L6J2N1O1O1O1O2N0000000000O1O100N2H^Nn@g1m>8FPN_A10P2W>c000001O1O7`M^AU2h>N4L1O002N1O1O00000000000000O100O1lKTNSIn1h6YNTIi1l6aNiH`1V7YO_FeNYOOd0S2d9B^E\\NOU1l0m0h9T1TFmNl9X1nEiNQ:Y1nEgNR:Z1lEgNS:\\1jEeNV:_1eEbN[:b1]EbNc:m3O1O100O1O100O1O1O11O1O1O1O001O1O0000001O1O0000001O1O1OO1O13M2N1O001O1O001O0000001O2N001O001O001O002N1O001O00001O000000001O1O000000000000O11O001O0000000000O1001O001O000000000lJhDa4W;_KkD`4U;_KlDa4T;_KmD`4T;^KoD`4Q;`KPE_4P;aKQE^4o:bKQE^4o:aKSE^4m:aKVE]4j:cKWE\\4i:eKVE[4i:fKXEY4h:hKWEX4j:gKXEW4h:iKXEW4h:hKZEW4f:iK[EV4e:jK[EV4e:jK\\EU4d:jK]EV4c:kK\\EU4d:kK]ET4c:lK]ET4c:lK_ER4a:nK`EQ4`:nKbEQ4^:oKbEQ4^:nKdEQ4]:nKdEQ4\\:oKdEQ4\\:oKdEQ4\\:oKeEP4[:PLfEo3Z:QLgEn3Y:RLgEn3Y:QLhEo3X:QLiEn3e;O1O1O00lNRLWEn3i:RLXEm3h:SLYE9A5]OHNl1k;nM[E4c0g1R:UNaGd1o;N8H3fN[@T1j;lNRHS1n7mNSHR1l7oNUHP1k7POWHn0j7QOYHl0g7TO\\Hi0m;01O00000000001O1O0000001O000000O11O01O000000O100000000001O00000O1N110O1O1L5L3O1O1K50ON3O100O1100O001O001O001O000000001O2N5QOT@a0b`0B9FYoi01P`UO0``02]_O1_`09JDh_O=V`0Fh_O;V`09N10000000000000000m_O" + } + ], + "model_output": "A dark-colored minivan with a visible rear section featuring a red and white taillight on the right side, a partially visible rear window with a defroster grid, and a rear windshield wiper is driving on the road. The vehicle has a visible rear bumper and a partially visible license plate mounted on the back. The minivan's body shows a reflection, indicating a glossy finish, and there is a visible antenna on the roof towards the rear." + }, + { + "image": "images/caption_detailed_65.png", + "subject_name": "cup", + "object_name": "bed", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 436, + 640 + ], + "counts": "oTo37[=5L3M2N2N2O1N101N100ODYCNf<1[COe<1\\CNd<2\\CNd<2\\COc<1]COc<1]COd<0\\C0d<0\\C0d<0\\C0d<1\\CNe<1[COe<2[CMe<4ZCLg<3ZCL^S5DnJYIAi6;ZIDg69\\IFf64_IKb63cII_65bIJ_65aIK_64bIL_63aIM`62aIM_62bIN^62bIN_60bI0^6OcI1]6NeI1\\6MhIOY60iIOX6OiI1W6NjI2W6LkI3U6LnI2S6LgIHoL=Y9JiIInL=Z9IPJ6P6IQJ7o5HSJ7n5GSJ9m5GSJ:l5DVJgJ\\O_L6h8`0iJZO_L6o75WH?Z3VO`L5n7]1bK]NaL5n7^1fKaN[4_1]K_NeL2o7^1[KaNeL1Q8_1YK^NhL2P8`1XK]NiL2P8c1TK^NjLOR8d1SKdNn4]1QKbNP5`1nJ_NS5b1lJ^NT5c1kJ]NU5d1jJ\\NV5e1iJ[NX5d1hJ[NY5f1gJYNZ5g1eJYN[5f1`JUNVM5[8f1_JVNUM4]8f1]JUNWM5\\8g1\\JTNXM5\\8f1aJ[N`5b1bJ]N`5b1eGXNc26h5i1YJWNh5h1jGTNLNg17d6h1RJZNk1J^Om1dNZNn1K^Oj1cN\\No1I_Ol1`N\\NQ2H@l1]N]NS2G@m1\\N[NT2J^Om1\\NZNV2I^On1ZNZNW2I^Oo1ZNWNY2J]OP2XNWNZ2J]OQ2UNXN]2H]OR2TNWN_2H\\OS2RNVNb2G[OV2oMTNg2FYOY2mMRNj2EXO[2lMQNk2EXO]2iMoMP3DWO_2fMnMS3DUOa2dMmMW3BUOe2_MiM]3BSOi2ZMhMa3ATOj2UMiMf3_OSOl2QMhMl3\\OSOQ3dLjMZ4UOQOc4o0^KoNc4Q1]KoNb4R1_KlNb4S1_KmNa4R1`KnN_4Q1cKnN^4Q1dKnN\\4Q1eKoNZ4R1gKlNZ4T1fKkN[4T1fKlNZ4T1gKjNY4W1hKhNX4X1hKhNW4Y1iKfNX4Z1iKeNW4Z1jKeNV4Z1mKeNS4[1nKcNS4]1nKbNR4^1nKaNR4`1oK^NR4b1oK\\NR4d1nK\\NR4d1nK[NR4f1oKYNQ4f1PLYNQ4g1PLWNQ4j1oKTNQ4m1oKSNQ4n1oKPNR4P2oKnMQ4T2nKkMS4V2mKhMT4X2lKgMT4[2mKbMT4^2mK`MT4a2mK\\MS4e2nKYMS4h2mKVMT4k2lKQMV4Q3jKmLW4S3jKkLV4W3lKdLV4]3jK`LX4`3lKYLV4j3iKRLZ4o3hKkKZ4V4jKbKZ4^4iKYK]4h4V22N2O001N2O001N100O2O0O2O1O0O2O000O2O1O00100O001O001O001N102N1O1O1O1O1O1O1O1O1O101N1O1O1O100O2N1O2N2N3M4L2N4L3L5L3L4K5DH9B=D;G9C=H9G8M3L6J4M5J6J5L5I7H?WOhmk0" + } + ], + "model_output": "A pink ceramic cup with a glossy finish, featuring a rounded handle on the left side, is filled with a frothy liquid, possibly a hot beverage, with a visible layer of foam on top, sitting on a bed." + }, + { + "image": "images/caption_detailed_66.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "n[k523VOa@l0R1N]>VO]@S1S1G`>l0\\ATOd>o0YAROf>R1VAnNk>U1QAkNo>V1PAkNn>X1PAhNP?Y1o@gNo>^1n@cNo>a1o@_N:\\OY=Y2ZB[N4MAQOa=j2gBYN2;D`N]=m2lBXNOc0Q=Z1kBVN2b0R=l4N4L2N3M1OCUC]I05k^571O2N1O2O1N]OjA_KT>b4lA_KR>Y5O001N2N101O001N1O101O000O1O2[KmIWGJJ241JO50i2]6f4PM^HlLg1V6i5l5O0001O7I:Fa0_Od0\\O:F=C;E<\\F\\ET9Z;^O;E4jGWDl7Sl0K4000H8134JN2BXAIP?K^`e4" + } + ], + "model_output": "A person wearing a dark blue jacket with a white stripe on the sleeve, dark pants, and dark shoes, with short dark hair, is standing with their left hand in their pocket and their right arm slightly extended, holding a black suitcase." + }, + { + "image": "images/caption_detailed_68.png", + "subject_name": "cow", + "object_name": "grass", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "TVU22m>2O0O2O0000000O2L9\\ADX>c0N2N1000001O01O01O010O100O1O2M2O2M2O4K4M6I;F3M1O00YOg0O1O1O1O10000O11O001O2N2N4L2N2NCiBROV=n0kBSOS=m0oBUOmn0L4K5O13N6IWOiBFY=6iQn5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m7R7n7000000000001O00001O0000000000000000001O00000000001O000000000000000000000000001O0000000000001O0000000000000000000000001O0000000000000000001O000000001O000000000O1000000001O0000O1001O0000000000000000O10000001O00000000O100000000001O0000O2O0001O00000000000000O20O00000000000001O0000000O1UJfGQ5Y8oJhGP5X8PKiGo4W8QKiGo4W8QKjGn4W8PKjGP5V8PKkGo4U8QKkGo4U8PKlGP5T8PKmGo4T8PKlGP5T8mJoGS5Q8lJSHQ5m7oJ]Hg4c7YK^Hf4b7YK`Hf4`7ZKaHe4_7ZKbHf4^7ZKdHd4\\7\\KeHc4[7]KeHc4[7]KeHc4[7^KdHb4\\7^KcHc4]7]KcHc4^7\\KcHc4]7]KcHc4]7]KfH`4Z7`KgH_4X7bKjH\\4W7cKkH[4U7eKmHY4S7gKPIV4P7iKSIU4m6kK_Ii3a6VLfId3[6[LfId3Y6\\LiIc3W6^LhIb3X6^LhIb3X6]LhId3Y6[LXIT4h6lKQI[4o6eKoH]4Q7dKmH]4S7bKnH^4R7bKnH]4S7cKmH]4S7cKnH\\4R7eKnHZ4R7eKQIY4o6gKSIW4m6iKVIU4i6kKXIT4h6lKZIR4g6mK[IP4f6PL]Im3c6SL`Ij3`6WL`Ih3`6XLlHFWOR4l7WLmHLSOm3P8WLlH3nNf3V8WLlH5lNd3X8WLlH6jNd3Z8VLlH`4T7`KlH`4T7`KkH`4W7_KiHa4W7^KkHa4U7_KkHb4T7^KmHa4S7_KmHa4S7_KnH;eN^3]8WLnH7iNb3Y8VLRI1iNi3U8VLfIj3Z6VLfIi3[6WLfIh3Z6XLfIh3[6WLeIj3Z6VLfIj3Z6WLeIi3[6WLeIi3[6WLeIi3[6WLeIi3[6XLoH]4Q7cKiHc4W7^KiHa4X7`KgH_4Y7bKgH]4Y7dKeH]4[7eKjGT5V8nJcGW5]8f000000000000O11O0000000000O10000001O01O00000O10001O000001O00000000O2O00000000001O000000000000000000000000000000000000000000000000000000000001O00O1000000000000000000000001O0000000000000000000000000001O00O100000000000000000000000001O00000000000000001O0000001O00001O00000000001O000000000000001O0000000000001O00000000001O0000000000000000001O00O1001O00000000000001O00O100000001O000000000000000000001O0000000000000000000000001O00000000000000001O00000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000aJjFT5W9iJkFW5V9hJjFX5V9hJjFX5W9fJjFZ5V9fJjFZ5V9fJjFZ5V9eJjF\\5V9cJkF]5\\900000001O00001O0000001O0000001O001O001O0000000000001O00000001O00O10000000000000000000001O0000001O00000000001O00000000O1000000001O00O10000000000000000000000O10000O100_OiJYGW5g8jJWGW5i8lJSGU5m8lJQGU5o8mJlFV5T9;000000000000000000000000001O0000000000001O00000000000000001O000000000000000001O0001O0000gF" + } + ], + "model_output": "A black and white cow with a predominantly white head, a large black patch over the left eye, and smaller black spots on the right side of the face stands on grass. The cow has a black body with white patches, a visible right ear, and is standing with its head turned slightly towards the viewer, showcasing its profile. The cow's tail is mostly white with a black tip, and it has a visible udder." + }, + { + "image": "images/caption_detailed_69.png", + "subject_name": "wine glass", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "WUb4:ec02M3N2N2O1N2O1N101]N\\Oh_Oe0T`0_Ok_Oa0R`0Cl_O?Q`0Co_O>n?EQ@;l?IS@7k?KV@5g?N]_OWO=j0T`02]_OUO?i0R`05^_ORO>k0S`05^_OQO:o0U`03^_OPO=m0P`0e0o_O[OP`0g0o_OYOP`0h0P@XOP`0i0V@POi?R1W@mNi?S1W@lNj?U1U@kNk?V1U@iNk?W1U@hNl?X1U@fNl?Z1U@dNk?]1V@bNj?^1W@`Nj?`1W@^Nk?`1W@^Nj?b1X@[Ni?e1Y@VNj?j1V1O2O000O2O0O2O0O2N1O2M3M4L4iN]Rg2" + }, + { + "size": [ + 640, + 427 + ], + "counts": "_jT11VSc25a`]M9H5K6VIM\\I9]6M`I7\\6KbI:Y6JbI1RO=Y2Y5X2X5?VKj4TO`H9NGZ2e5c0RKd4\\O^H<2AT2d5m0nJm2@_J0h0\\1h4S1jJf2L^JH=k0\\1g4T1iJd2OZKa0n0g4V1fJ[2MZJ5k0a1`4V7nKWGBc1\\4W7VLUG^OAROo1Z5Z7WLWG\\O@SOo1Z5Y7XLXG[Oc1]4T7YLYGZOc1\\4Q7aLXGTODROo1Y5S7dLYGQOEROo1Y5S7`L]GUOAROo1Y5S7`L^GTO@SOo1X5S7bLmFVN`0k0AUOo1W5S7kL[GgNCWOo1W5S7PMWGaNGYOn1V5T7nLZGbNDYOo1W5S7lL\\GdNe1`4o6kL_GcNb1b4o6iLcGbN_1e4n6iLdGaN^1f4n6iLdG^NXOCV2W5m6hLfG\\Na1l4i6hLiGYN^1o4i6gLlGSNSONY2Y5g6fLmGSN`1W5c6fL`JZ3`5fL`JZ3a5eL_J[3a5dL`J]3_5cLaJ]3`5bL`J^3a5aL_J_3a5^LbJb3_5]LaJd3^5[LcJe3^5YLcJg3]5YLcJh3]5SLgJm3[5lKjJU4W5_KjFUOT4\\5[:1O00001O00001O1O1O001O001O2N1bDZJ`8f5_G[Ja8f5WGcJg8^5QGkJm8W5mFnJR9R5oFmJQ9T5PGkJo8V5mFnJR9U5jFmJU9Y5eFgJ[9Y5eFgJ[9Y5fFfJZ9\\5cFeJ]9j5nD]Il0n0U:l5hEVJX:k5bEYJ_:g5`EWIBg0n:T6[EYIEd0P;Y6PEZJP;[700000000000000000001O0000001O2N00000000001O00001O0000001O001O0000001O000000001O0000001O000PKlF`KNZ2V9l1[GdK@`2U9T1YHULcNg2T9n0gHoKYNS3Q9h0oHPLlMGOb3V9?eIPLTMa3X93oIaLhLX3Y92TJdLeLZ3X9MVJjLbLY3Y9GUJmL`Lf3[9YOYJk0h5ROYJoNbLJU9V1YJoNhLGP9Y1XJkNVMCb8b1XJeN_MFZ8d1UJfNdMEW8g1SJcNhMEV8h1PJcNnMBR8k1oIcNRN@P8m1jIfNYNZOn7P2gIbNiLaNe1l0k7R2fIaNbN]Oh7S2dIaNfNZOg7U2aI`NnNWOb7Y2^I`NTOTO_7\\2[I`NXOSO^7]2YI^N\\OTO[7_2WI]NBQOX7b2TI^NFnNW7e2mH`N0hNU7g2jHaN4eNS7i2hHcN6cNR7k2gHbN9aNQ7n2bHbN_:_1]EcNc:_1WEeNj:[1TEfNn:Y1QEfNQ;Y1mDhNW;V1eDmN[;U1cDkN^;U1`DkNb;T1]DmNd;T1XDmNk;S1QDmNRoNZAE2O4KNOL323ij0" + } + ], + "model_output": "A clear wine glass with a thin stem and a broad, stable base, partially filled with a transparent liquid, reflecting light on its curved surface, is held by a person." + }, + { + "image": "images/caption_detailed_70.png", + "subject_name": "person", + "object_name": "snow", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kTY55c0Nj;9oCJP<;kCFTU8R6THnIm7o5VHoIk7h4jEiKf2SOb7Q5oEZKCGf=n4iBVKb=i4l0O0SO\\A]Le>\\3gA\\L]>a3gAZL\\>a3T1N2OhAcLkM9f=P3_D_M[;^2bDjM];S2bDQNVNlNH1ne0f2T6g4oHVH8b6C]I=c6D\\IOn0d4j5[NjHoLe0Ng0g4k5\\NiHmLj2f4^4]NiHhLm2j4[4^NhHeLP3m4X4^NXLa1j3^NVLb1j3bNRL]1o3dNPL[1Q4eNoK[1Q4eNoKZ1R4fNnKZ1R4fNnKY1S4fNnKZ1R4fNnKY1T4fNlKZ1T4fNlKY1V4fNjKY1X4fNhKZ1[4cNeK\\1^4aNcK_1_4_NaKa1a4]N^Kc1m4RNTKn1Q5YLaG7_3`3T5SL`G;]3b3^5_K_Gn0S3c3`6ZLaId3d6WL]Ii3d6VL\\Ij3d6VL\\Ij3e6TL[Im3f6RLZIm3l6nKTIR4n6kKSIU4n6jKRIU4P7jKPIV4R7hKoHW4T7eKmH[4V7bKjH^4X7`KhH_4];0000OeEcKU5]4kJbKV5^4V50cEfKT5Z4kJmKn4T4RKmKn4R4RKmKo4S4QKmKn4T4SKjKmMoAR2R>oMmAP2e8TMgKm0dKo1`8\\MhKf0hKm1_8aMfKe0iKj1]8gMgKa0kKh1\\8jMgK?mKf1\\8mMeK?nKc1\\8RNcK=PL`1]8UNaK=QL^1Z8ZNcK:RL\\1W8_NeK7SLY1X8cNbK6ULW1W8fNcK3VLW1V8hNbK3WLU1W8iN_K5YLR1X8kN\\K5[Lo0Y8nNZK5\\Lm0Y8QOWK5^Lk0[8SORK5cLg0[8UOQK5cLf0\\8VOoJ6dLd0]8XOkJ8fL`0_8[OeJ9kL<_8_ObJ7nL:a8_O^J:PM7b8A[J:RM5b8EWJ9VM2c8IoI:]MMd8KlI:_MKe8NgI9dMIe8OdI:gMGe83]I9nMCf89SI8WN_Of8=lH7^N\\Of8a0cH8gNWOf8e0[H7POTOe8n0gG:CgNg8a5YG_Jg8a5YG_Jg8a5YG^Jh8b5XG^Jh8b5XG]Ji8c5WG]Ji8c5WG]Ji8c5WG]Ji8CVGd31iLh8A_GoMC^56QMi8@fG]3ASMh8^OnG[3ZOWMh8]OUHV3SO]Mh8ZO`HQ3hNeMh8WOeHR3cNfMh8SOmHU3[NhMh8ROPIT3XNjMh8POVIR3RNnMh8POVIR3RNnMg8mN\\IT3mMoMg8kN_IU3iMQNh8gNcIW3fMQNg8fNgIW3bMSNg8cNlIX3]MUNg8\\NWJ[3RMXNg8YN]J^3kLZNh8VN_J`3jLZNe8UNdJ`3gL[Ne8QNiJc3bL\\Ne8oMlJd3_L^Nd8jMRKg3YL`Ne8gMUKh3WL`Ne8cMYKl3RLaNo<_1QCaNQ=]1oBcNQ=]1oBcNR=\\1nBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBgNR=X1nBhNR=W1oBiNQ=W1oBjNP=W1oBiNQ=W1oBiNQ=V1PCkNP=T1PClNQ=S1oBnNo:hM\\FX3fNPOh:oMbFo2gNSOd:QNdFl2hNTOb:SNfFe2jNZO]:TNfFc2mNZOY:XNgF^2QOZOW:ZNgF[2SO\\OS:\\NhFY2UO\\OQ:]NiFV2WO^Om9_NkFQ2ZOAg9bNnFl1\\OCb9eNQGg1^OE]9gNUGd1^OE\\9iNUGa1@GY9iNVG_1CIS9lNXG\\1EIQ9lNZGZ1FKl8oN]GV1GKk8PO^GT1HMh8QO^GR1KOa8SOdGl0M2]8TOeGi0O4Z8UOfGg004Y8VOgGe016U8XOiGb027R8ZOjG>69n7ZOlG=6:l7[OmG:8a7BSH0=`0\\7DUHK`0b0X7FWHFc0e0T7GWHDf0f0n6KYHAi0e0h6]3XIeLa6a3`I`L\\6a3eIaLU6b3lI`Ln5c3SJ`Lg5b3ZJ`LZ2PL_Oc7WN_LW2RL[Ob7_N]LU2]LlNY7PO[Lo1cLiNU7YOZLk1gLgNo6_OZLg1kLhNl6BXLf1mLVMPOF0Q1m7=WLd1mLUM]O1CNR8Y1QL_1hKSMP;_1XI]1k6eNVIX1k6iNUIU1m6kNTIT1k6mNVIQ1j6oNXIn0h6TOYIj0h6WOXIg0h6ZOYIe0g6[O[Ib0e6_O\\I?d6B]I;d6F]I9b6H`I5`6L`I3a6M`I2_6ObIO^62cIL^64dII[69eIF[6;fIDZ6hIBW6?jI@U6a0mI\\OS6e0nIYOR6h0oIWOQ6i0PJVOP6j0QJTOo5m0SJoNn5R1WJ_NQ6c1YJlMn5T2n5100O1O1O100O1O1O1O1O1O1O100O100O1O1N20Dn^OjMQa0c2O1O1O1O10000N2O1O1O100O1O1O1O1O100O002N1O1O010O2O0O1N10101O0O1O0O201N100O1N2O2N1O2Lm5Z@^GN^O2YOO3N44oa1" + } + ], + "model_output": "A small, fluffy white dog with light tan markings on its ears and around its eyes, sporting a black nose and dark, expressive eyes, is sitting on a bench. The dog is wearing a silver chain collar with a visible tag and is attached to a black leash." + }, + { + "image": "images/caption_detailed_72.png", + "subject_name": "fence", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bbe32T=9K2N3M3M2N1000000000000000000000000001OO10000001O000000000000000000000000000000000000000000000000000000000000001O00O1000000001O0000000000000000001O0000000000000000000000000000000000000000000000000000000000000000O10000000000001O0000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000O10000000000000000000000001O000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "e`l0i0[<=B`0D9J4PJWN6o1@ZNeMYOH8d2?dMYOH8d2?dMYOH8d2`0cMYOH7e2`0cMYOH6f2a0bMXOI7e2a0bMYOH6f2`0cMZOF7g2?cMZOF7g2?dMYOE8g2`0cMXOF8g2`0cMXOF7h2a0bMXOF7h2a0bMXOE8i2`0bMXOE8i2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD6k2a0aMZOC5l2a0aMZOC5l2b0`MYOC6m2a0aMXOB7m2a0aMXOB7m2a0aMXOB7m2a0aMYOA5o2b0`MYOA5o2b0`MYOA5o2b0aMXO@6o2b0aMXO_O7P3a0`MYO@6P3a0`MYO@6P3a0aMXO_O7P3a0aMXO_O7P3a0aMXO^O8Q3`0aMYO]O7R3`0aMYO^N]O:i0h3a0aMXO\\NB8e0k3a0aMXO\\ND6c0m3a0aMXO[NG5`0o3a0aMXOZNI5>P4b0`MWO[NJ4=Q4b0`MWO[NJ3=S4b0_MWOZNK4XLBh3?WLAi3a0TL@l3b0RL_Om3b0RL^On3d0PL\\OP4e0oK[OQ4g0mKYOS4j0iKWOW4j0hKVOX4l0fKTOZ4m0eKSO[4o0cKQO]4P1bKPO^4Q1`KPO`4S1]KmNb4U1]KkNc4X1TKnNm4f30000000000000000000001O000000000000000000001O00000000001O0000001O0000000000001O000000001O000000001O000000001O00001O001O1O1O1O1O2N2N2N1O2N2N2N2N2N1O2N2N2N2N1O3M2N1O1O2N1O2N2N1O3M1O1O1O2N3M4RMeGg2`8N2N2N1O3M2N1O1O1O1O2N3M2N2N2N2N1O1O1O1O2N2N2N1O2N2N2N001O1O2N1O1O2M3N1N7[ObENg:N^EN[Y\\2" + } + ], + "model_output": "A baseball catcher in full gear, including a blue helmet with a face mask, chest protector, and leg guards, is crouched on the playing field, ready to receive a pitch. The catcher is wearing a white uniform with blue and black accents, and is holding a mitt in the left hand, which is extended forward." + }, + { + "image": "images/caption_detailed_74.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "looking at", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "\\aT38cc06J5N2N4n\\OEZb0Q1G9O1OHn]OcNSb0[1Q^ObNPb0\\1;2N;E00010O`Nk]OU1Vb0iNl]Oa0<]Oia00m]Oa0<]OUb0=l0GQ[W2" + }, + { + "size": [ + 640, + 296 + ], + "counts": "QXZ28cc07d\\OLeb0i0N3N1O2[]OUOa0Ek`0`2M3M100O1O100O100N1O2OYO]_OQNc`0o1f0201d^OhMm`0S3C001O0001O1TMY_O]2i`0_M[_O_2Va0M3M1O3N4XNZ^Ol0oa0jNV^O:8CZc0O1O0O3NfYf2" + } + ], + "model_output": "A person with short hair, wearing a dark-colored t-shirt and blue jeans, stands with their back to the viewer, beside another person." + }, + { + "image": "images/caption_detailed_75.png", + "subject_name": "motorcycle", + "object_name": "grass", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "gmT72Y=2N001O1O1O2N1O2N1O2N1O6J:F1O1O00000000000000000000O100O100O100O100O1O100N2O100N2O1O1O1N2N2N2M3Lb`a0" + }, + { + "size": [ + 427, + 640 + ], + "counts": "1o1a0\\5o2dJQM]5n2aJTMa5j2_JVMb5i2^JVMd5i2\\JWMf5g2ZJYMg5f2YJZMh5e2XJ[Mi5d2VJ\\Mm5b2SJ^Mo5`2QJ`MP6_2PJaMQ6^2oIaMT6]2kIdMV6[2jIeMW6Z2iIfMY6X2gIhM[6V2eIjM[6V2eIhM_6V2`IkMb6S2^IlMd6S2\\ImMe6R2[InMf6Q2YIPNi6n1WIQNk6n1UIRNl6m1TISNm6l1RITNP7k1PIUNR7j1lHVNV7i1jHWNW7h1hHYNY7e1hH[NY7d1gH[N\\7c1dH]N]7b1cH^N_7`1`HaNa7^1_HbNb7]1^HbNc7_1[HbNg7\\1YHdNh7[1WHfNk7W1VHiNk7V1UHiNn7U1QHlNP8S1PHmNQ8R1oGnNS8P1mGoNV8o0jGQOW8n0hGRO[8l0eGTO]8j0cGVO_8h0aGWOa8h0_GXOc8f0]GYOe8f0ZG[Oh8UOn0V9dNeG>[Om0l8dNiG>_OP1c8bNnG>AQ1^8aNQH=CT1Y8_NSH>IP1R8bNUH>KQ1m7aNXH=OP1h7cNXH>0o0h7cNXH>5k0b7gNYH>:g0\\7kNZH=?e0V7nNZH>`0e0U7mN[H=a0g0`NQOV8KiH=a0h0]NSOX8HiH=c0V1c6]NjH=c0O[Ng0X8mNjH=b00^Nd0V8oNjHD5HU1n7XNfH>B7IS1S1lNh4]OjJ=A9IQ1P1VOf4TOnJgNf4]NXK>C_2=kNe4XN[K>C_2;POd4SN^K=D`28TOd4oM`K=D_26[Oc4iMcK:]12S2V3RL^L>:]12T2U3QL_L=;_10T2U3PL_L>=^1NT2V3PL_L=>`1LT2V3oK`L=>a1JU2W3mKaL=>a1JU2W3mKaLi2X5_1iJcNW5\\1iJeNV5[1jJeNV5[1jJfNV5Y1jJhNV5W1iJjNW5V1iJkNV5U1jJkNW5T1hJoNV5Q1iJROV5m0jJTOU5l0kJTOU5l0jJVOV5i0jJXOU5h0jJZOV5e0jJ\\OV5c0jJ^OU5a0lJ_OT5a0lJAS5>lJDS5[J^Oj5a0VJ^Ok5b0TJ]On5b0SJ\\Oo5d0QJZOR6f0mIXOU6h0kIXOU6h0jIXOX6f0jIXOW6i0iIUOY6j0kIQOV6o0PJjNQ6V1RJfNP6Y1RJeNn5[1ZJ\\Nh5c1[JYNf5g1^JTNc5l1dJlM]5S2hJiMY5V2oJaMR5`2VIeLb1e0Y5f2UIhL`1a0\\5f2TIoL[1:a5g2TISMX14e5i2SIXMT1Ni5j2SI[MQ1Jm5k2RI^Mn0GQ6j2QIhMe0]O[6k2PInM>WOc6k2oHQN:UOg6j2oHVN5oNm6k2nHWN3oNP7h2nHUOR7k0nHUOS7j0mHUOT7l0kHTOU7l0kHTOV7k0jHTOW7k0jHUOV7k0jHUOW7k0hHlL7g1R7\\1gHlL9g1Q7[1gHmL9h1P7[1gHmL:g1P7[1fHmL;g1P7\\1eHlL2O1J6OM21LHcA6Vm0KbA1N21OZhb0Mnh\\O0k>1]AOc>000fm01_cN1d>N\\A0k>0bP17``NJ\\95`K;Y4L`K:[4MXK>d4HlJg0P5[OmJh0Q5\\OgJi0W5ZOdJk0Z5ZO^Jl0[2QNbMW1Lm0ISO_NZOc1h02n0GP13UN1n0JQ1EbLYNa1T2P1IR1GbLXN^1T2T1GQ1J[N:i0IP1JYN:j0Jo0KWN:l0Io0KWN9n0Hm0OVN6o0In0OUN6n0Jo0OTN5n0JQ1hN[L7f1W1n0JR1dN_L9a1`0dMH[3j0U2mNlLY1P1HV2nNkLY1o0IW2mNkLY1n0IY2mNiLZ1n0IZ2lNiLZ1n0H\\2kNhL\\1m0HU57lJGU59lJFT5:lJEU5;lJDU5;mJCT5V5BjJ>V5BiJ?W5AiJ?W5AiJ`0V5@iJa0W5_OiJa0V5@iJb0V5^OjJb0V5^OiJc0W5]OiJc0W5]OhJd0X5\\OfJg0Y5YOUH3`0e0[7XOSH6?d0^7VOQH9>d0`7SORH;6h0h7mNRH=2i0k7jNSH>0i0m7iNSH>Ok0m7gNTH?Mk0P8eNSHb0Il0R8cNVHa0^OV1[8YNWHb0dNA5f1P9WNWHc0bNC4d1S9VNWHe0^NE4c1W9RNWH[1^Ne0[9PNWHd3g7]LYHc3g7]LYHc3g7]LYHc3g7]LYHb3h7^LXHa3j7^LVHa3k7_LTHb3l7^LTHa3m7_LSH_3o7aLQH]3P8dLPH[3i2gLfK0l06Y2H\\NZ3l2VMSL3a2]O`NX3o2fN^NRNbNX3P3hN]NPNcNW3f2iLiKU2l2kMeNV3f2lLgKT2e1lM]OMa0U3l2YOQMjM_OHd0U3l2[OmLkMBEe0T3j2BiLjMF@g0T3?QMMk2mNeMM[Oj0S3=_MPN3j0`2KdM2XOl0R3=WNjNn1HcM5VOl0R3;YNjN\\2XOWMg0ROl0R3:ZNiNd3NPKn0R3;\\NeNf31lJo0R39_N^N^NIV5a0kJo0R38aNcNd36iJn0S38bNbNc38hJn0S38bNaNd39gJn0R37fN_Nc3i4dNXHn0o2>i4dNXHn0n2?j4cNXHm0o2`0j4bNWHl0Q3a0i4cNVHk0Q3c0j4aNUHl0Q3c0j4aNUHl0Q3b0m4`NRHm0P3d0Q5]NoGm0Q3g0S5XNmGQ1o2h0Y5RNhGU1P3i0_5VObJj0^5UOaJl0a5RO_Jn0c5PO]JP1d5nN]JS1c5lN^JT1c5hN_JX1e5cN[J^1h5_NXJb1k5XNXJh1`90O1O010[GkM]4W2bKkM\\4V2_KoMa4Q2^KPNa4Q2\\KRNd45hGQ1c3kNd46hGo0d3kNd46hGo0d3kNd44jGQ1b3jNe43lGS1G`Nb3;j41oGY1V3eNl40PHZ1U3fNk40PHY1U3hNj40QHV1T3lNl4MQHV1o2^NeL?\\8MPHV1o2_NcL>_8MoGV1o2_NcL=`8OmGU1P3^NdL=`8OmGV1o2_NcL;c8MnGX1l2lNY5JlGZ1k2kNZ5KkGZ1k2kNZ5JlG[1j2kNZ5JlGZ1k2kN[5IlGU1m2SOY5GjGT1g2iNhL>g8EjGS1m2XO[5DiGS1k2YOW6f0jIZOW6d0hI^OY6a0fI@Z6?fIA\\6=eIC[6;fIE]64hIK[61gIOZ6NhI1[6KgI4^6DeI<^:11O:DaQl2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l;S3m;0O10000O10000O1O100O1000000O10000O100O100O10000O1000000O100O100O100O1000000O100O1000_OoDRMR;k2b0N3M2O2N1O2O001O1O10O01000O010O1O01OTO_MZE`2g:bMXE\\2i:eMWEY2k:hMTEV2n:jMQEV2o:lMPEQ2S;PNlDn1V;SNiDk1Y;UNfDj1[;WNeDg1];YNcDf1^;[N`Dd1a;]N^Dd1a;]N^Dc1c;^N\\Da1d;`N\\D_1e;bNZD\\1g;fNWDZ1j;gNSD[1m;fNQDY1QO100O100O1O2QOhB3Z=ETC4n=M2O2N2MdTh27njWM9B_OTBj0j=7O100000O03N7I9PM]NPHf1k7\\NTHe1k7\\NUHd1j7^NUHa1k7_NUHb1j7_NVH`1j7`NWH_1i7bNVH_1i7aNXH^1h7bNXH_1h7`NXH`1h7`NYH_1g7aNYH`1f7_N[Ha1e7]N]Hc1d7YN`Hf1]:0000000001O0001N1000001N100YDaNl9a1PFiNh9X1UFnNg9T1UFPOj9[1eEkNZ:n2O1O2N1N2O1O100O10000000000000000O10O02O0O100O1O1O1O1O1O100O1O1O1O100O100O100O100O1O100O1O1O100O1N2O1M3C=N2O1M3O1O1O1N2N2N2N2N2N2O1O100001O00000000001N1001O0001O000O10001O00000000001O00001O00001O00000000001O0000000000001O00000000001O000000001O00000000001O000000000000001O0000000000000010OO10000000001O00000000000000001O0000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000001O000000000000000000000000000000000000001O00000000000000000000001O00iF" + } + ], + "model_output": "A young boy with a focused expression, wearing a beige baseball cap and a pink and white striped t-shirt with a graphic design on the front, is sitting on a bed. He is holding a yellow cup with a straw in his right hand and appears to be in the middle of an action, possibly drinking or about to drink." + }, + { + "image": "images/caption_detailed_77.png", + "subject_name": "person", + "object_name": "road", + "predicate_name": "crossing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "`gm36Q=4kNIjDc0P;GhD>LWOm:?QE?J\\OR;6PEP1e0_N`9Y2]FhMb9[2[FgMd9\\2YFdMh9_2SFbMm9m22O1O5K4L1O1kNdEYO]:e0eEZO]:e0cEYO_:f0cEWO`:g0bEWO`:c0eE[O_:LXEI?9]:FYF9`;L1O1N3N2M_PQ4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "c9g3c910000O100O01000O2N100O10000O1000000O100O1000000O100O100O10000O10000N2O10000O1O10000000000002N8H1O001O00000000001O000000O100000000000000FVLRGk3^8i0J6O1N2F:O100000000O1000000001O000000000000000000O100000000000000000000000000000000000000O1000000001O0000000000000000OYLWHn1i7mM]HR2c7iMcHV2]7iMdHW2]7gMeHX2[7fMgHZ2Y7dMiH\\2W7aMlH_2S7`MoH`2Q7^MQIb2o6\\MSId2n6WMVIi2k6PM[IP3f6oLZIQ3g6nLYIR3i6kLXIU3k6cLZI]3Z8000O100O100O100O100001O0000000000000000O10000001O1O00000000UOdLfG]3Y8eLfG[3W8jLgGV3X8kLgGV3X8lLgGT3X8mLgGT3Y8lLgGT3Z8kLeGV3\\8iLdGW3_8fLaGZ3h8\\LXGe3i8ZLVGg3W901O2N[N_LaI`3^6cL`I]3`6dL_I\\3a6fL]IZ3b6kLZIU3f6lLYIT3g6mLXIS3h6nLWIR3h6[MlHe2S7aMhH_2W7fMeHZ2[7iMbHW2^7lM_HT2a7mM^HS2d7kM\\HU2j7fMUHZ2S8^MlGc2W8ZMiGf2Y8ZMdGg2\\8T100000000000000000000000000000000000000O100O1LUKgGl4U87O1O100000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000001O000000002N4L00001O1O0000001O0000001O004L1O00000000000000000000000000O100O1O1O1O100O100M3N2M3N2O1N2O1N21O001O00001O1O1O00000000O1N200O100000000001O1O2N1O00000000O100O1001O001O00001O=lJ_Gd4Q9C2N001O0000000000000000O100001O0000O100000000000000O1000000000000O100M3FeKcG\\4X8eKaGf4^84O1001OO10000001O000000000000001O0000001O1O1O00000000000000001O0000000000000000000000000000000000O100O1001OO1O100000000O10000001O0000000000001O1O2N2N5K4L002N000000000000000000000000000000001O2N4L3M3M2N2N000O1001O0000000000000000000QMYLZLg3e6000O1L4O1LSLiFn3W93001O1O6J5K3M5K4L1O_HiLo4U3d21O00000000000000000000000000000O100O100N2N2O1O100N2O1O100O1001O000000000000O100001O00000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000ZF" + } + ], + "model_output": "A person wearing a dark jacket with a fur-lined hood, light-colored pants, and dark shoes, with short hair and a light-colored beanie, is walking on the road." + }, + { + "image": "images/caption_detailed_78.png", + "subject_name": "person", + "object_name": "bench", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "WS_24X=2N1O2N1O100O1O100N2O1O1O1O1[OBYD`0c;FZD;c;KYD6f;MWD4g;OVD3i;h0O2O0O2M2O1O1O10001O001N1L4N2O2M2O1O2N100O1O1N2O100000000000010O01O001BTEZNm:^1TE[N06n:]1^E[Nf:d1b000E\\DmNd;S1\\DnNd;]1010[O]DoN37a;e0iD[OY;KXD`0b0D\\;:fDF\\;7eDH_;3bDN^;1cDN];2cDN_;ObD1_;MbD3`;JbD5[<01O0000001OO10O10Sli4" + }, + { + "size": [ + 429, + 640 + ], + "counts": "R\\Y1i0a<3O1O1O1O0OG7M221101O1O1O1L4J6J6OGPDWOPOmgd4" + } + ], + "model_output": "A person with long hair, wearing a patterned dress and high-heeled shoes, is captured in a dynamic pose with one leg extended forward and arms outstretched for balance, sitting on a bench." + }, + { + "image": "images/caption_detailed_79.png", + "subject_name": "person", + "object_name": "tie", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "TjW56fc05L4K4M4N2J7G8N2N3N0O3N3L2O1N2O1O1N1000O1O001O1O10O01O1O1O00100O10O0100O00_OfN^^OY1\\a0QO`^Oo0]a0WOa^Oh0]a0m002N20O0[Ob^OiN_a0l1O1O101N1000001O001O1O1O1O1O1O010O000010O1000O10O1O2N1QNk^OP1Xa0iNn^OV1na0O1O2N1O2N1O2O001N100O2O0O2N100010O0010O010O01O01O001O001O010O10O1O010000O4M5K2N4L2O1O2N0N3N1M4M1O3LYae0" + }, + { + "size": [ + 640, + 428 + ], + "counts": "l]V63_c0Ol\\O3Qc00m\\O2Rc0Ol\\O3Sc0Nj\\O5Uc0;0O1O1O1O2N3M4Ca\\O1oam1" + } + ], + "model_output": "An older man with a receding hairline and a full head of gray hair, wearing a dark blazer over a light blue dress shirt and a dark tie." + }, + { + "image": "images/caption_detailed_80.png", + "subject_name": "dog", + "object_name": "elephant", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + } + ], + "model_output": "A medium-sized, tan dog with a black muzzle, erect ears, and a bushy tail is chasing an elephant. The dog has a muscular build, with a broad chest and strong, straight legs. Its coat is short and smooth, and it has a focused expression with its mouth slightly open, revealing its tongue and teeth." + }, + { + "image": "images/caption_detailed_81.png", + "subject_name": "potted plant", + "object_name": "vase", + "predicate_name": "in", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Sa\\13k>`0A?B:F>B;E:FP3R4PMlMNbM7=k2U4PMlMNaM:9d2\\4oLlM0\\M?j2j2oL[LCj2l2mL\\LA;i02m04U3P4aKkK=On06T3o3aKmK>Mm06U3o3`KPL?Hm09T3n3`KRL`0El0;U3l3aKUL=Cn0a0c5S2VLlMj3V2VLhMj3[2VLbMj3_2XL^Mh3e2XLXMh3i2ZLSMg3o2T43dEoLc8S3[GoLc8R3\\GPMb8P3^GQMa8o2_GRM`8m2aGSM_8l2bGTM^8k2cGTM^8k2cGSM_8l2cGRM^8l2dGRM^8m2dI`Mn3_2mKgMS4X2nKhMR4W2nKkMjMZOf4i2bMnMfM_Oc4a2iMQNcMNT4n1\\NTN`MOS4l1^NVN^MNT4j1`NXN\\MNT4h1bN[NWMNX4f1aN]NUMOZ4c1bN_NRMO\\4a1cNaNoLO^4`1cNaNoLN_4`1aNdNmLOb4]1_NUNYLFe0i0c4[1_NVN\\LCc0k0c4\\1^NUN_LAa0n0b4\\1]NVNbM=R4]1WNZNhM9Q4\\1UN^NiM6S4\\1SN_NjM5S4\\1RN_NkM5T4\\1PN`NlM4T4\\1nMbNlM4V4[1lMaNnM5V4Z1lMaNmM6W4Y1lMaNmM4Y4[1jMaNmM4Y4\\1jM_NiM9]4X1jM_N]Me0i4l0kM^N[Mg0j4k0lM]NYMi0k4j0lM]NYMh0l4k0mMZNXMk0k4j0QNWNTMo0l4j0WNPNmLV1l4j0[NkMkLY1k4k0AUO?l0@TO`0l0@SOa0m0_OSOa0n0^OROb0o0]OQOc0P1]OnNd0S1\\OkNe0V1[OoMTKNa5S2^OgMXK3[5U2=fMDZ2a600000000O10000000000AaC]N_5CJk0CXO>f70O001O1001N1O2N8FhRj3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "mki51]4o0T5ROTJk1e5ZNfIY2X6iM\\Hc3b7\\LkFJ]1P4f7WLmFI[1S4g7TLnFIZ1U4g7ULWHn3i7mK[HS4e7jK^HW4U91O001N2O0O1N3N1N2O2O0O1O2O0O10000O2O00000O10000001O0000001O00001O001O1O2N1O1O1N3NnNbKcG]4\\8dKfGZ4S8mKnGR4P8PLQHn3P8RLRHl3n7TLSHk3m7ULSHj3n7WLRHh3n7XLRHh3o7XLPHg3Q8ZLnGf3R8ZLnGe3T8ZLlGe3U8[LjGf3V8[LiGd3Y8\\LeGd3\\8]LbGc3_8_L^Ga3c8aL[G^3g8bLWG^3j8cLTG\\3n8eLoF\\3S9iLeFW3]9U11N3M3L4N2L6K3MXL`KVM[4j6M4K6J4L4K5K5J5L5K4M4I8K6H8H:F:E?]OeVS2" + } + ], + "model_output": "A brown vase with a glossy finish and a curved handle on the upper right side, featuring a smooth surface with subtle variations in color intensity, giving it a slightly mottled appearance, is in front of the window." + }, + { + "image": "images/caption_detailed_82.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "^[b35g>3N1N3N1N2N2O1N2N2N2N2N2O1N2O1N2O1N2O1O1N2N2O1O1O1O1O1O1O100N2O1O1N2N2N2O1O1O002M200O1[OSNXDn1e;YNVDh1f;`NTDb1j;j0M3N2M3L4M3O1N2N2O1N2O1M3O100O10000000000000O10010O0O10000gLQEd2n:[MZE^2f:aM[E_2e:_M^E`2c:YMcEg2]:XMdEh2]:WM[EQ3e:nLRE[3V;3J6O1O1O1O1N201N1O1O002N100O1O100O1O100O100O10000O1000000000O1001N10O100001O0O01000000000O10000000000000001OO100000010N100001O00O100010O00O100010O00O101O010O000O1010O0001N2O010O001N110O001N101O100O001N2O010O001N2O010O1O0O11O00000000000000000000000O10000O10000O10000O100000000O10000000000O10000000000001O0000000000000000001O0000000^ORETMn:k2WEPMj:P3]EgLf:W3a000O10001N1000000O2O0O101N1O1O101N1O1O2O0O2N1O2N1N3N2N2N2N3M2N3M2N2N2M3M3N2N2M3M3M3M3M3M4L4K6J8Eon[1" + }, + { + "size": [ + 478, + 640 + ], + "counts": "0\\6b8000000000000O101O0000O10000000O1000000000000000000O100000000000000O100000000000000000000O10001OO100000000000O1000000000000O2OO1000000000O100000000O10000000000O1000000O1000001N1000000000000000000000000000000001O0000O100000001O00O1000000O1000O100000000000001O0000000O10000000000000000O2O00O010000O1N2K5M3N2O1N2O1O10000O10OO3N1O1O1N200O1O00100O1O2N1O010O101O0O1O100O100O100O1O1O1N2O1O2O0O100O10000O01000O2O0O10O010001N010O100O2O00O0100O1O100O100000000000000000000O101O0O00100O1O100O10001O0O010O10000O10000O2O00O10000000000000000000O1001O00000000000000000000000O1001O0001O0O20O0001N2O10O0001O1N200O001N6K3M010O1N1010N1000O100001O0O010O2O0000O100001O0000O11O0001N1001O01O000O11O0000O2O01O0001O0O11O0000O2O0010O01N1010O01O1N10100O00001N11O00jE\\KR:k4L2O10O0000O2O1OO2O00O2O010O00O1000000O10O2O01O0000000O10000000O11O0O1000000000000000000000000000000000O0200OO10O10000000O1001O000000O11O000O10O11O00000000000O100000001O00O100001OO01000001O0000O11N10O10000001O000O011O00O10000001O000O10000000O100001O000O011O00O10000001O00O1000O10000000001O0000000O1000O1001O0000000O100000O1001O00000O10000000O1001O000O1000000000O1001O000000000O10000000000O100000000000000000000O1000000000000000000O10000000000000000O1000000000000000000O10000000000000000O10000000000000000000000O10000000000O1000000000000O1000N" + } + ], + "model_output": "A close-up of a human finger with a visible fingernail, showing a natural skin tone with subtle pinkish hues and a slight sheen, indicating a healthy, well-maintained surface, in front of the wall." + }, + { + "image": "images/caption_detailed_83.png", + "subject_name": "sky", + "object_name": "road", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "0d2h:0000000000M300M3L4N2O100O1UOlD^OT;=`ETO`:k0cEQO_:n0P1O1O1N2O1O1000000001O001O001O1O1O1O001O0000000000EjC@V_CCad1_O\\5d1RI]Ob1oN]5j2cJVM]5j2cJVM]5j2dJUM\\5l2bJUM^5k2bJUM^5k2cJTM]5l2bJUM^5l2aJTM`5l2_JTMb5k2^JUMc5j2]JVMi5e2WJZMm50eH\\2^1dMY6V2gIjM]6R2cIoM^6P2aIoMb6P2]IPNe6m1\\ISNe6m1ZISNi6j1WIVNj6i1VIWNl6T1eG\\O_1@n6e1RI[No6e1PI[NR7c1nH]NR7c1nH]NS7b1mH^NT7`1mH`NT7^1mHbNT7]1kHdNW7Y1jHgNV7X1kHhNV7V1lHiNT7W1lHiNU7U1lHkNT7U1lHkNT7T1mHlNU7R1kHnNT7S1lHmNT7S1lHmNU7R1kHnNU7R1kHnNU7R1kHnNU7Q1lHoNT7P1mHPOT7o0lHQOS7o0nHQOQ7o0PIQOP7o0nHSOR7m0mHTOS7j0PIUOP7k0PIUOQ7j0oHVOQ7i0PIWOn6k0TISOl6m0TISOl6m0SITOm6l0SITOm6l0SITOn6k0RIUOo6j0QIVOm6l0SITOm6m0RISOn6m0RISOn6l0TISOm6l0SITOm6m0QITOP7k0PIUOQ7j0PIUOQ7j0oHVOS75SG:i1BV7OTG?g1A\\7>eHB[7>eHB\\7>bHC`7:aHF_7:bHE^7;bHE\\7=cHD_7:bHE^7;bHE^7;aHF`79`HG`79`HG`79aHF_7:aHF`79_HHb77_HHa77`HI`78^HIb77^HIc76]HJc76^HIb77]HJc76]HJd74]HLc74]HLc75\\HKd75\\HKd74]HLc74]HLc74]HLd73\\HMe72\\HMe72[HNg70YH0f9100O1TMMTJ3h800000000000001OO1O11O1O00001O000000O100O11O00001O00O1000000000000000000000000O100001O1O000O01001OO100001OO10000000000001OO1kK0fIOPO1^20k41eI0QOO_21j41eI1POMb21k4OeI0QOO^22l4OfIOPO0^22k4NgI0g12h8OkLOeJ0[50eJ0Y50XH1i1Of61ZIOQ60eJ0[50eJ0a80000000jJ2dI3M0a4KT70000000000000000000000000000000000000000000000000O1mN;TFEk9l0dEUO[:T1000000000000000000000000000000O1000000000000000000000000000000O11O0000O11O00O1000000000000000000000000000000000000O100000000000000000001O1O3M4L;Eb0^O>jFSMf8Y3F?A`0_Ok0VO?A5mIZJ35MM[5S7fJoHV4o7ZO1N4MRI" + } + ], + "model_output": "A white moving truck with an open rear door, featuring a metal frame and a partially visible license plate, has a dog sitting on it." + }, + { + "image": "images/caption_detailed_85.png", + "subject_name": "mirror", + "object_name": "wall", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "k5b4^:1O3L3N3M3M3L3N3M2N2N2N0O10O100O10O01000O010O010000O10O0100O100O010O100O1000O0100O10O0100O10O10O100O10O10O100O100O010O100O10O01000O010O10O10O0100O10O0100O10000O010O100O10O010O10O100O10O10O100O100O01000O101N3N5J4M5J5L5J6K2M6K3M5J5L3L6K4K5L3M3L6K3M4K5K5L5K4K5L4K4M4K4M5K4K5L4KfP[7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "1_2]1\\O_3W1_L]O2\\O`3W1]L\\O3^Oa3W1ZL[O5^Oc3W1VL[O7^Oe3W1RL[O9]Oi3W1lK\\O;]Ok3W1hK\\O=]On3V1cK]O?]Oo3X1_K[Ob0]OR4W1ZK\\Oc0]OV4V1VK]Od0]OW4W1SK\\Of0]OV4Z1RKYOh0]OV4\\1PKVOk0]OV4_1mJTOl0^OV4a1lJQOn0^OV4c1jJoNP1^OV4e1hJmNR1]OW4g1fJlNS1]OV4k1dJhNV1]OU4m1dJfNW1]OT4n1eJeNW1]OT4o1dJdNX1\\OU4Q2bJcNX1]OU4Q2cJbNX1]OU4Q2cJbNX1]OT4R2dJaNX1\\OU4S2cJ`NY1]OT4S2cJ`NX1^OT4S2dJ_NX1^OT4T2cJ^NY1^OS4U2dJ]NY1]OT4V2cJ]NX1^OT4U2eJ]NW1^OT4U2eJ]NW1^OT4U2eJ]NW1]OT4V2fJ\\NW1^OS4V2fJ\\NW1^OS4V2fJ\\NW1^OR4V2hJ\\NV1^OQ4U2kJ]NT1]OR4R2nJaNo0^OR4Q2PKaNn0^OR4o1RKcNl0^OQ4o1TKcNk0]OR4o1TKdNj0]OQ4P2UKcNj0]OQ4o1VKdNh0^OR4n1VKcNi0^OQ4o1WKcNh0^OQ4n1XKdNg0^OP4o1YKcNg0^OP4n1ZKdNf0^OP4n1ZKdNf0^Oo3n1\\KdNe0]Oo3P2\\KcNe0]Oo3o1]KdNd0\\OP4P2\\KdNc0]OQ4n1]KdNc0^Oo3o1^KcNc0^Oo3n1_KdNb0^On3n1aKdNa0^On3n1aKdN`0^Oo3n1bKdN?^On3o1cKcN?^On3n1dKdN>^On3n1dKdN>]On3o1eKdN=]On3n1fKdN<_On3m1fKdN<_Om3m1hKdN;_Ol3m1jKdN:^Om3n1iKdN:^Om3m1jKeN9^Ol3n1kKdN9^Ol3m1lKeN8^Ok3m1nKeN7]Ol3n1mKeN6^Om3l1nKfN5^Ol3l1PLfN4]Om3m1oKeN5^Ok3m1QLeN4^Ok3m1QLeN4^Ok3l1RLfN2_Ok3k1TLfN1_Ok3j1ULgN0^Ok3l1ULfN0^Ok3k1VLgNO^Oj3l1WLfNO^Oj3k1XLfNO_Oh3l1YLeNO^Oi3l1YLfNN^Oh3l1[LfNM^Oh3l1[LfNL_Oh3k1]LfNK_Oh3j1^LgNJ^Oi3k1]LgNJ^Oi3k1]LgNJ^Oh3k1_LgNI^Og3k1aLfNI_Of3k1aLfNI_Oe3k1cLfNG_Og3j1cLgNF_Og3j1cLgNF@e3i1fLgNEAd3h1gLgNECa3f1kLgNDG]3b1oLgNCLZ3\\1TMgNC2S3W1[MgNB6n2S1aMgNA:j2n0fMhN@=f2l0jMgN@`0c2i0mMgN@f0]2b0TNhN_Oi0Y2`0XNgN_On0T2:^NhN^OR1o16dNhN]OV1k12hNhN\\O[1h1LmNiN[O_1c1IROhN[Oc1^1EXOhNZOf1[1B[OhNYOj1Y1]O_OiNXOP2R1XOFhNXOS2o0TOJiNWOW2j0QOOhNWO\\2e0kN5hNWOa2`0fN:jNUOd2=bN>jNUOh28^Nd0kNROl25ZNi0nNnNl25UNn0TOhNj26SNR1[O_Ni29kMY1@ZNj27fM`1DTNl27_Mf1LlMg2<]Mh1m5TNSJm1Q6oMoIQ2`90O100O1O100O100O100O1O100O1O100O100O1O10000O1O100001O2N1O4L2N2N2N001O1O1O0000N2O1N2O1O1O1O1O1O100O1O2OO0100O2N1O010O100O1O1O100O1O10000O1O1O1O100O100O100O100O1O1O10000O1O100O1O100O1O100O1O1O1O100O100O1O1O100O100O100O1O1TH`LW4a3hKaLW4_3hKcLV4^3jKbLV4^3jKbLU4_3kKbLT4^3lKbLS4_3mKaLS4_3mKaLR4`3nK`LQ4a3oK_LQ4a3oK_LP4b3QL]Lo3c3QL]Lo3c3QL]L`M1W5b3ZM\\L^M`0k4T3gM\\L]Mm0`4g2SN\\L]MT1Y4`2ZN\\L]M]1P4W2cN\\L]Mh1f3k1mN^L[MS2]3_1XO^L[M`2Q3Q1D_L[Mg2k2i0J`LZMR3c2=3aLZM]3Y21>aLYMh3o1Gh0aLYMV4b1XOU1bLYM]4[1QO\\1cLWMg4T1eNe1dLWMP5m0ZNl1fLWMZ5d0oMU2gLVMg5:aM`2hLVMn53ZMg2hLVM]6^OZI0c3\\3gLTMX:l2hETMX:l2hETMX:l2hETMX:m2gERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fEQM[:o2fEPMZ:P3fEPMZ:Q3eEoL[:Q3eEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:S3cElL^:T3bElL^:T3cEkL]:U3cEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEiL_:X3`EhL`:X3`EhL`:W3aEiL^:X3cEfL^:m2[ESM70^:k2_ESM32^:j2dEPMN5_:j2RFVMn9i2TFUMm9j2TFVMl9j2TFVMl9j2TFVMl9j2UFTMl9l2TFSMm9m2SFRMo9n2Q11O1N2O1O001O01O04L000O4M1O1O001O00000000003M2N1O001O00000010O00O1001O001O0M4eLoDNGO3Mh04]O^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + } + ], + "model_output": "A silver laptop with a white keyboard and a large touchpad sits on a table. The screen displays a blue background with a white 'X' icon on the left side and a vertical list of icons on the right side." + }, + { + "image": "images/caption_detailed_87.png", + "subject_name": "teddy bear", + "object_name": "teddy bear", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "^ZV39l<8K6I5L3N1N2O1N1G:M2N2N2N3M2O1N2N2O1N2N2O1N2N2VOlM^FU2`9oM]FR2b9QN[FP2c9SNZFn1f9SNZFm1e9UNYFk1h9VNVFj1j9WNVFi1j9XNTFh1KfMn9d0VFd1o9\\NQFa1R:_NnE`1S:aNkE_1V:cNhE[1Z:k0O1O001O1N010O01O001O0010O101O001O00001O2N3M2N1O1O0O2O1O1O0010O011N1O10O0100O1O1^MUFd1k9[NWFe1h9ZNZFe1g9XN[Fh1e9WN\\Fi1d9WN]Fh1d9WN\\Fh1e9XN\\Ff1f9YN[Fe1f9[N[Fd1f9ZN\\Fe1e9YN^Fd1d9ZN_Fd1c9WNcFe1e:K1N10O[O`DD_;=cDA^;?cD_O];b0eD[O\\;d0fDYO\\;g0fDSO^;l0`0O2O0O2O1N2N2M4Hfee3" + }, + { + "size": [ + 425, + 640 + ], + "counts": "[UW31Y\\W10_\\lN1]dL6K3M5^ODoC=m;GQD;m;GRD:l;GTD9l;GTD:j;GVD9i;IVD7i;JWD6h;KXD5g;LXD5e:CPF9YO5e:EPFQ1o9SOnEm0Q:WOlEi0T:[OgEf0Y:]OdEc0\\:_OaEa0_:A`E?`:B]E?e:R1001O2M3N001OO1O100O10000O1000001N1000000001O001O1O000010O001O001M20[N\\Ee0c:ZO`Ef0_:YObEg0^:UOfELJ8_:HkE0I5]:HmE2G5]:FnE5G4Z:ERF7E2Z:_OZE2i0=D2Z;MgD2Y;NgD2Z;MfD3Z;MgD2Z;MgD2Y;NgD1[;NfD1[;NfD0\\;OeDO];0eDL^;2P1LWnf2" + } + ], + "model_output": "A plush teddy bear with a light brown, textured fur exterior, featuring a rounded head with a small, red, triangular nose and a subtle smile, sits with its arms outstretched and legs spread apart, its white snout contrasting with the light brown fur. The teddy bear is beside another teddy bear." + }, + { + "image": "images/caption_detailed_88.png", + "subject_name": "snowboard", + "object_name": "car", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Pdj24k>3N2N1O2N2N1O1O1O1O001O1O001O001O0000000000000000000000000000000O1lA\\Ol=l0O1000O010L301O100O110O2N001O000O10000001O00000000000000000000000000O10000000O100000000O101O00002N1O00000000O20mAXOl=n00000000O1N1O2O101N101O00000000O10000000000000000000000000000001O00001O00001O1O001O1O1O1O3DcA1f>2M010O0000I700O\\k]4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aa`17f>8I5VOFgB=S=KdB;X=j0L3M3N2N1O1O2N2N2O001O0O2O00001O001O000O2O001O00001N101O001O1O3L6K3M2N2M3M3N2M2O1N2N2O1N1O2O0O2N100O1O1O2N1O1O100N200O10000O1O00100O01O0010O01O010M210O0O20OO2O1O03N001O0O100O2O00000POlKTGT4l8TLlFl3S9VLlFk3S9TLmFm3T9RLlFn3T9SLWFHa0U4W9XLgFi3Y9YLeFg3\\9WLeFi3[9WLeFi3[9XLdFh3[9[LcFe3]9[LbFf3^9ZLbFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFh3\\9YLbFh3^9XLbFh3^9XLaFi3_9WLaFi3_9WL`Fj3_9XL^Fk3a9j000O1000_K_Fd3_9[LdFd3\\9\\LdFd3\\9\\LcFe3^9ZLbFf3_9XLbFi3]9WLcFi3]9WLcFi3]9XLaFi3_9WL`Fj3a9TL`Fl3`9TL_Fm3a9SL_Fm3a9SL_Fn3`9SL^Fn3b9RL^Fn3b9RL_Fm3a9RL_Fo3a9QL_FP4`9PL`FP4`9PL`FP4`9PL`FP4`9PL`FP4a9oK_FQ4a9oK_FR4`9nK`FR4`9nK`FR4_9nKbFR4^9nKbFR4^9mKcFS4]9mKcFS4]9mKcFT4\\9lKeFS4[9mKeFS4Z9nKfFR4Z9nKfFR4Z9nKfFR4Z9oKeFQ4Z9PLfFQ4Y9oKgFQ4Y9oKgFQ4Y9PLfFP4Z9PLfFP4Z9QLfFn3Z9TLeFl3Z9ULeFk3[9m0000WKfFl3Z9SLgFm3Z9l00001O01OO10YKeFi3[9n0O1100O1OO10000000001O00000000001O00000000QKhFV4X9jKhFV4X9jKgFW4Y9h00QKgFX4Y9gKhFX4X9hKhFX4X9hKhFX4X9hKhFX4Y9gKgFZ4Y9eKgF[4Y9eKgF[4Y9eKgF[4Z9dKeF]4]9aKeF]4\\9bKgF\\4Y9[KfFK2j4d9WK[Fi4e9WK[Fi4n90001O01O01O000010O0000010O0010O01O01O00001O000000001O00001O0011O2M1O2O0O2O0O2N10002N0O00010O100O0010O100O00010O1O1O010O1O001O1O010O1O001O001O1O1O001O1O001O0O2O2N1N100O2O001N2N2O1N1O3N1N1O2N1N3N1O1O1O2N1O2O001O010O001O00001O1O0O10001O001O00001O0O101O001O1O00001O00001O000000001N101O1O1O000O10001O00000O101N101O000O2O1O0O101O001N101O1N1O2O000O2O000O2O1N2N3M2O1O2M3M4M3L3M2M3N2N1N3M4J6I:IB1O001N2N2O2M2N2Nkol4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V2S;X200000000000000000000000001O2N2N1O001O2N00000000001OO1001O0000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000O10000O100O100001O000000000000000000000000000000000jIaM`1_2`NbM_1^2aNcM^1]2bNdM]1]2bNdM]1[2dNfM[1Z2eNgMZ1Z2eNfM[1Z2eNgMZ1Y2fNgMZ1Y2eNjMY1[1VJSOa4CX1U2gNlMY1T2gNlMY1T2gNlMY1T2fNmMZ1S2fNmMZ1S2fNmMZ1S2fNnMY1R2fNoMZ1Q2eNPN[1P2eNPN[1P2eNPN[1P2dNQN\\1o1dNQN\\1n1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1cNTN]1l1cNTN]1k1dNUN\\1i1eNXN[1h1eNXN[1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1gNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1gNVNY1j1gNVNY1j1gNVNY1j1gNUNZ1l1eNSN\\1m1eNRN[1n1eNRN[1n1eNRN[1n1fNQNZ1o1fNQNZ1o1gNPNY1P2gNPNY1P2hNoMX1Q2hNnMY1S2fNmMZ1S2fNmMZ1S2gNlMY1T2fNmMZ1S2eNnM[1R2ZNYNf1g1RNaNn1_1oMdNQ2]1lMeNT2[1kMeNV2[1iMfNW2Z1hMgNX2Y1gMhNY2W1gMjNY2V1gMmMfL5d5n1eMmMmL0^5S2eMmMoLL^5W2bMmMZ3S2fLmMZ3S2fLmMZ3S2fLmMZ3R2gLnMY3R2gLoMX3Q2iLnMW3Q2jLoMV3m1VIhMf3;S3h1VInMl3;m2e1YM\\Ne2d1]MZNc2f1bMUN^2l1eMnM]2R2^4O100001OO100001O00000000000000O100001O00O10lFRNk6o1TIRNk6n1^HRNTO2]8l1_HTNRO2]8j1aHUNPO2_8h1cHXNjN2b8f1dHYNiN1c8f1eHXNgN3d8e1dHYNhN2d8e1dHZNgN1e8e1dHZNgN1e8e1cH[NgN1f8c1dH\\NeN2V2GQ4k1TK\\NeN2T2KQ4g1WK[NeN2R2NQ4d1YK\\NdN2P20S4b1YK\\NdN2P20S4b1YK\\NcN3P20T4`1ZK^NaN2P21U4_1[K]N`N3=Em0?j4\\1\\KoNmNXOj0?l4Z1\\KPOnNYOg0>o4X1]KQOlN\\Of049f0\\5l0SKnNlNWO=98f0\\5l0SKnNA@0f0\\5l0RKoNB_O0f0\\5l0RKPOA^O2e0[5m0SKoN@_O3d0Z5n0SKPO_O^O>:P5X1VKUNfNc0f0Fb06l4\\1]KgNTOFe05k4^1^KRN`N1N9d01f04j4_1gK\\NhN2h01j4a1fK^OAPOi4c1eK]OCnNi4e1dK]OEkNh4h1bK^OHeNi4m1_K^OW6b0hI@W6`0iIbNXNA0O1`0n7^1iI_NkNL]7e1gI_NnNJ\\7g1gI^NPOGZ7k1fI^NY7b1gH_NX7a1hH_NX7a1hH^NY7c1gH]NX7e1gHZNYNEl8R2kH]NT7d1jHXN[NGj8Q2`HcNfN^Oh8P2^HnNh1ZO[2i1lKmNe1_O^2e1kKnNe1_O_2c1lKnNd1_OdM]O^4V2ZLQNlNc0g2JdM_O]4S2]LPNnNa0d2LbME]4n1_LPNQO2J1f27dML[4j1aLoMRO0J3d27bM0]4g1dLlMXO7U23cM5\\4e1eLjMXO9T23`M8_4b1ZMSNh0o0n1o0XMTNi0m0o1o0XMTNi0m0o1o0XMTNj0l0n1P1XMTNk0k0m1Q1XMTNl0j0l1R1YMSNn0h0i1U1YMSNR1d0e1Y1YMSNX1>_1_1YMSNY1=^1`1ZMSNW1=_1`1ZMSNW1=_1`1ZMSNW1=_1`1[MRNU1>a1`1ZMRNU1>a1`1[MQNT1?a1`1\\MPNR1`0c1`1\\MoMQ1a0c1`1\\MoMQ1`0d1a1[MoMQ1`0d1a1[MoMQ1`0d1a1ZMPNR1?d1a1ZMPNR1>e1b1XMQNS1=e1b1XMQNS1=e1b1XMPNS1?e1a1XMPNQ1a0g1_1XMPNP1b0h1^1YMoMn0c0j1^1YMnMm0d0j1`1XMjMo0f0i1a1>_NBb1>]NBc1>\\NCd1=\\NCe13R6T2jJUNgND=3Q6U2kJTNgNC>4P6U2lJSNfND>4P6U2lJoMkNG95P6U2nJhMnNN44R6U2mJgMnN034R6U2nJfMmN134Q6V2PKdMlN234Q6V2PKdMlN234Q6V2PKdMmN125Q6V2PKdMlN314S6U2PKdMlN314S6U2PKdMlN304V6T2oJeMjN314V6T2oJeMjN313W6U2_J]MUOV15WOX6V2]JRO[OgNX6X2]JQO[OfNY6Z2[JQO[OeNZ6Z2ZJRO\\OdNZ6Z2ZJRO]OcNY6[2YJSOB]NV6`2XJ_NROYOb0CX6d2TJ[NAROV7c2XIgMWOH_8a2ZHfM[OC^8g2XHeMS8\\2kGeMV8[2gGhMY8Y2dGjM[8V2dGjM]8V2cGjM]8V2cGiM^8W2bGiM_8V2bGhM_8X2mGZMU8f2W11_IYMTLNU6i2eM`M[2`2dMaM\\2_2bMcM^2]2aMdMRMMV4_2gNeMQMOW4]2fNdMSM1V4[2gNcMSM3W4Y2eNcMUM8R4W2hNaMUM>n3R2lN`MUM`0n3P2mN`MTMb0n3n1oN_MSMf0k3k1ROcMmLf0AoNV4h2]OcMhLV1f3W1bMTM:b0VNX1m3R1bMVM:a2T29bMWM:_2T2:aMXM;^2T2:aMYMZ3`M`M]O_Of2D>]3_M^M_OAZ6Q3WJkLQOGO6c07V6Q3WJiL]OH6?U6Q3XJhL37d5R3ZJfL28c5S3]JbL1;c5R3gKnLY4Y2ZImM]2JY4Y2\\IkM[2LX4Z2]IjM[2LX4Z2^IhM[2OV4Y2_IhM[2OV4Y2`IfM[21U4Y2`IeM]21R4[2bIcM\\22R4[2cIaM\\24Q4[2cIaM\\24R4Z2cIaM[25R4Z2dI`MZ27P4[2fI\\M\\28n3\\2hIXM\\2k0`JWOR5N=l0`JWOS5M=l0_JXOT5L=l0^JYOU5K[NBe1fJZN_51Kc1gJ^N]5OLa1iJ`N[5OKb1iJ`N\\5NKb1hJbN\\5LKb1jJbN[5LKb1iJcN\\5KJb1kJcN[5KJa1jJgN[5HKa1jJgN[5HJb1jJgN\\5GJa1kJhN[5GJa1kJhN[5GJV25jMKV25jMJX25hMKX25hMJY26gMJZ25fMKZ25fMKY26gMJY26gMIh1hJeN_5CIh1hJcNa5EGh1iJbN`5FGh1jJ`N`5HEh1mJ^N_5JDh1nJ\\N_5LCh1oJYN`5OAh1QKUN`53_Oh1k0XNUOh1k0XNUOh1k0XNUOi1j0WNUOj1k0VNUOi1l0WNUOh1k0XNUOg1l0YNTOg1l0YNTOf1m0ZNSOd1o0\\NQOc1P1]NPO9iJc02VOU6NQO6jJf0^6TOjN2jJj0\\6TOUOk0l0UOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOi0l0WOdN2SKg0Y6WOcN7PKb0]6WOcN:lJ?b6WOcNW1^1iNcNU1^1kNcNS1^1mNfNl0]1TOfNl0W1TOnNi0P1WOROi0l0WOUOk0h0UOYOm0d0SO]Om0b0SO_Ol0a0TO_O7XJ7Y6B_O5]J6T6E@3bJ2o5K_O2Y1NR60000000000000000000000000000000000000000000O1000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000N" + } + ], + "model_output": "The sky is filled with two distinct clouds: a larger, fluffy cloud on the right side, with a soft, rounded shape and a slightly irregular outline, and a smaller, more elongated cloud on the left side, appearing wispy and stretched out, with a smooth, flowing appearance. Over the sky, an airplane flies." + }, + { + "image": "images/caption_detailed_91.png", + "subject_name": "tree", + "object_name": "rock", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0]=c100000000000000000000000000000000000000000000000000000000O1000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O11O00O100000000000000000000000000000000000000000000000000000000000000hI[NVI2d5c16cNWJd0`5i06iNUJc0b5d082lJ_Nh3`1[16dJaNo3Y1[1;WJPN1e0[4P1\\1\\1bNdN]1]1cNcN]1^1bNbN]1b1`N^N_1d1`N\\N_1e1aN[N_1e1aN[N_1e1aN[N^1g1`NZN`1f1`NZN`1f1`NZN`1g1_NYNa1i1\\NXNd1i1[NWNe1i1[NWNf1h1YNYNg1g1YNYNg1h1XNXNh1h1XNXNh1h1XNXNh1h1XNXNh1h1WNYNi1g1WNYNh1i1WNXNh1g1YNXNg1i1XNXNh1h1XNXNg1h1ZNXNf1h1ZNXNe1i1ZNXNf1g1[NYNe1g1[NYNd1g1\\NZNd1f1\\NZNd1e1]N[Nb1e1_N[Na1e1_N[N`1f1_N[N`1f1`NZN_1g1aNYN^1h1aNYN_1h1`NXN_1i1aNWN^1i1cNWN\\1j1cNWN[1k1eNUNZ1k1gNUNY1k1gNUNZ1j1fNVNZ1i1gNWNY1i1fNXNY1h1hNXNW1h1jNXNV1g1kNYNT1g1lNZNT1f1lNZNT1f1kN[NU1d1lN\\NS1e1lN\\NT1c1lN^NS1c1iNaNW1_1gNcNY1\\1gNeNX1[1fNhNZ1X1eNiN[1W1dNjN[1V1eNkN[1U1dNlN\\1T1dNlN\\1T1cNmN\\1T1dNlN\\1T1dNlN[1T1eNmNY1U1gNkNX1V1gNkNX1U1iNkNV1V1jNjNU1W1jNjNV1V1jNjNU1W1kNiNU1X1jNhNV1X1jNhNU1Y1kNgNS1[1mNeNR1]1mNcNS1]1mNcNR1]1oNcNP1^1oNcNP1^1PObNo0`1PO`No0d1nN\\NQ1g1mNYNR1o1gNQNY1R2cNoM]1S2aNmM_1V2^NjMb1W2\\NjMc1d2oM]MQ2h2iMYMV2k2fMVMZ2l2aMWM^2k2_MWMa2k2YMYMg2i2PM^Mo2f2dLnIWOa3U4U70000O10000000000O100000000000000001O00000000000000001O000000001O1O001OaNgEWNY:o1]EUNc:U3]MfK^JZ4U5VLhJk3U5XLjJh3T5ZLlJf3S5[LmJe3S5[LmJe3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5]LmJc3R5^LnJb3S5]LmJc3S5]LmJc3T5\\LlJd3U5[LkJd3V5\\LjJd3W5[LiJe3X5ZLhJf3Y5YLgJg3Y5YLgJg3Z5XLfJh3Z5XLfJg3[5YLeJg3\\5XLdJh3\\5XLdJg3^5XLbJh3_5WLaJi3_5WLaJi3`5VL`Ji3b5VL^Jj3b5VL^Jj3b5VL^Jj3b5VL^Ji3d5VL\\Jj3d5VL\\Jj3e5UL[Jk3e5UL[Jj3g5ULYJk3g5ULYJk3g5ULYJk3h5TLXJl3h5ULWJk3j5TLVJl3j5TLVJk3k5ULUJk3k5TLVJl3j5TLVJl3j5TLVJl3k5SLUJm3l5RLTJn3l5RLTJn3l5RLTJm3m5SLSJm3m5SLSJm3n5RLRJo3m5QLSJo3m5QLSJo3n5oKSJQ4e4ULmII^1R4a4ZLQJC^1S4]4aLTJZO_1T4^4bL`L^3`3bL`L^3_3dL`L[3]3iLcLW3\\3jLdLV3\\3kLcLU3\\3lLdLT3Z3oLeLQ3R3YMmLg2P3]MoLc2P3^MPMb2m2aMSM_2k2cMUM]2d2jM\\MV2T2mKmJV2o2m1S2[NmMe1o1nKmJ^2T3d1k1cNUN]1k1aNWN_1i1`NXN`1g1`NZN`1f1_N[N`1f1^N\\Nb1e1[N]Ne1c1WNPKhM^3R4b1`MVO`2k0UM_Ok2c0bLnJDd4j3V6O10000O1000000000000000000000000001O0000O1000000001O0000000000001O00001O000\\MnKZJS4c5oK]JR4^5RLbJn3Z5WLeJi3X5ZLhJf3V5\\LjJe3T5\\LlJd3P5`LPKa3m4aLSK_3k4cLUK]3i4eLWK\\3g4eLYK\\3d4fL\\K[3b4fL^KZ3a4gL_KY3`4hL`KX3^4jLbKV3]4kLcKV3[4jLfKV3Z4jLfKV3Y4kLgKU3V4nLjKS3R4PMnKP3P4RMPLn2o3SMQLn2m3SMSLm2l3TMTLm2j3TMVLl2i3UMWLk2h3VMXLj2g3WMYLj2c3YM]Lg2`3\\M`Ld2^3^MbLc2\\3^MdLb2[3_MeLa2Y3aMgL_2X3bMhL_2W3aMiL_2V3bMjL_2U3aMkL_2S3cMmL^2o2eMQM\\2k2gMUMY2k2gMUMY2j2hMVMX2g2kMYMU2d2nM\\MS2b2mM_MS2a2mM_MT2]2oMcMQ2]2oMcMR2]2mMcMT2]2kMcMV2]2iMcMW2^2hMbMX2_2gMaMZ2_2eMaM]2^2bMbM_2_2_MaMa2_2_MaMb2e2WM[Mj2g2SMYMm2g2RMZMo2f2PMZMQ3e2oL[MQ3f2nLZMS3e2mL[MT3d2lL\\MT3e2kL[MV3e2iL[MX3f2fLZM\\3g2aLYM`3g2^LZMb3f2^LZMc3e2]L[Md3e2[L[Mg3c2YL]Mh3e2UL[Ml3e2RL\\Mn3e2QL[MP4h2lKXMV4g2eK]M\\4b2cK_M]4c2`K^Ma4b2^K^Mb4c2\\K^Mf4`2YKaMh4_2WKaMi4a2UK_Ml4a2RK`Mn4a2QK_MP5a2nJ`MS5_2mJaMT5_2kJaMU5a2iJ_MX5`2gJaMZ5_2eJaM\\5^2dJbM]5^2bJbM_5^2`JbMa5_2]JaMe5^2ZJbMi5\\2SJgMm5Y2SJgMn5Y2QJgMP6Z2mIgMV6`51O1O2N3M7I7I5K3M5K3M3M2N3M3M5K3M1O2N9G7I2N3M2N6J5K3M1O4L4L2N3M2N2N3M5K4L002N5K3M1O0\\M]Ek0c:ROaEm0g:jNZEV1h:gNYEY1k:bNWE]1i:bNXE^1j:^NXEb1k:PNiDH>X2Q<000001O00000000000000001O0000000000000000000000000000000000O1001O000000O11O0000O100000000O10000000000000000O1000000000000000000O100O100000000O1000000000000000000O10000000000000000O100000000" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`=_1a=0000000000000000000000000000000000000000000000000O1000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000O1000000O1000000O10000000000O1000000O100000000O1000000O1000000O1000000O10000O100000000O1000000O10000O1000000O10000000000O1000000000000001O00000000001O00000000001O0000001O00000000000000000000000000000000000000000000000000000000000000O10000O100O1O1O1O100O100O1000000O1000000000000O100000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000O1000000000000O1000000000000000000000000O10000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000000000O1000000000000000000O100000000000000000000000000000000000000000000001O0000000000000000001O00000000000000000000001O0000000000000000000000000000000000001O0000000000001O000000001O0000000000001O0000001O0000000000000000O100000000000000000000000000000000000000000000O100O100000000O1000000O10000000000000000O1000000000000000000O10000000000O100000000O1000000000000000000cC" + } + ], + "model_output": "A tree with dense, needle-like foliage in a conical shape, featuring a mix of light and dark green hues, stands beside a rock. The branches are layered, with the upper branches extending outward and slightly upward, creating a full and lush appearance." + }, + { + "image": "images/caption_detailed_92.png", + "subject_name": "bear", + "object_name": "rock", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "\\fk0h0]<9J5C>XO^NoDV2f::G5M1N3M2N2N3N1N2O1N2O1O1O1N2O1O2N001O1O00100O1O1N2O10O01O1O1O1O0000000000001OO10000001O00001O1O0000001O001O2N001O1O002N1O001O1O001O1O1O1O001O1O1O1O100O1O00100O00100O1000O10O1000O10O100000O0100O1000O100O01000O1O1O0003NO1O10O011O2gCaMk;l2O0O10000O01000O2O0O1O101N10010N2O1O001O0O101O0O1000000000O1000000000O0100O100O00100O1O001O1O1O001O1N2O1O00001O1N2O001O1O1O1N101O1O1O001N2O001O1N2O001N2O1N101O1O1O001N2O1O1O001N2O001O1O001N2O002N002M101O001N2O1O1O001N2O1O0O2N2N101N2N2N1N3N2N1O2N2N2M3N2M3N2L6G_ab0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "T6j8V60O1O2O0O010N2N2O1N2O1O2N1O1N101O1O1O1O100N2O1O1O1O100O1O1O101N1O1OWGYKZ7g4eH`KV7_4jHhKP7W4PInKl6Q4TITLh6k3XIYLe6f3\\I^L`6a3`IdL\\6[3eIiLW6V3jImLS6R3mIRMP6m2QJXMj5h2UJ^Mf5`2\\JbMb5]2_JfM^5Z2bJjMZ5U2gJmMW5R2iJSNS5l1nJVNP5i1QKZNl4e1UK_Ng4a1YKaNe4^1[KgNa4X1`KjN^4U1cKmN[4S1eKoNY4P1hKSOU4l0kKWOS4i0mKZOP4f0PL[Oo3d0RL^Ol3a0ULAi3>XLDf3P3AQM`0n2@RMb0l2]OUMe0i2ZOXMg0g2XOZMk0c2UO]Mm0a2RO`Mo0_2PObMR1\\2nNdMT1Z2kNfMW1Y2iNgMZ1V2eNkM]1S2bNnMa1o1^NRNd1l1[NUNh1h1XNXNj1f1UN[Nm1c1RN^NP2`1oM`NS2_1lMbNX2Z1gMgNZ2X1eMiN_2S1`MnNa2Q1^MPOe2m0ZMSOi2k0VMVOm2g0RMZOo2e0PM\\OR3b0mL_OV3>iLC[39dLH^36aLKb32]LNf30YL1i3MVL4k3KTL6m3ISL7n3HQL9o3GPL:Q4EnKV4BjK>W4AhK`0X4@hK`0Y4_OfKb0Z4^OfKb0Z4^OeKc0\\4\\OdKd0\\4\\OcKe0]4[OcKd0^4\\OaKe0_4[OaKe0_4[O`Kf0`4ZO_Kg0a4YO]Ki0c4WO\\Kj0d4VO[Kk0e4UOZKl0f4TOYKm0g4SOXKm0i4SOVKn0j4ROUKo0k4QOTKP1k4QOSKQ1m4oNRKR1n4nNRKR1m4oNRKQ1o4oNPKR1P5nNPKR1P5nNoJS1Q5mNnJT1R5lNmJU1R5lNmJU1S5kNmJU1S5kNlJV1T5jNkJW1U5iNjJW1V5jNjJV1V5jNiJW1W5iNhJX1W5iNiJW1W5iNhJX1X5hNgJY1Y5gNgJY1X5hNgJY1Y5gNgJX1Z5hNeJY1Z5hNeJY1[5gNeJY1[5gNdJZ1\\5fNdJZ1[5gNdJZ1\\5fNcJ[1]5eNcJ[1]5eNbJ\\1^5dNbJ[1_5eN`J\\1_5eN`J\\1`5dN`J\\1`5dN_J]1a5cN_J]1`5dN_J]1a5cN^J^1b5bN^J^1b5bN]J^1c5cN\\J^1d5bN\\J^1d5bN[J_1e5aN[J_1d5bN[J_1e5aNZJ`1f5`NZJ`1f5`NYJa1g5_NYJa1f5`NYJ`1h5`NXJ`1h5`NWJa1i5_NWJa1h5`NWJa1i5_NWJa1i5_NVJb1j5^NWJa1h5`NXJ`1h5`NXJ`1g5aNZJ^1f5bNZJ]1g5cNYJ]1f5dNZJ\\1f5dN[J[1e5eN[J[1e5eN[J[1d5fN\\JZ1d5fN]JY1c5gN]JY1c5gN]JY1c5gN]JY1b5hN_JV1b5jN^JV1b5jN^JV1b5jN_JU1`5lN`JT1`5lN`JT1`5lNaJS1^5nNbJR1^5nNbJR1^5nNbJR1^5nNbJR1]5oNdJP1\\5POdJP1\\5POdJo0]5QOdJn0[5SOeJm0[5SOfJl0Z5TOfJl0Z5TOfJl0Y5UOgJk0Y5UOgJk0X5VOiJi0W5WOiJi0W5WOiJi0V5XOkJf0V5ZOjJf0V5ZOjJf0U5[OkJe0U5[OkJe0U5[OlJd0S5]OmJc0S5]OmJc0S5]OnJb0R5^OnJb0R5^OnJb0Q5_OoJa0Q5_OPK`0P5@PK`0o4ARK>n4BRK>n4BRK>n4BRK>m4CTKhKBX4>iKAW4?iKAW4?iKAV4`0kK^OV4b0jK^OU4c0kK]OU4c0kK]OT4d0mK[OS4e0mKZOT4f0lKZOT4f0lKZOS4g0nKXOR4h0nKXOQ4i0oKWOQ4i0oKWOQ4i0PLUOQ4k0oKUOQ4k0oKUOP4l0PLSOQ4m0PLROP4n0PLROP4n0PLQOP4P1QLoNo3Q1QLoNn3R1RLnNn3R1RLnNn3R1SLlNn3T1RLlNm3U1SLkNm3U1TLjNl3V1TLjNl3V1TLiNl3X1TLhNl3X1ULgNk3Y1ULgNj3Z1VLeNk3[1ULeNj3\\1WLcNi3]1WLcNi3]1WLcNh3^1YLaNg3_1YL`Nh3`1XL`Ng3a1ZL^Nf3b1ZL^Ne3c1[L]Ne3c1[L\\Nf3d1[L[Ne3e1[L[Ne3e1[L[Ne3e1[L[Nd3f1]LXNd3h1\\LXNc3i1]LWNc3i1]LWNc3i1^LbLfMh0k5g2_L^LlMh0e5j2_L\\LQNg0`5m2`LZLSNf0]5Q3`LXLVNe0Z5S3`LWLXNe0X5T3aLULYNf0V5U3aLTL[Nf0S5W3bLRL^Ne0P5Y3cLQL_Nc0o4\\3lMdLT2\\3mMcLR2^3PN`LP2`3QN_Lo1a3RN^Ln1b3SN]Ll1d3UNZLl1f3UNYLj1h3XNVLh1j3XNVLg1k3ZNTLf1l3[NSLe1m3\\NRLd1n3]NPLc1Q4_NmKa1S4`NkKa1U4e40O100O1000000O1000000O10000O100000000O100O1000000O1000000O10000O100000000O10000O100O1000000O100000000O10000000000O10000O100O100000000O1000000O10000000000O1000000O1000000O10000O100000000O100000000O100O100O1000000O10000O1000000O10000TJmJj1T5VNnJh1R5XNoJg1Q5YNPKe1Q5[NPKd1P5\\NQKc1o4^NPKa1Q5_NPK`1P5aNPK^1P5bNQK\\1P5dNQK[1o4eNRKZ1n4fNSKY1m4hNSKV1n4jNSKU1m4kNTKS1m4mNSKS1m4nNSKP1n4POSKo0m4QOTKn0l4SOSKl0n4TOSKk0m4UOTKi0m4WOTKh0l4XOUKg0k4ZOTKe0m4[OTKd0l4\\OUKc0k4^OUK`0l4@TK`0l4@UK>l4BUK=k4DUK:l4FTK:l4FUK8l4ISK6n4JSK4n4LSK2n4ORKKS55nJHT58lJEW5;jJCW5=jJBV5?jJ_OW5a0iJ]OY5d0gJZOZ5f0gJXOZ5h0fJWO[5j0eJSO]5m0dJPO^5P1cJlN`5T1aJiNa5X1_JYNTOdM^6S4^JXNP6i1PJUNQ6k1PJTNP6l1PJTNP6l1QJRNP6n1QJQNo5P2QJoMo5Q2QJnMP6R2QJmMo5T2QJkMo5U2RJjMn5V2RJjMn5W2RJgMo5Y2RJfMn5Z2RJfMn5Z2SJeMm5[2TJcMm5^2SJaMm5_2TJ`Ml5`2TJ`Ml5`2UJ_Mk5b2UJ\\Ml5d2UJ[Mk5e2UJ[Mk5f2UJYMk5g2VJXMj5h2VJWMk5j2UJUMk5k2VJTMj5l2WJSMi5m2XJQMi5P3VJPMj5P3WJoLi5Q3XJmLi5S3XJlLh5U3WJjLj5V3WJiLi5W3XJgLi5Z3WJdLj5\\3WJbLj5^3WJ`Lj5`3VJ_Lk5b3UJ\\Ll5d3UJZLl5f3TJYLm5h3SJVLn5j3SJSLo5m3RJQLo5P4PJoKQ6Q4PJlKR6T4oIjKR6V4oIgKT6Y4mIcKU6]4mI]KV6d4S2001O0000001O001O000000001O000010O0000O2O00010O00001O00001O0O110O000000001O00kJ" + } + ], + "model_output": "A black cellular telephone with a physical keypad featuring white alphanumeric characters and a central navigation button surrounded by four function keys sits on a table. The screen is rectangular with a reflective surface, and the device has a curved design with a visible antenna on the top left corner." + }, + { + "image": "images/caption_detailed_94.png", + "subject_name": "pillow", + "object_name": "couch", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "kc05gc04M3O1O1O1O100O100O1O100O1O1O100O1O100O10000O1O100O100O100O100O1O100O1O100O100O100O100O1O100O1O1O1O10000O100O10000O10000O100O1N2O10000O10000001O1O1O1O001O1O1O1O1O1O1O2N1O1O1O1O1O1O1O1O1O1O1M3O1N2N2N2N2O2M2N2N2N2N3L`lW7" + }, + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PnLAQ3b0mL^OS3c0mL\\OR3f0mLZOR3h0nLWOR3i0nLWOQ3m0mLROR3Q1lLoNP3W1mIbMh2W1Z3Y1kIdMj2R1[3\\1gIgMk2m0^3h1aLXN_3j1_LVNa3l1]LTNc3n1[LRNe3P2YLPNg3R2WLnMi3S2VLmMj3U2TLkMl3W2RLiMn3Y2PLgMP4[2nKeMR4^2jKcMV4_2hKaMX4`2fKaMZ4b2bK_M^4Y1gIgNf12c4W1gIjNc1Of4g2WKZMa2dNUNV4TOXMg2bNUNY4POVMk2aNUN\\4lNTMW5P3bJSM^5Q3[JRMe5T3RJoLn5W500000000000000000000000000000000000000000eKgId1Y6\\NgId1Y6\\NgId1Y6g20eKgId1Y6\\NgId1Y6\\NgId1c5gKQKe2\\Od1c5gKQKe2\\Od1Y6\\NgId1Y6\\NgId1Y6\\NgId1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1W6^NiIb1W6^NiIb1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6h21O0000O1001O0ZIgIZ6Y6<0000000000000000000000000000000000000000O100001OO100O11O0000O1O1O1O1O1N2N2001OFkH^JU7a5PI[JP7d5QI]Jn6d5RI[Jo6d5<0eH\\Jo6c5QI^Jo6b5QI^Jo6c5PI]JP7c5PI]JP7c5PI]Jo6d5QI\\Jo6d5QI\\Jo6d5QI\\Jo6d5RI\\Jn6c5RI\\Jo6d5QI\\Jo6d5<0O10000000000000000000001O000000000000000kHZJe6f5[IZJe6f5[IZJe6g5ZIYJf6g5ZIYJf6g5?00000000000000001O0000000000001O00000000000000001O000000000000000000000000000000001O0001OO100000000000001O000000001O`LdHQ1\\7oNdHQ1\\7_20000TLSJ:m5WLTJ[3O>R6AnI?S6@mI`0T6_OlIa0U6^OkIb0U6^OkIb0U6Z300001O001O001O0000001O00000000000000000001O00000001O1O001O001O00000000O10000001O000000000000001OQLUI^1k6bNTI_1l6a2O00PLUI`1k6`NUI`1k6`200000000000000000000000001O1O00000000001O00000000001O000O1000000000000001O000000000001O00000O100001O00000]I" + } + ], + "model_output": "A black touring motorcycle with a large windscreen, dual rearview mirrors, and a top case is parked on gravel. The motorcycle features a visible license plate, a pair of rear luggage racks, and a black seat. It has a chrome exhaust system, a visible rear suspension spring, and a partially visible engine with cooling fins. The bike is equipped with a front fender and a visible front wheel with multi-spoke alloy rim." + }, + { + "image": "images/caption_detailed_97.png", + "subject_name": "bench", + "object_name": "tree", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + } + ], + "model_output": "An ornate, white-painted metal bench with intricate scrollwork and floral patterns adorns the backrest, featuring a symmetrical design with a central floral motif flanked by symmetrical leaf-like elements. The bench has curved armrests and a flat seat, with the metalwork extending to form the legs, which are not fully visible. The bench is in front of a tree." + }, + { + "image": "images/caption_detailed_98.png", + "subject_name": "cell phone", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 443 + ], + "counts": "Rlm33mc0O01O1O010O3N1O000O101N1N20O100L4O10000001O0010O01O000001MVXT4" + }, + { + "size": [ + 640, + 443 + ], + "counts": "WcY19dc07J5J5N2M3M3M2N2N2O1N2O0O2N2N101O0O2O1O001O001O1N2O001O1O0KWNR^Oj1ga0^NX^Oc1T?eNiBi1m[OmAP1n=ZOhAj0T>c2M3M4M2M3O1N3M2O1N2O1O1O1O1O100O1O100O10000O100O1[OaIhC`6SjJ\\AW5f>d0]O1OUJmAj5P>UJRB2Ne5o=YJSBk5m=XJRBg5[>H6^JaAX5k>JN14M8I2O1N1M4K5L7HTNnKSDj3Z>G5L5K5K3M2MXLSM`Fk2^=L5XNe^Oa0da0TO`^Oj0ga0nN\\^OP1\\b0N5J2O3M4K2O2N1O2M2O1O4K2N2O3L2J_R`2" + } + ], + "model_output": "A woman with shoulder-length brown hair, wearing a teal tank top and dark pants, is seated with her legs crossed. She has a necklace with a pendant and is holding a smartphone in her hands, which are positioned in front of her. She is wearing white socks with a red and blue pattern on the side." + }, + { + "image": "images/caption_detailed_99.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c]n25g>5M2N101O001O001O001N100O2O1O1N2O3M4L3QBROg=[1H4M4K6K9F5L2M3N000O1ZOgMeDY2Y;nMaDR2^;SN_Dm1`;j0O1O1O_M_De1a;l001O100000O010ZO`DZM0=a;X2jD[ME2b;a2h0O0O100O101O1N2N2XNbCk0aN4J_Y\\5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0W4i:00000000O10000001O00000000001O001O001O1O1O001O002N1O1O1O3MW1iN2N1O1O2N8H1OO1L4H8O1O100O10000001O001O1O1O000000O100O100_OWGiJk8i4m0M3N2N2O1O1O1_MfE;[:BmEZNGc1]:1PFYNFc1[:3\\FKe94_FIa95bFJ^92hFLX91lFMU91mFNT90oFOQ91oFOQ90PG1o8OQG1o8NRG2n8NRG2n8NRG2n8NRG3m8MSG3m8NSG1m8OSG1m81PG1o80PG0P91oF0P91nF2P91mF0R91mF0R91mF0R93kFMU94kFNR92nFOQ91oFOQ91nF1Q9OoF2P9OnF5o8LoF8ROPN6OS9j1bGm0]8TOaGm0_8UOQGTNLj2R9ClF>T9h2O001O1O2N1O2N1O0000O1O1M3O1N200O100N2O100O100O1oNPGRLP9l3TGQLm8n3[GiKg8V4\\GgKe8X4^GeKc8[4Q1O100000000000000000000000000001O00001O1O1O1O1O3Md0\\O1O1O1O2N001O00001O00001O00001O001O00001O001O002N1O1O2N1O2N2N2N2N1O1O1O1O0000000000000000O100O1O1O1O1N2O1N2O1RKTGf3n8VL\\Gc3e8\\L]Gb3g8ZL[Gd3P9QL]Gb3W:I3M3N1NlMPMZHP3f7VMUHh2l7ZMQHf2P8[MhGl2X8UMXG[O[Oi3^9mLUGY3k8hLTGc2BQM[9=QGa2NiLR9o0gFU2Q:lMmET2T:nMjEQ2W:QNhEm1Y:TNfEl1Z:\\11O100O10000O1000000000000001O000000001O1O1O2N2XF^Kn8d4PG^Kn8d4lFbKR9`4kFbKT9V5O1O1O001O00000000O1O1N2N2N2O1O1O1O1O1O100O100O100F:L4O1N2N2N2M3N2O100O1O1O10000O1000000O100000000001O0000001O00001O1O001O2N1O1O1O3M6J4YFQKU9^5O1O003M1O0000O1K5N2O1O1O1N2N200O1O1O1K5O1O1L4O100O1O1O1O1O1O10000O100000000O1000000001O001O001O001O2N1O001O2N002N2Nc0QFdJT9e5OFmFgJS9U5>L4O1O1O1N2O1]Oc0N2O100O10000O100000000001O1O1oEjKV9W4eFfKA:d9Q4jFRLV9Q4cFSL]9o3]FTLd9d40000000O100nN[FfLh9Z3XFeLP:GoEh23lLO9PN201N2JhZ^3" + }, + { + "size": [ + 375, + 500 + ], + "counts": "lTe21e;101O001O001N2O001O1O1O1O1O2N1O2N1O2O0O1O1O1O10O01O000001O000010O01O00010O001O010O00100O21N5K4L1O01O0O10001N1O101N10000O2O001N10001O001O0O101O1N101O1O001O1O001O1O1O1O100O1O001NWYQ2" + } + ], + "model_output": "A person wearing a black jacket with a high collar, black pants, and black shoes, with short hair and hands in their pockets, is looking at a kite." + }, + { + "image": "images/caption_detailed_101.png", + "subject_name": "sand", + "object_name": "playingfield", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "b8d4d80000000000000000001O000000O10000001O00O10000000O100000001O0000000000O100001OO01000000000000000000O100000000000000000000O1001OO0100000000000000000O10000000000000000000000O11O00O10000000000000000000000000001O0O1000000000000000000000000000000000000O100000001OO100000000000000000O10000000000000001O0000O100001O0O01001O00O100000O100000000000000000001O00O1001O0O10O100001O00000000000000O11O000O1000000000O10000000000000000000000001N010000QLPHg2o7TMYHi2h7dLlHZ3c8N7I6J4L2N2N1O2N1O2N2N2N1O1O002N1O1O001O1O1O1O001O001O00001O001O00001O001_NUEc0k:SO`El0`:POdEP1]:nNeEQ1[:lNiES1X;N00001O2N1O2N1O001O1O001O00001O000000001O00000000000000000000000000001O0000001O000000O1000000001O000000O11O1O0000O1000000O100O1000000000000O1000000000000POZO]Eg0b;00000oN[O_Ee0_:]OaEc0]:@bE`0]:AbE`0]:AcE?]:AbE`0]:BaE?^:B^Eb0a:_O^Eb0a:@^E`0b:B[E?e:BZE>f:DXEC4L2M3N1O1O1O10OO2O1O2N1O1O0010O01O001O0001O00000000A?L4O1N1M3N3O01O1O01O01O1O1O1M4J6_IdJQ5a5eJhJW5h6UNnHWLLb0Y7R3oHZLN=W7V3nHXL?Oh6e3j1M5K4lMeF0d9IhFL^9KoF^Oe9OoFIYX9" + }, + { + "size": [ + 425, + 640 + ], + "counts": "`UW31X=0Vb23f]M2nL0iH1X7KiH6T:2M2N2O1O2O000O10000O1000000O1O2N100O1000000O1^FYOa6h0^I\\O_6d0\\ICb6=^ID`6=`IC`6=`IC_6>aIB_6>aIB_6>aIC[MBb33c1i0`MB]6>cIB:Bo2m0gLBZ6?fIDW6dLC]36iLJk4XMSIn0Z2j1S801O00000000000000000000000000000000O1000000000000000000O10000[NWMXG4Z1f2W7lMaHT2W7[NYHoN_Og2R8[NnGkN>U3\\7WNXHcN7OGY3d7ZNRIc1i6`N[I^1`6gN`IY1[6kNgIT1U6POkIP1Q6SOPJm0m5VOSJj0i5XOYJh0d5ZO]Jf0`5]O`Jc0^5^OcJb0]5]OdJc0]5YOfJg0b5oN`JQ1g5eN\\J[1k5YNZJg1P6jMUJV2_800000000000001O00000000000000000000000000000000nFYMa0E[5R3oIbM=F_5h2TJfM8Dc5f2TJoMO^Ok5d2RJXNIZOH]OHNQ6S3dJ?\\3jLUMg2ROn0[3mL]MU2XOn0o2^MdMd1]OnLGl3h2WNoMo0@U1T2UNZN\\5R1YKjNi4m0eKkN`41UHLo31l8N102LUel2" + } + ], + "model_output": "A person wearing dark blue denim shorts rolled up to reveal a lighter inner fabric, paired with black flip-flops, stands beside a black dog." + }, + { + "image": "images/caption_detailed_103.png", + "subject_name": "umbrella", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 640, + 640 + ], + "counts": "gok54hc05I7L3K5M3L4L4M3L4N2N2N3M2O1O0O2O1N3N1O1N2O1O2L3O0N3M4K5M1M3M3N4K5I6G8DVC@l<>YC]Oi200O1N3M2O2M4M4L2N2N2N2M2O0O2O000O1M4O000001OO2O00010O0O100010O000000010O0010O00001N1M5M3M3M3[OZNnCi1f;gNTD[1i;P1L3N4L4L5K4M1N2N1000000O1000O10000O101N100O2O0O2O1O1O1O1O2N1O3M`0@3M3M001O001O002N2N0O100LYCPNiRC]OR=a0g0I7M4M3Moob5" + }, + { + "size": [ + 478, + 640 + ], + "counts": "2i>500000000000000000000000000000000000000000000000000001O0000O100001O000000000000O1000000001O0000000000O100001O0000000000000000O100000000001O000000jHMA3e7000O100nHOWO1h03UOMk06ROJm09QOGn0O^H6c6Ko0NaH7^6KZ16dNJ_15`NJb16\\NJf15YNKi14VNLk15SNKm17QNIP28nMHT27kMIV28hMHZ27eMI^26`MJa26]MKd25\\MJf24ZMLh23WMMk22TMNn22PMNR30mL1V3OgL1[3MeL3S90[J0\\L0d31\\LNd35YLKg36XLJj34VLLm31SLO^90O11O00fJ0fK0Z4NhK2X4MiK3W4MiK3`91SKLPK4m90PKMUK3j4OUK1j4M[F0j43k4L\\F1i43m9000iJNbFOj43d47YKIg47YKIf49YKGf4mFGe3K]5`0eF\\O2?h3E`5a0fF\\O0?j3D_5a0hF[OOb0i3B^5b0mF0c3^O`5d0jF0e3\\O`5f0hF1g3YO`5g0iF1f3XO`5g1_JYN`5i1_JWN_5k0oFNa3WO_5k0RGN^3WO_5l0SGM^3WO^5l0VGL\\3XO]5m0XGKZ3XO\\5n0[GKX3WO\\5n0]GLV3VO\\5n0_GMT3UO\\5o0aGKS3VO[5o0cGLQ3UOZ5Q1fGIP3VOX5R1jGGn2WOW5R1lGHl2VOV5S1oGGk2VOS5U1THEh2VOS5U1WHDf2WOR5U1ZHCd2XOQ5U1\\HCc2XOP5U1_HBa2YOo4U1bHA_2ZOn4V1cH@_2ZOm4V1fH@\\2ZOm4W1hH^O[2[Ol4W1jH]O[2\\Oj4W1mH\\OY2]Oi4W1PI[OW2^Oh4W1SIYOV2@g4V1TIZOU2@f4W1VIXOT2Ae4W1YIVOS2Cc4W1\\IUOQ2Db4W1_ITOo1Ea4W1bIROn1G_4W1dIROm1G_4W1eIPOm1I]4X1fInNn1J\\4W1hInNl1K[4W1kIlNk1MX4Y1mIiNl1NQ4_1TJbNk1OP4a1UJ_Nk10m3e1XJYNl12T3MkIk1W1TNj14o2c2kN]MQ1h2nNXMQ1k2mNUMQ1R3jNnLU1T3jNlLV1Y3eNgLZ1^3bNbL]1`3bN`L]1b3bN^L]1f3`NZL`1f3`NZL`1g3_NYL`1h3`NXL`1h3`NXL`1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXLa1h3jIULa43e1k3[NULe1j3\\NVLe1i3[NWLf1f3\\NZLe1d3\\N\\Ld1d3\\N\\Le1c3[N]Lf1a3[N_Lf1]3^NbLd1\\3\\NdLf1Z3ZNfLi1Y3UNgLS2o2oMQM\\2d2dM\\M]2b2dM^M]2a2cM_M]2a2cM_M^2`2bM`M_2_2`MbMa2]2_McMb2\\2^MdMc2[2]MeMd2Z2\\MfMd2Z2\\MfMd2Z2\\MfMe2X2\\MhMd2X2\\MhMd2W2]MiMc2V2^MjMc2T2^MlMb2U2]MkMd2T2\\MlMe2S2[MmMe2R2\\MnMe2P2\\MPNe2n1\\MRNe2j1^MVNc2g1_MYNa2d1bM\\N_2a1cM_N]2`1dM`N]2^1dMbN^2[1cMeN_2Y1aMgN`2W1aMiNa2U1_MkNb2T1^MlNc2R1^MnNc2n0nGoN_53c2n0`MROa2m0_MSOa2l0`MTOa2k0_MUOb2j0^MVOb2i0_MWOb2g0_MZO`2e0aMZO`2f0`MZOa2d0`M\\Oa2b0`M^Oa2a0_M_Ob2`0^M@c2>^MBd2<\\MDi24ZMLl2LVM4j8000000000000000000000000000000000000001O0000000000O10000000000000000000000000000000000000000000000000000000000000000000000000aM7iEIV::hEFW:=gECX:`0fE@Y:b0fE^OY:c0gE]OY:d0fE\\OY:e0gE[OY:e0gE[OY:e0gE[OY:e0gE[OY:d0hE\\OX:d0hE\\OX:c0iE]OW:b0jE^OW:?kEAU:?kEAV:=kECV:0nA2P>MQB3o=MQB4n=LQB6n=JRB6n=JRB7m=IRB8n=IQB7o=JmA:R>EoAc0i=\\OXBf0f=ZOZBg0e=YO[Bg0e=YO[Bg0e=YO[Bh0d=YOZBh0f=XOZBh0f=YOYBh0f=XOZBh0f=XOYBk0e=SO]Bn0b=RO^BP1`=PO_BQ1a=oN_BQ1a=oN_BR1`=nN`BR1`=nN`BR1`=nNaBR1^=nNbBR1^=nNbBR1^=nNbBS1]=mNdBR1\\=nNdBR1\\=nNdBS1[=mNfBR1Z=nNhBQ1W=POiBo0W=QOiBP1V=oNkBQ1U=oNlBP1T=POlBQ1S=oNnBP1R=POnBP1R=POnBQ1Q=oNPCP1P=POPCP1P=POQCo0o\\O8[[Ld0m;QOQEMVOc1i;`NQEMVOd1h;_NRENUOc1i;_NoD3UO_1l;^NoD=VOi0l;jNmD4VOO3S1i;jNnD4WON2T1i;jNnD4WOL4V1g;jNnD5AP1b;kNnD5_OP1c;kNnD6^OP1c;jNoD8[Oo0f;iNoD8[Oo0f;iNoD8ZOGLR1l;oNnD8YOGNa0NKm;EnD8WOI0?NKm;EnD9UOI2=OLl;EnD9UOI2>NJn;FnD8YOE14N`0j;_OnD8\\ODM4O`0k;@mD8\\ODM4Oc0h;]OPE8\\ODL6Ob0h;\\OQE8[OFM4Nb0i;\\OQE8ZOGN4Lb0k;[ORE7XOI0h0LVOj;2SE6WOI04L;Pl:F^F8b9H^F9a9G`F8`9HbF6^9JdF4\\9LfF3Y9MhF2X9NiF0X90hF0X90hF0X90hF0X90hF0X90hFOY91gFOY91hFNX92j200000000000000000000000000000000001O000000000000O100001O0000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000N" + } + ], + "model_output": "An individual wearing a red ski jacket with a black collar, black gloves, and a black helmet with goggles is in the snow." + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_8b_detailed_eval.json b/evaluation/GAR-Bench/model_outputs/gar_8b_detailed_eval.json new file mode 100644 index 0000000000000000000000000000000000000000..be724a176829c8f0987bab816bd962db996c89a5 --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_8b_detailed_eval.json @@ -0,0 +1,2570 @@ +[ + { + "image": "images/caption_detailed_0.png", + "subject_name": "person", + "object_name": "skateboard", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hT\\63W=1N3M2O1O2N1O1O2O0O1O001O00O10O10O001000O011N1O1O10000O1O1000000\\MCZH>e7GTFGk1b0Q81mGOS83kGMU85hGKY86fGJZ88cGJ\\88`GIb88[GIe8:WGGj8:PGIQ99lFHU99dFK]96\\F0d92WF0k90SF0o90PF0P:1nENU:b12O1O1O1M3N2M3L4N2O2N2O1O001N1OZFoMT8P2kGUNS8k1jGYNU8c1aFRNY1>V8^1aFTNX1a0d0@j5b1ZH]NW1d0a0@n5^1ZH^NV1g0>@Q6[1\\H]NT1k0=]OT6[1`I\\O9ZOV6\\1^I]O:WOX6]1\\I_O:UOZ6\\1ZIB:SO[6[1ZIE8QO^6Z1YIH7nN`6[1VIK8jNb6]1SIL8iNd6\\1RIN0ZN_O<`7[1PI2OWN@=a7Z1oH6LUNB=Ic0^O7J4K2N2O0O2O0O10O010TOhBOY=KoBAGNg=a0d0O0101XOAlB`0Q=ElB=Q=FmB=P=FmBbNXO]:8QG`0bNXO_:6oFb0j9\\OVFb0m9]OSFa0R:\\OnE?Y:^OiE?\\C1N100O100O10O0000010O0100O000000000000000000000O10000O100VNiCj0WRSN`0\\9RO`H?TN?[9RO^Hc0XN9\\9SOZHf0\\N3\\9WOVHg0aNOZ9ZOTHh0cNLZ9[OSHj0dNIZ9[OSHm0fNCY9]ORHQ1hN^OX9_ORHS1iNXOX9EoGS1jNUOX9HoGR1lNROW9KnGS1QOeNX95jGV1Y9hNhFX1X9gNiFX1X9hNhFX1X9gNhFZ1X9eNhF\\1X9dNfF]1[9cNcF_1]9aNbF`1]9aN`Fb1a9]N_Fc1a9]N_Fc1a9]N_Fc1a9]N^Fd1b9\\N^Fd1b9\\N]Fd1d9\\N[Fe1e9[NZFf1f9ZNYFg1g9YNXFh1h9XNXFh1S3XNj10RKi1R3\\Ni1KUKh1Q3`Ni1HVKh1Q3aNg1HXKf1o2fNh1DZKe1i2mNl1^O\\Kc1g2SOk1ZO_Kb1e2VOk1XO`Ka1f2XOi1WOaK`1f2[Oh1UO_K`1l2[Oe1UO]K_1Q3\\Oc1TO[K`1R3^Oa1TO\\K]1T3_O`1UO[K[1V3_O`1XOWKY1Z3_O_1ZOUKU1]3B^1ZOTKS1_3C]1\\ORKQ1a3C]1]OQKo0c3E[1]OQKn0c3G[1_OnJj0g3H[1_OlJi0i3HZ1AlJg0j3IY1BkJe0k3JZ1DhJb0n3JZ1GeJ?Q4JZ1IcJ>R4I[1KaJM2\\2^OgMb0MO^2\\OfMe0LO^2\\OeMg0LM`2ZOdMjN^NP1_1k0c2VO`MfNjNX1S1k0g2RO]MjNkNY1P1k0k2nNZMlNoNZ1m0k0m2kNXMnNQO\\1j0j0m2lNWMnNTO\\1h0i0n2_M`KY1f1SOWO[1d0j0n2`MbKV1f1TOXO[1c0j0l2aMgKR1b1XOZO[1`0j0W3aNoLZO\\OZ1?j0X3_NnL]O[O[1`0g0aNbMa4n0RM_O]OZ1>g0Z3^NkLA]O\\1>d0\\3\\NjLD\\O\\1>d0]3ZNiLF]O]1c3QNdL2]O_1==f3lMaL9[O`1=:kNjM]6<[Ja1=8kNoMX69`Ja1=6lNoMV6:dJ_1:8nNmMT6ZMdNl1i5i1SKeM0Z1`0F[4\\1PKSLYOc1k0W1`0I\\4[1nJjM6P1`0M[4Y1kJoM9j0a00Z4W1jJRN;f0`03Z4U1iJUN=?b09W4S1cJSLKU2i09c0>U4R1bJTLKU2j07c0cMcNi2b5i3TKnK1^MMP3m4e3SKnK2^MNP3l4g3fJTL?XMNo2l4i6SK[F1m2k4h6RK^F1l2l4f6RK_F2l2k4e6QKaF4l2i4c6SKaF4m2h4b6SKcF4l2h4a6RKXFF7a0R3d4_6UKfF7k2c4`6TKfF9l2a4_6TKfFk2^4Z7bKgH]4Y7bKiH]4W7cKiH]4W7_KnH`4R7`KnH`4Q7aKQI]4o6aKXEEh3i4P7aKZEFf3h4Q7aKUI_4k6aKUI_4l6`KUI_4k6`KVI`4k6^KWIa4j6^KWIa4k6\\KWIc4T;O001O001O001O00001OL`Ki@_4]?O1O002N2N1O2N2N2N2N1O2N2N1O2N2N2N2N1K6M5K6J0O10000O2YOo^O^NQa0X20O100O1[On^O]NRa0V220000N20000N3O1N2^Od^O]N`a0b1`^O]Naa0b1a^OSNia0j18N2N2O1O1N2N2N2O1N2N2N2O1O1O1M3O1N2O1N1O2O0000MXOP]Of0Rc0ZOn\\Od0Xc0N2OIAo\\O=Sc0Cl\\O=Uc0Ck\\O;Wc0Eh\\O;Yc0Eg\\O:Zc0Ff\\O8[c0Gf\\O9[c0Ge\\O7^c0Gb\\O8`c0H`\\O6ec001N[KM`E3W?N2OPPi1" + } + ], + "model_output": "The sky is a soft, pale blue with a smooth gradient, transitioning from a slightly darker blue at the top to a lighter blue towards the bottom. The sky is clear with no visible clouds, and a person is over it.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_4.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "al_6=^>8I7AWO[BS1a=:M1N04MmNfN_DZ1];kNaDU1[;POdDQ1X;ROhDn0S;WOmDi0Q;YOPEf0m:]OSEc0k:_OUEb0h:@ZE>d:FZE:e:HhDoNIM8[1V;KhDmNIN9Z1U;MgDPO4T1l0Ne73WGgNOM9^1`0f0_7YOkGbNO08b16j0f7RORHbN3e1ES1U8gNaHQ3_7nLbHS3\\7nLdHR3[7oLeHQ3Z7PMeH`NYO^4Q8TMeH]N[O_4^7aKQIc1I]N20F_4Z7eKUI_19l2_6iKVI\\1;k2\\6mKXIX1=j2Y6PLZIV1=j2X6PL[IW1=i2V6RL]IU1>i2R6ULVI@1b1g0i2P6WLUI\\1l0\\2n5\\NRJd1m5]NSJc1l5^NTJb1l5]NUJc1j5^NVJb1j5]NWJc1i5]NWJc1h5^NWJc1i5]NWJc1i5\\NWJe1i5[NVJf1j5ZNVJf1j5ZNRJj1n5UNPJn1P6RNnIP2R6PNiIU2W6kMcI[2^6eM\\I`14^L`6]6eIaI[6[6mIaIS6e0[I\\4i0lJm5f0]IW4l0RKg5e0`IS4o0UKe5d0_IR4Q1VKe5d0]Im3V1^K_5c0]Ii3Y1dK[5a0^Ia3`1nKY56]Ic3b1ULU53\\Ia3^:K4L3K6G\\DRMg;m28M3N2N6I6K5Ic0J5M2L5L3N3N2M4M2M3N3M2N2M2O2N1O2N1O1O1O2O1N1O001O00WO]CaN1?c2O0L4N00100N2O1O0]OFaB<^=J\\B7e=KVB7j=>`BSOP=l0QCVOnRCCo<=PCCR==kBCX=>cBCb=k03M2N1N2O1N2NTCXOd;g0\\D\\Ob;d0]D^Oa;i0XDXOh;m0RDTOn;S1hCPOX6K4L3N2M3N2N1O2N100O2N100O100O1000O01000O1O010O1O00001O1N2O0O1O2M3C=DeA4Y>LjA4T>LmA7o=ISB5m=KTB3m=MUB0l=1a000O2Oln00SQO2O0N4L4L3N1M4F9M3O1O2O00O0100000O100O101N1O2N2M2O3L3M6GTl]6" + } + ], + "model_output": "A dark-framed bicycle, likely black or dark gray, with a straight handlebar and a visible front wheel equipped with a black tire and spokes, is being ridden by a person. The bicycle features a visible front brake mechanism and a partial view of a rear wheel with similar features. The pedals are not visible.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_8.png", + "subject_name": "surfboard", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 408, + 640 + ], + "counts": "Ved18VO11O000000001O1N2N2N2N2N2N2N2N2O12N1O2NjN]E9b:EgE6W:IkE9R:DSF;m9DUF;_OQO[:>[Fc0e9[O_Fa0o:N2M3M1O4M:F000O1O0O2O010O1O1O1OhV^3" + } + ], + "model_output": "A person with shoulder-length hair, wearing a black wetsuit with a red stripe down the side, is captured in a dynamic pose with one arm extended forward and the other bent at the elbow, legs spread apart in a running or leaping motion, on a surfboard.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_9.png", + "subject_name": "person", + "object_name": "grass", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "d45k>0N200O1O2N2O1N1O1O1O010O2O3L4M000O100O010OO2N1NI9O10O1010O0100O1O2M2O2N2FfAK^>OfANg>OjSi8" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i8U6k80O10001O0000000001O1O2N2M6K6K1N2M2O3M2O0O1O0O2O001O1O1O001O10O00O10000000000000000000001O0000001O000000001O000000000000001O000000000000000000O1000000O2O00O02N1O1O001O1O1O1O100O1O1O1N2O1O1N2O1O1N2N2N2010N2O001O001O00001O00001O001O001O1O000000001O0000000000001O00001O00001O0000001O000000001O00001O1O3M1O1O2N2N2N001O2N1O100O0O2O1O001O001O0000001O0001O000001O00O010000000000000O100000000O10000O100000000000000001O000000000000O1O10000001O0000000000000000001O00000000001O00001O000000001O0000001O000000001O0000010O000O10001O0000001O00001O000000001O0000001O00001O0000001O00001O000000001O000000001O00000000001O00000000001O0000001O000000001O00001O000000001O00000000000000001O0000010N2O3M2N4L2N3M3M2N1O2N1O1O001O1O1O1O0000001O0000000000000000001O000000000000O10000000000O10000O10000O10000O100O100O1O100O100O1O100O100O1O1N2O1N2N2N2O1M3O1M3N2M3L4M3K5K5M3G9L4L4N2000000000000001OO10000O1O100O1O100O1O1O100O100O100O1O100O1O10000O100O100O100O100O100O100O100O100O1O10000O100O100O100O1O10000O100O1O100O1O10000O100O1O10000O10000O10000001O00001O2N001O001O001O001O0000001O000000000000000000O10000O1O100O100O1O1N2O1O2K3K6H8I7O1O100000001O00O1000000000000000O1FfHmH[7Q7iHlHX7S7;O10000O1O100L4M3ZOQHkI31R8S6a00000O100001OO10000000000O100001OO10000000000000000000000000000000000001O0000000000000000000000000000001O0000000000mI" + } + ], + "model_output": "A person with short dark hair, wearing a light-colored, long-sleeved shirt with a collar, and a dark-colored jacket with a zipper, is standing on grass.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_10.png", + "subject_name": "umbrella", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "b>;j1JWN3[a05f^OE106OO6Jm0Xa0d1M2O1O0O2O00001O0O2O1O00001N2O00001N101O001N101N10001O000O2O001O1N101O00001N101N101O0O3N001O0O2O001O1O0O2O1N101O001N2N101O1O0O2O001O000O2O0O2O000O2O000O100O2O00000O101N10000O100O100O10000O01000O100O010O1000O1000000000O0101O00O10O10000O100000000O100000000O1000000O3N7I7dK[@o3U`0F9G5KcNmL]Bc2b=fMZBV2e=UN[BXOaNS2T?QOXBWOeNS1S?0WBbNiN]1P?[1i@^Mg0X1_>f1XAZNh>i1UAWNj>l1RAkLIY1U?`3000000000001O0O1VHUK?k4AUK?k4[71O0000000000000O11O0000O0100O1O100O1O100O10O010000O1O1O1O10000O1O010O1O100O10000O100O00101OO0100O100000O010000O10O1001O1N100O2O0000001N100000001N10001N1O100O101O0O2O0O100O1O2N10000O100O1O1O2O0O1O100O2N1O1O1O100L5O0O100O101M2N2O1O1O1O1N2O1O1Oo_O]Mh>a2ZA_Me>^2`1O100O1N2N2O1O1O1O1N2O1O1O1N1O2O1O1O2Nf0YO7J5L3L7H3M3N1O1NUiV2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "cj04413L20gb0j7Q@gJa3Z5[LiJd3X5]LgJd3W5^LhJb3X5_LgJ`3Z5`LfJ`3Z5aLeJ_3Z5bLfJ^3Z5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5cLeJ]3[5dLdJ\\3[5fLdJZ3\\5fLdJZ3[5gLeJY3[5hLdJX3\\5hLdJX3\\5hLdJX3\\5iLcJW3\\5jLdJV3\\5jLdJV3\\5kLcJU3]5kLcJU3\\5mLcJS3]5nLbJR3^5nLbJR3^5oLaJQ3^5QMaJo2_5QMaJo2_5QMaJo2^5RMbJn2^5SMaJm2_5SMaJm2^5UMaJk2_5VM`Jj2`5VM`Ji2a5WM_Ji2`5XM`Jh2`5YM_Jg2a5YM_Jg2`5[M_Je2a5\\M^Jd2b5]M]Jc2c5]M]Jc2c5^M\\Jb2c5_M]Ja2c5`M\\J`2d5aM[J_2e5aM[J_2d5cM[J]2e5cM[J]2e5dMZJ\\2e5fMZJZ2f5fMZJZ2e5hMZJX2f5iMYJW2g5iMYJW2g5jMXJV2g5kMYJU2g5lMXJT2h5lMXJT2h5mMWJS2h5nMXJR2h5oMWJQ2h5QNWJo1i5QNWJn1j5SNUJm1j5TNVJl1j5TNVJl1i5UNWJk1i5VNVJj1i5WNWJi1i5WNWJi1i5XNVJh1j5XNVJh1j5XNVJh1i5YNWJg1i5YNWJg1i5YNWJg1h5[NWJe1i5[NWJe1h5\\NXJd1h5\\NXJd1h5\\NXJe1f5\\NZJd1f5\\NZJc1f5^NZJb1f5^NZJb1e5_N[Ja1e5_N[Ja1e5_N[Ja1d5`N\\J`1d5`N\\Ja1cNcIj6m4cJ`1cNhIe6h4hJl1U5TNlJP2P5PNPKU2i4mMWKS2g4oMYKQ2f4PNZKP2f4oM[KQ2iNUIX5j4oKQ2fN[IY5c4QLR2eN^IY5^4SLU2bN`IZ5Z4TLR3oNVHR4h4oLT3fN]HY4_4QMU3aN`H]4\\4RMl3n2TLRMm3l2TLTMl3h2WLYMo3i1WGlLo4[1Q4`1dL`Nd3U1_LkNe3m0_LSOj3OjL2h3RObLn0P4_NPLb1W;00000O100000000O1000000000000000000000000O11O0000iJ]NQHc1m7cNoG]1`7]NhC=g4V1U7AkH?o6GQI9n6HRI8n6IQI7n6JRI6l6KUI5j6LVI4i6MWI3e61[IO`66`IJZ64L2O1N3N2M2O2M2O1O1N3N2N2M3N1O1N3N1O1N2O1N3N2M3N1N102M2O2M2N2O1O2M2N3M2O1N2N2O2M2N3M2N2N2O1N3M2N1O3M1O3M2N1O2N2O1O1N2N2N2N2O1N2M3OiL^EQ2`:oMcEP2[:PNiEn1V:QNnEn1o9SNUFj1i9WNYFi1e9VN_Fh1_9YNeFe1X9[NlFd1Q9\\NSGb1k8]NZGa1e8_N^G_1`8aNcG^1[8cNhG[1V8eNmGZ1Q8fNSHX1k7hNXHW1g7iN[HV1c7jN`HU1^7kNeHT1Z7kNiHS1V7nNkHR1S7nNPIP1o6QOSIn0l6QOVIn0i6ROZIm0d6TO]Ik0b6UOaIj0]6WOdIh0[6YOgIf0X6YOjIg0T6ZOmIe0R6[OPJe0n5\\OTJb0k5^OWJb0h5^OZJa0d5_O^J`0b5_OaJ`0\\5BfJiLAW3`0hL_OX3b0hL^OW3c0iL\\OW3e0hL\\OW3e0iLZOW3g0iLXOX3h0hLWOX3j0hLUOY3k0gLUOZ3k0dLUO]3l0bLSO`3n0^LQOd3o0[LPOg3Q1VLoNl3R1RLmNP4S1oKlNT4T1iKlNY4U1eKjN]4V1aKjNa4V1^KiNd4W1[KgNi4X1VKfNl4[1RKeNQ5Z1nJdNU5\\1iJdNY5\\1eJdN]5[1bJfN_5Y1aJgNa5X1^JhNc5W1\\JkNd5U1[JkNf5U1XJlNi5T1VGTNg2i0S6R1SGZNg2e0W6^1eIcN\\6^1bIaNa6^1]IcNc6^1\\IaNf6_1XIbNj6^1TIaNn6^1QIbNR7\\1nHdNR7]1lHcNW7\\1gHeNZ7Z1eHfN]7X1cHhN_7W1`HiNb7V1]HkNd7T1\\HkNg7U1_FaNi09j8W1YFdNd0\\OWOi0n9X1RFhNf07Y9Y2[FhMh9Y2SFhMo9d31N2N3N1N2N2N3M2N2N1O3M2O1N2O1N2O1N2O1N2N2O1N2N2N2N3M2O0O2N2O1N2N2N2N2N2N2N2O1N2N3M1O2N3M1O2O1N2N3M1O3M2N102M1O2O1M3O1N2O1N3M1O2O1N2N2O1N2O1N2N2N2O0O2O1N2N2N2O1N2N2N3N1Mocg0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "2l>400001OO1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000oJb0`J^O\\5k0_JUO]5S1_JmN]5[1_JeN^5`1`J`N[5i1aJWN]5m1aJSN[5S2cJmMZ5Y2cJgMZ5]2eJcMW5c2gJ]MV5h2hJXMV5k2iJUMT5P3jJPMR5U3mJkLP5Z3nJfLn4_3QKaLm4c3QK]Lk4h3TKXLi4l3VKTLf4R4XKnKg4T4XKlKa4\\4^KdKc31XJ]4U2bKb36SJ[4Z2_Kc38nI^4]2ZKd3=hI]4c2VKe3a5[L_Je3b5ZL^Jf3b5ZL^Je3d5ZL\\Jf3e5YL[Jg3f5XLZJg3h5XLXJh3h5XLXJe3k5[LUJb3o5]LQJ`3R6`LnI]3V6bLjI\\3Y6cLgIX3^6hLbIV3a6iL_IS3e6mL[IP3i6oLWIn2l6RMTIk2o6UMQIf2U7YMkHe2W7[MiHa2[7_MeH^2_7aMaH\\2b7dM^HY2f7fMZHW2i7iMWHS2m7mMSHQ2P8nMPHm1U8SNkGk1X8TNhGi1[8WNeGf1^8ZNbGc1a8]N_G`1d8`N\\G]1g8cNYG[1j8dNVGY1m8gNSGX1o8gNQGW1Q9iNoFU1S9kNmFT1U9kNkFS1W2gLd4V2UIR1X9nNhFP1Z9POfFo0\\9POdFo0^9PObFn0`9RO`Fm0b9RO^Fm0c9SO]Fl0d9TO\\Fk0f9TOZFk0g9UOYFk0g9UOYFj0h9VOXFi0i9WOWFh0k9WOUFh0l9XOTFg0m9YOSFf0n9ZORFf0o9YOQFf0P:ZOPFf0P:ZOPFe0R:ZOnEe0S:[OmEd0T:\\OlEd0T:\\OlEc0V:\\OjEd0V:\\OjEc0W:]OiEc0W:]OiEb0Y:]OgEc0Y:]OgEb0[:]OeEc0[:]OeEb0\\:^OdEa0]:_OcEa0]:_OcEa0^:^ObEa0_:_OaEa0`:^O`Eb0`:^O`Ea0a:_O_Ea0a:_O_Ea0b:^O^Ea0c:_O]Ea0c:_O]Ea0c:_O]Ea0c:_O]E`0d:@\\E`0e:_O[Ea0e:_O[Ea0e:_O[Ea0f:^OZEb0f:^OZEa0g:_OYEa0h:^OXEb0h:^OXEb0h:^OXEb0i:]OWEc0i:]OWEc0i:]OWEb0j:]OWEc0i:]OWEc0i:]OWEc0j:\\OVEd0j:\\OVEd0k:[OUEe0k:\\OTEd0m:[OSEe0m:[OSEf0l:ZOTEf0l:ZOTEf0m:YOSEg0m:YOSEg0n:XOREh0n:XOREh0n:XOREi0m:WOSEi0m:WOSEi0n:VOREj0n:UOSEk0m:UOSEk0m:UOSEk0m:UOSEk0n:TOREl0n:TOREl0n:TOREm0m:SOSEm0n:SOQEn0n:ROREn0n:QOSEo0m:QOSEo0m:QOSEo0n:POREQ1m:oNSEQ1m:oNSER1l:oNSEQ1m:oNSEQ1m:oNSEQ1m:oNSER1l:mNUES1k:mNUES1k:mNUES1k:mNUES1k:mNUET1k:kNUEU1k:kNUEU1k:kNUEV1j:kNUEU1j:lNVET1j:lNVET1j:lNVET1j:lNVEU1i:kNWEU1i:kNWEV1h:jNXEV1g:kNYEU1g:kNYEV1f:jNZEV1f:jNZEV1f:jNZEV1e:kN[EU1e:kN[EV1d:jN\\EV1c:kN]EU1c:kN]EU1c:kN]EU1c:kN]EV1a:kN_EU1a:kN_EV1`:jN`EV1_:kNaEU1_:kNaEU1^:lNbET1^:lNbEU1]:kNcEU1\\:lNdET1[:mNeES1[:mNeET1Y:mNgES1Y:mNgES1Y:mNgES1X:nNhES1V:nNjER1U:oNkEQ1U:oNkER1S:oNmEQ1S:oNmEQ1S:oNmEQ1R:POnEQ1P:POPFP1P:POPFP1o9QOQFP1m9QOSFo0k9SOUFm0k9SOUFn0i9SOWFm0h9TOXFl0g9UOYFk0g9UOYFk0f9VOZFk0d9VO\\Fj0c9WO]Fi0b9XO^Fh0`9ZO`Fg0^9ZObFf0]9[OcFe0\\9\\OdFd0[9]OeFd0X9^OhFb0W9_OiFb0T9@lF`0R9BnF>P9DPGRHCl7>THBl7>THBk7?UHBj7>VHBj7>VHBj7>VHCi7=WHDh7UKAk4?VKAi4?WKAi4?XK@h4`0YK_Of4b0ZK_Oe4a0\\K^Od4b0\\K^Od4b0]K]Oc4c0^K]Oa4c0`K\\O`4d0`K\\O_4e0bKZO^4f0bK[O]4e0dKZO\\4f0eKZOZ4f0fKZOZ4f0gKYOX4h0iKXOV4h0kKWOU4i0kKXOS4i0nKVOR4j0nKVOR4j0nKWOP4j0QLUOn3l0SLSOm3m0SLTOl3l0ULTOi3m0XLROh3n0YLROf3n0ZLROf3n0[LROd3n0\\LROd3n0]LQOb3P1_LPO`3P1aLoN]3S1cLnN\\3R1eLnNY3S1gLnNX3R1iLmNW3S1jLmNU3S1kLmNT3T1lLmNR3T1PMkNm2V1TMkNk2U1VMkNh2U1YMlNe2U1[MkNd2U1^MkN`2U1cMiN\\2V1gMiNY2U1iMmNT2S1mMmNR2S1PNmNl1U1UNlNj1R1YNnNe1R1]NmNa1U1_NlN_1T1cNmNZ1P1kNPOT1m0POROP1m0QOTOm0m0SOUOk0k0VOUOh0i0\\OWO`0j0CUO=i0EXO9h0IYO4g0MZO1b05^OJa08_OF?=C@=a0D]O=e0BZO=h0DRO=Q1ElN;U1FhN;Z1FbN;_1F`N7d1JZN4i1LVN1n11nMOT22eMWMdLg2h54bMO`22]MMe24XMMi27QMJQ36lLKV36gLH]3:_LCf3?WL@j3b0SL_On3b0PL^OP4g0eIaLQ2f2^4S1YKlNk4Z1\\IQL^1e2W5_1gJ`N[5`1cJ`N_5`1`J_Nb5b1[J\\Nh5e1VJZNl5g1SJYNn5g1oI[NQ6f1nIXNU6h1iIVN[6j1dIUN^6k1`IVN`6k1^IVNc6i1\\IUNg6l1WIPNo6P2PIoMR7R2kHoMU7R2jHnMW7R2gHmM\\7S2bHjMb7V2]HhMg7X2WHiMi7X2VHhMk7X2SHhMo7X2PHeMT8[2jGdMX8a42N1O2L4N2N101M3K5N10000O2M3M200O3L3L4M2O2N1N3M3M201O0O2M3N2N1O2M2M4N2M2N3M3M3M2L4N3N1O2N101N10_J" + } + ], + "model_output": "A piece of white tissue paper with a soft texture and a slightly crumpled appearance, featuring a gradient of brown discoloration on one side, indicative of a stain or shadow, is on the dining table.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_13.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\n_13W=1N2N3M2N2N2N2N2N2N2N2O1M3N2N2N2O1N2N2N2N2O1N2N2N2N2O1N2N2N2O1N2N2N2N2N2O1N2N2N2M3[NbM\\H_2c7cMZH_2d7cMZH_2e7bMXHa2h7_MWHb2h7^MWHd2i7]MUHd2j7]MTHe2l7[MRHg2n7YMPHi2P8WMnGk2R8UMmGl2T8SMjGo2U8RMiGP3W8QMgGP3Y8PMeGR3[8nLbGU3^8lL_GV3a8j00000001OO100000000O10000O100000000000000000000000000000000O10000O100O100O1N2N2N2N2O1N200O1O100O1O1O1O1N2jNlJSJV5k5mJSJT5l5nJXIO:Z5]6UK[IP5e6k0N2O1O1N2ZO\\IcJf6\\5_I^Jc6b5_I[Jb6b5e0K5OaI[JZ5c5hJ]JX5b5hJ_JX5_5hJdJW5Y5gJmJX5R5dJSK\\5k4bJgJRO?\\6i4bJ[K^5d4aJ^K_5b4\\JcKd5\\4ZJgKf5Y4XJhKi5X4VJiKj5W4UJjKk5V4SJkKn5U4oInKP6S4oImKS6R4lIoKT6Q4kIoKV6P4iIQLX6o3fIRL[6n3cITL]6k3cIUL^6j3bIWL^6h3cIWL^6i3aIWL`6i3_IWLb6i3]IXLc6W5100O10000O100O1O10000002N5K;E3M4L4VLlHU1X7eNRIS1R7eNSIZ1o6bNTI]1n6_NYI\\1j6^N[I_1i6[NZIf1g6TN]Il1e6lMaIT2a6dMeI\\2`6XMgIh2^80001O000000O100DTMWFn2d9]MTFe2l9=0000O10O1001N0lNfLYHZ3`7nL^HS3Y7YM]Hn2a7WMZHk2d7XMXHk2f7XMXHi2f7^MSHd2k7_1O2A>ASJ^Io5`6TJ_Il5]6ZJcId5X6bJiI\\5W6fJhIY5W6jJiIT5W6mJiIR5V6PKjIo4V6SKiIl4V6UKkIj4U6UKlIk4S6dJcI7;T5S6cJdI89U5T6aJeI97V5_6jJ`IW5`6jJ_IV5b6jJ\\IW5d6kJYIV5g6g000000000000000000000000000000000cNgIcJLQ1P3nN:P5hLTKa0h0\\2WO9m4jLTKj0?T2^O9o4iLTKl01SO3o2GTM0Q3Q5mLTKl01TO1o2JRM1P3o4oLSKm02XOKk22nL2Q3l4QMSKm03\\29@a4XMRKm03[2;_O`4YMRKo02Y24fLMg2k4[MRKo03W2>^O]4\\MUK2Ma04a2?^O[4^M_Kc0H_2a0^OZ4]M^Kg0G]2a0_Om4UNaJ\\2a0@o4RNaJ^2h0XOW4fMkJN?=Hg2f0XOX4fMeK3^Oo2e0XOj5g0VJYOk5f0UJZOk5f0VJYOk5f0UJZOk5f0UJZOl5e0TJ[Ol5e0TJ[Om5d0SJ\\On5c0RJ]Oo5b0QJ^OP6b0oI^OQ6b0oI^OR6a0nI_OS6`0mI@U6>kIBV6=jICV6=jICW6mGZO@O123O17_8QEBo:N1000000O100O2O1N2NQW_7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "f;Y3g;000000000000000001O0000000000000000O100000000000000O100O1O1000000000000000000001O001O1O001O001O1O3M1O1O000O100000000000O10001O0000000O10000O1000000O1000000M3O1O1N2O1L4J6N2O100J6O1OISE]Lm:k3000000000000O1O100O100N2O10000000000000000001O0000000000VOjKjFV4P:01O00000000001O0000001O00001O0000001O00000000001O000000000^ORERMo:_3000000001O00000000001O000000001O00000000001O0000001O000000001O0000000000001O00000000001O0000000000001O0000001O00000000001O0000001O000000001O000000001O000000000000001O000000001O00000000001O0000001O00000000001N11O000001O000000001O00000000001O0000001O000000001O00001O001O0000001O00001O0000001O0000001O0000001O0000001O00001O000000001O0000001O0000001O0000000000001O0000001O00001O00001O001O00001O00001O000000001O00001O0000001O00001O00001O0000001O00001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O0000001O00001O00001O0000001O00001O00001O000000001O000000001O00001O0000000000001O001O0XNlBd1U=[NkBe1U=ZNlBf1W=000O11O000000000000O1000000000000000000O10000000000000000000000O10000000000000000000000O100000000000000O10000000000000000O10000000000000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000O10000000000000000000000000000000000O1000000000000000000UC" + } + ], + "model_output": "A white pickup truck with a visible rear section featuring a tailgate, a license plate mounted in the center, a rear bumper, and a partial view of a taillight on the right side, is driving on the road.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_18.png", + "subject_name": "person", + "object_name": "baseball glove", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "YRa01Z=000Uh?4eW@8I3L5L2N3M3N1\\DTOf:n0XEVOc:m0YEWOe:k0XEWO2Jl9Q1oEWO5Hk9S1mEXO7EEOk9U1VFYO9ECOm9U1TFYO4E^9N`F:3I\\9MeF50NZ9NgF3OOY9OjF0IUOJk0b91mFNE7]9LnFME7[9MjF3HnNNR1_9NhF5J1]9KdF9NL^9d20OO2O010O0001O001O00000O2N2N10100O001O0N3O010O000M3001O001000000O1O1O1O1O2N0G^LiFd3R9=N2N2O1N2O010O100O01000O010O10000^GhKQ8X4nGjKQ8V4nGlKQ8T4jGQLV8P4gGSLX8`400010O01O01O000010O000010O0dMiGEW85oGJQ81UHNk70VH1j7NWH1k7MUH4k7KVH5k7JUH7j7JUH6l7JSH6m7KRH5o7LoG4R8LmG4V8JiG6X8JgG6Z8lM`G22U12n0T9SOjFm0V9TOiFl0X9UOfFk0[9b11O1O2N5J4XOYFfMh9X2YFhMh9V2ZFiMg9`0TFc06mNg9>TFd07mNf9=TFe07mNf9>SFd08nNf9oEd0m:\\OTEa0n:_ORE`0P;_OPE`0R;_OnD`0T;@kD>X;BgD;];EbD:_;FbD8`;H`D5b;M]D1e;N^DMe;3^DFf;:\\DZOn;e0<10O100O100O10lC]O_O0k;d0dDC[;=cDE\\;;cDG^;8[D1c;n0N00O110O002M2N2N100O1O100O2N1O0O1O2N3M3L3N3K4L4L5M3KjSi4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V\\o23Z=N^n25[QM3N2N2N2ZOEXD>e;EXD?e;CWDb0f;AVDb0i;a0O1N110O01O1O2N1O010O1O101M2O1O0O1O2N2N3K6K3L4L4N3L4L^fh4" + } + ], + "model_output": "A baseball catcher in full gear, including a helmet with a face mask, chest protector, shin guards, and a mitt on the left hand, is crouched in a ready position. The catcher is wearing a dark blue jersey with red and white accents, gray pants, and black cleats with red detailing. The catcher's right hand is not visible, and the left hand is holding a baseball glove.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_19.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "j8o01UO03m0J\\16iM0N12JX>b1gA]N=;BM00000:0A1N11O50:OB1N10l=P6XBSJN?af4=00000O100001O00001O1O001O001QLi@S3X?lLh@T3X?lLi@S3X?kLi@U3X?jLh@W3X?hLh@X3X?gLi@Z3W?dLk@e3l>XLVAl3h>gKbA\\4P?102M1O100O1O1O1O1O100O001O010O1O001O10O01O1O00100O1O1O10O01O1O1O00001O010O1O1O1O0000010O00001O0000O1O2SNYAZNl>d1XAVNl>h1WASNm>m1TAnMQ?P2QAkMS?U2n@fMW?X2k@dMX?\\2i@_M[?`2h@ZM\\?f2g@SM^?l2k00001N101O1O0O2N101N2N101N1O2O1O1O1O1O1O1O1O2N1O1N2O2O1N1O1O1O1O1O2N1O01O001N3N2N1N3M2N2O1N1O2O1O1N00O20O103K2O1O1O1O003M1O10N11n^OXMl`0n2O1OK5N2100O1O1O101O000O100000002NO1O1001O7I0O2O0O100O2N101OO1000O2O00O1000O010000O10O1000O10O100000O10000O1000000O10001O00001O0O1000001O000O2O001O001O1O0O2O001O001O1O0O2O00001N2O001O1N3N3L1000001N2O0O3N2N2N3M1N3N1N3M3M2O2M4L5J6K5J6J6Hh\\b0" + }, + { + "size": [ + 640, + 425 + ], + "counts": "Yi0;6HWb09f]OH6V3a?m0O00000000O10O1000O100000000O10000000O1000O1000000O100000O100000O1000000O100O10000000000O10000O10O1000O10000O100000000O10O100000O100000001O000O0100000O1000000O100000O100000O100000000O0100000000000O10000000O10000000O10000001O00000000000O10000000000000000O100000001O0010O0001O0101O1N2O1O2N2M2O2N1O3MROfLWAW3h>jL[AT3d>nL]AX3Z>jLgAU3W>lLnAP3P>RMQBl2n=UMXBd2h=^MYB`2f=aM]B]2a=eMdBV2Y=mMkBo1S=RNRCi1mVC^Oj01OO1WHRIS7n6mHRIS7n6g0O003M00_HVI`6i6`IWI`6i6_IXIa6h6_IXIa6h6TIWI_O020[7i6oHWID00020\\7h6_IXIa6h6^IYIa6h6_IXIa6h6^IYIc6f6nHXIN2T7f6\\I[Ic6f6[I\\Ie6d6]IZIc6f6oHWID051Z7g6mHXID0j7h6RIYIb6g6^IYIb6g6]IZId6e6[IZIh6e6XI[Ih6e6VI[Il6e6RI]Io6b6QI]IP7c6oH^IQ7b6oH]IR7c6nH]IS7b6mH]IT7c6lH]IT7c6lH]IU7b6lH]IT7c6k0000001O0000001O1O0000001OeGbIg7^6YHbIh7]6XHbIi7^6c00001O00000000aGcIo7\\6PHeIP8[6a00000000]GdIV8[6iGfIW8Z6iGfIW8Z6jGeIV8[6>000001O01O00000000000000000000000000000000000000000O2O00000000000\\GcIX8]6hGcIX8]6<0000000000000000000000000000001O000001O0000000000001OO101O1O00001O1Oj3_9fLSGX3m8jLQGV3o8mLnFS3R9nLlFS3T9\\M\\FT22iLb9[1SFc1f0QMW9]4n0O2K7ULnDA=f2X;^MRELXO4>5Pjm1" + }, + { + "size": [ + 493, + 640 + ], + "counts": "V;P11T2W;lMiDT2W;lMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;mMjDS2V;lMkDT2U;kMlDU2T;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2S;jMmDV2R;kMnDU2R;kMnDU2S;jMmDV2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;kMnDU2R;lMmDT2S;mMlDS2T;mMlDS2T;mMlDS2T;mMlDS2T;lMmDT2S;mMlDS2T;mMlDS2T;mMkDT2U;kMlDU2T;kMlDU2T;kMlDU2T;lMjDU2V;Q100000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000001O0000001O0000001O0000000000000000001O000000000000001O0000001O0000001O000000001O0000001O0000001O00001O00001O001O001O001O1gL^De2b;ZM`De2`;ZMbDe2_;YMcDf2^;WMeDh2[;WMfDi2[;UMgDj2Y;UMhDk2Y;SMiDl2X;RMiDn2W;RMjDm2l;O1O001O00001O001O0000001O001O001O001O000000001O1O001O00000000001O00001O1O00001O001O00001O000000001O0000001O001O1O1O00000000001O001O001O001O001O00001O001O001O001O001O00000000001O0000001O00001O00001O0001O01O0O101O001O001O001O001O00001O0000001O001O00001O001O000000001O001O002N00001O00000000001O0000001O001O00001O001O0000001O00001O001O1O1O00000000001O00001O00010O0O101O001O000010O0001O001O001O1O00000000001O0000001O001O00001O00001O00001O001O001O001O001O0000001O0000001O00001O001O0000001O001O001O1O001O001N2O00hhP3" + } + ], + "model_output": "A red and black diesel locomotive numbered 1803, featuring a yellow front bumper, two sets of headlights, and a pair of windshields with wipers, is driving on the railroad. The locomotive has a black chassis with yellow railings and steps, and is coupled to a maroon freight car with visible ribbing and a white roof.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_21.png", + "subject_name": "sandwich", + "object_name": "sandwich", + "predicate_name": "leaning on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lg_32m>2O1N101N2O1M4M2O1N2O0N3N101N1O2N1SOUOdCm0Z:G8J3O3I5M4N1N2M3M4L3N1O2M3M3N2N1O2M3N2M3O1N2N2N101O0O2O0O2O1N101N101O1N101O1N101N2O001N2N101O001N2O1O0O2O001N2O001N2O1N2O1O001N101O001N101N10001O0O101O1O0O101O000O101O00000O101O0O10001N10000O1000000O10000000O010O10000O01000O100000O010000000000O100000O10O100000000O0100000000000O1000000000O10O10000000000O10000O100O1000000O1000000O1000000000000000000O10000O1000000O10010O0O10000000000O11O00000000O11O00O100001O00O10000O100O2N11O1O0000O2N1000000O20O000001O0000010O0O11O010OO10001O00000O1100O0000O1O10001N1O1001O00O2O0O100O1O10001O000O1O10001N1L4N21O1O00O2L3001O01N100003NO000O10010O000000010O000O11O01O00001O0000001O0O100000000010O01N101O001N101O00001O00000001O01O00001O00010O00000O101O000000001O00001O00001O001N10001O0000001O0000001O0000001O00001O00001O01O0001O00001O001O001O001O1O001O100O2N2N2M3N2M3N3L3N3L3N1O1N2O1O3L2N2O2M4M3M2M3N4K5Ib]f2" + } + ], + "model_output": "A sandwich with white bread, containing a layer of green lettuce and a slice of tomato, with a visible spread that appears to be mayonnaise, is attached to a sandwich.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_22.png", + "subject_name": "cat", + "object_name": "sand", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "g^S67T=3N3M101N3]CBS5b0@W30oKf5ImJ=1a0]OY30PL\\62aIHj0j0XO]32nK[6MfJe0nN_32oKZ6LiJd0jNb32oKh7=VHe32nKi7;UHh32mKk79SHj32mKl77SHl31mKm76QHn32lKo72PHS40lKR8NoGV4OlKT8JoGY4MnK^8R4bGnK^8S4_GoKa8^4001O00001O1O001OOQM^GV1b8hNaGW1^8hNdGX1\\8hNeGW1[8jNdGV1\\8lNaGU1^8nN`GR1`8m1VMXKPMh4n2\\KZJNb2f4S3fKhLZ4W3jKfLV4Z3mKcLS4]3RL]Lo3c3ULXLl3h3TLVLn3j3RLTLP4l3PLRLR4n3nKnKV4S4iKjKZ4V4eKjK\\4V4dK^Kh4b4VK\\Kn4e4jJ[J@h0P6m4`J\\J^Oe0U6P5\\JPKh5P5WJmJm5S5SJkJo5V5PJiJQ6W5oIhJR6X5nIcJW6]5hI]JEKl00h4h5gJ[Jg0Oa4f5fJ\\Jj0N`4f5fJZJf6e5[I\\Jd6b5_I]Ja6c5_I\\Jb6d5c000000000000QJeJkNI\\4b5dK\\J`0<\\OI_4_5eK]JO`KTIb2l6\\MXIb2h6]MYIc2h6[MZId2f6[M[Ie2e6[M[Ie2e6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2d6ZM\\If2e6YM[Ig2e6ZMZIf2f6[MXIf2h6o10000000mIRIH3j5k6a000000gITIR6l6mIUI5Me5n6VJUIS6k6lIVIT6Q700RJkHe5U7UJmH00j5S7]JlHb5Q7bJnH^5R7bJnH^5R7aJoH_5P7aJQI_5o6bJPI^5P7bJPI^5Q7bJnH^5S7aJmH_5S7aJmH_5S7aJmH_5S7=0000O10000000000001O0RKmHc3S7\\LQIa3o6`KPIa01o3o6`KPIa01o3o6_KQIb00o3P7^KQIb0OP4P7^KQIa01P4o6^KRI`0OR4o6_KQIa0MQ4R7_KnH`5R7=000000000000001O001O00000000000000000YJiHY5X7=0000O100000000000000001O00O11O0000000000000000001O00000000001YKjHV3V7iLoHS3Q7lLQIS3o6mLRIR3n6nLTIP3l6oL[Ik2f6TMhI^2X6bMiI]2W6dMlIX2T6hM[Ji1f5WNdJ^1\\5bNeJ]1[5dNeJ[1[5gNdJX1\\5iNdJV1\\5kNcJU1]5kNdJT1\\5mNcJS1]5mNcJS1]5mNcJWOSNT1[7DaJUOYNV1W7D`JUO\\NU1T7G_JSO_NU1R7I]JROcNT1P7J[JSOfNS1P7H[JSOgNU1n6H[JROiNU1m6HZJSOiNU1m6HZJROjNV1m6F]JoNhN[1k6F\\K:e4D\\KcJC]5>bJB^5a0_J_Ob5a0\\J@d5a0[J_Oe5c0YJ]Og5g0UJYOk5k0oIWOR6S1mH]MMa1V7]1\\H]M8W1[7T2_HmMa7o3001O001O000000001O00O100000000O100000aJ`HR5_7nJcHQ5]7oJdHP5]7oJdHP5\\7oJeHQ5[7PKdHP5\\7PKdHP5]7oJcHQ5]7oJcHQ5]7PKbHP5^7SK^Hn4b7>1O3M1O001O0000000000001aLTH\\1m7aNVH^1k7VMSHg02S2l7TMSHh03S2j7UMSHh03S2j7UMTHh02R2k7TMTH:0C2n2j7UMTH;OB4m2i7VMTH;0A3n2i7VMTH;0A3n2j7UMSH;1B3m2j7UMRHm0Qc04N000O100000000000000000000000000000000000000000000000000000000000000000000000O100000O10000000000000000000000000000000000000000000000000000000000000000000000000000O10000001N4M7Ib0\\OjSQ10WlnN3X\\OLSRS6" + } + ], + "model_output": "An individual with a contemplative expression, wearing a short-sleeved shirt and pants, is sitting on a bench with one leg crossed over the other. The person's arms are crossed over the crossed leg, and they appear to be wearing shoes.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_24.png", + "subject_name": "airplane", + "object_name": "sea", + "predicate_name": "flying over", + "mask_rles": [ + { + "size": [ + 488, + 640 + ], + "counts": "kjg73T?2O00000O1K5VAH]>NfA:MI0OT>1mA8OHO1T>OnA?NBT>OnA?NBT>OnAf0R>8O7E=D6M100000O1O1L4I7J>Cc0]O8L10000O10000000000000000000O100N5K7G6K4N10O1M4Ha0E2N2O001N_nn0" + }, + { + "size": [ + 488, + 640 + ], + "counts": "Y7k0Q1R4^7nKbHR4_7mKbHR4^7nKbHS4]7mKcHS4]7mKcHS4\\7mKfHR4Z7jKeG[OQ1k4Z7fKnH[4Q7cKQI]4o6bKSI]4m6bKTI^4l6bKTI_4k6aKUI_4k6`KVIa4i6_KWIa4i6^KXIb4h6^KXIc4g6\\KZId4f6\\KYIf4f6ZKZIf4f6ZKYIg4g6ZKWIh4h6YKhHGWOU5P8UKfHY5Y7iJcHY5]7hJaHZ5^7gJaHZ5_7fJ`H[5^7eJbH\\5^7dJbH\\5^7dJbH\\5^7dJ_H`5`7`J^Hc5a7^J^Hb5b7bJYH_5g7eJUH[5k7hJPH[5o7o0000001O001O0000000O110O00001O00gIYHd2ObNh7fN`Hc2KfNe7dNgH_OA0OL7h1N9d7bNlH[OC0=f1C0001OO10000O100O2N1O1O1PLc0kEUO^19b8k0dE[Ob1Kg8V2mFkMW9>^ESONj1i0hNW;^1XDhNi;[22N2N002N1O1O1O001O1O001O001O001O00001O001O1OmNaM]E_2Z:mMdE7[Ol0j:UOkEKAm0\\:ASF]OFQ1n9L_FgNL\\1Q9b0PH^Oo7d0oG]OP8d0oG]Oa5gNXJ2iNl1]1[Ob5iNUJ5eNl1a1WOe5kNRJb28cNf5oNnI_2:cNh5POlI_2:aNj5TOjI[2:aNl5VOkIW28cNm5XOkI\\OZOS2k0ZOP6XOeJZ1ZO^OQ6ZOfJV1XO@R6]OeJR1WOBS6^OeJP1VODU6^OcJo0VODX6]OaJQ1ROE]6[O`JR1nNFb6]O[JX1dN_MIl1X7@WJa3i5jLkIW3U6kLhIV3X6mLeIS3Z6SM_Io2a6VMWIm2i6YMlHl2U7c2000000000000000000O100001O0000000000000000000000000000000000O10000001O00O1000000000000000000000000001O0000000000000000000000000O10000O1SOSI^In6[6S1001O1O00001O00001OmIeGY5[8cJkG\\5T8cJnG\\5R8dJnG\\5R8cJoG^5P8cJoG]5Q8cJnG^5R8dJlG\\5T8hJfG[5Y8j0000bJiGeN0n4W8TL\\Hj3d7PLbHQ4]7nKeHQ4[7mKgHT4X7kKiHU4W7jKjHW4V7gKkHZ4T7fKlHZ4T7`KRIa4m6[KXIe4g6[KYIe4g6ZKZIh4d6XK\\Ii4c6WK]Ij4a6WK_Ii4a6WK_Ii4a6WK_Ij4`6VK`Ik4_6TKbIm4]6SKcIm4^6PKcIQ5]6nJdIS5[6mJdIT5\\6lJaIW5_6iJRIf5n6ZJQIh5n6ZJPIf5P7\\JmHe5R7^JlHb5T7_JjHb5V7_JiHa5W7^JiHc5X7\\JgHe5Y7[JeHg5Z7ZJfHf5Z7ZJeHg5[7ZJdHg5[7ZJdHf5\\7aJ\\H`5d7bJZH^5f7cJYH]5g7R101O00aIZH[5g7dJ`HV5`7iJbH=Kh0M9e7bNdH9Oi0J;c7aNPIUOB6=e1A>`7_N_Jo0UNa0\\7]NcJP1RNc0[7XNjJR1lMf0Z7VNnJP1kMi0X7PNWK7]Ni1b;000O1O10000O1O2O0O100N2_ObMUDa2b;g02YMQDQ2Z<_MRDZ2a00001O0O1000000O1N2O100N2]LDXH>[7nMdAT2\\>PN^AR2a>PNVAX2i>nMY@k2b=oLUCe4j<_KfBALT5\\=W1O1O1M3O100O1N2O1O100O1M3O100O1O1O100O100O10000000000000000000000000000001O00aNUIiEk6U:XI`EJXOo6W;a1N2I7N2O1O100O1O1O100O1O100O1O100O10000O1O1O1O100O1O100TOfFQG[9l8iFSGW9j8oFdFD9]9Q9XGmFi8R9S1O100O100oM^EdJb:[5`EcJa:\\5aEbJ`:^5aE_Ja:`5aE\\Jb:c5_E\\Jb:c5bEWJa:i5aESJa:m5`ESIAa0R;[6_ESI_OI2129P;j6\\EXIAE5ON0Q;T7ZE_IE^OX;R7SElIj:T6VEmIi:S6WEmIi:R6XEmIi:S6WEmIi:R6XEmIi:S6XEkIi:T6XElIh:T6YEjIh:V6YEgIi:X6ZEbIj:^6WE`Ij:`6YE]Ig:c6ZE_Ic:a6fEXIX:h6fEXI\\:i6`EZI`:W6aDiIU1J[:\\6eDcIk<\\6=000000000000O10hBgIiYL^A?GoN2k2j>eMjA1IT2`>eMlA3HV2_>bMnA4GW2h`0N3M1O2J6XOZ^OQOia0h0b^ORO_a0j0X1D;E8Hm]T2N^njM=Ji00i7ZN]Ag1c>]NWAe1i>bNo@_1Q?h1O10000000000O10000O10000O100O1O100O100O1O100O100O1O100O1O1O1O1O1O1O1O1N2M3lKeJ`I^5_6dJ^I^5`6eJZI`5e6aJoHk5P7VJfHT6Z7mIYGM`NKh0`6^9iIXGc0gNg5P:gIVGb7i8_HVGb7i8^HVGd7i8]HUGe7k8[HTGf7k8[HUGe7j8\\HVGd7i8]HWGc7i8^HVGb7i8_HVGb7j8^HVGb7i8`HUGa7k8bHRG^7o8cHnF^7R9gHiFY7X9jHbFX7_9lHZFV7f9mHSFW7n9]1001O1O1O1O2TGlEh7V:QHSFk7U;L2N2N2N2`LXDdNi;X1^DdNc;X1bDfN_;W1dDhN];V1dDjN];S1fDlN[;P1hDPOY;m0iDSOX;k0jDTOW;h0lDXOU;c0PE\\OY;ZMeC2?[1j0Y1oYLmAg3Q>\\LnAd3o=_LQBa3n=bLoA_3o=dLPB\\3n=gLQBY3n=jLoAW3n=mLPBT3n=oLPBR3o=PMmAS3l=d1J6J6E;D3M101N2O2M3N1O1N0100O010O01O0001000O010O001M201O100O1O1O1O1N2N3N1N2O101O000O2O0O101O0O2O1O1O1N2N2O1N3N1O2M3N2N1N4M2LLXCWNeoDA]12d94_]A4YAIc>;N2O1O10000O01O0102NO0020aA@[>c000O00100O10O1O001O001O1O001O001O001N2O1M3O1O10OO3M2O00Y`g5" + } + ], + "model_output": "A small bird with a stout build, featuring a mix of brown and white plumage, stands in front of a wall. The bird has a distinctive brown streak above the eye, a short, conical beak, and a white underbelly. Its wings display a pattern of brown and black markings, and the tail is long and narrow with brown and white bands. The legs are thin and pinkish, ending in sharp, clawed toes.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_28.png", + "subject_name": "bird", + "object_name": "floor", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "\\WR3=k<5L4K5M3L3N3M3L8H>C3M2N2M3N3MAVEdNd:\\1^EdN`:\\1bEdN]:\\1dEdNZ:]1gEbNY:]1jEaNV:_1kE`NT:a1nE]NR:c1oE\\Nj9JgEk1`0ZNg9NhEi1a0XNf90iEh1b0WNBLj96RFg1c0VN@Oi96SFe1e0VN^OOi98SFc1g0UN]O1f99VFa1g0UN]O1e99XFa1g0SN]O3`9=\\F^1l0VNd8`0`FZ1k0WN]8f0iFS1j0WN[8h0kFQ1j0WNZ8h0mFQ1h0XNZ8k2fGUMZ8k2eGVM[8j2eGVMZ8k2eGVM[8j2dGWM\\8i2cGWM^8i2aGWM`8h2_GZMa8f2\\G]Md8c2TGeMl8[2oFkMP9j2TG^Ll8]3YGbLg8[3]GdLd8Z3^GeLd8X3j0dMPFW1U:]NTFa1Q:YNTFb1FYNi:c1[EaN`:^1cEaN]:]1fEbNZ:\\1hEcNX:\\1kEbNU:[1oEdNR:Z1PFeNP:Z1QFfNP:X1RFgNo9V1SFjNP:o0UFPOm9k0VFUOk9i0VFWOl9e0VF[Om9a0TF_Oo9=RFCR:7QFHi;000QZY4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\7m5^7000000000000000000000000000000000000000000000000000001O00O10000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O1001O00000000000000000000000000000000000000000000000000001O1O4L3M2N2N1O2N1O2N2N1O3M1O2NcJYOYOV1g0jNQO^1R5b0\\Od0A?@`0YOg0TOl0L4O1M3N2N2N2N2N2N2M3N2M3O1N2O1N2O1N2N2N2N2N2O1N2O1O1O21M3N3M2N2N3M3M2N1O3M102MGRJjJl5f5K3M3M2N3M2N3M2N3M3M1O2N2N4L5L7H9G9F9H7I7IgLoLUNh2k1XM[Nc2e1\\MdN[2\\1eMlNS2T1mMUOi1k0XN\\Ob1c0^NDZ1=fNJS16mN3j0MVOc03DMS61O1^DN];2cDN\\;3dDM];2cDM^;2cDN\\;4bDN];1eDM];0_V20giM1O0O11O0O10O11000SlV2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "nnW35a;2N2ZEJd9=XFFf9=UFFj9=nEJQ:7lEKT:l000000000001O001O001O000O1001O0O100O10000O10000O10000000O1000001N10001O0O10O1O100000O0100000000O100000010O0100O010O01O00000000000O100000000000000000000dNeN\\H[1c7gN\\HY1a7kN^HU1b7kN^HU1a7lN_HT1a7lN^HU1a7lN_HT1`7mN`HS1_7nNaHR1]7PO[HDVO]1]8ROZHEWOY1^8UOWHV1i7lNSHV1l7lNoGX1Q8iNlGY1T8iNhGY1X8iNeGX1[8R10000oMoL]KP3b4SM\\Km2c4VM[Kj2e4XMYKh2f4ZMYKf2g4[MWKf2i4]MnI^Oj0U3X5]MkICk0P3Z5_MhICl0o2]5jMbJW2^5iM`JY2`5gM]J\\2d5cMYJ`2g5`MXJa2i5_MUJb2k5^MTJc2n5[MRJe2Q6XMnIi2U6TMjIm2W6SMhIm2Z6QMfIo2]6nLcIR3^6nLbIQ3_6nLaIR3`6nL_IW1DNn6kN^IQ3c6oL^Io2d6PM[IP3f6PMZIo2g6QMYIn2i6QMWIn2k6QMUIn2l6QMTIo2m6QMRIo2n6QMSIn2m6TMPIm2Q7TMeH@1_3Z7i01O00001O0000O10000O1000000O1XOhKQJX4o5hKPJY4o5iKoIX4Q6hKmIZ4S6gKiI\\4X6eKaI`4`6;1O00000000001O2N1O1O000000O1N2O100O1000000O100000000001O2N6J3M2N4L1O1OO100O1O100O1O10000001O1O001O1O1O1O1O2N1OdH" + } + ], + "model_output": "A tree with numerous slender branches covered in clusters of small, vibrant green leaves stands beside a stop sign. The branches extend outward in various directions, creating a dense canopy. The leaves are oval-shaped with smooth edges, and the branches are thin and slightly curved.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_30.png", + "subject_name": "grass", + "object_name": "stop sign", + "predicate_name": "on back of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "\\:c4^:O000000001O1O001O1O001O1O1O2N1O001O00001O000000O10000O100O1O100O1O1O1O100O100O10000001O001O001O00O100000000001O1O1OO1O1000000O1O1000000000000001O001O1O1O001O0000001O0000001O1O0000001O1O001O1O1O1O1O001O1O1O1O001O001O1O1O1O1O1O1O1O1O1O001O1O002N001O1O001O1O1O001O1N2O1O001O1O002N010O1O1O1O001O1O1O1O001O1O1O1O1O001O1O1O1O001O002N1O001O001O1O1O1O001O001O1O001O1O1O001O1O1O1O1O001O1O1O1O1O1O001O1O1O1O1O001O1O1O1O1O001O1O1O1O001O1O1O001O1O001O1O001O1O001O001O00001O00001O00000000000000000000000000000000O1000000000000000000000000O1000000000000000000000000000O2DoAEogX5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "abe1h1V=Z1kBQMh;c3@l0TOf0ZO?A4L2N2N2N2N2N2M201N2N2N2N2N1O2N2N2N2N2N2N1O2N2N2N2N1O2N2N2N2O1N2M2O2N3M1O2N[MPI]Mn6b2VI\\Mj6d2VI]Mh6d2WI_Mf6b2XI`Mg6a2YI`Me6a2[I`Mc6a2\\IaMa6a5OnL`IeM^6[2dIdM[6]2eIQKNS2[6m2gIcMX6^2hIcMV6^2jIcMT6^2lIcMR6]2oIdMo5^2PJcMn5i5N2N2N2N]MXJPLg5P4ZJQLd5P4\\JPLc5R4\\JPLa5i6N2N2N1OYMgJjKW5W4iJjKU5W4kJjKS5W4mJjKQ5S7OUMPKiKn4X4SKgKl4Z4SKhKk4Y4UKiKh4Y7NPMZKhKe4j3[KdI0c2c4Y4]KgKb4Z4^KhK`4]7NlLbKhK]4Y4cKhK[4Y4dKhK[4`7N2N2N2N2N2N1O2N2NfIWL`1g3bNYL^1e3cN\\L]1b3Q5O2O1MbIbL_1]3aNcL_MOa1\\3Q1eL^MOa1[3c3eL^LX3g7N3N1N1OcJoLmNP3c6NkJUMWNj2j1XMUIN6M]4l2Y2TMYI0j4i2V7N3N2N3N1N1O2N1O2N2O0O1O2N1000O1000000001OO1000001OO10000001O0O100000000000000000001N1000000000000000000000001O000O1000000000000000000O1000001O00000000000000001N100000000000000000000O2O0000000000000000000O1000001O00000000000O10000000001O00000O10000000000O100000001O00000000000O11O00O10001O000000000000000O2O000000000000000O101N1O1O1O1O1O1O1O100N2O1O2N1O1N2O100O2N1O2N2N2N2N1N3N2N3N3L2N1O3M2N2N2M3N4L2O1N2M3N3M2N2N2N2M4N0O2N3M3M2N2N2M3N2N2O1N2N3M3L3N3M1O3M2N2N2O3L2M3N2N3M1O2N3M3M2O1N2M4L3N3M1O3M2O1N2N2N3M2M4L3N2O1N2N2O1M4N2L3M3N3M1O2N2K5H:]Oa0H9^Oa0Df2`1000001O1MgN" + } + ], + "model_output": "A lush bouquet of deep red roses with velvety petals, tightly clustered to form a dome shape, with hints of green foliage peeking through the blooms, sits beside a TV.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_32.png", + "subject_name": "truck", + "object_name": "road", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Q7V1U<1O1O1O00001O000O0100O1000001O000O101N10001O1O1O1O00000001O1O2N1O1O000O2OOO2O1000000000O100O10000O10000001O1O1O000000000000000001M2O1O1000000O1O1O100000000000O10O100L4O1O2N2WObC4a[V7" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\8m08E\\;;dDE\\;;dDF[;:eDF[;:eDF\\;9dDG\\;9dDH[;8eDH[;8eDH[;8eDH[;8eDHZ;9fDHY;8gDHX;9hDHW;8iDHW;8iDHW;8iDIV;8iDHV;9jDGV;9jDGV;9kDGT;9lDGT;9lDHS;8mDHS;8mDHS;9lDGU;8kDIT;7lDIU;6kDKU;4lDKU;4kDLU;4kDLU;4kDLU;4kDLV;4iDLW;4iDMV;3jDMV;3jDMV;3jDMV;3jDMV;3kDMT;3lDMT;3lDMT;4kDLU;4kDMT;3mDLR;5nDKR;5nDLQ;4oDLQ;4oDLQ;4oDLQ;5oDKP;5PEKP;5PEKQ;4oDMP;3PEMP;3PENo:2QENo:2QENo:2QEOn:1REOn:1REOm:2SENm:3RENm:2SENm:2TEMl:3TENj:3VEMi:4WELi:4WEMh:3XEMh:3XEMg:4YELg:4YELh:4WEMi:2XEMm:NSE3m:LSE4m:LTE3m:LSE4m:LRE5o:JQE6o:JQE7n:JRE5n:KRE5n:KRE6m:JSE6m:JSE6m:JSE7l:IUE7k:HUE8k:HVE7j:IVE7j:IVE7j:IVE8i:HWE8i:HWE8i:IVE7j:IVE8i:HWE8i:HWE8j:GVE9j:GVE:i:FWE:i:GWE8i:HWE9h:GXE9h:GXE9h:HXE8g:HYE8h:GXE9h:GXE9h:GYE9g:FYE@O6k:8VE_O37h:9VE[O7;c::lEES:=mECR:=oEBQ:>oEBR:=oEBR:>nEBR:=nECR:>mEBS:>mECR:>]EXON9e:`0ZEZO07e:a0WE[O34f:c0SE[O72g:R1YEnNg:S1XEmNh:S1XEmNh:S1XEnNg:R1YEnNg:R1YEnNg:R1YEnNf:T1YEmNf:S1ZEmNf:S1ZEnNf:P1[EPOe:P1\\EoNd:Q1\\EoNe:P1\\EoNd:1oDa0=_Oc:P1]EPOc:P1]EQOb:n0`EQO`:S1\\EmNd:T1ZEmNf:S1[EmNd:S1\\EmNd:T1[ElNe:T1[ElNe:T1[EmNc:T1^EkNb:U1^EkNb:U1^EkNb:V1]EkNb:U1_EkN_:V1aEkN^:U1bEkN]:V1cEjN]:V1cEkN\\:U1dElN[:T1eElN[:U1dEkN\\:U1dEkN\\:U1dElN[:K\\Ei09\\OY:V1gEjNU:Z1kEfNU:e0bE[O90U:Z1kEfNU:`0bEF8JU:\\1kEdNU:\\1kEdNU:\\1kEdNU:\\1lEcNT:]1lEdNS:\\1mEdNS:\\1mEeNR:[1nEeNS:Z1mEfNS:Z1mEgNR:Y1nEgNR:Y1nEgNQ:Z1oEgNP:Y1PFgNP:Y1QFgNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFfNm9Z1SFhNk9X1UFhNk9X1UFhNk9X1UFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFiNj9W1VFjNi9V1WFjNi9V1WFkNh9U1XFkNg9V1YFiNh92QF47Kg9V1YFjNh94oEO9Ng93PFO9Ng92QF08Ng9OTFXOKf0:3g99oED:3g9:UF\\O4;f9S1ZFmNf9S1ZFmNf9S1ZFmNf9S1ZFnNe9R1[FnNk9l0UFTOn9i0RFWOQ:f0nE[OT:c0lE]OV:a0jE_O]::cEG_:6aEJ`:5`EK`:5`EKa:4_EM_:4aEL_:4aEL_:4aEM^:3bEN]:2cEN]:2cEN^:nNYEP193\\:mN]Eo065o9POnE0Kk094m9UOlEMOh087h9XOQFIOh078d9]OTFe08Nc9h0]FXOc9h0]FYOb9g0^FYOb9g0^FYOb9g0^FYOb9g0^FYOb9f0_FZOa9f0_F[O`9e0`F[O`9e0`F[O`9e0`F[Oa9d0_F]O`9c0`F]O`9c0`F]O`9c0`F]O`9c0_F_Oa9`0_F@a9`0_F@a9`0_F@a9@XF27>a9@XF]O285k0b9JZFlNL;8P1a9J]FVO2P1b9JWFZO7l0b9<_FDa9]OXF27a0a9^OVF29a0`9;`FEa9:_FFb99^FHc96\\FKf93ZFNf91ZFOh9OXF1i9NWF2j9MVF3j9MVF3j9MVF3j9MVF4i9LWF4i9KXF5h9KXF6g9JXF8g9HYF8g9HZF6g9JYF6g9YOQFC7U1h9gNRF3O17V1g9fNSFd06g0f9eNTFd06h0e9dNTFe07g0e9F[F:e9VOXF]O3]1e9UO[F\\O0`1d9TO\\F\\O0`1d9TO\\F[O1a1c9VOUF^O8\\1c9D]Fc9A^F?b9A^F?b9YNVFR18f0c9VNXFR15h0j9_NmE=9T1m9TOSFl0m9TOSFl0m9TOSFm0k9TOUFm0j9SOVFn0j9TNnEh09T1n9SNjEd08X1S:]NiEJ4i1S:]NjEI3j1S:]NeEG127j1S:]NeEH018k1f9hNRF\\OO29k1e9hNUF^O5l1e9fNWF\\O5o1c9eNXF\\O5o1d9cNYF]O3P2e9bNXF^O3P2n9YNoEG3P2j9]NRFD3Q2f9nMQF`02C7o1f9iNZFX1d9iN\\FW1d9`NSFC9m1e9_NRFD9m1i9[NnEH9n1g9[NPFG9n1e9]NRFD:o1c9^NTF^O1O8U2d9]NSF^O207U2e9\\NbFe1_9^NRF\\O7V2g9bNYF^1h9bNWF^1i9bNWF_1i9`NWFa1k9\\NUFd1l9[NTFd1m9[NTFe1h9jMPFa08e1g9`NYF`1[9iMgFf0Na1[9iMjFc0Kd1[9jMiFb0Lg1X9gMjFd0Mf1Y9fMhFf0Od1Y9fMiFe0Ne1Y9fMoF>Il1X9fMPG=Hl1Y9gMnF>Ik1Y9gMgFe00d1Y9gMgFe0Of1Y9eMhFe0Og1X9dMiFe0Oh1W9cMjFe0Oh1W9cMjFe0Oi1V9bMkFe0Oi1V9bMkFd00j1U9bMkFd0Om1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9`MlFc00m1T9_MmFd0Om1T9_MmFd0No1`9QN`Fo1`9QN`Fo1`9PNaFP2_9PNaFo1a9PN_FQ2`9oM_FR2a9nM_FR2V9[MlFc0OQ2T9^MlFa00P2V9^MjFb0OQ2W9]MjFa00R2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM_FR2a9nM^FS2b9lM_FT2a9lM_FT2a9lM_FT2a9lM^FV2a9jM_FV2a9iM`FW2a9hM_FX2a9hM_FX2a9hM_FX2a9hM_FW2b9iM^FW2b9iM^FX2a9hM_FX2a9gM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9fM_FZ2a9eM`F[2`9eM_F\\2a9dM_F\\2a9dM_F\\2a9dM_F\\2a9cM`F]2`9cM`F]2a9bM_F^2a9bM_F^2a9bM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9aM^F_2b9`M_F`2a9`M^Fa2b9_M^Fa2b9_M^Fa2b9_M^Fa2b9_M]Fb2c9^M]Fc2c9\\M]Fc2d9\\M]Fd2c9\\M]Fd2c9\\M\\Fe2d9[M\\Fe2d9[M\\Fe2d9[M\\Fe2e9YM\\Fg2c9ZM]Ff2c9ZM]Ff2c9ZM]Ff2d9YM[Fh2e9XM[Fh2e9XM[Fh2e9XM[Fh2e9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM\\Fi2d9WM[Fk2d9TM]Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2d9UM\\Fk2e9SM\\Fm2d9SM\\Fm2d9SM[Fn2e9RM[Fo2d9PM\\FP3e9QMZFo2f9PM[FP3e9oL\\FQ3n91000000O101OO010000O100O100O1FiL_FX3`9jL^FW3b9900O100000001N100O1O0100000O10000O2M200O0100000000000O101OO1000001O1O1O000000O1O1N200O100000001N11O3L5L1O01O00O11N1O1K5O10O11O00000O1000000000000001O0000000000O100000000O10000O1000000000000O100000000O100001OO1000000O11O1O00000TOjFcMW9Y2SG_MQ9]2Q1N2N3M2M5L4L00000000O100C=O1000O100000000001O0000000000000000O11O000dF" + } + ], + "model_output": "A red fire engine, featuring reflective striping, a side compartment with handles, a visible wheel with a silver rim, and a section of a yellow and black striped pattern on the rear, is parked on the road.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_33.png", + "subject_name": "paper", + "object_name": "pavement", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 500, + 333 + ], + "counts": "R`?3`?2N3N1O2M2O1N103M1N3N1O1N2O1O0O2O0O0100O2VOZAd0k>00O2OO010000O01000O100O100O2O00000O2N1N2N3HS?0TA0^Ul3" + }, + { + "size": [ + 500, + 333 + ], + "counts": "Q>e0U=\\OmBd2OaM100LS<_3M100O100O100O100O1O100O1O10000O11O00OBYDQMf;P3]DmLd;Q3^DnLb;R3_DmLa;S3`DlL_;T3cDkL[;W3gDgLY;3ZDn2>oLY;V3iDiLX;V3e0L3M4N1N2N2N3M3L3N2N3L3M2L5J6K4O1O1O1000O010O10O10O010O0100O001O001O001O10O10O10N101O001O1G^N`Bb1^=dN^B\\1b=;2M2O1O1O2N100O2N1000010N10000000O1O2O0O00001O1O1O2N6J2ZCoMe;Q2WDVNf;k1WDZNf;f1YD^Nd;e1VD^Nj;f1mCeMMg0W?VA@a>o0F9F:H8I7F:H9F9H8CDH8EE;K6B=K5N3N100010O01PLkI_NV6c0gJ]OZ58PKHQ57oJIS55lJLU53kJMU53kJMV53hJNX52gJOY52fJN[51eJO[51dJ0]50bJ1]5OcJ1^5NbJ2_5N_J3a5M_J3b5L^J4b5L]J5c5K]J5d5K[J5e5KZJ6g5JXJ6h5JWJ7j5HVJ8j5IUJ7l5HTJ8l5oMPHn0T2S1l5oMQHm0R2U1n5mMRHm0o1W1n5lMTHl0m1Y1P6jMTHm0k1Y1Q6kMTHl0i1Z1T6iMSHm0i1Z1T6jMSHk0i1[1U6iMSHk0g1]1V6hMSHk0g1]1V6hMSHl0f1\\1X6hMRHk0e1^1Y6gMRHk0e1^1Z6gMPHk0f1_1Y6fMRHj0d1a1Z6eMRHk0c1`1\\6eMPHk0d1`1\\6eMQHj0b1b1^6cMPHk0b1b1^6dMoGk0a1c1_6bMQHk0_1c1a6aMPHl0_1c1a6bMoGk0`1c1b6aMoGk0^1e1c6`MoGl0]1d1d6`MoGl0]1e1d6^MPHl0[1g1e6^MoGk0\\1g1f6^MnGk0Z1h1h6]MnGk0Z1h1h6]MoGj0Y1i1i6]MmGj0Y1j1R4nLmL>hMk0X1i1e3]MZMOiMk0X1i1X3jMhMBhMj0W1l1k2UNVNUOhMk0V1k1\\2eNfNeNhMk0U1l1m1TOWOTNhMk0T1m1_1BEfMhMk0T1m1S1N1[MgMk0S1m1d0>c0iLgMk0R1n1NT1Y1SLgMk0R1n1F\\1a1lKfMj0R1P2ZOf1o1_KfMj0Q1k7Y1\\GeMj0Q1j7[1[GdMj0R1k7Z1[GdMk0P1k7\\1ZGdMk0P1k7\\1[GdMi0P1l7]1ZGcMk0o0k7^1ZGcMk0n0l7_1ZGcMi0n0m7`1ZGaMi0o0m7`1ZGaMj0m0m7b1ZG`Mi0n0m7b1[G_Mh0o0m7c1]GZMg0S1l7c1oH\\NR7d1nH\\NR7e1nHYNS7g1mHZNR7f1nHZNR7g1mHYNS7g1mHYNS7g1mHYNS7g1mHXNT7h1mHWNS7j1lHVNT7j1lHVNT7j1lHVNT7j1mHUNS7l1lHTNT7l1lHTNT7l1lHTNT7m1kHSNT7n1lHRNd6^2\\IbM]6e2cI[M\\6g2RIYLiNQ1V8e2iHfLkNg0\\8c2dHXN\\7h1aH[N_7e1^H[Nd7f1[HZNg7e1XH]Ng7c1XH_Ng7`1YHbNe7_1ZHdNd7\\1[HeNe7\\1YHfNf7Z1YHhNf7X1ZHhNe7Y1ZHhNf7Y1YHgNf7Z1WHiN`7`1`HaNV7i1iHWNj6V2UIkMa6`2^I`MZ6h2eIYMP6R3PJnLe5^3YJcLP5U4nJlKd4b4\\KXKE`Mf4c7cKlJ[4[8\\KXER4Z;F;I6N3N1O101O000000001O00001O0000001O0000001O0000000O2O0000001O0000001O0000001O0000001O00000000001O1O0O1000001O0000001O0000001O000000001O000O11O01O000O2O000000001O1O0000000000001O000000001O0O11O01O000101N4Lb0^Oc0^OV1\\DoJW:U6RO`0@W1jN;D000001O00001O000000001O000000001O0000001fHnHP6R7mI[Ii5f6RJnI\\OfNn5\\7eJ]JQ5c5nJhJh4^=" + } + ], + "model_output": "A glossy chestnut horse with a white blaze on its forehead and white socks on its front legs stands on a chair. The horse's mane is a lighter shade, possibly flaxen, and it has a muscular build with a prominent withers and a well-defined neck. Its ears are perked up, and it has a calm, attentive expression.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_35.png", + "subject_name": "person", + "object_name": "giraffe", + "predicate_name": "feeding", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "mgV58^;4L1O100O100O01000O100O1000000O1000001O1O001O1O2N1O1O2O1N2O00N1N3M3M3M4L3MaT1GhI" + }, + { + "size": [ + 375, + 500 + ], + "counts": "bil0=U;:H6J5K3N2O0010O001O100O001O010O00100O001O010O10O0100O010O10O10O100O10O10O10O010O10O10O1000000O10O01000000O10O10O10000O1000O10O101OO10O@aEJ_:5fEF[::hE@FOb:a0a00O10000O10000000001N100O10001N10000O10001O00001N100O2O1O00001O0010O010O0101N003N2N1N2O1N101N1O101O0O2O0000000001OO2O0O101N8H1O1O1N2L3O1O2O000000O1RNjF>2IV9HlF5;Jl8NnF1W:MkE0mRT3" + } + ], + "model_output": "A person with short, light brown hair, wearing a white shirt with a collar, and a dark-colored jacket, is feeding a giraffe.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_36.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Wme21kc06N000[O6Q]OMkb0d01O14T]OXOYb0[1L3N2N3L5K1O1DSNd^Om1[a0=O0100000001O1kM_^Oh1da0PNb^OP2ha0N1G9F:K6O0O2O1O2N3Mf0SO[\\O1gVP6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "hU18d?LVDZ2i9nMPFR2o9lMTFT2l9jMVFV2i9jMXFV2h9jMXFV2h9iMYFW2g9dM^F\\2c=0TJXN`Ih1[6bNaDQOl4]2^6QO[Io0d6UOYIk0f6XOXIh0h6XOXIh0h6WOYIi0g6WOYIi0Y7_NmHa1Z;QNm@o1o>YNm@g1P?aNk@_1S?eNk@[1n>oNo@Q1n>UOo@k0P?XOn@h0P?[On@f0o>_Oo@a0n>El@>Q?Z2M3N2N2O1M3N2N2N2O1O1O1O100001O1O1O1O1O1O1O1O1O1O1O1O1O001O1O1O00001O001O0000POXKhBh4U=^KYBD7n4_=iK_BW4`=kK_BU4_=nK`BR4_=PL_BQ4`=RL]Bo3c=SLYBo3f=V100M3O1N200O1001gIbBJ5k5S>I2N1O1O00002N;E:F7iLl@T1Y?cNm@[1[?YNi@g1b?lMa@S2a?hMc@W2_?eMc@[2^?`Mg@_2]`00001O00001O00000000000000001O000ZO`^OlN`a0S1a^OmN^:EWL_1ZIlN]:LTLn1l3RNULm1j3WNSLi1m3XNRLh1m3]NoKc1P4`NfETOU6\\2T4eNkK[1U4fNjKZ1k3iMSFj0OROa23k0X2a6lNSGhNQO\\1Y1`7lNTG]O1e0S1R1h7lNTG]O2j0l0m0n7iNXG_O0a136e8iNYG@4c1G4l8hN[G_O4d1E5l8_NVFVOZ1a00e1C5n8YNjGLFf1A5o8UNPHMBi1^O5Q9oMUH2]Oj1\\O5S9nMUH1^Ol1YO5T9mMVH1^Om1XO5T9mMVH1_Om1VO5V9lMVH1^OQ2SO2[9iMUH4]OR2RO1j;mMTER2RO1k;lMSES2RO1k;lMREU2ROOm;kMPEW2SONm;jMoD[2SOKo;iMmD^2SOIP`0\\2n0]:eNiBN=>^2o0X:lNgBM`06d2P1o9DYCZOj2R1k9GZCUOn2S1h9HZCTOP3S1e9J[CQOR3U1c9KZCnNU3W1a9KZCmNV3X1`9K[CkNW3Y1^9L[CjNX3Z1]9L[CiNh1M\\O^1a;L[ChNa1OVNk0d0b0ZB1O00O1J6L4[OSNU_Oo1o?gM]@S3`?TMR@J1S3m?g0001O1O00001O1O2TMk_OTO0`2W`0PNc@e1Va0F5K1O1O0000IZNS^Og1Sb0000002YNh]O[1cb0L3NO2001O2N2M3DM4O010N2O1N101O001O01O00001N100O2O1O1N10100O_OnI_KR6`4PJ_KQ6Q4^JoKb5k3dJUL\\5k3oIhK?;e5k3bJUL^5k3bJUL_5j3aJVL`5i3`JWL`5i3`JVLa5j3_JVLb5h3^JYLb5g3^JYLc5g3\\JXLe5h3[JXLf5n3gIhK4:U6`3dIVL3O2129W6^3UJYLD9W6\\3WJ[LB9W6[3XJ\\LA9W6Z3ZJ\\L_O:X6W3fJiLZ5V3gJiLZ5W3fJiLZ5V3gJjLY5U3hJkLX5T3iJlLW5T3\\JcL]O9W6T3\\JcL]O9W6S3jJmLV5S3jJmLV5S3iJnLW5R3hJoLX5Q3^JeLUO<]6n2^J[Mc5d2\\J]Mc5d2[J^Me5b2ZJ_Mf5`2ZJaMe5`2YJbMg5^2XJcMh5\\2XJeMg5\\2XJeMh5Z2XJhMg5X2XJiMg5X2XJiMh5V2WJlMh5T2WJoMh5P2XJQNg5P2XJQNh5o1WJSNg5m1YJTNg5l1WJVNh5j1XJXNf5h1ZJYNf5g1XJ[Nh5d1XJ^Nf5c1YJ^Ne5f1nIiL1c1o5k3N3L3K6I6K5N2M3L4LdMTKUNh4o1YKPNe4Q2\\KPN`4S2`KmM_4T2aKmM[4V2eKkMY4V2gKRMNoNY4P4jKoLNROW4o3lKlL0VOP4P4QLiLOYOm3o3TLgL1]Of3m3YLeL3AQ3QOPMk4LbL4Hd1lNoN]5XOnK5NT1GdNd43gK53l0`5nN]J67g0^5RO[J7h<01M20100000O1nNEXES;BmD>S;BmD=T;ClD=T;DjD=V;EfD=Z;DeD<[;DeD<[;DeD;];EbD<^;DaD=_;C`D>T;]OhD44`0R;^OkD12a0R;_OmDO2b0o:@oDO1`0Q;AnDO1`0Q;AoDOO`0S;@nD1Ma0U;^OnD0Mb0U;@lDONa0V;3gDMZ;P10XNjD^1c;N1O1O1O000O_DgNT;Y1hDoNT;P1kDTOS;l0lDWO7DZ:V1\\EYO8CZ:l1dEUNP:LoEQ20SNo91nEm12SNn9\\2PFeMQ:h2001O0\\MmET2T:jMPFR2Q:mMPFS2Q:lMQFR2R:jMQFS2Q:jMQFV2`:0O01DUE\\Nk:c1WE\\Ni:c1XE\\Nh:c1ZE]Ne:c1\\E]N_:f1dEWN^:f1a0XOgDGV;9mDFR;;oDCQ;=RE@n:a0UEZOl:g0k0N2O2ROjCg0[<000O2N2N2N1O2N2O1N2OO0O110N2O00000L400100O20N100N2O0010O01O5^OeC0a1O10000001O3M3M1O001O00000000000000000000000000O100^NSMWB1A2=j2^>_MbAa2^>_MbAa2]>aMbA_2l=SMSB?1^2l=SMSB?0_2^>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`McA`2]>`MbAa2^>_MaAb2^>_MbAa2^>^McAb2]>^McAb2]>^McAb2^>]MbAc2^>\\McAd2\\>\\MdAe2\\>XMgAh2Y>VMiAj2V>VMkAj2U>UMlAk2R>WMnAi2Q>XMoAh2o=ZMRBe2l=\\MUBd2i=^MWBb2i=^MWBb2i=^MWBb2h=_MYB`2g=`MZB_2f=aM[B^2e=bM[B^2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2e=bM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=cM\\B]2d=bM]B^2c=bM]B^2c=bM\\B_2d=`M]B`2c=_M^Ba2c=]M^Bc2b=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=\\M_Bd2a=]M^Bc2b=^M]Bb2c=`M[B`2f=_MZBa2f=`MYB`2g=`MZB_2f=aMZB_2f=aMZB_2f=aM[B^2e=bM[B^2e=cMZB]2f=cMZB]2f=cMnAEFh2\\>dMlAFGf2]>eMjAGGe2_>dMjAHFd2`>dMjAi2V>WMiAj2W>VMfAm2Z>TMdAm2\\>SMdAm2]>RMbAo2^>RM`Ao2`>QM_AP3a>f00000000000001O00000000000000000000001O001O4L2N1O1O1O001O1O1O1O2N1O2N1O1O1O2N2N3M1O1O1O2N1O1O1O1O1O1O1O001O1O1O1O001O1O001O1O001O1O001O1O1O1O1O001O1O1O001O0000001O00001O000000001O000000001O0000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000O100000000O10000000000O1000000O1000000O10000O100O1O100O1O100O1N2O1O100O1O1O100000000001O00001O0000001O0000000000\\OaMj@_2U?cMi@^2V?eMg@\\2Y?eMe@\\2[?c0O10000000000O1000000O10000O100O100O10000O1000000O100O10000O1O10000O10000000000000000000000000000000000000000000000000000000000O100000000O10000O100O100O100O10000O1O100O100O100000000001OP1PO5K3M2N:F8H=C;E3M2N1O1O001O1O001O001O001O0000001O000000000000000000001O0000000000000000000000000000O100000000000000O100000000O10000O1O1L4K5hLmN]BN\\O0R1j1T=ZNUBb3k=^LUBb3k=]LWBb3i=^LWBb3i=^LXBa3h=\\L]Bb3c=]LaB`3b=\\LbBa3j>aLe@NF3NX2]a0SO?A6J4L3M001O1O001O001O001O0000001O000000001O0000000000001O0001OO100001O00000000000000O1000O2O000000000000O100000O2O000000000O0100000N2O1nK1TE1l:3lD1S;1lCHnMd0V>EkCn0UdDA\\;`0dD_OUM" + } + ], + "model_output": "A white ceramic bowl with a flared rim and a smooth, glossy finish is on the dining table.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_41.png", + "subject_name": "sports ball", + "object_name": "person", + "predicate_name": "touched", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kiR13W=2EOYC2d<3YCNe<<00000O10000001O00O20O0001O1ObCDn;=PDHm;8RDKl;3UDOi;2VDOj;0VD1j;0TD1k;0TD2k;f010000000000O1000VDiN`;W1_DjNa;V1_DjNa;V1_DjNa;U1_DlNa;T1_DlNb;S1^DmNb;S1]DnNc;Q1^DoNc;P1]DoNd;Q1\\DoNe;P1ZDQOf;o0ZDQOf;n0[DROf;m0YDSOi;m0TDUOm;T101O10O1O2N1N2O1N2M5J5M4L5K4L3K4NST\\6" + }, + { + "size": [ + 427, + 640 + ], + "counts": "bQT12Y=1J0oB01Nm<0`U:8QjEd0E:XDPOl:n1G7G9I6J8J5L5J5G:K4L4L5K6G7L5K5L4K3VLeKgKUOe2Y5b1mKjKjN\\11\\OY5]3nKiKjN[13[OW5_3RLUMnNTOR5e3SLTMROiN^O2a5P4RLRMi4l2[KQMf4n2[KoLh4P3YKWLWN`0a6Y3ZKTLWNa0`6Z3bKeL^4[3cKbL_4]3bKcL^4\\3cKdL]4\\3fK`L[4_3lKZLU4e3mKYLT4g3lKYLT4g3mKXLa27RL`3d6eLZIZ3d6kLZIT3d6PM[IP3d6RM[Im2e6VMYIi2i6XMUIh2k6\\MQId2o6]MPIb2Q7_MnHa2R7`MmH`2S7cMjHc0K[NAn0k79dH;>VOo6`0bH7d0VOk6h0[H2n0ROh6R1SHKZ1POc6h2dIRM]6m2gIoL[6P3iIkLY6S3P2O2N1O10000O2O0O10002N5K4K4M4L2N4L2N2N4L2N3N2M5K9H0O0010O00001O2N1O001dEfNe8[1UGkNj8W1RGlNm8U1PGnNo8S1mFPOS9S1eFRO[9o0`FVO_9k0oE_N1n0o9U2O00lNTFjNk9[201VNSMTIn2i6VMUIj2j6YMTIh2k6ZMSIf2l6`MoHa2X3fLkNLRM^1f0Q2Z3kLaNe1RNa1[3kL\\Nl1XNZ1Z3lLSMCNd2_Oo0X3XNQMX1[Ob0a3_NaLW5]3]2O1O1O1O1O1O2N001N2O1O1O1O1O1N2O1O1M4L3O1M4M2M8G6J5UNcD]1o;aNUDi0c3N2N2O1O0O101O00000000000000000000000000000000000000000000000O1000O10000000000000000000000000QJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0[63nJV7R5jHnJV7R5jHnJV7R5jHnJV7R5iHWJO6W7c5jHWJO6W7c5jHUJ19T7b5kHTJ2:S7b5kHSJ3V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LR9J6K4L5K4L3N1O2M2O2M101N2O1N3M2M3M3M2O2N2O1O1N1O2O001O0O1OO21O0O0XORN`Do1_;WNZDl1f;UNXDk1i;f01N2N101N1O2N2O0O2O1N2N2N1O2N100O2N1O1O100O1O1O100O2O0O1O10000O2O000000010O01O00001O0010O00010O01O1O100O1O010O1O010O1O10O01O100O1O1O10000O1O1O100O100O0010000O2N2N2O2N1N2N1O2O2M2N1O100O2O1N10000O100O1O1O100O1O0010O0101N1O3N3L5K3M3M3RHnKi4U4RKlKn4V4oJkKR5W4gJmKY5X4aJiK_5[4[JfKf5d4nI^KS6g4fI[KY6k4^IXKb6l4YIUKg6n4TISKm6S5jHPKV7]60O1000000GUH\\Il7b6YHZIh7d6;O0O2O1iNeGhK\\8T4RHbKn7[4\\H]Ke7d4[HZKf7h4YHWKg7n4SHRKn7T5gGSKW8l5OO1O2N11O1OnLiGUO]8j0XH_Nk7a1bHfLkNR1d8X2PIaMQ7_2UI[Mk6e2[ISMg6m2[IPMf6P3^IjLe6U3c21O0000001O001O010O00001O00001O00001O00010O001O00001O001O000000001O00000O10001O0000000000001O0000000000000O1000000000000000O01000000000O1000000000O01000000O10000O10O010000O100O10O10O1O1O1O100O100O100O1O\\GjL`5U3`JmL_5R3^JSMa5l2\\JXMd5h2[JYMe5g2YJZMh5f2UJ\\Ml5e2PJ^Mo5e2lI]MU6e2hI\\MX6e2eI]M[6f2aIZM`6h2\\IWMg6k2VIPMP7R3lHnLV7T3gHmLY7T3eHkL]7W3`HjL`7X3\\HiLd7Y3ZHgLg7[3VHdLl7]3QHcLQ8`3kG_LW8c3eG]L]8e3`GZLb8i3YGXLh8l3RGTLP9l42O1N2O1O1N3N1O1O1N2O1NmGnJ\\6Q5dIPK]6m4bIWK]6h4cIZK\\6d4dI_K[6`4dIbK]6\\4cIeK]6Y4eIgK\\6W4dIjK\\6U4bInK_6Q4_IQLa6R4YIQLg6Q4SIRLo6P4gHVLZ7m3\\HWLh7]53N2O2N1O2O0O2N1O1M4XM[GPOh8n0^GbNo8[1VGWNU9g1PGRNT9l1PGlMW9Q2mFgMZ9V2jFbM]9Z2iF^M]9_2d1M2M4L3N3M3M3L3M3N3N2M2M4M3L4PO_B9g=F_BOh=0d00001O00001O001O1O0O3NTef1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "V6e17W5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6hJnIX5R6k1000000000000000000000000000gHiIi5W6WJiIi5W6WJjIh5V6XJjIh5V6XJjIh5V6XJjIh5W6WJhIj5X6UJiIk5W6VJhIj5W6`100001O5K0kKcIG^6RMaIo12o0]67cII]67cII^67`IJb6Y4000000001gH^IR6b6mI`Ii1N[1b6lL_Ii11Z1`6mL_Ii11Z1_6oL^Ih12Z1`6nL^Ih11[1a6U400000000000000O_H_Ie6`6\\IbIb6]6_IdIj1Mm1_6YLcIa6]6_IcIa6]6_IcIa6]6_IbIb6]6Q1000UKcIlLOd3^62dIN\\6dLbIb23j0Z6eLcIa23j0Z62eIO[60fI0Z60fI0Z6dLcI`23l0Y6eLdI_23l0Z60eI1[60dI0\\6OeI1Z6kKfIo306Z6jKhIo3N7Y6kKiIn3N7Y6jKjIo3M7X6kKkIn3M7X6lKjIX8V6hGjIT4LKZ6SLgIR40KY6TLfIP43KV6>jIBV6?iIAV6`0jI@U6b0iI_OV6f4O1O1OTNoIkJQ6S5RJlJm5S5UJmJk5S5VJlJj5S5WJmJi5S5WJmJi5R5XJnJh5R5XJnJh5S5VJnJj5R5VJnJj5S5SJoJm5Q5SJoJm5Q5SJoJm5T5nInJR6P700000000000000O100000000001_GRJn7n5mG^Jl7b5RHbJl7\\6M2N2N1O001O1O000000001O0000000000000000O1000000O10000O100N2O1N2N2O1O1O100O100O100O100N200O1O1O100O100O1N200O1O1O1O1O1O100000000O10000O11O00O1000000001O00001O0000001O1O1O2N001O1O001O1O1O001O001O1O2N001O2N1O1O1O001O1O1O1O001O2N1O1O3M2N2N2N1O2N3M2N3M001O002N2N1O1O2N1O1O1O1O1O1O1O00001O2N2N3M3M4L2TMVG^On8O1O11O0000000000000000O100000000O1kLRMXJn2h5_MkGYOW13`NX3^8eM`G_OS1V3]7UNZHl1f7VN`GWNa0d3o7\\NoGe1Q8]NkGe1V8b200001O001O001O0000001O001O00001O001O001O0000001O0000001O00001O00000000001O00001O000000000000000000000000000000000000O1000000000000O100000000O100000000O10000O10000O10000O1O10000O100O1O100O1O\\L_Gk0a8SOkGc0T8[OPHd0P8YOTHf0k7YOWHg0i7XOXHh0g7WO\\Hh0c7WO_Hi0b7QObHP1^7oNcHQ1^7kNeHT1]7iNeHW1\\7dN_GVNU1W3^7aNeH`1\\7^NcHc1`7YN`Hh1b7UN^Hl1c7QN^HP2d7mM[HU2f7iMZHX2g7fMQHnNQO_3Q9^MnGj2S8TMnGl2S8RMmGo2W8lLjGT3X8hLhGZ3[8aLeGa3\\8]LcGe3k91N2M3O1N2O1O1N2NPMcEa1\\:\\NiEc1W:[NlEd1S:[NoEe1P:ZNSFe1i9^NXFb1g9^NZFb1e9^N\\Fb1c9^N^Fb1`9_NbF`1]9`NdF`1[9`NfF`1Z9^NhFb1W9^NjFb1U9[NoFe1P9WNUGi1l8TNVGl1l8jM[GW2g8]McGc2\\:100O1O1O1H8O1O1O100O1N2N2O1M3F:bM]LTIn3i6ZLlHl3S7WLgHm3V7YL`Hn3^7UL\\HP4a7WLTHP4i7VLPHn3P8a1N2N2L4M3N2L4O1M3N2O1M3O1K5N2H8@`0O1O1O1000000O100000000N2O10000O1N2O100000000000000000\\HWJQ6i5mIYJS6g5lIZJT6g5kIYJU6g5kIZJT6e5mI[JS6e5mI[JS6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6f5lIZJT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHPMY1jNT6V4cHQMX1bNhN3^7k5jIQJjN3\\7[4bHQM\\1dNR6[4cHPM^1bNo5^4dHoLm8Q3TGnLl8R3UGmLk8S3TGnLl8R3SGeKNU1o8V3SGPMl8R3RGnLn8Q3TGfK1n0k8\\3TGfK0o0l8Z3UGgKNP1m8Y3UGoLk8Q3VG\\LMEm8o3VGcKLe02Jk8n3ZGYLJIl8n3ZGlLf8T3ZGlLf8T3YGnLf8R3YGoLg8Q3XGQMg8o2YGQMg8o2WGSMi8R4VGiJk8\\4WG`K00OJN6l8`4WGaKOO0IO7k8`4VGbK0OOH08j8T4VGkK1NNL20a8T4oFmK`0Oa8T4oFmK?0^ONj8U4YGnK=0@Mj8O" + } + ], + "model_output": "An adult zebra stands with its body profile facing left, showcasing its distinctive black and white striped pattern. The stripes are unique, with varying widths and patterns across the body, legs, and mane. The mane is erect and striped, and the ears are pointed upwards. The zebra's eyes are alert, and the nose is dark. The tail is partially visible with a tuft of hair at the end, standing on dirt.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_47.png", + "subject_name": "cake", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "PYR41h01[=2cBO[=4dBL[=6cBK\\=7bBJblKBT4=mKCT4fFGU9;dF@cN7f:=aFN]92dE^O=d0n9OcE@b5n600000O10000000000000000O100001OO10000fH" + } + ], + "model_output": "An individual wearing a bright yellow ski jacket with a hood, red ski pants, and black gloves, equipped with a black helmet and goggles, is jumping from the snow, with their left arm extended outward and their right arm bent at the elbow.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_49.png", + "subject_name": "donut", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 612, + 612 + ], + "counts": "_Ql1l0oa0c0A8H7J6I:D;K5J5M6H7H6L2N2M4K7I5L3M3N4I6L2N4L4K0020001O1N2N110O1N2N1O2O1O1O2M2O2N1O1N2O001O100N2N3OO10O1N2O011N1O1O1N201O0O1O1O1O1O101N2O0O1O1O1O10000O2N1O101N1O100O100O1O1O101O0O100O10000O1O1O10000O101N100O100O2N1000000O10000O101O0O10000O1000lLhD\\NW;a1PE\\NP;a1VE]Ni:b1ZE\\Ne:e1\\EZNd:f1]EYNc:f1^EZNa:g1aEWN^:i1dEVN[:k1eEUN[:j1fEVNZ:i1hEUNY:j1hEVNW:k1jETNV:l1jETNV:l1jEUNU:k1kEUNU:j1mEUNR:l1nETNR:l1oESNQ:m1oESNQ:m1oESNP:n1QFRNn9n1RFRNn9n1RFRNn9n1RFRNn9n1RFRNm9o1TFPNl9P2TFPNl9P2TFPNl9P2TFQNk9o1UFRNj9n1VFRNj9o1UFRNj9n1VFRNj9n1VFRNj9n1WFRNh9n1XFRNh9n1XFRNh9n1XFRNg9o1YFQNg9P2XFQNg9o1YFQNg9P2XFPNh9P2XFQNg9P2YFPNf9Q2YFoMg9Q2YFPNf9Q2YFoMg9Q2YFPNe9S2ZFlMf9U2YFlMf9U2YFlMf9T2[FkMe9V2ZFjMf9W2XFjMh9X2VFhMj9Y2TFiMk9X2TFhMl9Z2PFhMP:Z2mEhMR:]2cEjM\\:_50001O001O001O1O1O01O0001O1O001O00100O00002N10O01O001O100O010O1O100O1O010O100O100O2OO01O011O0O01O010O10O01N101N2O0O2N2O0O101N1O2O1N2N1O2O1N2N1O2N1O2N2O0O2N2N2N2N1N3M3N2N1N3N2N2N2N1O2N2O1N2N2N1O3M2N2N2N2L5J5L4N3M2M3M5K3N2M3N2M3M3N2M3N3L3N2N2M3N3K5M2M4K6J5J5L7I5K8F9I6K9F6JZb0`0F7G8H8G:B=F:B>N2O1000000000000O10001O00000000000000O02O00O100000000O100O1O1O1O1O1O1O100O1O001O1O100O1O1O1O1O10O010000O1ROf_OROZ`0h0o_OUOQ`0g0T@XOk?g0W@YOi?e0Z@ZOe?f0]@YOc?e0`@ZO`?e0b@ZO^?d0d@\\O\\?c0f@\\OZ?c0h@[OY?d0h@[OY?d0i@YOY?g0h@VOZ?i0j@POZ?P1]100O1000001O000010O3M10O00010O000O101N1O1O2N1O2N1O1Ok_O_Oi=`0TBEk=:TBIl=4TBNl=1oA5Q>JkA;V>DeAa0[>^OeAc0\\>ZOcAi0]>VO[AS1f>lNTAZ1m>dNPA`1Q?]Nn@f1U`03H8N2K5L4N3K3M4K5L5K4I7L3L6L3J6K4J7M3M4L2M4H8G9N2L4N2N200O100O1O100O100O1000000O10000000000000000000000001O000000010O0O2O00010O1O001O001O1N2O1O001O2L3@`0H8J7J5I7K5K5H:F9I7J5K6J8UNY_Om0ea0H6L4K6J6K4L4M2MdgZ6" + } + ], + "model_output": "A doughnut with a glossy red icing, generously sprinkled with white nonpareils, is held by a person.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_50.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0R8n6000000O10000O100O10000O10YLSH;l7]3000`LTHLk75VHJj73[HKd7i3000000O100O100O1000000O10000O10002M102N2M5L3L5L3M3L5L5K3L4M1N2O00001N101N2O0O2O00000O2O001N2O001N2O001N2O1O0O2O1O1N2O0O2O1O0O2O1N102N001N2N101O1O1N101O1N2N2O1O001N2O0O2O1N2O1O1N101N2O1O0O2O1O001N3N0O2O1O0O2O1N2O001N2O1N2O1O0O2O1O001N2N2O1O001O1N101N2O1O1O0O2O1N2O0O2O1O001O2M101N2O001N3N001N2O0O2O1N2O1O1O0O101O1O1N2O1O0O2O1N2O1O1O0O2O0O2O1O0O2O1O0O2O1N3N1N101O001N2O1O001N2O1O1N2O1O0O2O001O1N2O1O0O2O2N001N2O1N2Nf]Q6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "U8i6W8000O100O10000O10000O10000O100O1000000O100O100O1000000O100O10000O100O10000O10000O100^HlHX7T7gHnHW7S7iHQIS7o6lHVIP7j6PIWIn6j6RIYIk6g6UI_Ie6a6[IaIb6`6^IaIa6_6_IaIa6_6`I`I_6a6aI_I_6a6`I`I`6`6`I`I_6a6aI_I_6a6bI^I]6c6bI^I^6b6bI^I^6b6bI^I]6c6cI]I]6c6dI\\I\\6d6dI\\I[6e6dI\\I\\6d6dI\\I\\6d6eI[I8VOe5_7SJ[I8VOd5`7TJZI7XOd5^7TJ[I8ZOa5[7WJ[I7\\O`5Z7ZJYI6]O`5Z7ZJYI6^O_5Y7[JXI6@_5X7[JXI6@_5X7[JXI6A^5W7\\JXI5C]5V7^JWI5D\\5U7_JWI5EZ5U7aJVI5FY5T7bJVI5GX5S7cJVI5GW5T7dJUI5HV5S7eJUI5IT5S7gJTI5JS5R7hJTI5KR5Q7iJTI4LR5Q7jJSI4LR5Q7jJRI5NP5P7kJQI60m4P7mJPI53l4m6oJPI54j4m6QKoH54j4m6QKoH55h4m6SKmH66g4m6SKmH58g4k6TKmH59e4k6VKlH5:c4k6XKkH5:c4k6XKkH4`4i6[KiH5>`4i6\\KhH4`0^4i6]KgH6a0\\4h6^KgH6b0Z4h6`KfH6c0Y4g6aKfH6c0Y4g6aKTHL>:g0X4h6bKSHL>9j0W4e6dKdH6h0U4d6eKcH7j0R4d6gKaH8l0P4c6hKRHL:=Q1n3d6iKQHL:=R1m3c6jK`H7n0o3b6jK`H7o0m3b6lKPHL;;T1l3a6mKPHL;:V1k3`6oKoGL;:W1j3_6PL^H6T1h3_6RL]H6T1h3_6RL\\H7V1f3^6SL\\H6X1f3\\6TL\\H6Y1d3\\6VL[H6Z1c3[6WL[H5[1c3[6XLZH5\\1b3Z6YLkGM;8`1b3Z6YLlGL:9a1`3Z6[LYH4`1_3W6]LYH4a1^3V6^LYH4a1]3W6_LXH4b1\\3V6`LWH5c1Z3W6aLVH5e1X3U6cLVH5f1W3T6dLVH5f1W3T6dLhGM99l1T3T6fLhGL89m1U3S6fLSH6k1R3S6hLRH6k1R3S6hLQH7n1o2Q6jLQH6o1o2Q6kLPH6P2n2P6lLoG7R2l2o5mLoG7S2j2o5oLnG7S2j2o5PMcGL7:X2i2n5QMmG6V2g2n5RMlG7X2f2l5SMlG7Y2e2k5TMhGNJ9c2d2l5UMgG;^2_2k5VMgG:`2^2j5XMfG:a2]2i5YMgG9a2]2h5ZMfG:c2[2g5\\M`GL2=g2Z2h5]MdG9e2Y2g5]MeG:e2X2f5^MeG:f2V2f5`MdG9h2V2d5bMbG9j2U2d5aMcG:j2S2d5cMcG9i2T2d5cMcG9j2S2c5eM]GL2j4BWK=i4BYK=g4BZK>f4BfHEf0h0d6CbHLh0?g6E_H1h08j6F\\H7i02k6GYHk0Jm6HWH`0k0Go6ITHc0n0Bn6JTHe0o0_On6LRHg0P1\\On6MQHh0Q1ZOo6NoGj0R1WOo6OnGk0S1UOP70mGl0T1ROo62lGm0V1oNo64kGn0W1kNo67jGn0Y1gNo6:iGo0i:QOWEP1h:POXEQ1g:oNYEQ1g:oNYER1f:nNZER1f:nNZES1e:lN\\EU1c:kN]EU1c:kN]EV1b:iN_EW1a:iN_EX1`:hN`EY1_:gNaEZ1^:fNcEY1]:fNdE[1[:eNeE[1[:eNeE\\1Z:dNeE^1Z:bNfE_1Y:`NhE`1X:`NhEa1W:_NiEb1V:^NjEb1V:^NjEc1U:\\NlEd1T:[NmEf1R:ZNmEh1R:WNoEi1Q:WNoEj1P:VNPFk1o9UNQFk1o9UNQFl1n9TNRFm1m9SNRFo1m9QNSFo1m9PNUFP2j9PNVFQ2i9oMVFR2j9nMVFS2i9mMWFT2h9lMXFU2g9kMXFV2h9jMXFW2g9iMXFX2h9gMYFZ2f9fMZF[2e9eM[F[2e9eM[F\\2d9dM\\F]2c9bM^F^2b9bM^F_2a9aM_F`2`9`M_Fb2`9^M`Fb2`9^M`Fc2_9]MaFd2^9\\MbFe2]9[McFe2]9[McFf2\\9ZMdFf2\\9ZMdFg2[9YMdFi2[9WMeFj2Z9VMfFj2Z9VMfFk2Y9UMgFl2X9TMgFm2Y9SMgFn2X9RMhFo2W9QMiFP3V9PMjFQ3U9oLkFQ3U9oLkFR3T9nLkFT3T9lLlFU3S9jLnFW3Q9iLoFW3Q9iLoFX3P9iLnFX3R9gLoFZ3P9fLPG[3o8eLQG\\3n8dLRG\\3n8dLRG]3m8cLSG^3l8bLSG`3l8`LTGa3k8_LUGa3k8_LUGb3j8^LVGc3i8]LWGd3h8\\LXGe3g8[LXGf3h8ZLXGg3g8YLYGh3f8XLZGh3f8XLZGi3e8WL[Gj3d8VL[Gk3e8UL[Gl3d8TL\\Gm3c8SL]Gm3c8SL]Gn3b8RL]GP4b8PL^GP4b8PL^GQ4a8oK^GR4b8nK^GS4a8mK_GT4`8lK`GT4`8lK`GU4_8kK`GW4_8jK`GW4_8iKaGW4_8iKaGX4^8hKbGY4]8gKcGY4]8gKbG[4]8fKbG[4]8eKcG\\4\\8cKeG]4[8cKeG^4Z8cKeG^4Z8bKeG_4[8`KfGa4Y8_KgGb4X8^KhGc4W8\\KjGd4V8\\KiGf4V8ZKjGg4U8YKjGi4U8WKkGj4T8VKlGj4T8VKlGk4S8UKmGl4R8TKnGl4R8TKmGn4R8RKnGo4Q8QKoGo4Q8QKoGP5P8PKPHQ5o7nJRHS5m7mJSHS5m7nJRHS5m7mJSHS5m7lJSHV5l7jJTHW5k7jJTHV5l7jJTHW5k7iJUHX5i7iJVHY5i7gJWHY5i7gJWHZ5h7fJXH[5g7eJXH]5g7cJYH]5g7cJXH_5g7aJYH`5f7`JZH`5f7`JZHa5e7_J[Hb5d7]J\\Hd5d7]J[Hd5c7]J]Hd5b7\\J^Hd5b7\\J^He5a7[J^Hg5`7ZJ`Hg5_7YJaHg5_7YJaHh5]7YJcHh5\\7XJdHi5[7WJeHi5[7WJeHj5Y7WJfHk5Y7UJgHl5X7TJiHk5W7UJiHl5T7VJlHk5R7VJoHi5Q7WJPIi5o6WJQIj5n6WJQIj5m6WJTIh5k6ZJVIe5i6[JWIe5i6\\JVIe5i6[JWIf5g6[JYIf5f6ZJ[If5d6[J[Ie5e6[J[If5d6[J[If5c6\\J\\Id5c6]J^Ic5`6^J`Ic5_6^J`Ib5`6^JaIb5^6^JbIb5]6`JcI`5\\6`JeI`5Z6`JgI_5Y6bJfI_5X6bJhI_5V6bJjI_5U6bJjI_5T6bJmI^5Q6dJnI]5P6dJPJ]5o5dJQJ[5n5gJRJY5m5gJTJX5k5iJUJX5j5iJUJX5i5iJWJW5i5iJXJW5f5kJYJV5f5kJYJV5e5kJ[JU5d5lJ\\JU5b5mJ^JS5a5mJ`JR5_5oJaJR5^5nJbJS5\\5oJcJQ5\\5PKeJP5Y5QKgJP5X5PKiJP5U5QKkJP5T5QKlJn4S5SKmJn4Q5TKoJXNYOa6f5XKQKk4n4WKQKj4m4WKUKg4j4ZKWKf4g4\\KXKd4g4]KYKd4e4]K[Kd4c4^K\\Kb4d4^K]Kb4b4^K^Kb4b4_K]Kb4a4_K_Kb4_4_KaKa4_4`KaKa4\\4aKcK_4]4bKbK_4\\4bKdK_4Z4bKgK^4W4cKjK\\4V4eKjK[4U4eKlK[4R4fKnKZ4Q4hKnKY4P4hKPLY4o3hKQLX4m3iKSLW4m3iKSLX4k3jKULU4k3lKVLS4h3nKXLR4h3oKWLR4g3oKYLQ4f3PLZLP4e3QL\\Ln3d3SL[Ln3c3SL]Ll3c3VL\\Lj3c3WL]Li3c3XL]Lg3c3YL]Lg3b3[L]Le3b3\\L^Lc3c3]L]Lc3b3^L_La3a3`L_L^3b3bL^L^3a3cL`L[3`3fLaLY3_3gLcLV3]3lLcLS3\\3nLdLQ3]3oLcLQ3\\3PMeLn2\\3RMdLn2Z3TMfLl2Z3TMfLk2Z3VMfLi2Z3XMgLg2Y3YMgLg2X3ZMhLe2Y3^MdLb2[3_MeL`2[3aMeL_2[3aMeL^2[3aMhL^2W3dMhL[2X3fMmKXLg0R6\\3iMeLV2[3jMgLU2X3lMhLS2X3oMgLQ2Y3PNeLP2[3QNfLn1Z3QNhLm1X3TNiLk1W3WNgLi1X3YNgLf1Y3SNQLTLh0h5W3TNPMk1P3VNPMj1o2YNoLg1Q3\\NlLc1T3]NmLb1T3]NnLb1Q3_NPM_1P3bNPM^1o2dNPM\\1o2eNQMZ1P3eNRMY1n2hNSMW1m2jNRMU1n2lNRMT1m2mNSMR1n2oNQMQ1n2POSMn0m2TORMl0m2VORMi0o2VORMj0m2WOSMh0m2YOSMg0m2ZORMe0n2\\ORMd0m2^ORMa0o2_OQMa0n2@RM?o2BPM>P3BPM=P3EoL;P3FQM8P3HQM7n2KRM3n2NPMPKUOQ5k3OSM0m22RMNm23SMLm25SMKm26RMIn28SMGl2;TMCl2>UMAk2`0TM_Ol2c0SM]Om2c0SM\\Om2e0TMZOk2g0VMWOj2k0UMUOk2l0TMSOl2n0TMROl2n0TMQOk2R1TMnNk2S1UMlNl2T1TMlNk2V1PMYJZO`4f3X1PMXJZO`4f3Y1oLWJ[O_4g3Z1nLWJ[O_4f3[1oLVJ[O^4f3^1oLSJ\\O]4f3`1SMSJVOY4f3f1VMZNi2h1VMWNk2j1gLoIH0KV4f3l1gLnIIOKV4e3n1fLmI0S4Y3Q2gLkI2R4W3U2fLjIE09Q4[3V2gLiIE19n3[3Z2fLfIH08o3[3[2eLfI5o3U3\\2fLeIL2I10k3f3^2dLeILU4a3W2aLeIMU4a3W2bLeIHM2V4d3Y2cLfIHQ4d3Y2eLgIFP4d3Z2fLfIFo3e3[2eLfIGm3d3^2eLaILNKo3d3c2RM[Mm2h2SMWMm2i2cL]I0NKl3b3k2bL\\I0MLl3a3l2cLZI00Li3a3m2lL[IFd3^3R3kL[IHb3]3T3jL[IH`3_3W3fL[IJ]3`3X3gL\\IH\\3`3Y3iLZIG\\3a3Z3hLZIG[3`3]3iLYIFZ3a3^3hLXIGZ3`3_3hLYIGW3b3a3gLWIGX3a3b3hLVIGW3a3e3gLgLX3Z3iLdLW3]3iLcLW3o0" + } + ], + "model_output": "A black laptop with a glossy finish, featuring a keyboard with white lettering on the keys and a trackpad with two buttons below it, sits on a table. The laptop has a visible logo on the lower right corner of the palm rest area.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_51.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "bS\\1SEGg:=VEFg:;XEGf:;XEHd:4N2M2O2N1O101L3O1M3O1O0\\OWORCj0jI7I7L4M2O2M3N2M3N2N2M3O1M3N2L4L5WIiIc4[6SKTJb4R6]KQJ\\4T6cKoIV4V6jKlIn3[6QLfI^3k6_LWI`3l6]LVIb3R7ULPIi3T7bKoGOo0_4R9O2N1O2M2O2N1O2O1M2O2M2O1N3N1N3M3N1O2L3N3L4M3M2N3L4L5L5K4K6J6I8J6I7KU1iN_Rj0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nj_66l03V@`Rj0" + } + ], + "model_output": "A smiling man with short hair and glasses, wearing a blue and gray fleece jacket with a logo on the left chest, blue jeans, and brown shoes, stands with his hands in his pockets and a slight lean to his right side, attached to a bicycle.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_53.png", + "subject_name": "handbag", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Qha0a0f=F^BY1j2N10000O0100O0001O0O101_NMXC074=FQ;;VD070a0Jm:;WDg0j0ROl:c1TE_Nj:a1UEbNj:^1UEdNi:\\1VEfNj:Y1VEjNh:U1WEnNh:R1WEPOh:P1WEROh:n0TEWOk:j0nD]OQ;d0lD_OT;a0iDBV;U200001cLgDP3[;lLgDT3b;2O0O2N2O1N2_EdL[9]3aFeL_9^3]FcLd9`3WFaLi9b3SF_Lm9\\2iE]N2\\OU:U2nEROR:l0QFROQ:=aFA`9fF_O\\9?gF^OZ9UOcEg0Y10T9YOgE`0[13P9\\OiEj8BmEM]1?h8CfE2m11^8j0dGSO^8k0dGSO^8l0cGSO]8l0dGSO^8k0dGSO^8j0eGUO\\8d0kGYOY8b0U3M3Jgh>" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y3e;[3000001O000000001O0000001O0000001O00000000001O0000000000001O0cKbLXM^3f2iLUMW3i2mLUMS3i2QMUMP3i2SMUMm2j2ZMPMf2[2eLYJk0W3`2_2hLWJl0W3]2`2jLWJk0W3]2_2kLVJk0Z3]2[2ZNdMl1V2UNiMl1V2TNjMQ2P2PNPNW2i1iMWNX2h1hMXNZ2f1gMYN[2e1eM[N\\2d1dM\\Nd2\\1]MbNk2W1VMhNk2X1UMhNj2X1XMfNi2Z1\\M`Nh2]1cMWN`2g1cMUN_2k1cMQN^2P2fMjM]2V2hMbMk2m1XMnMj2Q2VMnMk2R2TMnMl2R2UMmMl2R2UMmMk2S2WMkMi2U2XMjMh2V2]MeMc2[2bM`M^2`2eM]M[2c2gM[MY2e2iMYMW2g2jMXMV2h2kMWMT2j2mMUMS2k2nMTMQ2m2QNQMn1P3SNoLm1R3TNlLn1R3SNmLo1R3RN`LmKLQ6d3WNWLoK3j5f3dNZL\\1f3eNYL^OOaMi3P3XLZO5fMc3V7^LjHc3U7^LjHc3U7]LkHd3S7gLbHZ3^7QMWHP3^2VLa1Q1kKi2d2YL^1P1lKh2e2ZL]1o0lKj2f2nKbL6j4S1WKgN`0T4R3PLV1\\1eKg2T8]MgGf2W8\\MfGh2X8XMhGi2R4bKgKOm3b6aMmIh0Di1a6ZM`J;oNZ2d6XMbJ9kN_2f6UMbJ7jNe2g6PMdJ5gNj2i6mLdJ4dNo2m6hLbJ6aNS3n6dLcJ7`NU3n6cLeJ4^NY3n6QLhH9Q2N`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + } + ], + "model_output": "A person wearing a white basketball uniform with the number 6 on the back, white shorts, and white sneakers with black accents is captured in mid-stride, suggesting movement, running on the playingfield. The individual has short dark hair and is looking to the side with a focused expression.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_55.png", + "subject_name": "bowl", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "ejb06Q=7I7I5K4N3M2M3M4M1O2N2N2N1O2N1O2N1O2N1O2N101N1O100O1O2O0O1O2O0O1O101N1O10001N100O2O0O2O000O100O2O0O100O101O0O100O1000000O10000O10000O10000000000O100000O100000000O10O1000O10000O10000O100O100O100O100O100O2N100O1O1O2O0O1O1O1O1O2O0O1N3N1O1O2N1N3M2O2M3M2N3N2M2O2M3N3M2M3N3L4L4K6HZQn5" + }, + { + "size": [ + 426, + 640 + ], + "counts": "d1Z29Bb0f0b4IkJBc0e0a4JjJDd0b0a4JjJFe0`0a4Q2_KPN_4Q2bKnM^4R2bKnM\\4U2dKjM\\4V2dKjM[4W2eKiMY4Y2hKgMV4DoJ`0k0LU4\\2jKdMU4]2kKdMS4]2mKcMS4]2nKbMQ4_2oKaMP4`2PL`Mo3b2PL^Mo3c2RL]Mk3e2UL[Mj3g2VLXMi3i2XLVMg3k2YLVMf3j2ZLVMe3k2[LUMd3m2\\LSMb3n2^LRMa3o2_LQM`3P3`LPM^3R3cLmL\\3U3cLlL\\3T3dLlL[3U3eLlLY3U3gLkLY3U3hLjLW3W3iLiLW3X3hLiLU3Y3lLfLS3[3mLeLR3\\3oLcLo2_3QMaLn2`3RMaLm2_3TM`Lk2i0[L[Oi0Lk2f0dLZOa00j2e0kLWO;5h2b0RMXO56h2`0XMXO08g2?]MWOM9d2a0aMUOL9b2a0eMUOI;a2?hMUOH;_2`0kMTOF=^2>nMUOD<^2=QNVOA=^2;TNWO^O>^2;TNWO^O>^2:VNXO\\O=^2:XNXOZO>^29YNYOYO?]27\\NYOXO?\\27]N[OVO>]26_N[OTO?]26_N[OTO`0\\24aN\\OSO`0\\23cN]OQO`0[22eN^OPO`0[21fN_OPO?Z22gN^OoN`0[20gN@nNa0[2nMdLc1S2OnN?o2ZOTN7mN`0o2XOUN7lNa0Q3UOTN:lN`0R3SOSN>jN?T3QOTN?hN`0U3POSN`0hNa0V3lNSNc0gNa0X3jNRNd0gNa0\\3eNnMk0eN`0a3`NkMP1eN`0a3gM^Ka0\\2X1eN`0b3fM_K?\\2Z1cNa0l3RNaM]1dN`0n3oM_Mb1bN`0P4kM`Md1`Na0P4kM`Md1aN`0P4kM_Me1aN`0Q4jM^Mf1aN`0R4hM_Mh1_N`0R4eMaMk1]N`0j6@VI`0j6@VIa0i6@VI`0k6_OUIa0k6_OVI`0j6@VIa0i6_OWIa0i6@WI?j6@VI`0j6@VI`0j6@VIa0i6@VI`0j6@WI?i6AWI`0h6@YI?g6AYI?g6BXI>h6AZI?e6A[I?e6A[I?j2fMIj1^Ma0h2iMFd1eMb0e2kMEb1gMc0d2lMD_1jMe0b2nMB\\1mMf0a2PN_OZ1RNf0^2RN^OV1VNh0\\2TNWLEk2^1dNh0Z2_NROg0fNj0X2_NROf0hNj0[2[NmNi0jNm0Y2YNlNj0lNm0Z2WNjNj0oNn0X2WNiNi0QOQ1V2UNiNi0ROR1d5lN^JT1a5lNaJS1_5kNcJU1]5jNdJV1\\5hNgJX1W5hNjJX1V5gNkJZ1T5dNoJ[1P5eNRKZ1n4eNSK[1l4eNVKZ1j4fNVKZ1i4gNWKZ1h4gNWKY1i4gNXKX1g4iNYKW1f4jNZKV1f4jNZKW1d4jN]KU1b4lN^KU1a4lN^KT1a4mN_KS1a4mNaKQ1^4PObKQ1\\4POeKo0[4QOeKo0Z4SOeKn0Y4SOgKm0X4TOhKl0W4UOjKj0U4WOkKj0l0nM]OY1Gi0j0QN]OW1Jg0i0SN[OX1Ke0j0SNZOY1Le0i0SNQMGd1b1b0d0h0UNoLHe1`1d0c0h0VNmLIe1_1g0a0g0XNjLJg1_1g0?h0YNhLKf1_1j0>g0\\NcLIj1_1l00n3d2dMXOPNTN>Oo3e2cMYOoMSN`0MP4f2aMZOoMSN`0LQ4g2aMXOoMVN?IR4j2_MWOPNVN`0FS4m2]MWOQNUNY5d2gLVOPNVN=IV4k2\\MVOQNVN60]4d2\\MVOQNWN32_4a2]MWOnMXN53^4^2_MWOlMZN65\\4Z2cMGPNQN\\4X2dMHnMSNo2@QNd2S1g0e0PMSNZ2W1g0b0UMTNT2Z1i0=YMVNn1]1j0;[MVN:]OOS2\\27_MXN3@3P2\\26aMXNMF5l1^23cMZNHI7j1^21gMZNCL8j1e0iMGU2K[N^O0;g1d0kMGR2N[N[O2=e1c0lMHQ2M]NXO4?b1d0lMIo1O]NSO8b0`1b0mMIm11^NPO:d0^1b0nMIk12^NmN>e0[1c0oMHi15^NiNa0g0Y1c0PNHf16aNeNb0k0V1b0RNGd19aNaNe0m0T1b0TNEa1=aN]Nh0o0S1a0UNE^1?aNZNl0P1P1b0VND\\1a0bNXNm0Q1o0b0VND\\1b0aNUNP1S1m0b0WNCZ1e0bNoMS1X1i0a0YNBY1f0bNmMV1Y1g0a0XNCY1g04UO;a0XNCX1h06SO:b0YNBW1i06TO9a0ZNBW1i06TO9a0ZNCU1i08TO9?ZNDU1i09SO8`0ZNDU1j08RO9`0ZNDU1j08RO:?YNDV1k08RO8?ZNDU1l09QO8?ZNDU1l09QO8?ZNDU1l09QO9>YNEU1l0:QO7>ZNEU1l0:QO7>ZNEU1l0:QO7>YNFV1k0:QO8=XNGV1k0;PO7>WNHW1j0;PO7>VNHY1j0:QO7Y2a7VNQHA>Z2`7VNVIj1j6VNVIj1j6WNUIi1k6WNUIi1k6XNTIh1l6XNTIh1l6XNTIh1l6XNTIi1k6XNTIh1l6XNTIh1l6XNSH^O9[2OfMc6a0UI^O9[2NgMd6`0UI^O:Z2MhMd6a0UI\\O:[2MiMc6`0VI\\O:\\2LjMc6>VI^O:Y2MmMa6gM]O:W2MQOX2oNJ_OA]NSO?S2IiNl1c0\\O_O`NSO>R2JiNj1h0ZO[OeNQO=S2JiNi1m0VOWOjNPO=T2IgNj1Q1TOUOkNoN>T2IgNi1S1ROUOnNmN?S2HgNh1W1oNUOROjN?S2HgNg1c1dNjN_OhN>T2HfNg1l1ZNfNIcN>U2HeNg1S2QNdN2`N=T2IeNf1Z4SNmJ>T2IdNf1\\4TNkJ=U2IdNd1^4VNiJ>T2HeNb1a4XNeJ>U2HeNa1b4YNdJ>V2HbNa1f4XNbJ?U2IcN_1g4YNaJ?U2IcN^1h4[N_J>V2IcN]1j4\\N\\J>W2IcN[1l4_OaLVOcN[1l4@`LUOcN[1n4@_LUOcNY1Q5A\\LWObNW1S5B[LWObNU1V5CXLXObNT1W5EVLWOaNU1Z5DULWOaNS1]5ERLXO`NS1_5EQLXO`NR1`5FPLXO_NR1c5EnKYO_NQ1d5GlKYO^No0i5GiKZO^Nn0j5IgKYO^No0k5HhKXO]No0l5IgKXO\\Nn0o5JeKXO[Nn0R6IcKYO[Nl0U6K_KYO\\Nk0V6L]KZO]Ni0W6M\\KZO\\Ni0Y6M[K[O[Ng0[6NZK[O[Nf0]6OXKZO[Ne0_61VKZO[Nd0`62UKZO[Nc0a64RKZO\\Nb0d63PK[OZNb0h64nJYOZNb0j64lJ[OXNa0m64jJ\\OYN?n66hJ[OYN?Q75fJ\\OYN=S77dJ\\OYN_IDZNMX8`0]IC[NLY8a0\\IC[NJ[8c0ZIDZNH]8c0ZIEZNF^8d0XIFZND`8f0VIGZNBa8f0UIH[N_Ob8i0SIG]N^Ob8j0QIH]N\\Oe8k0nHJ[N\\Og8j0nHJZN\\Oj8i0lHK[NYOl8k0jHKZNYOn8k0hHL[NWOf0M[7P1cIL[OVOR7n0cILZOXOR7l0dILXO[OT7i0cILXO^OS7f0eILWO@S7e0eIKWOBS7c0fILVOBS7b0gILUODT7?gIMTOGS7=hILTOHT7W2SAjMk>V2UAkMj>V2UAmMh>S2WAPNh>Q2PAWNn>_3N2O1N2N2N2O0O2N1O2N2N2O0O2O0O2N2O0O2N101N1O101N1O1O100O2O1N1O2O0O101N1O100O2N100O1O100O100O101N100O1O100O2O0O100O1O100O1O100O1O10O01O100O010O1O010O00100O010O0010O01O1O00100O001O01O10O010O010O01O010O0010O00100O0010O0010O01O010O01O010O01O01O010O0010O001O010O01O010O01O0010O1O010O01O010O001O010O010O001O010O10O01O01O01O10O1O01O0010O01O010O010O010O001O10O010O01O010O1O0010O10O1O001O010O1O10O01O001O010O1UKbBS3_=f11O001O100O1O00100O100O00100O1O1O010O1O1O1O1O101N1O1O1O101N1O1O2N1O2O0O2N1O2O0O2N2N2N1O2N1O2N2O1N1O2N2N1O2N3M2N4L4M4K7I2N2N2N2N1O2O1N2N2N2O1N2N2N2N2O2M4L3N2M6J:F3M1O3N1N2N3M3Mf6" + } + ], + "model_output": "A golden-brown hot dog bun with a soft, fluffy interior and a slightly crisp exterior, featuring a split top design to cradle its contents, is attached to a hot dog.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_57.png", + "subject_name": "laptop", + "object_name": "table", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0V5j90WFlJf9W501O1O4L4L1O2N2N5K2N3M1O3M2N3M2N3M2N3M6J1O002N4L3M1O12O0100NO2N2O3M1O01O02M7J2M1O2O2L3N3L2O1N5J:E4N2O004N2NO2N0101O1N8H0O00003N1N2N2N2M3N1O0000O1O1O2O0O1M3O1O0100000O001O01M3O1N2JkE_KW:_46N2O00100O10001O0001eEdKP:\\4oEfKP:[4PFdKP:]491iEfKf9]4QFiKo9d40000000000000000000000001O00001O1O001O000iKRF\\3o9`LUF_3k9^LYFa3g9]L[Fc3e9\\L\\Fd3d9[L]Fe3d9XL_Fg3a9XL`Fh3`9VLbFj3Z:0O10000000000M3O1000000O1HmK`E0NT4e:mK[ES4e:nKZEQ4f:5L4000000LeKbE\\4\\:5N2000XMkEe0T:XOWFa0h9\\ObF>Z9lMnEc1R1:T9CQG;o8\\OjElN[1f1k8ZOaGe0^8ZOdGf0\\8WOhGh0U8YNmEn0o1i0T8XOnGh0R8WOPHh0o7UOUHk0k7QOYHo0g7POZHP1f7nN]HQ1c7mN_HS1`7jNcHW1U7ZN[F=c2Y1Y7gNgHY1Y7fNhHZ1Y7dNhH\\1X7bNiH_1W7`NfHIZMg1P:]NiHLWMg1P:[NoHe1U:1M3JVNUCk1P=00000K5LoMYCQ2j<100M3O100000000O100000000O1O1001O00000O2O1O1O2N2N001O1O1O1O001O001O1O0000001O1O00000000MYNmBg1S=YNmBg1S=YNmBg1S=YNmBg1S=YNmBg1V=000001O000000000000001O00000000001O00001O1O00000O2O00001O001O001O1OYMfNhGZ1W8gNjGX1V8iNiGW1W8iNjGV1V8kNjGT1V8lNkGS1U8nNkGQ1U8oNkGQ1T8QOkGo0U8QOlGn0S8TOmGk0S8UOnGj0R8VOnGj0R8VOoGi0P8YOPHf0o7[OQHe0o7[OQHe0o7[ORHd0n7\\ORHd0m7^OSHa0l7@TH`0l7@UH?j7CUH=k7CUH=k7DUH;i7GWH8i7JWH5h7LXH4h7LXH4g7MZH2d70\\H0c72\\HNc73^HLb74^HLa75_HKa75`HJ^78bHH]7:bHF^7:cHE\\7=cHC[7?eHBZ7?eHA[7?eHA[7?eHA[7?eHAZ7a0eH_O[7a0eH_O[7a0eH_O\\7`0eH_O[7a0eH_O\\7a0bH@^7`0bH@_7?bH@^7`0bH@^7a0aH_O_7a0aH_O`7`0`H@`7`0`H@a7`0^H@b7`0^H_Oc7a0\\H@e7`0YHAg7?YHAh7>WHCi7>VHBj7>VHBj7>VHAl7>THBl7>THBm7>RHBo7=QHZOeMB[:T1PHUO^8k0bGSO_8m0aGSO_8n0aGPOa8o0`GPO`8Q1_GoNb8P1^GoNc8Q1^GlNd8U1[GjNf8V1ZGjNg8U1ZGjNf8V1c2O10O01O01O0010O01O01O1O010O001O001O01O000000010O00100O010O0001O3M2O0O2N2N2O2M1O2N3N0O3N1N3M2N3N1NTgZ3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[5e2=`1KdM`5l0eJ`1KdMa5k0cJb1LcMc5i0`Je1MbMe5h0]Jf1NbMg5f0[Jh1NbMk5b0WJl1NbMm5`0UJm1OcMn5>SJo1OcMP6R1V8]O]G@>R1U8]O^G@`0Q1S8]O^GBa0o0Q8^O`GAb0o0n7@`GAd0m0l7AaGBd0l0k7BbG@d0n0j7AcGAd0m0h7CdG_Of0m0f7CfG^Of0n0d7DfG^Og0m0d7DeG_Oh0l0P2eNj1o0_K_Oh0m0b7DfG^Oj0m0`7DgG_Oj0l0_7EgG_Oj0l0^7EiG^Ok0l0\\7EiG@k0k0\\7EiG_Ol0l0[7DjG@k0l0[7DjG@l0k0l1mN]1g0kKAm0k0k1POYLLh4g0XLBm0j0j1ROY1b0PLAn0k0j1QOX1b0QLBm0k0j1SOW1>SLCm0l0i1YOQ18XLDn0k0j1XOP18YLEm0k0j1YOo06[LEm0l0j1XOn06\\LFl0l0j1_Og0ObLFn0l0i1Bd0KfLEP1m0g1C7ZOPLa0R1DR1n0e1G3WORLa0T1CR1n0e1H2VORLb0T1AU1o0d1JNNYMYOU1o0d1KMLZMZOV1o0d1JLMZMYOW1P1c1JKM[MYOY1o0b1KIM\\MYOY1o0c1JHM\\MZOZ1o0b1KFM\\M[O\\1m0b1LEL\\M[O^1m0a1LEK]M\\O]1m0b1KDL]M\\O\\1n0c1KCJ^M]O]1n0c1JBK]M^O^1m0c1K@K^M]O`1m0c1J_OL^M]O`1m0c1J@J^M^O`1n0c1I_OK^M^O`1n0c1J^OJ_M]Oa1o0b1J^OI`M^O`1o0c1I]OJ`M\\Ob1Q1a1J\\OHbM]Oa1Q1a1J\\OHbM\\Ob1R1a1JYOIdMZOc1S1`1KXOGc0>5KXOGb0?6JXOFc0`05JXOFc0`05KWOEd0`05KWODe0a04KWODe0a04LVOBg0b04KUOBh0c03KUOBh0c03LTOAi0c03LTO@j0d02MSO_Ok0d03LRO_Ol0e01NRO]Om0e01OQO\\On0e02NPO\\Oo0f01NPO\\Oo0f01OoN[OP1f01OoNZOQ1g00OoNZOQ1g00OPOYOP1h000POVOP1k00OPOVOP1k000POTOP1l000QOSOo0m001POQOQ1n0O1POQOQ1n0O1QOPOP1o0O2POoNQ1o0O2QOmNQ1Q1N2ROkNP1T1O0ROkNo0U1N1TOhNo0W1M2d3N\\L2d3N\\L2d3N\\L2d3N\\L3c3M]L3c3M]L4b3L^L4b3L^L4b3L^L4b3L^L5a3K^L6b3J_L5a3K_L6`3J`L6`3J_L7a3I_L8`3H`L8b3F_L9a3G^L:c3E]L;e3C[L>e3A[L?e3A[L`0e3_O[La0e3_O[Lb0d3^O\\Lb0d3^O\\Lb0e3]O[Lc0e3]O\\Lb0d3^O\\Lc0b3^O^Lb0b3^O_La0a3_O_La0a3_O`L`0`3@bL?]3AeL=[3CfLhM@X2`0kM]OU2c0lM\\OT2d0nMZOQ2g0PNYOo1g0SNWOm1i0UNUOj1l0WNSOi1m0YNQOg1o0_NjNa1W1T60000000O100000000O1000000O10000O1000000000000O100O100000000000000O100000000O100000000O1000000O1O100O1O100O100O1O10000O1000000O1000000000000000000000000001O00001O000000001O00001O000000001O000000000000001O00000000001O00eB" + } + ], + "model_output": "A black laptop with a visible keyboard featuring white lettering on the keys, a touchpad, and a series of ports on the left side including USB ports, an HDMI port, and possibly a card reader slot, is placed on the table. The laptop has a visible hinge mechanism and a partially visible screen with a thick bezel.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_58.png", + "subject_name": "person", + "object_name": "surfboard", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "_nd22\\=3N2N100O11O000000QFJe65ZINd62ZI0f60XI2i6MTI6m6JnH9S7GkH;U7EhHj7BTH?m7BoG?S8AkG?X8@fGa0[8@bG`0`8@_G?c8A\\G>g8BWG=k8CTG;o8EPG9T9GkF7W9JgF6[9IdF7]9JaF6a9J]F7c9I]F6d9J[F7f9IXF7i9JUF7k9IUF6m9JQF7o9JoE7Q:KlE5MBP9lFO0ET9=kF2JD[9>fFR1Y9QOeFo0[9SObFn0^9SO`Fm0a9UO]Fk0c9WOZFj0g9XOUF?U:Y16WO`EZNh:c1d0O2M101O0O101O001N2O1O1O001N2O1O1N2O2M4L=C3L5K6Hejf4" + }, + { + "size": [ + 432, + 640 + ], + "counts": "c\\k24\\=001O00000000001O0O100000001O000000000000000O1000001O0000000000000000000000000O1000000000000000O10000000O1000O100000O01000000O10O0100O10O0100O1O100O2Meab4" + } + ], + "model_output": "A person in a full-body black wetsuit with long sleeves and legs, featuring a front zipper and a logo on the left chest, is playing with a surfboard.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_59.png", + "subject_name": "horse", + "object_name": "sand", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VSR21[=0ZS_16]l`N5K4M3M4L5QDUOT;R1bDVOZ;`1M3L4M3N2N3M\\OPE@\\:=jEBU:8QFHn98QFJo97oEJQ:h10]OkEPNT:n1SFmMn9U2c02N00]OWEcNj:\\1[EYNI1m:d1d0N2N30\\EaNZ9b1bF`N]9b1`F_Na9`1^FaNc9^1[FdNe9[1YFhNg9X1UFlNl9S1SFlNP:S1oEkNU:V1eElN]:o12N2M4L5M3L2B`0H7L5M3N2M3N001N101O0@eC1[d3^8bLaG^3_8aLUGM6b3e8aLUGN6`3e8oLTGS3l8f000O1000000000000000000000O11O000000000000000000000000000000bLTG]2m8QM[GDI10Z3l8mLcGX3]8dLhG[3X8cLjG]3R9000000000004L0000000000001OO1M3N2000000000000001O000000001O1O001O00000000001O00O1WOdLcG\\3T92002N1O000000000000O1O100O1O2N00101N1O1O1L4001O1O2N3M1O1O0000000000000000001O2N1O0000001O00mNYMaGh2\\8_M`Ga2\\8dMcG\\2\\8fMcGZ2]8Z11OO101N1000000000000O10000000000000000000000000000000000001aK`GQ4Q9L2N3M2N2N2N8SLaF5KR3R:00O2O000001O0000O01AQMbFo2]9TM`Fm2`9TM_Fl2a9UM^Fk2a9UM`Fk2_9RMfFm2Y9oL`FM8T3h8mLPGO682l2g8cMZG]2f8`M]G`2c8^M_Gb2`8ZMeGf2[8YMfGg2Y8ZMgGf2^8PMnFKf0U3\\91M3M3O1L4N2M3N2N2N2J6E;M3O10000O1001O3M>B>oK^F]3l9K2O1O00001O0000lNmLRG>:e2_8jM]GV2`8PN\\GQ2^8kL`GW10n1_8a110000000000O1000000O10000O10000O10000000000O1000000000000000000O100000001O0000000O100000000000000O1000001O0O10000000000000000O2O0000000000001O0O1000001O00000000000O1000000000000000O100000000000O1000O1000000000000000000000000000000000O11N100000O11O000000O11O00000000000000000000000000000000000000000000000000000000000O100000001O0000000000000000000000000000000000000000000000000O2O01O00O1000000000000000000000000000001O000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000001O0001O0000000000000000000000000000000000001O00_OWG`Li8`3XG_Lh8a3YG^Lg8b3YG^Lg8a3ZG_Lf8a3[G^Le8b3[G^Le8b3[G^Le8a3]G^Lb8c3^G]Lc8b3]G^Lc8a3^G_Lb8a3_G^LW1" + } + ], + "model_output": "A dark bay horse with a sleek coat and a black mane and tail stands on the sand. The horse features a white blaze on its forehead and white socks on its front legs, with a muscular build, prominent withers, and a well-defined neck.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_60.png", + "subject_name": "banana", + "object_name": "bowl", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c`]5c0\\>`0@3N1N2O1N2O0O2O1N101N2O0O2O1N101O0O2O0O2O001N10001O000O2O00001N100000001O0000001O0000001O0000000O11O000001O00000000001O0000001O00000001O0001O00000001O0001O00000000010O00000001O01O00000001O01O000000010O0000010O000001O01O0000010O0001O01O00010O0001O01O01O01O00010O000010O00010O00010O00010O0100O0001O0001O10O001O00010O010O02O0O1O010O101N010O1O10O02O0O1O100O100O100O2O0O101N101N1eGZM[4f2aK`MbLK[7g2PLaMaL1Y7_2SL[Ng3g1TL_Nj3a1SLdNj3^1QLeNP4]1kKeNV4\\1dKhN\\4\\1\\KgNf4[1SKgNo4\\1gJgN^5\\1YJgNj5_1gIfN\\6c1RIdNQ7c42O2N1N3M3N1N3N2N1O2N2M3N1O2N2N1O2O1N1O2N2N2N2N2O1N1O2O1N2O1N2O1O1N2O1N2O1N2O1N2O1N2O1N3M2N2O1N2N3M2N3M2N3M2N2N3M2N3M3M2N3L4M3M3L4L3OXJ" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hZ^55j>3N1M3eI9YMJe2k0iGmNe4:`3_1UL`Ni3c1VL\\Ni3e1YLZNe3g1[LZNc3g1^LYN`3h1`LXN_3i1aLWN^3j1cLVN[3k1eLUNZ3l1hLRNW32aHV1Z4gNS31hHV1W4gNP33nHR1T4iNm26RIn0Q4mNk25XIk0n3oNi27ZIi0m3POh27]Ih0k3QOg28_If0j3ROf29aI7VOAc4Oe29dI5WOA`41d2:fI2WOC_41b2lIIV4Jl1=QJFT4Mj1>SJDS4Ni1?TJCS4Nh1`0VJAR4Og1a0WJ@R4Ng1c0XJ]OQ41f1c0ZJ[OP42e1c0\\JZOP43c1d0^JWOP45a1e0O[O0f00ZOOf02ZOMg03YOLh04XOLh04XOKh05YOKg05YOJh06XOIh07YOIg07YOHh08XOHh07YOIf08ZOGg09YOGg08YOIg07YOIg07YOIf07[OHf08ZOHf07[OIe06\\OJd06\\OJc06^OJb05_OKa05_OKa05_OK`06@J`06@J`07_OI`09_OGa09_OGa09_OGa0:^OFa0<^ODb0<^ODb0=]OCc0=]OCc0=]OCc0>\\OBc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAc0?]OAb0a0]O_Oc0a0]O_Oc0a0]O_Oc0a0]O_Ob0b0^O^Ob0b0^O^Ob0a0^O@a0a0_O_Oa0a0_O_Oa0a0_O_Oa0`0@@?a0A_O?a0A_O?a0A_O?a0A_O?a0A_O?`0B@>`0B@>`0B@=a0C_O=a0C_O=a0C_OX15nJ_Oh3=Z14nJ_Og3>[13mJ@g3=]13kJAh3<]12kJCg3;_12iJDg3;`11hJEg3;a10gJFg3:c10eJGg3:d1OeJFg3;e1NdJHf3;f1McJIg39g1NaJJf39j1M_JKf38l1M^JKe38n1M\\JLe38o1L[JMd38R2JYJ0d36T2JWJ0e36U2JUJ1e35W2JRJ3f33Y2JPJ4f32[2JnI5NZOg2f0_3KjI7OZOg2c0a3MfI81YOf2c0d3LbI:3YOf2a0f3M^I;5XOf2`0h3NYI=7WOg2>j33nH=?TOg2=n3a1ZISNf2=Q4`1WIUNg2:T4a1SIWNg29X4_1oH[Ng26[4_1lH]Ng24`4^1fHaNg23d4\\1bHdNi20g4[1]HiNi2Mk4_2RKbMQ5^2lJcMU5]2gJfM[5Z2bJfMa5[2ZJgMh5Z2RJiMP6Y2jIiMX6Y2_IlMd6V2RIoMP7X2_HoMe7d42M3N4L3M4L9GA;E?B=B;D9GmeT1" + } + ], + "model_output": "A ripe banana with a bright yellow peel, exhibiting a gentle curve and a smooth surface with a few small brown speckles, is placed beside a bowl.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_61.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 482, + 640 + ], + "counts": "W]o31o>4M2N2[D1T81gG9R8IiG?R8CkGm0f7UOWHR1c7oNYHV1e7lNXHW1g7kNkGa1T8bN_Gj1^8\\NUGm1j8VNkFS2U9PNbFV2]9h1O10000O1O1O10000O1000000O1000000hMiJbH1ONk1X5Z5^KdJb4X5dKfJ\\4m4SLQKm3n4XLnJh3R5ZLkJg3T5aLeJ_3X5gLeJY3Y5mLcJS3Z5SMcJm2\\5XM_Ji2`5[M]Je2\\5eMaJ[2Y5nMdJR2W5UNfJl1W5ZNfJf1W5_NgJa1V5dNgJ]1V5hNgJY1S5^4O1N2O1M3M3N2N2O1O1L400O1O1O1O1O1O1O100N200OoGiKY5W4i2O1O10000O100O100O10000O1000000O1000001O0O0100001OO01000000001O00000000001O00001O1O1O0000001O0000010O0O2O1O1O001O000010OO2O1O1O1O1eIfKl1[4RNiKk1X4TNkKi1V4UNnKh1S4WNnKh1S4VNPLh1R4UNPLj1R4SNQLk1P4SNQLm1Q4PNQLo1Q4mMQLS2Q4iMQLW2Q4fMPLZ2S4aMPL^2S4]MoKd2U4UMnKj2Z4jLjKU3a4`JSJ11O0_1_1P4]5]KfJb4a5UKaJk4`5PKdJP5^5nJcJQ5^5mJcJS5^5kJdJU5\\5iJeJW5^5dJdJ\\5b7N100010OO100010O01O001N110O0O1000000jJWFS5i9mJWFS5i9lJXFT5h9lJXFT5h9mJWFS5i9mJWFS5i9mJWFS5i9lJYFS5k900001O01O1O001O1O0O1000O2O0O10000000O101O0001O01O0O101O00001O0000001O001O1O1O1O00001O001O001O1O00001O0000001O1O1O1O1O1O1O1O1O1O001O1O1O1O1O001O001O1O1O001O002N1O1O001O001O1O1O1O1O1O001O1O1O1O001O2M2O0O2O001O100O1N2O010O1O1O100N101O1O1O1O1O00100O1O1N2O00011N2M110O002N1O1N110O1O2N1O001O1O1O2N001O002N1O1O001O2N1O1O1O2N2N1O1O2N1O1O1O2N1O1N4L2N2O2L4L5K:DRo0" + }, + { + "size": [ + 482, + 640 + ], + "counts": "Zn071200001J0005O11J0O2O50K0:0K0J10001O0O5^2IiM7JKR7h4WJSMb5o2ZJUMe5l2WJWMi5l2SJUMm5m2PJTMP6o2iIUMW6n2bIVM^6f510000000000O100000000000000O1000000000O010000000000000000O10O11O1O0O2ROaIWI_6a6U1K5K5L4N102N1O1O00010O10000O1O1N200O100000000000000O10000O2O000O10O1TH^JX6b5fIfJT6Z5lIhJR6W5nIlJP6U5oIkJQ6U5oIkJR6U5lImJS6S5lInJT6R5lInJS6S5lImJU6S5jInJV6R5iIoJW6R5hInJX6R5gIoJY6Q5fIoJ[6e3RI`La0L]6a3XI`L:O_6_3[I`L61_6^3]I`L33`6\\3`I`LO3c6[3aIaLH7g6W3cIaLAVOES1X7U3cIbLB9n6R3aIfL_O9R7o2`IhLZOg0QO_OKKB2H7CJo11f2S1XKiN1c1h0_O]O^OLK0GW34^1k2PLUN^OYONMNFo0LW19_2i2QLWN^OWOONNF<`0f1Fb2f2PL\\N_OROO1NE;c0e1Dc2d2PL_N@POO1OD9f0e1Dc2e0]Ke0e0H_OnNO4OA9h0d1Ce2c0]Ke0e0L]OlN05N_O:j0d1Cd2`0`Kf0c0N^OiN07;:`1Cc2?^Kh0e0O^OeN298:b1Bc2e0^K;f08\\OaN3=5:d1Bd2b0^K9h0=YO_N4>49e1Cc2n1XLZOWO^N6>2;e1Bb28`Kg0j0=XOTN6d00:f1Bb28`Kc0l0c0WOPN5h0O9e1Cd25`K>Q1i0]OiNH9f1Ba2[OdKh01?n0m0]OgNH8g1Cc20dKd0i0m0_OeNJ8e1Dc2DeKI3V1e0m0AdNI9e1Ec2BgKH0Y1e0n0BbNH:f1Cc2@hK_1d0c0]OeMOX2c1cNd2^OhKa1c0b0_OdMOX2b1eNc2_1XL3CaM0Y2a1eNb2a1UL4G\\M2[2^1eNc2h1kK41C_1cNc2IoKl3NkM^1bNc2HQL_6[1kIc2V7[MlHd2U7ZMlHf2V7VMmHi2T7UMnHk2S7QMPIn2R7nLQIQ3Q7kLRIT3Q7eLSI[3Q7_LRI`3Y9O1O1O1O001O1O1O1O1O001O1O1O1O1O1O1O1O1O001O001O001O2N001O1O1O1O1O001O1O1O1O1O1O1O1O1O1O1O001O1O1O2N002N001O1OhMRN]Gm1`8YN^Gf1`8_N^G`1`8dN_G[1m2WNa0a0aLW1l2^N=>gLS1i2cN>=iLn0g2jN<:mLj0f2oN;9oLg0d2SO;8QMd0c2WO98TM?b2[O87XM=]2_O:4ZM<[2A:5[M:Y2C;4]M8V2F<3^M7T2H<2aM6R2H=3aM4Q2K=1dM2o1M=1eM1n1O<1gMMm13<0iMJl16:1kMKg15>0lMKe15`0OlMLb17`0OnMJ`19ROWNbM1h0h1UOVNc1_o0" + } + ], + "model_output": "A woman with short brown hair, wearing a black V-neck top and a gray cardigan, stands in front of a wall. She has a black helmet with a badge on the front, a neutral expression, and is adorned with a large circular necklace.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_62.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 444, + 640 + ], + "counts": "mm[43g=3N1O2O001O001N1010O01O0aJD]M1=Q1]OS2I_Me4a0bKoNa0n0@S3\\4lMnKROHQ3Y4QNiKPOOo2W4UNbKPO8k2U4NlK3S4LmK5R4KoK5P4LPL4o3MQL3o31mKOS43iK0U4l40000O10000000YHfKl5Y4l101O00000O10QOnKTHR4g7XLTHh3f7cLTH^3d7PMUHQ3b7i1^OmIVIX6g6a0M2O20O000001O0001O000001O00010O0000001[JTIX4l6`K`I\\4a6bKaI]4_6bKcI]4]6cKdI\\4\\6cKfI\\4[6nJQI9h0h4W6mJTI9f0j4W6kJVI4i0P5R6kJ]JU5V7O1O010O001O001O1O1O1O1O1O2N1O001O1O2N001O1O1O1O1O1O002O0O1N200O2N1O1O1O2[N[FoNf9c0YFSN0L4\\1d9l0XFhM3]1g9i0VFjM2^1j9f0TFlM2^1l9h0UFWOm9h0RFXOo9Z23M2M4M2N3L3N4L6H4M4L3N3L3L6J4M3M4J:D8I6M5GUCWOZ=6c__1" + }, + { + "size": [ + 444, + 640 + ], + "counts": "o_l5:P1Ib9?gFNjND?3n93L3O1O1O1N1O2O001N2O2N2M4M1O1N2N0O2O1O1O1O001O2O000O010O2O000000010O1O:F2N2N1O001OO100010OO1O011O002M5L1O1O1O2N1O2N1O1O1N2O1N2N2N8G6D`ALcml02WaSO2M2O2N1O2M2O2K6L3N1O2N1O3M2N2O0O2N4L2O0YHeNi1\\1UNiNg1X1UNmNh1V1VMeNQL?b6n0YMjNiKe0i6b0\\M:_2H]M`0^2B_Mc0\\2@[Mj0a2ZOYMm0c2TO[Mo0c2ROZMQ1e2SOUMR1g2SORMR1k2QOQMS1l2ROnLR1o2TOkLP1R3QOlLR1Q3POkLT1S3nNkLT1S3lNlLW1R3jNlLZ1R3eNnL^1o2aNPMd1n2\\NQMf1m2ZNSMh1l2VNUMl1j2RNWMo1h2PNWMT2g2kMTM\\2l2dMQM`2n2_MPMd2o2[MmLk2S3TMgLS3Y3lL`L\\3_3dL\\Lb3d3^LWLg3i3YLTLj3k3WLPLn3P4SLjKR4V4oKUK[M200P7h4S3000000O1O100O0010000O2N100O100O00100O100O100O1O2O0O001O100O100O1nMiEnMFLNO;b0Z:\\1\\ESN_1>U9Z1XGcNi8Y1_GcN`8[1eGaN]8V1nGfNT8Y1j2N2O1O1O2O000000O01000001M200O100O10O01O1O2O0O10000O10000O1000000O100000nFXOl3h0V501O0000O0101O0000000000000000000001O00001O00000YFXOV5i0`4001O001O001O1O001O001O001O1O1O1O001O001O1O1O1O1O2N2N1O1YEcNi7^1VHbNj7_1THcNk7^1gFiNFKc9^1VF`N@i03^OV:Z1oEgNEZ1\\:1iEjNIV1^:3aElN0S1^:g2O1O00001O001O1lMeKaI\\4Z6iKeIX4V6oKgIR4S6VLjIj3Q6\\LnId3o5bLPIQOCF:5M5JYbd2" + } + ], + "model_output": "A white football with a pattern of blue curved lines and a blue and white logo is beside a person.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_64.png", + "subject_name": "car", + "object_name": "road", + "predicate_name": "driving on", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "kRZ83h`09H9a_OQ7l7jIdGWOkNgAT1^>bNhA]1n>O001O00000000O100000000O100O100O1O1SO]NXBi1g=h0M300O100O1M3O100M3O1O1N2O1O1O1N2N2N2M3O100L]LkBd3Y=O1O3M;cLWBn2n=O00cMQMkFn2U9RMSGf2j;ZMdA[2d>L1O5K4L6J2N1O1O1O1O2N0000000000O1O100N2H^Nn@g1m>8FPN_A10P2W>c000001O1O7`M^AU2h>N4L1O002N1O1O00000000000000O100O1lKTNSIn1h6YNTIi1l6aNiH`1V7YO_FeNYOOd0S2d9B^E\\NOU1l0m0h9T1TFmNl9X1nEiNQ:Y1nEgNR:Z1lEgNS:\\1jEeNV:_1eEbN[:b1]EbNc:m3O1O100O1O100O1O1O11O1O1O1O001O1O0000001O1O0000001O1O1OO1O13M2N1O001O1O001O0000001O2N001O001O001O002N1O001O00001O000000001O1O000000000000O11O001O0000000000O1001O001O000000000lJhDa4W;_KkD`4U;_KlDa4T;_KmD`4T;^KoD`4Q;`KPE_4P;aKQE^4o:bKQE^4o:aKSE^4m:aKVE]4j:cKWE\\4i:eKVE[4i:fKXEY4h:hKWEX4j:gKXEW4h:iKXEW4h:hKZEW4f:iK[EV4e:jK[EV4e:jK\\EU4d:jK]EV4c:kK\\EU4d:kK]ET4c:lK]ET4c:lK_ER4a:nK`EQ4`:nKbEQ4^:oKbEQ4^:nKdEQ4]:nKdEQ4\\:oKdEQ4\\:oKdEQ4\\:oKeEP4[:PLfEo3Z:QLgEn3Y:RLgEn3Y:QLhEo3X:QLiEn3e;O1O1O00lNRLWEn3i:RLXEm3h:SLYE9A5]OHNl1k;nM[E4c0g1R:UNaGd1o;N8H3fN[@T1j;lNRHS1n7mNSHR1l7oNUHP1k7POWHn0j7QOYHl0g7TO\\Hi0m;01O00000000001O1O0000001O000000O11O01O000000O100000000001O00000O1N110O1O1L5L3O1O1K50ON3O100O1100O001O001O001O000000001O2N5QOT@a0b`0B9FYoi01P`UO0``02]_O1_`09JDh_O=V`0Fh_O;V`09N10000000000000000m_O" + } + ], + "model_output": "A dark-colored minivan with a visible rear section featuring a red and white taillight on the right side, a partially visible rear window with a defroster grid, and a rear windshield wiper is driving on the road. The vehicle has a visible rear bumper and a partially visible license plate mounted on the back. The minivan's body shows a reflection, indicating a glossy finish, and there is a visible antenna on the roof towards the rear.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_65.png", + "subject_name": "cup", + "object_name": "bed", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 436, + 640 + ], + "counts": "oTo37[=5L3M2N2N2O1N101N100ODYCNf<1[COe<1\\CNd<2\\CNd<2\\COc<1]COc<1]COd<0\\C0d<0\\C0d<0\\C0d<1\\CNe<1[COe<2[CMe<4ZCLg<3ZCL^S5DnJYIAi6;ZIDg69\\IFf64_IKb63cII_65bIJ_65aIK_64bIL_63aIM`62aIM_62bIN^62bIN_60bI0^6OcI1]6NeI1\\6MhIOY60iIOX6OiI1W6NjI2W6LkI3U6LnI2S6LgIHoL=Y9JiIInL=Z9IPJ6P6IQJ7o5HSJ7n5GSJ9m5GSJ:l5DVJgJ\\O_L6h8`0iJZO_L6o75WH?Z3VO`L5n7]1bK]NaL5n7^1fKaN[4_1]K_NeL2o7^1[KaNeL1Q8_1YK^NhL2P8`1XK]NiL2P8c1TK^NjLOR8d1SKdNn4]1QKbNP5`1nJ_NS5b1lJ^NT5c1kJ]NU5d1jJ\\NV5e1iJ[NX5d1hJ[NY5f1gJYNZ5g1eJYN[5f1`JUNVM5[8f1_JVNUM4]8f1]JUNWM5\\8g1\\JTNXM5\\8f1aJ[N`5b1bJ]N`5b1eGXNc26h5i1YJWNh5h1jGTNLNg17d6h1RJZNk1J^Om1dNZNn1K^Oj1cN\\No1I_Ol1`N\\NQ2H@l1]N]NS2G@m1\\N[NT2J^Om1\\NZNV2I^On1ZNZNW2I^Oo1ZNWNY2J]OP2XNWNZ2J]OQ2UNXN]2H]OR2TNWN_2H\\OS2RNVNb2G[OV2oMTNg2FYOY2mMRNj2EXO[2lMQNk2EXO]2iMoMP3DWO_2fMnMS3DUOa2dMmMW3BUOe2_MiM]3BSOi2ZMhMa3ATOj2UMiMf3_OSOl2QMhMl3\\OSOQ3dLjMZ4UOQOc4o0^KoNc4Q1]KoNb4R1_KlNb4S1_KmNa4R1`KnN_4Q1cKnN^4Q1dKnN\\4Q1eKoNZ4R1gKlNZ4T1fKkN[4T1fKlNZ4T1gKjNY4W1hKhNX4X1hKhNW4Y1iKfNX4Z1iKeNW4Z1jKeNV4Z1mKeNS4[1nKcNS4]1nKbNR4^1nKaNR4`1oK^NR4b1oK\\NR4d1nK\\NR4d1nK[NR4f1oKYNQ4f1PLYNQ4g1PLWNQ4j1oKTNQ4m1oKSNQ4n1oKPNR4P2oKnMQ4T2nKkMS4V2mKhMT4X2lKgMT4[2mKbMT4^2mK`MT4a2mK\\MS4e2nKYMS4h2mKVMT4k2lKQMV4Q3jKmLW4S3jKkLV4W3lKdLV4]3jK`LX4`3lKYLV4j3iKRLZ4o3hKkKZ4V4jKbKZ4^4iKYK]4h4V22N2O001N2O001N100O2O0O2O1O0O2O000O2O1O00100O001O001O001N102N1O1O1O1O1O1O1O1O1O101N1O1O1O100O2N1O2N2N3M4L2N4L3L5L3L4K5DH9B=D;G9C=H9G8M3L6J4M5J6J5L5I7H?WOhmk0" + } + ], + "model_output": "A pink ceramic cup with a glossy finish, featuring a rounded handle on the left side, is filled with a frothy liquid, possibly a hot beverage, with a visible layer of foam on top, sitting on a bed.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_66.png", + "subject_name": "person", + "object_name": "pavement", + "predicate_name": "standing on", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "n[k523VOa@l0R1N]>VO]@S1S1G`>l0\\ATOd>o0YAROf>R1VAnNk>U1QAkNo>V1PAkNn>X1PAhNP?Y1o@gNo>^1n@cNo>a1o@_N:\\OY=Y2ZB[N4MAQOa=j2gBYN2;D`N]=m2lBXNOc0Q=Z1kBVN2b0R=l4N4L2N3M1OCUC]I05k^571O2N1O2O1N]OjA_KT>b4lA_KR>Y5O001N2N101O001N1O101O000O1O2[KmIWGJJ241JO50i2]6f4PM^HlLg1V6i5l5O0001O7I:Fa0_Od0\\O:F=C;E<\\F\\ET9Z;^O;E4jGWDl7Sl0K4000H8134JN2BXAIP?K^`e4" + } + ], + "model_output": "A person wearing a dark blue jacket with a white stripe on the sleeve, dark pants, and dark shoes, with short dark hair, is standing with their left hand in their pocket and their right arm slightly extended, holding a black suitcase.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_68.png", + "subject_name": "cow", + "object_name": "grass", + "predicate_name": "walking on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "TVU22m>2O0O2O0000000O2L9\\ADX>c0N2N1000001O01O01O010O100O1O2M2O2M2O4K4M6I;F3M1O00YOg0O1O1O1O10000O11O001O2N2N4L2N2NCiBROV=n0kBSOS=m0oBUOmn0L4K5O13N6IWOiBFY=6iQn5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m7R7n7000000000001O00001O0000000000000000001O00000000001O000000000000000000000000001O0000000000001O0000000000000000000000001O0000000000000000001O000000001O000000000O1000000001O0000O1001O0000000000000000O10000001O00000000O100000000001O0000O2O0001O00000000000000O20O00000000000001O0000000O1UJfGQ5Y8oJhGP5X8PKiGo4W8QKiGo4W8QKjGn4W8PKjGP5V8PKkGo4U8QKkGo4U8PKlGP5T8PKmGo4T8PKlGP5T8mJoGS5Q8lJSHQ5m7oJ]Hg4c7YK^Hf4b7YK`Hf4`7ZKaHe4_7ZKbHf4^7ZKdHd4\\7\\KeHc4[7]KeHc4[7]KeHc4[7^KdHb4\\7^KcHc4]7]KcHc4^7\\KcHc4]7]KcHc4]7]KfH`4Z7`KgH_4X7bKjH\\4W7cKkH[4U7eKmHY4S7gKPIV4P7iKSIU4m6kK_Ii3a6VLfId3[6[LfId3Y6\\LiIc3W6^LhIb3X6^LhIb3X6]LhId3Y6[LXIT4h6lKQI[4o6eKoH]4Q7dKmH]4S7bKnH^4R7bKnH]4S7cKmH]4S7cKnH\\4R7eKnHZ4R7eKQIY4o6gKSIW4m6iKVIU4i6kKXIT4h6lKZIR4g6mK[IP4f6PL]Im3c6SL`Ij3`6WL`Ih3`6XLlHFWOR4l7WLmHLSOm3P8WLlH3nNf3V8WLlH5lNd3X8WLlH6jNd3Z8VLlH`4T7`KlH`4T7`KkH`4W7_KiHa4W7^KkHa4U7_KkHb4T7^KmHa4S7_KmHa4S7_KnH;eN^3]8WLnH7iNb3Y8VLRI1iNi3U8VLfIj3Z6VLfIi3[6WLfIh3Z6XLfIh3[6WLeIj3Z6VLfIj3Z6WLeIi3[6WLeIi3[6WLeIi3[6WLeIi3[6XLoH]4Q7cKiHc4W7^KiHa4X7`KgH_4Y7bKgH]4Y7dKeH]4[7eKjGT5V8nJcGW5]8f000000000000O11O0000000000O10000001O01O00000O10001O000001O00000000O2O00000000001O000000000000000000000000000000000000000000000000000000000001O00O1000000000000000000000001O0000000000000000000000000001O00O100000000000000000000000001O00000000000000001O0000001O00001O00000000001O000000000000001O0000000000001O00000000001O0000000000000000001O00O1001O00000000000001O00O100000001O000000000000000000001O0000000000000000000000001O00000000000000001O00000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000aJjFT5W9iJkFW5V9hJjFX5V9hJjFX5W9fJjFZ5V9fJjFZ5V9fJjFZ5V9eJjF\\5V9cJkF]5\\900000001O00001O0000001O0000001O001O001O0000000000001O00000001O00O10000000000000000000001O0000001O00000000001O00000000O1000000001O00O10000000000000000000000O10000O100_OiJYGW5g8jJWGW5i8lJSGU5m8lJQGU5o8mJlFV5T9;000000000000000000000000001O0000000000001O00000000000000001O000000000000000001O0001O0000gF" + } + ], + "model_output": "A black and white cow with a predominantly white head, a large black patch over the left eye, and smaller black spots on the right side of the face stands on grass. The cow has a black body with white patches, a visible right ear, and is standing with its head turned slightly towards the viewer, showcasing its profile. The cow's tail is mostly white with a black tip, and it has a visible udder.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_69.png", + "subject_name": "wine glass", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "WUb4:ec02M3N2N2O1N2O1N101]N\\Oh_Oe0T`0_Ok_Oa0R`0Cl_O?Q`0Co_O>n?EQ@;l?IS@7k?KV@5g?N]_OWO=j0T`02]_OUO?i0R`05^_ORO>k0S`05^_OQO:o0U`03^_OPO=m0P`0e0o_O[OP`0g0o_OYOP`0h0P@XOP`0i0V@POi?R1W@mNi?S1W@lNj?U1U@kNk?V1U@iNk?W1U@hNl?X1U@fNl?Z1U@dNk?]1V@bNj?^1W@`Nj?`1W@^Nk?`1W@^Nj?b1X@[Ni?e1Y@VNj?j1V1O2O000O2O0O2O0O2N1O2M3M4L4iN]Rg2" + }, + { + "size": [ + 640, + 427 + ], + "counts": "_jT11VSc25a`]M9H5K6VIM\\I9]6M`I7\\6KbI:Y6JbI1RO=Y2Y5X2X5?VKj4TO`H9NGZ2e5c0RKd4\\O^H<2AT2d5m0nJm2@_J0h0\\1h4S1jJf2L^JH=k0\\1g4T1iJd2OZKa0n0g4V1fJ[2MZJ5k0a1`4V7nKWGBc1\\4W7VLUG^OAROo1Z5Z7WLWG\\O@SOo1Z5Y7XLXG[Oc1]4T7YLYGZOc1\\4Q7aLXGTODROo1Y5S7dLYGQOEROo1Y5S7`L]GUOAROo1Y5S7`L^GTO@SOo1X5S7bLmFVN`0k0AUOo1W5S7kL[GgNCWOo1W5S7PMWGaNGYOn1V5T7nLZGbNDYOo1W5S7lL\\GdNe1`4o6kL_GcNb1b4o6iLcGbN_1e4n6iLdGaN^1f4n6iLdG^NXOCV2W5m6hLfG\\Na1l4i6hLiGYN^1o4i6gLlGSNSONY2Y5g6fLmGSN`1W5c6fL`JZ3`5fL`JZ3a5eL_J[3a5dL`J]3_5cLaJ]3`5bL`J^3a5aL_J_3a5^LbJb3_5]LaJd3^5[LcJe3^5YLcJg3]5YLcJh3]5SLgJm3[5lKjJU4W5_KjFUOT4\\5[:1O00001O00001O1O1O001O001O2N1bDZJ`8f5_G[Ja8f5WGcJg8^5QGkJm8W5mFnJR9R5oFmJQ9T5PGkJo8V5mFnJR9U5jFmJU9Y5eFgJ[9Y5eFgJ[9Y5fFfJZ9\\5cFeJ]9j5nD]Il0n0U:l5hEVJX:k5bEYJ_:g5`EWIBg0n:T6[EYIEd0P;Y6PEZJP;[700000000000000000001O0000001O2N00000000001O00001O0000001O001O0000001O000000001O0000001O000PKlF`KNZ2V9l1[GdK@`2U9T1YHULcNg2T9n0gHoKYNS3Q9h0oHPLlMGOb3V9?eIPLTMa3X93oIaLhLX3Y92TJdLeLZ3X9MVJjLbLY3Y9GUJmL`Lf3[9YOYJk0h5ROYJoNbLJU9V1YJoNhLGP9Y1XJkNVMCb8b1XJeN_MFZ8d1UJfNdMEW8g1SJcNhMEV8h1PJcNnMBR8k1oIcNRN@P8m1jIfNYNZOn7P2gIbNiLaNe1l0k7R2fIaNbN]Oh7S2dIaNfNZOg7U2aI`NnNWOb7Y2^I`NTOTO_7\\2[I`NXOSO^7]2YI^N\\OTO[7_2WI]NBQOX7b2TI^NFnNW7e2mH`N0hNU7g2jHaN4eNS7i2hHcN6cNR7k2gHbN9aNQ7n2bHbN_:_1]EcNc:_1WEeNj:[1TEfNn:Y1QEfNQ;Y1mDhNW;V1eDmN[;U1cDkN^;U1`DkNb;T1]DmNd;T1XDmNk;S1QDmNRoNZAE2O4KNOL323ij0" + } + ], + "model_output": "A clear wine glass with a thin stem and a broad, stable base, partially filled with a transparent liquid, reflecting light on its curved surface, is held by a person.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_70.png", + "subject_name": "person", + "object_name": "snow", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "kTY55c0Nj;9oCJP<;kCFTU8R6THnIm7o5VHoIk7h4jEiKf2SOb7Q5oEZKCGf=n4iBVKb=i4l0O0SO\\A]Le>\\3gA\\L]>a3gAZL\\>a3T1N2OhAcLkM9f=P3_D_M[;^2bDjM];S2bDQNVNlNH1ne0f2T6g4oHVH8b6C]I=c6D\\IOn0d4j5[NjHoLe0Ng0g4k5\\NiHmLj2f4^4]NiHhLm2j4[4^NhHeLP3m4X4^NXLa1j3^NVLb1j3bNRL]1o3dNPL[1Q4eNoK[1Q4eNoKZ1R4fNnKZ1R4fNnKY1S4fNnKZ1R4fNnKY1T4fNlKZ1T4fNlKY1V4fNjKY1X4fNhKZ1[4cNeK\\1^4aNcK_1_4_NaKa1a4]N^Kc1m4RNTKn1Q5YLaG7_3`3T5SL`G;]3b3^5_K_Gn0S3c3`6ZLaId3d6WL]Ii3d6VL\\Ij3d6VL\\Ij3e6TL[Im3f6RLZIm3l6nKTIR4n6kKSIU4n6jKRIU4P7jKPIV4R7hKoHW4T7eKmH[4V7bKjH^4X7`KhH_4];0000OeEcKU5]4kJbKV5^4V50cEfKT5Z4kJmKn4T4RKmKn4R4RKmKo4S4QKmKn4T4SKjKmMoAR2R>oMmAP2e8TMgKm0dKo1`8\\MhKf0hKm1_8aMfKe0iKj1]8gMgKa0kKh1\\8jMgK?mKf1\\8mMeK?nKc1\\8RNcK=PL`1]8UNaK=QL^1Z8ZNcK:RL\\1W8_NeK7SLY1X8cNbK6ULW1W8fNcK3VLW1V8hNbK3WLU1W8iN_K5YLR1X8kN\\K5[Lo0Y8nNZK5\\Lm0Y8QOWK5^Lk0[8SORK5cLg0[8UOQK5cLf0\\8VOoJ6dLd0]8XOkJ8fL`0_8[OeJ9kL<_8_ObJ7nL:a8_O^J:PM7b8A[J:RM5b8EWJ9VM2c8IoI:]MMd8KlI:_MKe8NgI9dMIe8OdI:gMGe83]I9nMCf89SI8WN_Of8=lH7^N\\Of8a0cH8gNWOf8e0[H7POTOe8n0gG:CgNg8a5YG_Jg8a5YG_Jg8a5YG^Jh8b5XG^Jh8b5XG]Ji8c5WG]Ji8c5WG]Ji8c5WG]Ji8CVGd31iLh8A_GoMC^56QMi8@fG]3ASMh8^OnG[3ZOWMh8]OUHV3SO]Mh8ZO`HQ3hNeMh8WOeHR3cNfMh8SOmHU3[NhMh8ROPIT3XNjMh8POVIR3RNnMh8POVIR3RNnMg8mN\\IT3mMoMg8kN_IU3iMQNh8gNcIW3fMQNg8fNgIW3bMSNg8cNlIX3]MUNg8\\NWJ[3RMXNg8YN]J^3kLZNh8VN_J`3jLZNe8UNdJ`3gL[Ne8QNiJc3bL\\Ne8oMlJd3_L^Nd8jMRKg3YL`Ne8gMUKh3WL`Ne8cMYKl3RLaNo<_1QCaNQ=]1oBcNQ=]1oBcNR=\\1nBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNQ=[1oBeNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBfNR=Z1nBgNR=X1nBhNR=W1oBiNQ=W1oBjNP=W1oBiNQ=W1oBiNQ=V1PCkNP=T1PClNQ=S1oBnNo:hM\\FX3fNPOh:oMbFo2gNSOd:QNdFl2hNTOb:SNfFe2jNZO]:TNfFc2mNZOY:XNgF^2QOZOW:ZNgF[2SO\\OS:\\NhFY2UO\\OQ:]NiFV2WO^Om9_NkFQ2ZOAg9bNnFl1\\OCb9eNQGg1^OE]9gNUGd1^OE\\9iNUGa1@GY9iNVG_1CIS9lNXG\\1EIQ9lNZGZ1FKl8oN]GV1GKk8PO^GT1HMh8QO^GR1KOa8SOdGl0M2]8TOeGi0O4Z8UOfGg004Y8VOgGe016U8XOiGb027R8ZOjG>69n7ZOlG=6:l7[OmG:8a7BSH0=`0\\7DUHK`0b0X7FWHFc0e0T7GWHDf0f0n6KYHAi0e0h6]3XIeLa6a3`I`L\\6a3eIaLU6b3lI`Ln5c3SJ`Lg5b3ZJ`LZ2PL_Oc7WN_LW2RL[Ob7_N]LU2]LlNY7PO[Lo1cLiNU7YOZLk1gLgNo6_OZLg1kLhNl6BXLf1mLVMPOF0Q1m7=WLd1mLUM]O1CNR8Y1QL_1hKSMP;_1XI]1k6eNVIX1k6iNUIU1m6kNTIT1k6mNVIQ1j6oNXIn0h6TOYIj0h6WOXIg0h6ZOYIe0g6[O[Ib0e6_O\\I?d6B]I;d6F]I9b6H`I5`6L`I3a6M`I2_6ObIO^62cIL^64dII[69eIF[6;fIDZ6hIBW6?jI@U6a0mI\\OS6e0nIYOR6h0oIWOQ6i0PJVOP6j0QJTOo5m0SJoNn5R1WJ_NQ6c1YJlMn5T2n5100O1O1O100O1O1O1O1O1O1O100O100O1O1N20Dn^OjMQa0c2O1O1O1O10000N2O1O1O100O1O1O1O1O100O002N1O1O010O2O0O1N10101O0O1O0O201N100O1N2O2N1O2Lm5Z@^GN^O2YOO3N44oa1" + } + ], + "model_output": "A small, fluffy white dog with light tan markings on its ears and around its eyes, sporting a black nose and dark, expressive eyes, is sitting on a bench. The dog is wearing a silver chain collar with a visible tag and is attached to a black leash.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_72.png", + "subject_name": "fence", + "object_name": "person", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bbe32T=9K2N3M3M2N1000000000000000000000000001OO10000001O000000000000000000000000000000000000000000000000000000000000001O00O1000000001O0000000000000000001O0000000000000000000000000000000000000000000000000000000000000000O10000000000001O0000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000O10000000000000000000000001O000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "e`l0i0[<=B`0D9J4PJWN6o1@ZNeMYOH8d2?dMYOH8d2?dMYOH8d2`0cMYOH7e2`0cMYOH6f2a0bMXOI7e2a0bMYOH6f2`0cMZOF7g2?cMZOF7g2?dMYOE8g2`0cMXOF8g2`0cMXOF7h2a0bMXOF7h2a0bMXOE8i2`0bMXOE8i2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD7j2`0bMYOD6k2a0aMZOC5l2a0aMZOC5l2b0`MYOC6m2a0aMXOB7m2a0aMXOB7m2a0aMXOB7m2a0aMYOA5o2b0`MYOA5o2b0`MYOA5o2b0aMXO@6o2b0aMXO_O7P3a0`MYO@6P3a0`MYO@6P3a0aMXO_O7P3a0aMXO_O7P3a0aMXO^O8Q3`0aMYO]O7R3`0aMYO^N]O:i0h3a0aMXO\\NB8e0k3a0aMXO\\ND6c0m3a0aMXO[NG5`0o3a0aMXOZNI5>P4b0`MWO[NJ4=Q4b0`MWO[NJ3=S4b0_MWOZNK4XLBh3?WLAi3a0TL@l3b0RL_Om3b0RL^On3d0PL\\OP4e0oK[OQ4g0mKYOS4j0iKWOW4j0hKVOX4l0fKTOZ4m0eKSO[4o0cKQO]4P1bKPO^4Q1`KPO`4S1]KmNb4U1]KkNc4X1TKnNm4f30000000000000000000001O000000000000000000001O00000000001O0000001O0000000000001O000000001O000000001O000000001O00001O001O1O1O1O1O2N2N2N1O2N2N2N2N2N1O2N2N2N2N1O3M2N1O1O2N1O2N2N1O3M1O1O1O2N3M4RMeGg2`8N2N2N1O3M2N1O1O1O1O2N3M2N2N2N2N1O1O1O1O2N2N2N1O2N2N2N001O1O2N1O1O2M3N1N7[ObENg:N^EN[Y\\2" + } + ], + "model_output": "A baseball catcher in full gear, including a blue helmet with a face mask, chest protector, and leg guards, is crouched on the playing field, ready to receive a pitch. The catcher is wearing a white uniform with blue and black accents, and is holding a mitt in the left hand, which is extended forward.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_74.png", + "subject_name": "person", + "object_name": "person", + "predicate_name": "looking at", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "\\aT38cc06J5N2N4n\\OEZb0Q1G9O1OHn]OcNSb0[1Q^ObNPb0\\1;2N;E00010O`Nk]OU1Vb0iNl]Oa0<]Oia00m]Oa0<]OUb0=l0GQ[W2" + }, + { + "size": [ + 640, + 296 + ], + "counts": "QXZ28cc07d\\OLeb0i0N3N1O2[]OUOa0Ek`0`2M3M100O1O100O100N1O2OYO]_OQNc`0o1f0201d^OhMm`0S3C001O0001O1TMY_O]2i`0_M[_O_2Va0M3M1O3N4XNZ^Ol0oa0jNV^O:8CZc0O1O0O3NfYf2" + } + ], + "model_output": "A person with short hair, wearing a dark-colored t-shirt and blue jeans, stands with their back to the viewer, beside another person.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_75.png", + "subject_name": "motorcycle", + "object_name": "grass", + "predicate_name": "parked on", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "gmT72Y=2N001O1O1O2N1O2N1O2N1O6J:F1O1O00000000000000000000O100O100O100O100O1O100N2O100N2O1O1O1N2N2N2M3Lb`a0" + }, + { + "size": [ + 427, + 640 + ], + "counts": "1o1a0\\5o2dJQM]5n2aJTMa5j2_JVMb5i2^JVMd5i2\\JWMf5g2ZJYMg5f2YJZMh5e2XJ[Mi5d2VJ\\Mm5b2SJ^Mo5`2QJ`MP6_2PJaMQ6^2oIaMT6]2kIdMV6[2jIeMW6Z2iIfMY6X2gIhM[6V2eIjM[6V2eIhM_6V2`IkMb6S2^IlMd6S2\\ImMe6R2[InMf6Q2YIPNi6n1WIQNk6n1UIRNl6m1TISNm6l1RITNP7k1PIUNR7j1lHVNV7i1jHWNW7h1hHYNY7e1hH[NY7d1gH[N\\7c1dH]N]7b1cH^N_7`1`HaNa7^1_HbNb7]1^HbNc7_1[HbNg7\\1YHdNh7[1WHfNk7W1VHiNk7V1UHiNn7U1QHlNP8S1PHmNQ8R1oGnNS8P1mGoNV8o0jGQOW8n0hGRO[8l0eGTO]8j0cGVO_8h0aGWOa8h0_GXOc8f0]GYOe8f0ZG[Oh8UOn0V9dNeG>[Om0l8dNiG>_OP1c8bNnG>AQ1^8aNQH=CT1Y8_NSH>IP1R8bNUH>KQ1m7aNXH=OP1h7cNXH>0o0h7cNXH>5k0b7gNYH>:g0\\7kNZH=?e0V7nNZH>`0e0U7mN[H=a0g0`NQOV8KiH=a0h0]NSOX8HiH=c0V1c6]NjH=c0O[Ng0X8mNjH=b00^Nd0V8oNjHD5HU1n7XNfH>B7IS1S1lNh4]OjJ=A9IQ1P1VOf4TOnJgNf4]NXK>C_2=kNe4XN[K>C_2;POd4SN^K=D`28TOd4oM`K=D_26[Oc4iMcK:]12S2V3RL^L>:]12T2U3QL_L=;_10T2U3PL_L>=^1NT2V3PL_L=>`1LT2V3oK`L=>a1JU2W3mKaL=>a1JU2W3mKaLi2X5_1iJcNW5\\1iJeNV5[1jJeNV5[1jJfNV5Y1jJhNV5W1iJjNW5V1iJkNV5U1jJkNW5T1hJoNV5Q1iJROV5m0jJTOU5l0kJTOU5l0jJVOV5i0jJXOU5h0jJZOV5e0jJ\\OV5c0jJ^OU5a0lJ_OT5a0lJAS5>lJDS5[J^Oj5a0VJ^Ok5b0TJ]On5b0SJ\\Oo5d0QJZOR6f0mIXOU6h0kIXOU6h0jIXOX6f0jIXOW6i0iIUOY6j0kIQOV6o0PJjNQ6V1RJfNP6Y1RJeNn5[1ZJ\\Nh5c1[JYNf5g1^JTNc5l1dJlM]5S2hJiMY5V2oJaMR5`2VIeLb1e0Y5f2UIhL`1a0\\5f2TIoL[1:a5g2TISMX14e5i2SIXMT1Ni5j2SI[MQ1Jm5k2RI^Mn0GQ6j2QIhMe0]O[6k2PInM>WOc6k2oHQN:UOg6j2oHVN5oNm6k2nHWN3oNP7h2nHUOR7k0nHUOS7j0mHUOT7l0kHTOU7l0kHTOV7k0jHTOW7k0jHUOV7k0jHUOW7k0hHlL7g1R7\\1gHlL9g1Q7[1gHmL9h1P7[1gHmL:g1P7[1fHmL;g1P7\\1eHlL2O1J6OM21LHcA6Vm0KbA1N21OZhb0Mnh\\O0k>1]AOc>000fm01_cN1d>N\\A0k>0bP17``NJ\\95`K;Y4L`K:[4MXK>d4HlJg0P5[OmJh0Q5\\OgJi0W5ZOdJk0Z5ZO^Jl0[2QNbMW1Lm0ISO_NZOc1h02n0GP13UN1n0JQ1EbLYNa1T2P1IR1GbLXN^1T2T1GQ1J[N:i0IP1JYN:j0Jo0KWN:l0Io0KWN9n0Hm0OVN6o0In0OUN6n0Jo0OTN5n0JQ1hN[L7f1W1n0JR1dN_L9a1`0dMH[3j0U2mNlLY1P1HV2nNkLY1o0IW2mNkLY1n0IY2mNiLZ1n0IZ2lNiLZ1n0H\\2kNhL\\1m0HU57lJGU59lJFT5:lJEU5;lJDU5;mJCT5V5BjJ>V5BiJ?W5AiJ?W5AiJ`0V5@iJa0W5_OiJa0V5@iJb0V5^OjJb0V5^OiJc0W5]OiJc0W5]OhJd0X5\\OfJg0Y5YOUH3`0e0[7XOSH6?d0^7VOQH9>d0`7SORH;6h0h7mNRH=2i0k7jNSH>0i0m7iNSH>Ok0m7gNTH?Mk0P8eNSHb0Il0R8cNVHa0^OV1[8YNWHb0dNA5f1P9WNWHc0bNC4d1S9VNWHe0^NE4c1W9RNWH[1^Ne0[9PNWHd3g7]LYHc3g7]LYHc3g7]LYHc3g7]LYHb3h7^LXHa3j7^LVHa3k7_LTHb3l7^LTHa3m7_LSH_3o7aLQH]3P8dLPH[3i2gLfK0l06Y2H\\NZ3l2VMSL3a2]O`NX3o2fN^NRNbNX3P3hN]NPNcNW3f2iLiKU2l2kMeNV3f2lLgKT2e1lM]OMa0U3l2YOQMjM_OHd0U3l2[OmLkMBEe0T3j2BiLjMF@g0T3?QMMk2mNeMM[Oj0S3=_MPN3j0`2KdM2XOl0R3=WNjNn1HcM5VOl0R3;YNjN\\2XOWMg0ROl0R3:ZNiNd3NPKn0R3;\\NeNf31lJo0R39_N^N^NIV5a0kJo0R38aNcNd36iJn0S38bNbNc38hJn0S38bNaNd39gJn0R37fN_Nc3i4dNXHn0o2>i4dNXHn0n2?j4cNXHm0o2`0j4bNWHl0Q3a0i4cNVHk0Q3c0j4aNUHl0Q3c0j4aNUHl0Q3b0m4`NRHm0P3d0Q5]NoGm0Q3g0S5XNmGQ1o2h0Y5RNhGU1P3i0_5VObJj0^5UOaJl0a5RO_Jn0c5PO]JP1d5nN]JS1c5lN^JT1c5hN_JX1e5cN[J^1h5_NXJb1k5XNXJh1`90O1O010[GkM]4W2bKkM\\4V2_KoMa4Q2^KPNa4Q2\\KRNd45hGQ1c3kNd46hGo0d3kNd46hGo0d3kNd44jGQ1b3jNe43lGS1G`Nb3;j41oGY1V3eNl40PHZ1U3fNk40PHY1U3hNj40QHV1T3lNl4MQHV1o2^NeL?\\8MPHV1o2_NcL>_8MoGV1o2_NcL=`8OmGU1P3^NdL=`8OmGV1o2_NcL;c8MnGX1l2lNY5JlGZ1k2kNZ5KkGZ1k2kNZ5JlG[1j2kNZ5JlGZ1k2kN[5IlGU1m2SOY5GjGT1g2iNhL>g8EjGS1m2XO[5DiGS1k2YOW6f0jIZOW6d0hI^OY6a0fI@Z6?fIA\\6=eIC[6;fIE]64hIK[61gIOZ6NhI1[6KgI4^6DeI<^:11O:DaQl2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l;S3m;0O10000O10000O1O100O1000000O10000O100O100O10000O1000000O100O100O100O1000000O100O1000_OoDRMR;k2b0N3M2O2N1O2O001O1O10O01000O010O1O01OTO_MZE`2g:bMXE\\2i:eMWEY2k:hMTEV2n:jMQEV2o:lMPEQ2S;PNlDn1V;SNiDk1Y;UNfDj1[;WNeDg1];YNcDf1^;[N`Dd1a;]N^Dd1a;]N^Dc1c;^N\\Da1d;`N\\D_1e;bNZD\\1g;fNWDZ1j;gNSD[1m;fNQDY1QO100O100O1O2QOhB3Z=ETC4n=M2O2N2MdTh27njWM9B_OTBj0j=7O100000O03N7I9PM]NPHf1k7\\NTHe1k7\\NUHd1j7^NUHa1k7_NUHb1j7_NVH`1j7`NWH_1i7bNVH_1i7aNXH^1h7bNXH_1h7`NXH`1h7`NYH_1g7aNYH`1f7_N[Ha1e7]N]Hc1d7YN`Hf1]:0000000001O0001N1000001N100YDaNl9a1PFiNh9X1UFnNg9T1UFPOj9[1eEkNZ:n2O1O2N1N2O1O100O10000000000000000O10O02O0O100O1O1O1O1O1O100O1O1O1O100O100O100O100O1O100O1O1O100O1N2O1M3C=N2O1M3O1O1O1N2N2N2N2N2N2O1O100001O00000000001N1001O0001O000O10001O00000000001O00001O00001O00000000001O0000000000001O00000000001O000000001O00000000001O000000000000001O0000000000000010OO10000000001O00000000000000001O0000000000000000000000001O0000000000000000000000000000000000000000000000000000001O0000000000000000000000001O000000000000000000000000000000000000001O00000000000000000000001O00iF" + } + ], + "model_output": "A young boy with a focused expression, wearing a beige baseball cap and a pink and white striped t-shirt with a graphic design on the front, is sitting on a bed. He is holding a yellow cup with a straw in his right hand and appears to be in the middle of an action, possibly drinking or about to drink.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_77.png", + "subject_name": "person", + "object_name": "road", + "predicate_name": "crossing", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "`gm36Q=4kNIjDc0P;GhD>LWOm:?QE?J\\OR;6PEP1e0_N`9Y2]FhMb9[2[FgMd9\\2YFdMh9_2SFbMm9m22O1O5K4L1O1kNdEYO]:e0eEZO]:e0cEYO_:f0cEWO`:g0bEWO`:c0eE[O_:LXEI?9]:FYF9`;L1O1N3N2M_PQ4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "c9g3c910000O100O01000O2N100O10000O1000000O100O1000000O100O100O10000O10000N2O10000O1O10000000000002N8H1O001O00000000001O000000O100000000000000FVLRGk3^8i0J6O1N2F:O100000000O1000000001O000000000000000000O100000000000000000000000000000000000000O1000000001O0000000000000000OYLWHn1i7mM]HR2c7iMcHV2]7iMdHW2]7gMeHX2[7fMgHZ2Y7dMiH\\2W7aMlH_2S7`MoH`2Q7^MQIb2o6\\MSId2n6WMVIi2k6PM[IP3f6oLZIQ3g6nLYIR3i6kLXIU3k6cLZI]3Z8000O100O100O100O100001O0000000000000000O10000001O1O00000000UOdLfG]3Y8eLfG[3W8jLgGV3X8kLgGV3X8lLgGT3X8mLgGT3Y8lLgGT3Z8kLeGV3\\8iLdGW3_8fLaGZ3h8\\LXGe3i8ZLVGg3W901O2N[N_LaI`3^6cL`I]3`6dL_I\\3a6fL]IZ3b6kLZIU3f6lLYIT3g6mLXIS3h6nLWIR3h6[MlHe2S7aMhH_2W7fMeHZ2[7iMbHW2^7lM_HT2a7mM^HS2d7kM\\HU2j7fMUHZ2S8^MlGc2W8ZMiGf2Y8ZMdGg2\\8T100000000000000000000000000000000000000O100O1LUKgGl4U87O1O100000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000001O000000002N4L00001O1O0000001O0000001O004L1O00000000000000000000000000O100O1O1O1O100O100M3N2M3N2O1N2O1N21O001O00001O1O1O00000000O1N200O100000000001O1O2N1O00000000O100O1001O001O00001O=lJ_Gd4Q9C2N001O0000000000000000O100001O0000O100000000000000O1000000000000O100M3FeKcG\\4X8eKaGf4^84O1001OO10000001O000000000000001O0000001O1O1O00000000000000001O0000000000000000000000000000000000O100O1001OO1O100000000O10000001O0000000000001O1O2N2N5K4L002N000000000000000000000000000000001O2N4L3M3M2N2N000O1001O0000000000000000000QMYLZLg3e6000O1L4O1LSLiFn3W93001O1O6J5K3M5K4L1O_HiLo4U3d21O00000000000000000000000000000O100O100N2N2O1O100N2O1O100O1001O000000000000O100001O00000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000ZF" + } + ], + "model_output": "A person wearing a dark jacket with a fur-lined hood, light-colored pants, and dark shoes, with short hair and a light-colored beanie, is walking on the road.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_78.png", + "subject_name": "person", + "object_name": "bench", + "predicate_name": "sitting on", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "WS_24X=2N1O2N1O100O1O100N2O1O1O1O1[OBYD`0c;FZD;c;KYD6f;MWD4g;OVD3i;h0O2O0O2M2O1O1O10001O001N1L4N2O2M2O1O2N100O1O1N2O100000000000010O01O001BTEZNm:^1TE[N06n:]1^E[Nf:d1b000E\\DmNd;S1\\DnNd;]1010[O]DoN37a;e0iD[OY;KXD`0b0D\\;:fDF\\;7eDH_;3bDN^;1cDN];2cDN_;ObD1_;MbD3`;JbD5[<01O0000001OO10O10Sli4" + }, + { + "size": [ + 429, + 640 + ], + "counts": "R\\Y1i0a<3O1O1O1O0OG7M221101O1O1O1L4J6J6OGPDWOPOmgd4" + } + ], + "model_output": "A person with long hair, wearing a patterned dress and high-heeled shoes, is captured in a dynamic pose with one leg extended forward and arms outstretched for balance, sitting on a bench.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_79.png", + "subject_name": "person", + "object_name": "tie", + "predicate_name": "wearing", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "TjW56fc05L4K4M4N2J7G8N2N3N0O3N3L2O1N2O1O1N1000O1O001O1O10O01O1O1O00100O10O0100O00_OfN^^OY1\\a0QO`^Oo0]a0WOa^Oh0]a0m002N20O0[Ob^OiN_a0l1O1O101N1000001O001O1O1O1O1O1O010O000010O1000O10O1O2N1QNk^OP1Xa0iNn^OV1na0O1O2N1O2N1O2O001N100O2O0O2N100010O0010O010O01O01O001O001O010O10O1O010000O4M5K2N4L2O1O2N0N3N1M4M1O3LYae0" + }, + { + "size": [ + 640, + 428 + ], + "counts": "l]V63_c0Ol\\O3Qc00m\\O2Rc0Ol\\O3Sc0Nj\\O5Uc0;0O1O1O1O2N3M4Ca\\O1oam1" + } + ], + "model_output": "An older man with a receding hairline and a full head of gray hair, wearing a dark blazer over a light blue dress shirt and a dark tie.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_80.png", + "subject_name": "dog", + "object_name": "elephant", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + } + ], + "model_output": "A medium-sized, tan dog with a black muzzle, erect ears, and a bushy tail is chasing an elephant. The dog has a muscular build, with a broad chest and strong, straight legs. Its coat is short and smooth, and it has a focused expression with its mouth slightly open, revealing its tongue and teeth.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_81.png", + "subject_name": "potted plant", + "object_name": "vase", + "predicate_name": "in", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Sa\\13k>`0A?B:F>B;E:FP3R4PMlMNbM7=k2U4PMlMNaM:9d2\\4oLlM0\\M?j2j2oL[LCj2l2mL\\LA;i02m04U3P4aKkK=On06T3o3aKmK>Mm06U3o3`KPL?Hm09T3n3`KRL`0El0;U3l3aKUL=Cn0a0c5S2VLlMj3V2VLhMj3[2VLbMj3_2XL^Mh3e2XLXMh3i2ZLSMg3o2T43dEoLc8S3[GoLc8R3\\GPMb8P3^GQMa8o2_GRM`8m2aGSM_8l2bGTM^8k2cGTM^8k2cGSM_8l2cGRM^8l2dGRM^8m2dI`Mn3_2mKgMS4X2nKhMR4W2nKkMjMZOf4i2bMnMfM_Oc4a2iMQNcMNT4n1\\NTN`MOS4l1^NVN^MNT4j1`NXN\\MNT4h1bN[NWMNX4f1aN]NUMOZ4c1bN_NRMO\\4a1cNaNoLO^4`1cNaNoLN_4`1aNdNmLOb4]1_NUNYLFe0i0c4[1_NVN\\LCc0k0c4\\1^NUN_LAa0n0b4\\1]NVNbM=R4]1WNZNhM9Q4\\1UN^NiM6S4\\1SN_NjM5S4\\1RN_NkM5T4\\1PN`NlM4T4\\1nMbNlM4V4[1lMaNnM5V4Z1lMaNmM6W4Y1lMaNmM4Y4[1jMaNmM4Y4\\1jM_NiM9]4X1jM_N]Me0i4l0kM^N[Mg0j4k0lM]NYMi0k4j0lM]NYMh0l4k0mMZNXMk0k4j0QNWNTMo0l4j0WNPNmLV1l4j0[NkMkLY1k4k0AUO?l0@TO`0l0@SOa0m0_OSOa0n0^OROb0o0]OQOc0P1]OnNd0S1\\OkNe0V1[OoMTKNa5S2^OgMXK3[5U2=fMDZ2a600000000O10000000000AaC]N_5CJk0CXO>f70O001O1001N1O2N8FhRj3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "mki51]4o0T5ROTJk1e5ZNfIY2X6iM\\Hc3b7\\LkFJ]1P4f7WLmFI[1S4g7TLnFIZ1U4g7ULWHn3i7mK[HS4e7jK^HW4U91O001N2O0O1N3N1N2O2O0O1O2O0O10000O2O00000O10000001O0000001O00001O001O1O2N1O1O1N3NnNbKcG]4\\8dKfGZ4S8mKnGR4P8PLQHn3P8RLRHl3n7TLSHk3m7ULSHj3n7WLRHh3n7XLRHh3o7XLPHg3Q8ZLnGf3R8ZLnGe3T8ZLlGe3U8[LjGf3V8[LiGd3Y8\\LeGd3\\8]LbGc3_8_L^Ga3c8aL[G^3g8bLWG^3j8cLTG\\3n8eLoF\\3S9iLeFW3]9U11N3M3L4N2L6K3MXL`KVM[4j6M4K6J4L4K5K5J5L5K4M4I8K6H8H:F:E?]OeVS2" + } + ], + "model_output": "A brown vase with a glossy finish and a curved handle on the upper right side, featuring a smooth surface with subtle variations in color intensity, giving it a slightly mottled appearance, is in front of the window.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_82.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "^[b35g>3N1N3N1N2N2O1N2N2N2N2N2O1N2O1N2O1N2O1O1N2N2O1O1O1O1O1O1O100N2O1O1N2N2N2O1O1O002M200O1[OSNXDn1e;YNVDh1f;`NTDb1j;j0M3N2M3L4M3O1N2N2O1N2O1M3O100O10000000000000O10010O0O10000gLQEd2n:[MZE^2f:aM[E_2e:_M^E`2c:YMcEg2]:XMdEh2]:WM[EQ3e:nLRE[3V;3J6O1O1O1O1N201N1O1O002N100O1O100O1O100O100O10000O1000000000O1001N10O100001O0O01000000000O10000000000000001OO100000010N100001O00O100010O00O100010O00O101O010O000O1010O0001N2O010O001N110O001N101O100O001N2O010O001N2O010O1O0O11O00000000000000000000000O10000O10000O10000O100000000O10000000000O10000000000001O0000000000000000001O0000000^ORETMn:k2WEPMj:P3]EgLf:W3a000O10001N1000000O2O0O101N1O1O101N1O1O2O0O2N1O2N1N3N2N2N2N3M2N3M2N2N2M3M3N2N2M3M3M3M3M3M4L4K6J8Eon[1" + }, + { + "size": [ + 478, + 640 + ], + "counts": "0\\6b8000000000000O101O0000O10000000O1000000000000000000O100000000000000O100000000000000000000O10001OO100000000000O1000000000000O2OO1000000000O100000000O10000000000O1000000O1000001N1000000000000000000000000000000001O0000O100000001O00O1000000O1000O100000000000001O0000000O10000000000000000O2O00O010000O1N2K5M3N2O1N2O1O10000O10OO3N1O1O1N200O1O00100O1O2N1O010O101O0O1O100O100O100O1O1O1N2O1O2O0O100O10000O01000O2O0O10O010001N010O100O2O00O0100O1O100O100000000000000000000O101O0O00100O1O100O10001O0O010O10000O10000O2O00O10000000000000000000O1001O00000000000000000000000O1001O0001O0O20O0001N2O10O0001O1N200O001N6K3M010O1N1010N1000O100001O0O010O2O0000O100001O0000O11O0001N1001O01O000O11O0000O2O01O0001O0O11O0000O2O0010O01N1010O01O1N10100O00001N11O00jE\\KR:k4L2O10O0000O2O1OO2O00O2O010O00O1000000O10O2O01O0000000O10000000O11O0O1000000000000000000000000000000000O0200OO10O10000000O1001O000000O11O000O10O11O00000000000O100000001O00O100001OO01000001O0000O11N10O10000001O000O011O00O10000001O000O10000000O100001O000O011O00O10000001O00O1000O10000000001O0000000O1000O1001O0000000O100000O1001O00000O10000000O1001O000O1000000000O1001O000000000O10000000000O100000000000000000000O1000000000000000000O10000000000000000O1000000000000000000O10000000000000000O10000000000000000000000O10000000000O1000000000000O1000N" + } + ], + "model_output": "A close-up of a human finger with a visible fingernail, showing a natural skin tone with subtle pinkish hues and a slight sheen, indicating a healthy, well-maintained surface, in front of the wall.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_83.png", + "subject_name": "sky", + "object_name": "road", + "predicate_name": "over", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "0d2h:0000000000M300M3L4N2O100O1UOlD^OT;=`ETO`:k0cEQO_:n0P1O1O1N2O1O1000000001O001O001O1O1O1O001O0000000000EjC@V_CCad1_O\\5d1RI]Ob1oN]5j2cJVM]5j2cJVM]5j2dJUM\\5l2bJUM^5k2bJUM^5k2cJTM]5l2bJUM^5l2aJTM`5l2_JTMb5k2^JUMc5j2]JVMi5e2WJZMm50eH\\2^1dMY6V2gIjM]6R2cIoM^6P2aIoMb6P2]IPNe6m1\\ISNe6m1ZISNi6j1WIVNj6i1VIWNl6T1eG\\O_1@n6e1RI[No6e1PI[NR7c1nH]NR7c1nH]NS7b1mH^NT7`1mH`NT7^1mHbNT7]1kHdNW7Y1jHgNV7X1kHhNV7V1lHiNT7W1lHiNU7U1lHkNT7U1lHkNT7T1mHlNU7R1kHnNT7S1lHmNT7S1lHmNU7R1kHnNU7R1kHnNU7R1kHnNU7Q1lHoNT7P1mHPOT7o0lHQOS7o0nHQOQ7o0PIQOP7o0nHSOR7m0mHTOS7j0PIUOP7k0PIUOQ7j0oHVOQ7i0PIWOn6k0TISOl6m0TISOl6m0SITOm6l0SITOm6l0SITOn6k0RIUOo6j0QIVOm6l0SITOm6m0RISOn6m0RISOn6l0TISOm6l0SITOm6m0QITOP7k0PIUOQ7j0PIUOQ7j0oHVOS75SG:i1BV7OTG?g1A\\7>eHB[7>eHB\\7>bHC`7:aHF_7:bHE^7;bHE\\7=cHD_7:bHE^7;bHE^7;aHF`79`HG`79`HG`79aHF_7:aHF`79_HHb77_HHa77`HI`78^HIb77^HIc76]HJc76^HIb77]HJc76]HJd74]HLc74]HLc75\\HKd75\\HKd74]HLc74]HLc74]HLd73\\HMe72\\HMe72[HNg70YH0f9100O1TMMTJ3h800000000000001OO1O11O1O00001O000000O100O11O00001O00O1000000000000000000000000O100001O1O000O01001OO100001OO10000000000001OO1kK0fIOPO1^20k41eI0QOO_21j41eI1POMb21k4OeI0QOO^22l4OfIOPO0^22k4NgI0g12h8OkLOeJ0[50eJ0Y50XH1i1Of61ZIOQ60eJ0[50eJ0a80000000jJ2dI3M0a4KT70000000000000000000000000000000000000000000000000O1mN;TFEk9l0dEUO[:T1000000000000000000000000000000O1000000000000000000000000000000O11O0000O11O00O1000000000000000000000000000000000000O100000000000000000001O1O3M4L;Eb0^O>jFSMf8Y3F?A`0_Ok0VO?A5mIZJ35MM[5S7fJoHV4o7ZO1N4MRI" + } + ], + "model_output": "A white moving truck with an open rear door, featuring a metal frame and a partially visible license plate, has a dog sitting on it.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_85.png", + "subject_name": "mirror", + "object_name": "wall", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "k5b4^:1O3L3N3M3M3L3N3M2N2N2N0O10O100O10O01000O010O010000O10O0100O100O010O100O1000O0100O10O0100O10O10O100O10O10O100O100O010O100O10O01000O010O10O10O0100O10O0100O10000O010O100O10O010O10O100O10O10O100O100O01000O101N3N5J4M5J5L5J6K2M6K3M5J5L3L6K4K5L3M3L6K3M4K5K5L5K4K5L4K4M4K4M5K4K5L4KfP[7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "1_2]1\\O_3W1_L]O2\\O`3W1]L\\O3^Oa3W1ZL[O5^Oc3W1VL[O7^Oe3W1RL[O9]Oi3W1lK\\O;]Ok3W1hK\\O=]On3V1cK]O?]Oo3X1_K[Ob0]OR4W1ZK\\Oc0]OV4V1VK]Od0]OW4W1SK\\Of0]OV4Z1RKYOh0]OV4\\1PKVOk0]OV4_1mJTOl0^OV4a1lJQOn0^OV4c1jJoNP1^OV4e1hJmNR1]OW4g1fJlNS1]OV4k1dJhNV1]OU4m1dJfNW1]OT4n1eJeNW1]OT4o1dJdNX1\\OU4Q2bJcNX1]OU4Q2cJbNX1]OU4Q2cJbNX1]OT4R2dJaNX1\\OU4S2cJ`NY1]OT4S2cJ`NX1^OT4S2dJ_NX1^OT4T2cJ^NY1^OS4U2dJ]NY1]OT4V2cJ]NX1^OT4U2eJ]NW1^OT4U2eJ]NW1^OT4U2eJ]NW1]OT4V2fJ\\NW1^OS4V2fJ\\NW1^OS4V2fJ\\NW1^OR4V2hJ\\NV1^OQ4U2kJ]NT1]OR4R2nJaNo0^OR4Q2PKaNn0^OR4o1RKcNl0^OQ4o1TKcNk0]OR4o1TKdNj0]OQ4P2UKcNj0]OQ4o1VKdNh0^OR4n1VKcNi0^OQ4o1WKcNh0^OQ4n1XKdNg0^OP4o1YKcNg0^OP4n1ZKdNf0^OP4n1ZKdNf0^Oo3n1\\KdNe0]Oo3P2\\KcNe0]Oo3o1]KdNd0\\OP4P2\\KdNc0]OQ4n1]KdNc0^Oo3o1^KcNc0^Oo3n1_KdNb0^On3n1aKdNa0^On3n1aKdN`0^Oo3n1bKdN?^On3o1cKcN?^On3n1dKdN>^On3n1dKdN>]On3o1eKdN=]On3n1fKdN<_On3m1fKdN<_Om3m1hKdN;_Ol3m1jKdN:^Om3n1iKdN:^Om3m1jKeN9^Ol3n1kKdN9^Ol3m1lKeN8^Ok3m1nKeN7]Ol3n1mKeN6^Om3l1nKfN5^Ol3l1PLfN4]Om3m1oKeN5^Ok3m1QLeN4^Ok3m1QLeN4^Ok3l1RLfN2_Ok3k1TLfN1_Ok3j1ULgN0^Ok3l1ULfN0^Ok3k1VLgNO^Oj3l1WLfNO^Oj3k1XLfNO_Oh3l1YLeNO^Oi3l1YLfNN^Oh3l1[LfNM^Oh3l1[LfNL_Oh3k1]LfNK_Oh3j1^LgNJ^Oi3k1]LgNJ^Oi3k1]LgNJ^Oh3k1_LgNI^Og3k1aLfNI_Of3k1aLfNI_Oe3k1cLfNG_Og3j1cLgNF_Og3j1cLgNF@e3i1fLgNEAd3h1gLgNECa3f1kLgNDG]3b1oLgNCLZ3\\1TMgNC2S3W1[MgNB6n2S1aMgNA:j2n0fMhN@=f2l0jMgN@`0c2i0mMgN@f0]2b0TNhN_Oi0Y2`0XNgN_On0T2:^NhN^OR1o16dNhN]OV1k12hNhN\\O[1h1LmNiN[O_1c1IROhN[Oc1^1EXOhNZOf1[1B[OhNYOj1Y1]O_OiNXOP2R1XOFhNXOS2o0TOJiNWOW2j0QOOhNWO\\2e0kN5hNWOa2`0fN:jNUOd2=bN>jNUOh28^Nd0kNROl25ZNi0nNnNl25UNn0TOhNj26SNR1[O_Ni29kMY1@ZNj27fM`1DTNl27_Mf1LlMg2<]Mh1m5TNSJm1Q6oMoIQ2`90O100O1O100O100O100O1O100O1O100O100O1O10000O1O100001O2N1O4L2N2N2N001O1O1O0000N2O1N2O1O1O1O1O1O100O1O2OO0100O2N1O010O100O1O1O100O1O10000O1O1O1O100O100O100O100O1O1O10000O1O100O1O100O1O100O1O1O1O100O100O1O1O100O100O100O1O1TH`LW4a3hKaLW4_3hKcLV4^3jKbLV4^3jKbLU4_3kKbLT4^3lKbLS4_3mKaLS4_3mKaLR4`3nK`LQ4a3oK_LQ4a3oK_LP4b3QL]Lo3c3QL]Lo3c3QL]L`M1W5b3ZM\\L^M`0k4T3gM\\L]Mm0`4g2SN\\L]MT1Y4`2ZN\\L]M]1P4W2cN\\L]Mh1f3k1mN^L[MS2]3_1XO^L[M`2Q3Q1D_L[Mg2k2i0J`LZMR3c2=3aLZM]3Y21>aLYMh3o1Gh0aLYMV4b1XOU1bLYM]4[1QO\\1cLWMg4T1eNe1dLWMP5m0ZNl1fLWMZ5d0oMU2gLVMg5:aM`2hLVMn53ZMg2hLVM]6^OZI0c3\\3gLTMX:l2hETMX:l2hETMX:l2hETMX:m2gERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fERMZ:n2fEQM[:o2fEPMZ:P3fEPMZ:Q3eEoL[:Q3eEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:R3dEnL\\:S3cElL^:T3bElL^:T3cEkL]:U3cEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEjL^:V3bEiL_:X3`EhL`:X3`EhL`:W3aEiL^:X3cEfL^:m2[ESM70^:k2_ESM32^:j2dEPMN5_:j2RFVMn9i2TFUMm9j2TFVMl9j2TFVMl9j2TFVMl9j2UFTMl9l2TFSMm9m2SFRMo9n2Q11O1N2O1O001O01O04L000O4M1O1O001O00000000003M2N1O001O00000010O00O1001O001O0M4eLoDNGO3Mh04]O^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + } + ], + "model_output": "A silver laptop with a white keyboard and a large touchpad sits on a table. The screen displays a blue background with a white 'X' icon on the left side and a vertical list of icons on the right side.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_87.png", + "subject_name": "teddy bear", + "object_name": "teddy bear", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "^ZV39l<8K6I5L3N1N2O1N1G:M2N2N2N3M2O1N2N2O1N2N2O1N2N2VOlM^FU2`9oM]FR2b9QN[FP2c9SNZFn1f9SNZFm1e9UNYFk1h9VNVFj1j9WNVFi1j9XNTFh1KfMn9d0VFd1o9\\NQFa1R:_NnE`1S:aNkE_1V:cNhE[1Z:k0O1O001O1N010O01O001O0010O101O001O00001O2N3M2N1O1O0O2O1O1O0010O011N1O10O0100O1O1^MUFd1k9[NWFe1h9ZNZFe1g9XN[Fh1e9WN\\Fi1d9WN]Fh1d9WN\\Fh1e9XN\\Ff1f9YN[Fe1f9[N[Fd1f9ZN\\Fe1e9YN^Fd1d9ZN_Fd1c9WNcFe1e:K1N10O[O`DD_;=cDA^;?cD_O];b0eD[O\\;d0fDYO\\;g0fDSO^;l0`0O2O0O2O1N2N2M4Hfee3" + }, + { + "size": [ + 425, + 640 + ], + "counts": "[UW31Y\\W10_\\lN1]dL6K3M5^ODoC=m;GQD;m;GRD:l;GTD9l;GTD:j;GVD9i;IVD7i;JWD6h;KXD5g;LXD5e:CPF9YO5e:EPFQ1o9SOnEm0Q:WOlEi0T:[OgEf0Y:]OdEc0\\:_OaEa0_:A`E?`:B]E?e:R1001O2M3N001OO1O100O10000O1000001N1000000001O001O1O000010O001O001M20[N\\Ee0c:ZO`Ef0_:YObEg0^:UOfELJ8_:HkE0I5]:HmE2G5]:FnE5G4Z:ERF7E2Z:_OZE2i0=D2Z;MgD2Y;NgD2Z;MfD3Z;MgD2Z;MgD2Y;NgD1[;NfD1[;NfD0\\;OeDO];0eDL^;2P1LWnf2" + } + ], + "model_output": "A plush teddy bear with a light brown, textured fur exterior, featuring a rounded head with a small, red, triangular nose and a subtle smile, sits with its arms outstretched and legs spread apart, its white snout contrasting with the light brown fur. The teddy bear is beside another teddy bear.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_88.png", + "subject_name": "snowboard", + "object_name": "car", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Pdj24k>3N2N1O2N2N1O1O1O1O001O1O001O001O0000000000000000000000000000000O1lA\\Ol=l0O1000O010L301O100O110O2N001O000O10000001O00000000000000000000000000O10000000O100000000O101O00002N1O00000000O20mAXOl=n00000000O1N1O2O101N101O00000000O10000000000000000000000000000001O00001O00001O1O001O1O1O1O3DcA1f>2M010O0000I700O\\k]4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aa`17f>8I5VOFgB=S=KdB;X=j0L3M3N2N1O1O2N2N2O001O0O2O00001O001O000O2O001O00001N101O001O1O3L6K3M2N2M3M3N2M2O1N2N2O1N1O2O0O2N100O1O1O2N1O1O100N200O10000O1O00100O01O0010O01O010M210O0O20OO2O1O03N001O0O100O2O00000POlKTGT4l8TLlFl3S9VLlFk3S9TLmFm3T9RLlFn3T9SLWFHa0U4W9XLgFi3Y9YLeFg3\\9WLeFi3[9WLeFi3[9XLdFh3[9[LcFe3]9[LbFf3^9ZLbFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFg3]9YLcFh3\\9YLbFh3^9XLbFh3^9XLaFi3_9WLaFi3_9WL`Fj3_9XL^Fk3a9j000O1000_K_Fd3_9[LdFd3\\9\\LdFd3\\9\\LcFe3^9ZLbFf3_9XLbFi3]9WLcFi3]9WLcFi3]9XLaFi3_9WL`Fj3a9TL`Fl3`9TL_Fm3a9SL_Fm3a9SL_Fn3`9SL^Fn3b9RL^Fn3b9RL_Fm3a9RL_Fo3a9QL_FP4`9PL`FP4`9PL`FP4`9PL`FP4`9PL`FP4a9oK_FQ4a9oK_FR4`9nK`FR4`9nK`FR4_9nKbFR4^9nKbFR4^9mKcFS4]9mKcFS4]9mKcFT4\\9lKeFS4[9mKeFS4Z9nKfFR4Z9nKfFR4Z9nKfFR4Z9oKeFQ4Z9PLfFQ4Y9oKgFQ4Y9oKgFQ4Y9PLfFP4Z9PLfFP4Z9QLfFn3Z9TLeFl3Z9ULeFk3[9m0000WKfFl3Z9SLgFm3Z9l00001O01OO10YKeFi3[9n0O1100O1OO10000000001O00000000001O00000000QKhFV4X9jKhFV4X9jKgFW4Y9h00QKgFX4Y9gKhFX4X9hKhFX4X9hKhFX4X9hKhFX4Y9gKgFZ4Y9eKgF[4Y9eKgF[4Y9eKgF[4Z9dKeF]4]9aKeF]4\\9bKgF\\4Y9[KfFK2j4d9WK[Fi4e9WK[Fi4n90001O01O01O000010O0000010O0010O01O01O00001O000000001O00001O0011O2M1O2O0O2O0O2N10002N0O00010O100O0010O100O00010O1O1O010O1O001O1O010O1O001O001O1O1O001O1O001O0O2O2N1N100O2O001N2N2O1N1O3N1N1O2N1N3N1O1O1O2N1O2O001O010O001O00001O1O0O10001O001O00001O0O101O001O1O00001O00001O000000001N101O1O1O000O10001O00000O101N101O000O2O1O0O101O001N101O1N1O2O000O2O000O2O1N2N3M2O1O2M3M4M3L3M2M3N2N1N3M4J6I:IB1O001N2N2O2M2N2Nkol4" + }, + { + "size": [ + 427, + 640 + ], + "counts": "V2S;X200000000000000000000000001O2N2N1O001O2N00000000001OO1001O0000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000O10000O100O100001O000000000000000000000000000000000jIaM`1_2`NbM_1^2aNcM^1]2bNdM]1]2bNdM]1[2dNfM[1Z2eNgMZ1Z2eNfM[1Z2eNgMZ1Y2fNgMZ1Y2eNjMY1[1VJSOa4CX1U2gNlMY1T2gNlMY1T2gNlMY1T2fNmMZ1S2fNmMZ1S2fNmMZ1S2fNnMY1R2fNoMZ1Q2eNPN[1P2eNPN[1P2eNPN[1P2dNQN\\1o1dNQN\\1n1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1dNSN\\1m1cNTN]1l1cNTN]1k1dNUN\\1i1eNXN[1h1eNXN[1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1fNYNZ1g1gNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1fNWNZ1i1gNVNY1j1gNVNY1j1gNVNY1j1gNUNZ1l1eNSN\\1m1eNRN[1n1eNRN[1n1eNRN[1n1fNQNZ1o1fNQNZ1o1gNPNY1P2gNPNY1P2hNoMX1Q2hNnMY1S2fNmMZ1S2fNmMZ1S2gNlMY1T2fNmMZ1S2eNnM[1R2ZNYNf1g1RNaNn1_1oMdNQ2]1lMeNT2[1kMeNV2[1iMfNW2Z1hMgNX2Y1gMhNY2W1gMjNY2V1gMmMfL5d5n1eMmMmL0^5S2eMmMoLL^5W2bMmMZ3S2fLmMZ3S2fLmMZ3S2fLmMZ3R2gLnMY3R2gLoMX3Q2iLnMW3Q2jLoMV3m1VIhMf3;S3h1VInMl3;m2e1YM\\Ne2d1]MZNc2f1bMUN^2l1eMnM]2R2^4O100001OO100001O00000000000000O100001O00O10lFRNk6o1TIRNk6n1^HRNTO2]8l1_HTNRO2]8j1aHUNPO2_8h1cHXNjN2b8f1dHYNiN1c8f1eHXNgN3d8e1dHYNhN2d8e1dHZNgN1e8e1dHZNgN1e8e1cH[NgN1f8c1dH\\NeN2V2GQ4k1TK\\NeN2T2KQ4g1WK[NeN2R2NQ4d1YK\\NdN2P20S4b1YK\\NdN2P20S4b1YK\\NcN3P20T4`1ZK^NaN2P21U4_1[K]N`N3=Em0?j4\\1\\KoNmNXOj0?l4Z1\\KPOnNYOg0>o4X1]KQOlN\\Of049f0\\5l0SKnNlNWO=98f0\\5l0SKnNA@0f0\\5l0RKoNB_O0f0\\5l0RKPOA^O2e0[5m0SKoN@_O3d0Z5n0SKPO_O^O>:P5X1VKUNfNc0f0Fb06l4\\1]KgNTOFe05k4^1^KRN`N1N9d01f04j4_1gK\\NhN2h01j4a1fK^OAPOi4c1eK]OCnNi4e1dK]OEkNh4h1bK^OHeNi4m1_K^OW6b0hI@W6`0iIbNXNA0O1`0n7^1iI_NkNL]7e1gI_NnNJ\\7g1gI^NPOGZ7k1fI^NY7b1gH_NX7a1hH_NX7a1hH^NY7c1gH]NX7e1gHZNYNEl8R2kH]NT7d1jHXN[NGj8Q2`HcNfN^Oh8P2^HnNh1ZO[2i1lKmNe1_O^2e1kKnNe1_O_2c1lKnNd1_OdM]O^4V2ZLQNlNc0g2JdM_O]4S2]LPNnNa0d2LbME]4n1_LPNQO2J1f27dML[4j1aLoMRO0J3d27bM0]4g1dLlMXO7U23cM5\\4e1eLjMXO9T23`M8_4b1ZMSNh0o0n1o0XMTNi0m0o1o0XMTNi0m0o1o0XMTNj0l0n1P1XMTNk0k0m1Q1XMTNl0j0l1R1YMSNn0h0i1U1YMSNR1d0e1Y1YMSNX1>_1_1YMSNY1=^1`1ZMSNW1=_1`1ZMSNW1=_1`1ZMSNW1=_1`1[MRNU1>a1`1ZMRNU1>a1`1[MQNT1?a1`1\\MPNR1`0c1`1\\MoMQ1a0c1`1\\MoMQ1`0d1a1[MoMQ1`0d1a1[MoMQ1`0d1a1ZMPNR1?d1a1ZMPNR1>e1b1XMQNS1=e1b1XMQNS1=e1b1XMPNS1?e1a1XMPNQ1a0g1_1XMPNP1b0h1^1YMoMn0c0j1^1YMnMm0d0j1`1XMjMo0f0i1a1>_NBb1>]NBc1>\\NCd1=\\NCe13R6T2jJUNgND=3Q6U2kJTNgNC>4P6U2lJSNfND>4P6U2lJoMkNG95P6U2nJhMnNN44R6U2mJgMnN034R6U2nJfMmN134Q6V2PKdMlN234Q6V2PKdMlN234Q6V2PKdMmN125Q6V2PKdMlN314S6U2PKdMlN314S6U2PKdMlN304V6T2oJeMjN314V6T2oJeMjN313W6U2_J]MUOV15WOX6V2]JRO[OgNX6X2]JQO[OfNY6Z2[JQO[OeNZ6Z2ZJRO\\OdNZ6Z2ZJRO]OcNY6[2YJSOB]NV6`2XJ_NROYOb0CX6d2TJ[NAROV7c2XIgMWOH_8a2ZHfM[OC^8g2XHeMS8\\2kGeMV8[2gGhMY8Y2dGjM[8V2dGjM]8V2cGjM]8V2cGiM^8W2bGiM_8V2bGhM_8X2mGZMU8f2W11_IYMTLNU6i2eM`M[2`2dMaM\\2_2bMcM^2]2aMdMRMMV4_2gNeMQMOW4]2fNdMSM1V4[2gNcMSM3W4Y2eNcMUM8R4W2hNaMUM>n3R2lN`MUM`0n3P2mN`MTMb0n3n1oN_MSMf0k3k1ROcMmLf0AoNV4h2]OcMhLV1f3W1bMTM:b0VNX1m3R1bMVM:a2T29bMWM:_2T2:aMXM;^2T2:aMYMZ3`M`M]O_Of2D>]3_M^M_OAZ6Q3WJkLQOGO6c07V6Q3WJiL]OH6?U6Q3XJhL37d5R3ZJfL28c5S3]JbL1;c5R3gKnLY4Y2ZImM]2JY4Y2\\IkM[2LX4Z2]IjM[2LX4Z2^IhM[2OV4Y2_IhM[2OV4Y2`IfM[21U4Y2`IeM]21R4[2bIcM\\22R4[2cIaM\\24Q4[2cIaM\\24R4Z2cIaM[25R4Z2dI`MZ27P4[2fI\\M\\28n3\\2hIXM\\2k0`JWOR5N=l0`JWOS5M=l0_JXOT5L=l0^JYOU5K[NBe1fJZN_51Kc1gJ^N]5OLa1iJ`N[5OKb1iJ`N\\5NKb1hJbN\\5LKb1jJbN[5LKb1iJcN\\5KJb1kJcN[5KJa1jJgN[5HKa1jJgN[5HJb1jJgN\\5GJa1kJhN[5GJa1kJhN[5GJV25jMKV25jMJX25hMKX25hMJY26gMJZ25fMKZ25fMKY26gMJY26gMIh1hJeN_5CIh1hJcNa5EGh1iJbN`5FGh1jJ`N`5HEh1mJ^N_5JDh1nJ\\N_5LCh1oJYN`5OAh1QKUN`53_Oh1k0XNUOh1k0XNUOh1k0XNUOi1j0WNUOj1k0VNUOi1l0WNUOh1k0XNUOg1l0YNTOg1l0YNTOf1m0ZNSOd1o0\\NQOc1P1]NPO9iJc02VOU6NQO6jJf0^6TOjN2jJj0\\6TOUOk0l0UOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOj0k0VOUOi0l0WOdN2SKg0Y6WOcN7PKb0]6WOcN:lJ?b6WOcNW1^1iNcNU1^1kNcNS1^1mNfNl0]1TOfNl0W1TOnNi0P1WOROi0l0WOUOk0h0UOYOm0d0SO]Om0b0SO_Ol0a0TO_O7XJ7Y6B_O5]J6T6E@3bJ2o5K_O2Y1NR60000000000000000000000000000000000000000000O1000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000N" + } + ], + "model_output": "The sky is filled with two distinct clouds: a larger, fluffy cloud on the right side, with a soft, rounded shape and a slightly irregular outline, and a smaller, more elongated cloud on the left side, appearing wispy and stretched out, with a smooth, flowing appearance. Over the sky, an airplane flies.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_91.png", + "subject_name": "tree", + "object_name": "rock", + "predicate_name": "beside", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0]=c100000000000000000000000000000000000000000000000000000000O1000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O11O00O100000000000000000000000000000000000000000000000000000000000000hI[NVI2d5c16cNWJd0`5i06iNUJc0b5d082lJ_Nh3`1[16dJaNo3Y1[1;WJPN1e0[4P1\\1\\1bNdN]1]1cNcN]1^1bNbN]1b1`N^N_1d1`N\\N_1e1aN[N_1e1aN[N_1e1aN[N^1g1`NZN`1f1`NZN`1f1`NZN`1g1_NYNa1i1\\NXNd1i1[NWNe1i1[NWNf1h1YNYNg1g1YNYNg1h1XNXNh1h1XNXNh1h1XNXNh1h1XNXNh1h1WNYNi1g1WNYNh1i1WNXNh1g1YNXNg1i1XNXNh1h1XNXNg1h1ZNXNf1h1ZNXNe1i1ZNXNf1g1[NYNe1g1[NYNd1g1\\NZNd1f1\\NZNd1e1]N[Nb1e1_N[Na1e1_N[N`1f1_N[N`1f1`NZN_1g1aNYN^1h1aNYN_1h1`NXN_1i1aNWN^1i1cNWN\\1j1cNWN[1k1eNUNZ1k1gNUNY1k1gNUNZ1j1fNVNZ1i1gNWNY1i1fNXNY1h1hNXNW1h1jNXNV1g1kNYNT1g1lNZNT1f1lNZNT1f1kN[NU1d1lN\\NS1e1lN\\NT1c1lN^NS1c1iNaNW1_1gNcNY1\\1gNeNX1[1fNhNZ1X1eNiN[1W1dNjN[1V1eNkN[1U1dNlN\\1T1dNlN\\1T1cNmN\\1T1dNlN\\1T1dNlN[1T1eNmNY1U1gNkNX1V1gNkNX1U1iNkNV1V1jNjNU1W1jNjNV1V1jNjNU1W1kNiNU1X1jNhNV1X1jNhNU1Y1kNgNS1[1mNeNR1]1mNcNS1]1mNcNR1]1oNcNP1^1oNcNP1^1PObNo0`1PO`No0d1nN\\NQ1g1mNYNR1o1gNQNY1R2cNoM]1S2aNmM_1V2^NjMb1W2\\NjMc1d2oM]MQ2h2iMYMV2k2fMVMZ2l2aMWM^2k2_MWMa2k2YMYMg2i2PM^Mo2f2dLnIWOa3U4U70000O10000000000O100000000000000001O00000000000000001O000000001O1O001OaNgEWNY:o1]EUNc:U3]MfK^JZ4U5VLhJk3U5XLjJh3T5ZLlJf3S5[LmJe3S5[LmJe3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5\\LnJd3R5]LmJc3R5^LnJb3S5]LmJc3S5]LmJc3T5\\LlJd3U5[LkJd3V5\\LjJd3W5[LiJe3X5ZLhJf3Y5YLgJg3Y5YLgJg3Z5XLfJh3Z5XLfJg3[5YLeJg3\\5XLdJh3\\5XLdJg3^5XLbJh3_5WLaJi3_5WLaJi3`5VL`Ji3b5VL^Jj3b5VL^Jj3b5VL^Jj3b5VL^Ji3d5VL\\Jj3d5VL\\Jj3e5UL[Jk3e5UL[Jj3g5ULYJk3g5ULYJk3g5ULYJk3h5TLXJl3h5ULWJk3j5TLVJl3j5TLVJk3k5ULUJk3k5TLVJl3j5TLVJl3j5TLVJl3k5SLUJm3l5RLTJn3l5RLTJn3l5RLTJm3m5SLSJm3m5SLSJm3n5RLRJo3m5QLSJo3m5QLSJo3n5oKSJQ4e4ULmII^1R4a4ZLQJC^1S4]4aLTJZO_1T4^4bL`L^3`3bL`L^3_3dL`L[3]3iLcLW3\\3jLdLV3\\3kLcLU3\\3lLdLT3Z3oLeLQ3R3YMmLg2P3]MoLc2P3^MPMb2m2aMSM_2k2cMUM]2d2jM\\MV2T2mKmJV2o2m1S2[NmMe1o1nKmJ^2T3d1k1cNUN]1k1aNWN_1i1`NXN`1g1`NZN`1f1_N[N`1f1^N\\Nb1e1[N]Ne1c1WNPKhM^3R4b1`MVO`2k0UM_Ok2c0bLnJDd4j3V6O10000O1000000000000000000000000001O0000O1000000001O0000000000001O00001O000\\MnKZJS4c5oK]JR4^5RLbJn3Z5WLeJi3X5ZLhJf3V5\\LjJe3T5\\LlJd3P5`LPKa3m4aLSK_3k4cLUK]3i4eLWK\\3g4eLYK\\3d4fL\\K[3b4fL^KZ3a4gL_KY3`4hL`KX3^4jLbKV3]4kLcKV3[4jLfKV3Z4jLfKV3Y4kLgKU3V4nLjKS3R4PMnKP3P4RMPLn2o3SMQLn2m3SMSLm2l3TMTLm2j3TMVLl2i3UMWLk2h3VMXLj2g3WMYLj2c3YM]Lg2`3\\M`Ld2^3^MbLc2\\3^MdLb2[3_MeLa2Y3aMgL_2X3bMhL_2W3aMiL_2V3bMjL_2U3aMkL_2S3cMmL^2o2eMQM\\2k2gMUMY2k2gMUMY2j2hMVMX2g2kMYMU2d2nM\\MS2b2mM_MS2a2mM_MT2]2oMcMQ2]2oMcMR2]2mMcMT2]2kMcMV2]2iMcMW2^2hMbMX2_2gMaMZ2_2eMaM]2^2bMbM_2_2_MaMa2_2_MaMb2e2WM[Mj2g2SMYMm2g2RMZMo2f2PMZMQ3e2oL[MQ3f2nLZMS3e2mL[MT3d2lL\\MT3e2kL[MV3e2iL[MX3f2fLZM\\3g2aLYM`3g2^LZMb3f2^LZMc3e2]L[Md3e2[L[Mg3c2YL]Mh3e2UL[Ml3e2RL\\Mn3e2QL[MP4h2lKXMV4g2eK]M\\4b2cK_M]4c2`K^Ma4b2^K^Mb4c2\\K^Mf4`2YKaMh4_2WKaMi4a2UK_Ml4a2RK`Mn4a2QK_MP5a2nJ`MS5_2mJaMT5_2kJaMU5a2iJ_MX5`2gJaMZ5_2eJaM\\5^2dJbM]5^2bJbM_5^2`JbMa5_2]JaMe5^2ZJbMi5\\2SJgMm5Y2SJgMn5Y2QJgMP6Z2mIgMV6`51O1O2N3M7I7I5K3M5K3M3M2N3M3M5K3M1O2N9G7I2N3M2N6J5K3M1O4L4L2N3M2N2N3M5K4L002N5K3M1O0\\M]Ek0c:ROaEm0g:jNZEV1h:gNYEY1k:bNWE]1i:bNXE^1j:^NXEb1k:PNiDH>X2Q<000001O00000000000000001O0000000000000000000000000000000000O1001O000000O11O0000O100000000O10000000000000000O1000000000000000000O100O100000000O1000000000000000000O10000000000000000O100000000" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`=_1a=0000000000000000000000000000000000000000000000000O1000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000O1000000O1000000O10000000000O1000000O100000000O1000000O1000000O1000000O10000O100000000O1000000O10000O1000000O10000000000O1000000000000001O00000000001O00000000001O0000001O00000000000000000000000000000000000000000000000000000000000000O10000O100O1O1O1O100O100O1000000O1000000000000O100000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000O1000000000000O1000000000000000000000000O10000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000O100000000000000000000000000000000000000000000O1000000000000000000O100000000000000000000000000000000000000000000001O0000000000000000001O00000000000000000000001O0000000000000000000000000000000000001O0000000000001O000000001O0000000000001O0000001O0000000000000000O100000000000000000000000000000000000000000000O100O100000000O1000000O10000000000000000O1000000000000000000O10000000000O100000000O1000000000000000000cC" + } + ], + "model_output": "A tree with dense, needle-like foliage in a conical shape, featuring a mix of light and dark green hues, stands beside a rock. The branches are layered, with the upper branches extending outward and slightly upward, creating a full and lush appearance.", + "eval_result": "False" + }, + { + "image": "images/caption_detailed_92.png", + "subject_name": "bear", + "object_name": "rock", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "\\fk0h0]<9J5C>XO^NoDV2f::G5M1N3M2N2N3N1N2O1N2O1O1O1N2O1O2N001O1O00100O1O1N2O10O01O1O1O1O0000000000001OO10000001O00001O1O0000001O001O2N001O1O002N1O001O1O001O1O1O1O001O1O1O1O100O1O00100O00100O1000O10O1000O10O100000O0100O1000O100O01000O1O1O0003NO1O10O011O2gCaMk;l2O0O10000O01000O2O0O1O101N10010N2O1O001O0O101O0O1000000000O1000000000O0100O100O00100O1O001O1O1O001O1N2O1O00001O1N2O001O1O1O1N101O1O1O001N2O001O1N2O001N2O1N101O1O1O001N2O1O1O001N2O001O1O001N2O002N002M101O001N2O1O1O001N2O1O0O2N2N101N2N2N1N3N2N1O2N2N2M3N2M3N2L6G_ab0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "T6j8V60O1O2O0O010N2N2O1N2O1O2N1O1N101O1O1O1O100N2O1O1O1O100O1O1O101N1O1OWGYKZ7g4eH`KV7_4jHhKP7W4PInKl6Q4TITLh6k3XIYLe6f3\\I^L`6a3`IdL\\6[3eIiLW6V3jImLS6R3mIRMP6m2QJXMj5h2UJ^Mf5`2\\JbMb5]2_JfM^5Z2bJjMZ5U2gJmMW5R2iJSNS5l1nJVNP5i1QKZNl4e1UK_Ng4a1YKaNe4^1[KgNa4X1`KjN^4U1cKmN[4S1eKoNY4P1hKSOU4l0kKWOS4i0mKZOP4f0PL[Oo3d0RL^Ol3a0ULAi3>XLDf3P3AQM`0n2@RMb0l2]OUMe0i2ZOXMg0g2XOZMk0c2UO]Mm0a2RO`Mo0_2PObMR1\\2nNdMT1Z2kNfMW1Y2iNgMZ1V2eNkM]1S2bNnMa1o1^NRNd1l1[NUNh1h1XNXNj1f1UN[Nm1c1RN^NP2`1oM`NS2_1lMbNX2Z1gMgNZ2X1eMiN_2S1`MnNa2Q1^MPOe2m0ZMSOi2k0VMVOm2g0RMZOo2e0PM\\OR3b0mL_OV3>iLC[39dLH^36aLKb32]LNf30YL1i3MVL4k3KTL6m3ISL7n3HQL9o3GPL:Q4EnKV4BjK>W4AhK`0X4@hK`0Y4_OfKb0Z4^OfKb0Z4^OeKc0\\4\\OdKd0\\4\\OcKe0]4[OcKd0^4\\OaKe0_4[OaKe0_4[O`Kf0`4ZO_Kg0a4YO]Ki0c4WO\\Kj0d4VO[Kk0e4UOZKl0f4TOYKm0g4SOXKm0i4SOVKn0j4ROUKo0k4QOTKP1k4QOSKQ1m4oNRKR1n4nNRKR1m4oNRKQ1o4oNPKR1P5nNPKR1P5nNoJS1Q5mNnJT1R5lNmJU1R5lNmJU1S5kNmJU1S5kNlJV1T5jNkJW1U5iNjJW1V5jNjJV1V5jNiJW1W5iNhJX1W5iNiJW1W5iNhJX1X5hNgJY1Y5gNgJY1X5hNgJY1Y5gNgJX1Z5hNeJY1Z5hNeJY1[5gNeJY1[5gNdJZ1\\5fNdJZ1[5gNdJZ1\\5fNcJ[1]5eNcJ[1]5eNbJ\\1^5dNbJ[1_5eN`J\\1_5eN`J\\1`5dN`J\\1`5dN_J]1a5cN_J]1`5dN_J]1a5cN^J^1b5bN^J^1b5bN]J^1c5cN\\J^1d5bN\\J^1d5bN[J_1e5aN[J_1d5bN[J_1e5aNZJ`1f5`NZJ`1f5`NYJa1g5_NYJa1f5`NYJ`1h5`NXJ`1h5`NWJa1i5_NWJa1h5`NWJa1i5_NWJa1i5_NVJb1j5^NWJa1h5`NXJ`1h5`NXJ`1g5aNZJ^1f5bNZJ]1g5cNYJ]1f5dNZJ\\1f5dN[J[1e5eN[J[1e5eN[J[1d5fN\\JZ1d5fN]JY1c5gN]JY1c5gN]JY1c5gN]JY1b5hN_JV1b5jN^JV1b5jN^JV1b5jN_JU1`5lN`JT1`5lN`JT1`5lNaJS1^5nNbJR1^5nNbJR1^5nNbJR1^5nNbJR1]5oNdJP1\\5POdJP1\\5POdJo0]5QOdJn0[5SOeJm0[5SOfJl0Z5TOfJl0Z5TOfJl0Y5UOgJk0Y5UOgJk0X5VOiJi0W5WOiJi0W5WOiJi0V5XOkJf0V5ZOjJf0V5ZOjJf0U5[OkJe0U5[OkJe0U5[OlJd0S5]OmJc0S5]OmJc0S5]OnJb0R5^OnJb0R5^OnJb0Q5_OoJa0Q5_OPK`0P5@PK`0o4ARK>n4BRK>n4BRK>n4BRK>m4CTKhKBX4>iKAW4?iKAW4?iKAV4`0kK^OV4b0jK^OU4c0kK]OU4c0kK]OT4d0mK[OS4e0mKZOT4f0lKZOT4f0lKZOS4g0nKXOR4h0nKXOQ4i0oKWOQ4i0oKWOQ4i0PLUOQ4k0oKUOQ4k0oKUOP4l0PLSOQ4m0PLROP4n0PLROP4n0PLQOP4P1QLoNo3Q1QLoNn3R1RLnNn3R1RLnNn3R1SLlNn3T1RLlNm3U1SLkNm3U1TLjNl3V1TLjNl3V1TLiNl3X1TLhNl3X1ULgNk3Y1ULgNj3Z1VLeNk3[1ULeNj3\\1WLcNi3]1WLcNi3]1WLcNh3^1YLaNg3_1YL`Nh3`1XL`Ng3a1ZL^Nf3b1ZL^Ne3c1[L]Ne3c1[L\\Nf3d1[L[Ne3e1[L[Ne3e1[L[Ne3e1[L[Nd3f1]LXNd3h1\\LXNc3i1]LWNc3i1]LWNc3i1^LbLfMh0k5g2_L^LlMh0e5j2_L\\LQNg0`5m2`LZLSNf0]5Q3`LXLVNe0Z5S3`LWLXNe0X5T3aLULYNf0V5U3aLTL[Nf0S5W3bLRL^Ne0P5Y3cLQL_Nc0o4\\3lMdLT2\\3mMcLR2^3PN`LP2`3QN_Lo1a3RN^Ln1b3SN]Ll1d3UNZLl1f3UNYLj1h3XNVLh1j3XNVLg1k3ZNTLf1l3[NSLe1m3\\NRLd1n3]NPLc1Q4_NmKa1S4`NkKa1U4e40O100O1000000O1000000O10000O100000000O100O1000000O1000000O10000O100000000O10000O100O1000000O100000000O10000000000O10000O100O100000000O1000000O10000000000O1000000O1000000O10000O100000000O100000000O100O100O1000000O10000O1000000O10000TJmJj1T5VNnJh1R5XNoJg1Q5YNPKe1Q5[NPKd1P5\\NQKc1o4^NPKa1Q5_NPK`1P5aNPK^1P5bNQK\\1P5dNQK[1o4eNRKZ1n4fNSKY1m4hNSKV1n4jNSKU1m4kNTKS1m4mNSKS1m4nNSKP1n4POSKo0m4QOTKn0l4SOSKl0n4TOSKk0m4UOTKi0m4WOTKh0l4XOUKg0k4ZOTKe0m4[OTKd0l4\\OUKc0k4^OUK`0l4@TK`0l4@UK>l4BUK=k4DUK:l4FTK:l4FUK8l4ISK6n4JSK4n4LSK2n4ORKKS55nJHT58lJEW5;jJCW5=jJBV5?jJ_OW5a0iJ]OY5d0gJZOZ5f0gJXOZ5h0fJWO[5j0eJSO]5m0dJPO^5P1cJlN`5T1aJiNa5X1_JYNTOdM^6S4^JXNP6i1PJUNQ6k1PJTNP6l1PJTNP6l1QJRNP6n1QJQNo5P2QJoMo5Q2QJnMP6R2QJmMo5T2QJkMo5U2RJjMn5V2RJjMn5W2RJgMo5Y2RJfMn5Z2RJfMn5Z2SJeMm5[2TJcMm5^2SJaMm5_2TJ`Ml5`2TJ`Ml5`2UJ_Mk5b2UJ\\Ml5d2UJ[Mk5e2UJ[Mk5f2UJYMk5g2VJXMj5h2VJWMk5j2UJUMk5k2VJTMj5l2WJSMi5m2XJQMi5P3VJPMj5P3WJoLi5Q3XJmLi5S3XJlLh5U3WJjLj5V3WJiLi5W3XJgLi5Z3WJdLj5\\3WJbLj5^3WJ`Lj5`3VJ_Lk5b3UJ\\Ll5d3UJZLl5f3TJYLm5h3SJVLn5j3SJSLo5m3RJQLo5P4PJoKQ6Q4PJlKR6T4oIjKR6V4oIgKT6Y4mIcKU6]4mI]KV6d4S2001O0000001O001O000000001O000010O0000O2O00010O00001O00001O0O110O000000001O00kJ" + } + ], + "model_output": "A black cellular telephone with a physical keypad featuring white alphanumeric characters and a central navigation button surrounded by four function keys sits on a table. The screen is rectangular with a reflective surface, and the device has a curved design with a visible antenna on the top left corner.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_94.png", + "subject_name": "pillow", + "object_name": "couch", + "predicate_name": "on", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "kc05gc04M3O1O1O1O100O100O1O100O1O1O100O1O100O10000O1O100O100O100O100O1O100O1O100O100O100O100O1O100O1O1O1O10000O100O10000O10000O100O1N2O10000O10000001O1O1O1O001O1O1O1O1O1O1O2N1O1O1O1O1O1O1O1O1O1O1M3O1N2N2N2N2O2M2N2N2N2N3L`lW7" + }, + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PnLAQ3b0mL^OS3c0mL\\OR3f0mLZOR3h0nLWOR3i0nLWOQ3m0mLROR3Q1lLoNP3W1mIbMh2W1Z3Y1kIdMj2R1[3\\1gIgMk2m0^3h1aLXN_3j1_LVNa3l1]LTNc3n1[LRNe3P2YLPNg3R2WLnMi3S2VLmMj3U2TLkMl3W2RLiMn3Y2PLgMP4[2nKeMR4^2jKcMV4_2hKaMX4`2fKaMZ4b2bK_M^4Y1gIgNf12c4W1gIjNc1Of4g2WKZMa2dNUNV4TOXMg2bNUNY4POVMk2aNUN\\4lNTMW5P3bJSM^5Q3[JRMe5T3RJoLn5W500000000000000000000000000000000000000000eKgId1Y6\\NgId1Y6\\NgId1Y6g20eKgId1Y6\\NgId1Y6\\NgId1c5gKQKe2\\Od1c5gKQKe2\\Od1Y6\\NgId1Y6\\NgId1Y6\\NgId1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1W6^NiIb1W6^NiIb1X6]NhIc1X6]NhIc1X6]NhIc1X6]NhIc1X6h21O0000O1001O0ZIgIZ6Y6<0000000000000000000000000000000000000000O100001OO100O11O0000O1O1O1O1O1N2N2001OFkH^JU7a5PI[JP7d5QI]Jn6d5RI[Jo6d5<0eH\\Jo6c5QI^Jo6b5QI^Jo6c5PI]JP7c5PI]JP7c5PI]Jo6d5QI\\Jo6d5QI\\Jo6d5QI\\Jo6d5RI\\Jn6c5RI\\Jo6d5QI\\Jo6d5<0O10000000000000000000001O000000000000000kHZJe6f5[IZJe6f5[IZJe6g5ZIYJf6g5ZIYJf6g5?00000000000000001O0000000000001O00000000000000001O000000000000000000000000000000001O0001OO100000000000001O000000001O`LdHQ1\\7oNdHQ1\\7_20000TLSJ:m5WLTJ[3O>R6AnI?S6@mI`0T6_OlIa0U6^OkIb0U6^OkIb0U6Z300001O001O001O0000001O00000000000000000001O00000001O1O001O001O00000000O10000001O000000000000001OQLUI^1k6bNTI_1l6a2O00PLUI`1k6`NUI`1k6`200000000000000000000000001O1O00000000001O00000000001O000O1000000000000001O000000000001O00000O100001O00000]I" + } + ], + "model_output": "A black touring motorcycle with a large windscreen, dual rearview mirrors, and a top case is parked on gravel. The motorcycle features a visible license plate, a pair of rear luggage racks, and a black seat. It has a chrome exhaust system, a visible rear suspension spring, and a partially visible engine with cooling fins. The bike is equipped with a front fender and a visible front wheel with multi-spoke alloy rim.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_97.png", + "subject_name": "bench", + "object_name": "tree", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + } + ], + "model_output": "An ornate, white-painted metal bench with intricate scrollwork and floral patterns adorns the backrest, featuring a symmetrical design with a central floral motif flanked by symmetrical leaf-like elements. The bench has curved armrests and a flat seat, with the metalwork extending to form the legs, which are not fully visible. The bench is in front of a tree.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_98.png", + "subject_name": "cell phone", + "object_name": "person", + "predicate_name": "held", + "mask_rles": [ + { + "size": [ + 640, + 443 + ], + "counts": "Rlm33mc0O01O1O010O3N1O000O101N1N20O100L4O10000001O0010O01O000001MVXT4" + }, + { + "size": [ + 640, + 443 + ], + "counts": "WcY19dc07J5J5N2M3M3M2N2N2O1N2O0O2N2N101O0O2O1O001O001O1N2O001O1O0KWNR^Oj1ga0^NX^Oc1T?eNiBi1m[OmAP1n=ZOhAj0T>c2M3M4M2M3O1N3M2O1N2O1O1O1O1O100O1O100O10000O100O1[OaIhC`6SjJ\\AW5f>d0]O1OUJmAj5P>UJRB2Ne5o=YJSBk5m=XJRBg5[>H6^JaAX5k>JN14M8I2O1N1M4K5L7HTNnKSDj3Z>G5L5K5K3M2MXLSM`Fk2^=L5XNe^Oa0da0TO`^Oj0ga0nN\\^OP1\\b0N5J2O3M4K2O2N1O2M2O1O4K2N2O3L2J_R`2" + } + ], + "model_output": "A woman with shoulder-length brown hair, wearing a teal tank top and dark pants, is seated with her legs crossed. She has a necklace with a pendant and is holding a smartphone in her hands, which are positioned in front of her. She is wearing white socks with a red and blue pattern on the side.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_99.png", + "subject_name": "person", + "object_name": "wall", + "predicate_name": "in front of", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "c]n25g>5M2N101O001O001O001N100O2O1O1N2O3M4L3QBROg=[1H4M4K6K9F5L2M3N000O1ZOgMeDY2Y;nMaDR2^;SN_Dm1`;j0O1O1O_M_De1a;l001O100000O010ZO`DZM0=a;X2jD[ME2b;a2h0O0O100O101O1N2N2XNbCk0aN4J_Y\\5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0W4i:00000000O10000001O00000000001O001O001O1O1O001O002N1O1O1O3MW1iN2N1O1O2N8H1OO1L4H8O1O100O10000001O001O1O1O000000O100O100_OWGiJk8i4m0M3N2N2O1O1O1_MfE;[:BmEZNGc1]:1PFYNFc1[:3\\FKe94_FIa95bFJ^92hFLX91lFMU91mFNT90oFOQ91oFOQ90PG1o8OQG1o8NRG2n8NRG2n8NRG2n8NRG3m8MSG3m8NSG1m8OSG1m81PG1o80PG0P91oF0P91nF2P91mF0R91mF0R91mF0R93kFMU94kFNR92nFOQ91oFOQ91nF1Q9OoF2P9OnF5o8LoF8ROPN6OS9j1bGm0]8TOaGm0_8UOQGTNLj2R9ClF>T9h2O001O1O2N1O2N1O0000O1O1M3O1N200O100N2O100O100O1oNPGRLP9l3TGQLm8n3[GiKg8V4\\GgKe8X4^GeKc8[4Q1O100000000000000000000000000001O00001O1O1O1O1O3Md0\\O1O1O1O2N001O00001O00001O00001O001O00001O001O002N1O1O2N1O2N2N2N2N1O1O1O1O0000000000000000O100O1O1O1O1N2O1N2O1RKTGf3n8VL\\Gc3e8\\L]Gb3g8ZL[Gd3P9QL]Gb3W:I3M3N1NlMPMZHP3f7VMUHh2l7ZMQHf2P8[MhGl2X8UMXG[O[Oi3^9mLUGY3k8hLTGc2BQM[9=QGa2NiLR9o0gFU2Q:lMmET2T:nMjEQ2W:QNhEm1Y:TNfEl1Z:\\11O100O10000O1000000000000001O000000001O1O1O2N2XF^Kn8d4PG^Kn8d4lFbKR9`4kFbKT9V5O1O1O001O00000000O1O1N2N2N2O1O1O1O1O1O100O100O100F:L4O1N2N2N2M3N2O100O1O1O10000O1000000O100000000001O0000001O00001O1O001O2N1O1O1O3M6J4YFQKU9^5O1O003M1O0000O1K5N2O1O1O1N2N200O1O1O1K5O1O1L4O100O1O1O1O1O1O10000O100000000O1000000001O001O001O001O2N1O001O2N002N2Nc0QFdJT9e5OFmFgJS9U5>L4O1O1O1N2O1]Oc0N2O100O10000O100000000001O1O1oEjKV9W4eFfKA:d9Q4jFRLV9Q4cFSL]9o3]FTLd9d40000000O100nN[FfLh9Z3XFeLP:GoEh23lLO9PN201N2JhZ^3" + }, + { + "size": [ + 375, + 500 + ], + "counts": "lTe21e;101O001O001N2O001O1O1O1O1O2N1O2N1O2O0O1O1O1O10O01O000001O000010O01O00010O001O010O00100O21N5K4L1O01O0O10001N1O101N10000O2O001N10001O001O0O101O1N101O1O001O1O001O1O1O1O100O1O001NWYQ2" + } + ], + "model_output": "A person wearing a black jacket with a high collar, black pants, and black shoes, with short hair and hands in their pockets, is looking at a kite.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_101.png", + "subject_name": "sand", + "object_name": "playingfield", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "b8d4d80000000000000000001O000000O10000001O00O10000000O100000001O0000000000O100001OO01000000000000000000O100000000000000000000O1001OO0100000000000000000O10000000000000000000000O11O00O10000000000000000000000000001O0O1000000000000000000000000000000000000O100000001OO100000000000000000O10000000000000001O0000O100001O0O01001O00O100000O100000000000000000001O00O1001O0O10O100001O00000000000000O11O000O1000000000O10000000000000000000000001N010000QLPHg2o7TMYHi2h7dLlHZ3c8N7I6J4L2N2N1O2N1O2N2N2N1O1O002N1O1O001O1O1O1O001O001O00001O001O00001O001_NUEc0k:SO`El0`:POdEP1]:nNeEQ1[:lNiES1X;N00001O2N1O2N1O001O1O001O00001O000000001O00000000000000000000000000001O0000001O000000O1000000001O000000O11O1O0000O1000000O100O1000000000000O1000000000000POZO]Eg0b;00000oN[O_Ee0_:]OaEc0]:@bE`0]:AbE`0]:AcE?]:AbE`0]:BaE?^:B^Eb0a:_O^Eb0a:@^E`0b:B[E?e:BZE>f:DXEC4L2M3N1O1O1O10OO2O1O2N1O1O0010O01O001O0001O00000000A?L4O1N1M3N3O01O1O01O01O1O1O1M4J6_IdJQ5a5eJhJW5h6UNnHWLLb0Y7R3oHZLN=W7V3nHXL?Oh6e3j1M5K4lMeF0d9IhFL^9KoF^Oe9OoFIYX9" + }, + { + "size": [ + 425, + 640 + ], + "counts": "`UW31X=0Vb23f]M2nL0iH1X7KiH6T:2M2N2O1O2O000O10000O1000000O1O2N100O1000000O1^FYOa6h0^I\\O_6d0\\ICb6=^ID`6=`IC`6=`IC_6>aIB_6>aIB_6>aIC[MBb33c1i0`MB]6>cIB:Bo2m0gLBZ6?fIDW6dLC]36iLJk4XMSIn0Z2j1S801O00000000000000000000000000000000O1000000000000000000O10000[NWMXG4Z1f2W7lMaHT2W7[NYHoN_Og2R8[NnGkN>U3\\7WNXHcN7OGY3d7ZNRIc1i6`N[I^1`6gN`IY1[6kNgIT1U6POkIP1Q6SOPJm0m5VOSJj0i5XOYJh0d5ZO]Jf0`5]O`Jc0^5^OcJb0]5]OdJc0]5YOfJg0b5oN`JQ1g5eN\\J[1k5YNZJg1P6jMUJV2_800000000000001O00000000000000000000000000000000nFYMa0E[5R3oIbM=F_5h2TJfM8Dc5f2TJoMO^Ok5d2RJXNIZOH]OHNQ6S3dJ?\\3jLUMg2ROn0[3mL]MU2XOn0o2^MdMd1]OnLGl3h2WNoMo0@U1T2UNZN\\5R1YKjNi4m0eKkN`41UHLo31l8N102LUel2" + } + ], + "model_output": "A person wearing dark blue denim shorts rolled up to reveal a lighter inner fabric, paired with black flip-flops, stands beside a black dog.", + "eval_result": "True" + }, + { + "image": "images/caption_detailed_103.png", + "subject_name": "umbrella", + "object_name": "person", + "predicate_name": "attached to", + "mask_rles": [ + { + "size": [ + 640, + 640 + ], + "counts": "gok54hc05I7L3K5M3L4L4M3L4N2N2N3M2O1O0O2O1N3N1O1N2O1O2L3O0N3M4K5M1M3M3N4K5I6G8DVC@l<>YC]Oi200O1N3M2O2M4M4L2N2N2N2M2O0O2O000O1M4O000001OO2O00010O0O100010O000000010O0010O00001N1M5M3M3M3[OZNnCi1f;gNTD[1i;P1L3N4L4L5K4M1N2N1000000O1000O10000O101N100O2O0O2O1O1O1O1O2N1O3M`0@3M3M001O001O002N2N0O100LYCPNiRC]OR=a0g0I7M4M3Moob5" + }, + { + "size": [ + 478, + 640 + ], + "counts": "2i>500000000000000000000000000000000000000000000000000001O0000O100001O000000000000O1000000001O0000000000O100001O0000000000000000O100000000001O000000jHMA3e7000O100nHOWO1h03UOMk06ROJm09QOGn0O^H6c6Ko0NaH7^6KZ16dNJ_15`NJb16\\NJf15YNKi14VNLk15SNKm17QNIP28nMHT27kMIV28hMHZ27eMI^26`MJa26]MKd25\\MJf24ZMLh23WMMk22TMNn22PMNR30mL1V3OgL1[3MeL3S90[J0\\L0d31\\LNd35YLKg36XLJj34VLLm31SLO^90O11O00fJ0fK0Z4NhK2X4MiK3W4MiK3`91SKLPK4m90PKMUK3j4OUK1j4M[F0j43k4L\\F1i43m9000iJNbFOj43d47YKIg47YKIf49YKGf4mFGe3K]5`0eF\\O2?h3E`5a0fF\\O0?j3D_5a0hF[OOb0i3B^5b0mF0c3^O`5d0jF0e3\\O`5f0hF1g3YO`5g0iF1f3XO`5g1_JYN`5i1_JWN_5k0oFNa3WO_5k0RGN^3WO_5l0SGM^3WO^5l0VGL\\3XO]5m0XGKZ3XO\\5n0[GKX3WO\\5n0]GLV3VO\\5n0_GMT3UO\\5o0aGKS3VO[5o0cGLQ3UOZ5Q1fGIP3VOX5R1jGGn2WOW5R1lGHl2VOV5S1oGGk2VOS5U1THEh2VOS5U1WHDf2WOR5U1ZHCd2XOQ5U1\\HCc2XOP5U1_HBa2YOo4U1bHA_2ZOn4V1cH@_2ZOm4V1fH@\\2ZOm4W1hH^O[2[Ol4W1jH]O[2\\Oj4W1mH\\OY2]Oi4W1PI[OW2^Oh4W1SIYOV2@g4V1TIZOU2@f4W1VIXOT2Ae4W1YIVOS2Cc4W1\\IUOQ2Db4W1_ITOo1Ea4W1bIROn1G_4W1dIROm1G_4W1eIPOm1I]4X1fInNn1J\\4W1hInNl1K[4W1kIlNk1MX4Y1mIiNl1NQ4_1TJbNk1OP4a1UJ_Nk10m3e1XJYNl12T3MkIk1W1TNj14o2c2kN]MQ1h2nNXMQ1k2mNUMQ1R3jNnLU1T3jNlLV1Y3eNgLZ1^3bNbL]1`3bN`L]1b3bN^L]1f3`NZL`1f3`NZL`1g3_NYL`1h3`NXL`1h3`NXL`1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1i3_NWLa1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXL`1h3`NXLa1h3jIULa43e1k3[NULe1j3\\NVLe1i3[NWLf1f3\\NZLe1d3\\N\\Ld1d3\\N\\Le1c3[N]Lf1a3[N_Lf1]3^NbLd1\\3\\NdLf1Z3ZNfLi1Y3UNgLS2o2oMQM\\2d2dM\\M]2b2dM^M]2a2cM_M]2a2cM_M^2`2bM`M_2_2`MbMa2]2_McMb2\\2^MdMc2[2]MeMd2Z2\\MfMd2Z2\\MfMd2Z2\\MfMe2X2\\MhMd2X2\\MhMd2W2]MiMc2V2^MjMc2T2^MlMb2U2]MkMd2T2\\MlMe2S2[MmMe2R2\\MnMe2P2\\MPNe2n1\\MRNe2j1^MVNc2g1_MYNa2d1bM\\N_2a1cM_N]2`1dM`N]2^1dMbN^2[1cMeN_2Y1aMgN`2W1aMiNa2U1_MkNb2T1^MlNc2R1^MnNc2n0nGoN_53c2n0`MROa2m0_MSOa2l0`MTOa2k0_MUOb2j0^MVOb2i0_MWOb2g0_MZO`2e0aMZO`2f0`MZOa2d0`M\\Oa2b0`M^Oa2a0_M_Ob2`0^M@c2>^MBd2<\\MDi24ZMLl2LVM4j8000000000000000000000000000000000000001O0000000000O10000000000000000000000000000000000000000000000000000000000000000000000000aM7iEIV::hEFW:=gECX:`0fE@Y:b0fE^OY:c0gE]OY:d0fE\\OY:e0gE[OY:e0gE[OY:e0gE[OY:e0gE[OY:d0hE\\OX:d0hE\\OX:c0iE]OW:b0jE^OW:?kEAU:?kEAV:=kECV:0nA2P>MQB3o=MQB4n=LQB6n=JRB6n=JRB7m=IRB8n=IQB7o=JmA:R>EoAc0i=\\OXBf0f=ZOZBg0e=YO[Bg0e=YO[Bg0e=YO[Bh0d=YOZBh0f=XOZBh0f=YOYBh0f=XOZBh0f=XOYBk0e=SO]Bn0b=RO^BP1`=PO_BQ1a=oN_BQ1a=oN_BR1`=nN`BR1`=nN`BR1`=nNaBR1^=nNbBR1^=nNbBR1^=nNbBS1]=mNdBR1\\=nNdBR1\\=nNdBS1[=mNfBR1Z=nNhBQ1W=POiBo0W=QOiBP1V=oNkBQ1U=oNlBP1T=POlBQ1S=oNnBP1R=POnBP1R=POnBQ1Q=oNPCP1P=POPCP1P=POQCo0o\\O8[[Ld0m;QOQEMVOc1i;`NQEMVOd1h;_NRENUOc1i;_NoD3UO_1l;^NoD=VOi0l;jNmD4VOO3S1i;jNnD4WON2T1i;jNnD4WOL4V1g;jNnD5AP1b;kNnD5_OP1c;kNnD6^OP1c;jNoD8[Oo0f;iNoD8[Oo0f;iNoD8ZOGLR1l;oNnD8YOGNa0NKm;EnD8WOI0?NKm;EnD9UOI2=OLl;EnD9UOI2>NJn;FnD8YOE14N`0j;_OnD8\\ODM4O`0k;@mD8\\ODM4Oc0h;]OPE8\\ODL6Ob0h;\\OQE8[OFM4Nb0i;\\OQE8ZOGN4Lb0k;[ORE7XOI0h0LVOj;2SE6WOI04L;Pl:F^F8b9H^F9a9G`F8`9HbF6^9JdF4\\9LfF3Y9MhF2X9NiF0X90hF0X90hF0X90hF0X90hF0X90hFOY91gFOY91hFNX92j200000000000000000000000000000000001O000000000000O100001O0000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000N" + } + ], + "model_output": "An individual wearing a red ski jacket with a black collar, black gloves, and a black helmet with goggles is in the snow.", + "eval_result": "True" + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_8b_simple.json b/evaluation/GAR-Bench/model_outputs/gar_8b_simple.json new file mode 100644 index 0000000000000000000000000000000000000000..fb08349325632437b9efdddf2b1860c26914e432 --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_8b_simple.json @@ -0,0 +1,2240 @@ +[ + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000227491.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[f`46i>3N001fA3`0DQ<9^CU1_4M1O2O000O22M0000O010O00001O0QDJ7FT6a0bI6N\\O_6>ZIf0OmNg6>VIn0IgNR7;RIU1GaNV7;QIY1D_NY7:oHl1_N^M>?U8:iHS3Y7mLcHU3]7mL`HT3a7lL]HU3d7kLZHW3f7iLXHX3i7hLVHX3k7hLSHY3n7gLPHZ3P8gLnG[3R8eLmG[3S8eLlG\\3T8eLkG[3U8gLhGZ3X8gLgGY3X8jLfGV3Y8lLfGT3X8oLfGQ3T8WMkGj2m7^MRHb2i7cMWH]2f7gMXHZ2e7jMZHV2b7nM^HR2_7RN_Hn1`7TN`Hl1^7VNbHi1]7ZNbHe1^7\\NbHd1\\7^NdHa1\\7aNcH^1\\7dNdHnNKaNc0ENc0R7[2aHgNo1bNc5g2]HgNV2[N^5o2[HgNQ:T3O1O1O1O102N2M2O3K4aHhJc42^I[5i1gJo33XJl5c1UJd3P7YLQIe3U7ULnHj3U7PLnHo3W7kKkHT4^7ZKlHe4\\7nJjHQ5c8O100O1O100O100O100000000000000000000000000000000001O000dMWKaJi4[5ULjIm3S6XLUInNJk4P7UMjHo2T7_20001O001N2XI[HS6g7hI_HU6b7mI]HQ6c7PJ^Hn5d7RJ[Hm5f7SJ[Hk5f7TJ[Hk5f7SJ^Hj5c7UJ_Hh5c7WJ_Hg5^8O001N2N2O1O1O2L3M5L4L2N4L2M3O1O001N101O001N101O0O2O1O0O2N101N2O0N3L4M3M3M3L4B>M3M3M3DUD^MP<_2:N2O2N2M2O2N2L5J6WOlBUOc=;YWf4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_0.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000029397.jpg", + "mask_rles": [ + { + "size": [ + 449, + 640 + ], + "counts": "Zl95l=1O1O001O01O0000000010O012MU6CaIg0^6ZOVIQ1j6S301O001O0001O00000000000000001O002N3M:QIiH`6^7N100O1O00001O000000001O00001O00001O0001O0000000000000000000000000O100O100O10000O1O1N2O1N2N2N2O1O1O1O100O101O0O1000000O10000O100O100O1O1O1O10001O0O10000000000000O11O00000000001O000000O2O00000000000O1O1O1N2J6J6K5J6L4J6J6L4J6K5I7J6K5K5K5I7I7K5K5M3J6K6L3J6O10001O1O2N2N2N2N3M2N2N3M2N2N2N3M3M2N2N2N3M4L1N102M5I6H7J8G9Edm<" + }, + { + "size": [ + 449, + 640 + ], + "counts": "Qa_4?_=6L2L5L3M3N2N1O3M1O2O0O100L5ZOe0L5J5M3N3M2O1N2O1O2L4N4lN`M^Fd2k8f1E;@`0H7L301O1O1O001O1O00001O000000001O00000000000000000000000000000000O1000000000000O100N2O1M3CXH\\Jm7]4`HeKA5Z8S4W1M3]Oc0M3L4L4M3K5L4I7F:I7H8J6E;H8EXCVOTR;5a[E6H7L4RLBSJa0h5LnI7o56dI7n5g0UI_Of6e0UI^Oi6h0PIK^68oH9P7LcH=\\7T30001O001O0000000000001O00000000001O000000000000000000O1000000O10000O\\NlI]KS6Y6O1O1O1N2N2N2O1N2N2O1O10000O10000O10000000000N2gNgGjLY8U3iGiLX8U3kGjLU8T3PHgLR8V3SHgLn7V3WHfLk7X3XHcLl7X3[H`Lk7[3`1L4\\MQG0U9M\\G_Oj8?]2O1O1O1O1N2O1O1O1N2O1O1O1N2NSaZ1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_1.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000163117.jpg", + "mask_rles": [ + { + "size": [ + 500, + 376 + ], + "counts": "[o0Q82QH\\7V8N000O2HhGRIX8U7001O01O000000000O1O1O1N2N2dL`HcNa7Z1cHdN^7Z1eHeN[7[1gHcNY7]1kH_NU7a1oH[NQ7f1nH[NQ7e1oH]No6d1oH`Nn6a1PIaNo6`1PI`NP7`1PI`NP7a1PI_No6d1nH]NQ7d1jHVKOV3W7e1hHWK0T3X7S5000O1O100000000PNgHfKX7Y4jHfKV7Z4kHeKU7Y4PIeKo6Z4UIoIE^1U7b4^I^Kb6b4^I_Ka6a4_I^Kb6b4]I_Kc6a4]I^Kd6b4ZI_Kg6b4XI]Ki6d4XIYKi6h4WITKl6m4SIQKo6P5QInJP7S5gHhIMU1\\7S5aHhI120S1^7T5_HjI022o0_7l601O2N4L1O1O`HhHc6X7]IjHa6n0iHT5c0RJb6T7[IUIA@h6[7eIYI@]Ok6Z7eI]I[6c6dI]I]6c6cI]I]6b6dI\\I^6d6cI[I^6d6bI[I`6d6`I\\Ib6b6`I]Ib6`6_IaI`6_6`IZI]OES7?nHl5c0VJ_6NmHk5f0VJ]6_6dIQI[O3LMU7P7cIoH@0J0T7P7[1O02M2L4WKhHQ1\\7nNhHn0[7POfHP1Z7POdHeLMX4_7TOhHl0X7TOhHl0X7VOfHj0Z7YObHi0]7i3M2O01O01N11O0000O100000001O01OO3N3M001O1O2N00TMVIoLi6P3ZInLf6R3[ImLe6R3\\InLd6Q3]InLd6R3\\InLd6R3\\ImLe6T3ZIlLf6U3ZIiLg6X3WIiLa5cMoJN@g5InLi5\\MgJNA47\\8[6cGeIX8b6fG]IY8P7O2N1O01ZOlGmIR8k6O1O01001N0100001OO01UMgH[MY7d2gH\\MZ7d2fH\\MZ7d2gH[MY7e2gH[MY7f2fHZM[7f2bH\\M]7^501O0000O1UKaH`1a7^N_Ha1c7^N^H`1c7\\N\\HQM3a4b7]NcHb1_7\\NcH`1`7`NbH\\1_7dNcHY1_7gN]H\\1e7dNUH^1d0fK^6n2iH]1j0fK\\6X7gIhHZ6U7U1M2O00101aNgG^JIo0_8d4kG]KU8a4mG_KS8^3fGZL89R8\\3gGZL7 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_2.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000170613.jpg", + "mask_rles": [ + { + "size": [ + 640, + 439 + ], + "counts": "eiT57gc03M2N2O2K4N2K5L4O1O1N2jLL^B6^=n1UAjL0`1k>`3N1OkLeAj0X>UOmAj0R>TORBj0n=SO\\Bg0c=WOeBc0[=YOmBd0S=YORCe0mVS<@oC`0P<@RD>o;@SD`0m;@TD>l;AWD>i;AYD=i;AYD;j;DXD4o;MQDKV<5kCD\\<l8BTG8S9HkFGg99YF]OQ:c0nEYOX:f0hEUO^:j0bEPOd:P1]EhNk:2\\BMk2HQ;8YBM_a03a^OM_a02c^OM]a02d^ON]a00d^O1[a0Ng^O1Za0Lh^O4Xa0Jj^O6Va0Il^O6Ta0Hn^O8Sa0Eo^O;Qa0DQ_O;o`0Cj^OCDj0ca0_Oj^OJBg0ea0ZOl^O1^Of0bb0ZO^]Of0\\b0WOa]O34e0Zb0ZO`]O26d0Zb0Cf]O[MgNIg2e;UNVG`0ZMeNJf2e;VNTGc0[M[1a;RNRGg0[M^NNe2d;WNQGl0bMh0\\;]NoFn0jM`0W;aNmFR1lM=W;aNkFU1mM:W;bNhF\\1oMOZ;fNoDH5j1\\O^M1Y2^;8iD:FVM4V2^;c3]DWJ4V2`;e3_DVJMR2d;j3_DRJOQ2d;o3aDnK`;S4aDgKc;Z4\\DcKg;W63L5gNjCbJ\\<^5eC[Jb^6d;]JYD`5h;dJVDX5m;jJQDQ5S`0VBUOP>k0UBlNo=S1YB^No=a1ZBoMm=P2S2O1N1O2N3L3K5K6J7H9G8H;EVhV4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is talking to .", + "image": "images/caption_simple_3.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000465822.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dh^31e;1ig72RXH4J7I8J4M3N3L4]OROVFR1h9RORFR1k9?M3N3L2O100O2N1O1O1O1O1O2N1O100O2N101N1O1O1O2O0O101N1O1O101O0O2O0O100_HQMZ6P3dIQM[6P3bISM^6m2`IUM`6k2_IWM`6i2]IZMc6f2QIgMn6Z2oHhMR7W2kHmMT7[3010O0mLfHi1Z7VNiHh1X7WNlHf1S7YNQId1o6\\NTIa1m6]NWIa1i6]NZIa1g6]N\\Ia1e6]N]Ic1c6XNcIf1_6UNfIi1Z6TNjIk1W6RNlIbNh0`7dNRJc0`Nh0W9WOkFh0T9XOnFg0R9XOPGg0P9VOTGj0j8VOXGi0h8VO[Gh0f8WO\\Gh0c8WOaGg0^8YOcGg0ZOnNi8:oGS1o7kNSHU1n7hNSHX1o7dNTH\\1S9001O010O10O00010O01O010O001O010O00010O10O010O01O010O0100O010O0010O0101N1O2M2I8YOl0BcPb0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "f[b41c;5M3M3N3L2N3N3M2N2N10O01O000000O2N100O1O2N100O101N1O1O2O0O1O1O1O1O101N1O1O101N1O2N`[e0" + } + ], + "question": "What is doing with the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_4.png", + "model_output": " is holding the ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "32kc0k0XO2N001O000000001O000000001O000000001O0000001O00000000001O0000001O00000000001O0000001O0000001O0000000000001O00000000001O00000000001O000000001O000000001O000000001O0000001O0000001O0000001O0000001O000000001O0000001O000000001O0000000000O10000O100O100O1O1O100O1O100O100O100O1O1000000O100000000000000000000001O00001O000000001O0000001O00001O0000000000001O0000000000000000000000000000000000001O0000000000001O0000001O001O0000001O00000000000000001O0000001O1O001O001O1O001O1O001O001O001O1O001O00001O001O001O00001O001O00001O00000000001O00001O00001O00001O0000001O0000001O0000001O001O0000001O000000001O000000001O001O0000001O00O1000000O100O100O10000O100O100O10000O1000000O100000000000000O10000O10000000000000000001O0000000000001O000000001O000000000000001O000000000001O001O000000000000000000001O00000O1000001O0O100O1N2O100O1N3M2O1O2N1N2N3N1O1O1N3M2O1N3N100O100O2O0000000000d^OROg?m0V@WOi?i0V@YOi?h0V@XOj?h0V@YOj?g0T@ZOl?g0S@ZOl?g0R@[Om?f0R@[Om?f0`_OoNG=i`0a1P_OcNo`0X200O1O1O1O1O1O010O001O1O1O2N1O1O1O0001O01O0000001O01O0000010O0000000001O0000001O00000001O01O00000000001O0001O01O000000001O01O000001O01O00010O00000010O0001O0001O000001O0000010O000001O00000010O00000001O000000010O00000001O01O00001O01O0000010O000000000010O00001O0000000010O0001O0000000010O00000001O00000001O00001O;bLX_O46KO67HL001Je0[?" + }, + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y and ?", + "answer": " is in front of .", + "image": "images/caption_simple_5.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000128051.jpg", + "mask_rles": [ + { + "size": [ + 360, + 640 + ], + "counts": "U4V4R7000000000000000001O00000000000000000000000000000001O000000001O00001O2N1O001O00O10000O1O100O1010O00000O1001O01O1O0000000000O1L4O2N10000000000000001O0O2O00O11O000001O010O001O00001O3M4M2M1O1O2N1O001O01O00001O0010O01N10001O1O001O1N3M2N3M2M4K5J6E;K5K4O3L3N3M2N2M5L2N4K3L5J8SO^FFi9DXF8 located relative to ?", + "answer": " is in front of .", + "image": "images/caption_simple_6.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000560266.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "ocP1335iZLAf3>\\LAd3>^L@c3`0_L^Ob3`0aL]O`3b0cL\\O^3b0dL\\O_3a0cL^O_3?dL^Oe39]LFe36`LFd35hPY4" + } + ], + "question": "What are and doing to each other?", + "answer": " is looking at and biting .", + "image": "images/caption_simple_7.png", + "model_output": " is biting ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000024919.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "iah21bYQ14bSoN0O2O0O1O1O000UN8nE1k9 and doing to each other?", + "answer": " and are pushing each other.", + "image": "images/caption_simple_8.png", + "model_output": " is pushing ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231169.jpg", + "mask_rles": [ + { + "size": [ + 454, + 640 + ], + "counts": "WaS2=f=g0SOb0E9H4L3M2N4K6K3N1O1N3M4M3L2O2M4M3L4M1O1O2M4M2N1N2O2M4M2M2O1O2N1O2N1N2O2N1N2O1O2N1O2M3N001OSLmFi2P9WMSGi2k8VMWGj2g8WMZGj2d8UM]Gl2b8TM_Gl2`8TMaGl2^8SMdGn2Y8RMiGm2W8SMjGn2T8QMmGP3R8oLPHQ3o7oLRHR3l7nLUHQ3k7nLVHS3i7mLXHT3f7kL\\HU3b7kL`HV3^7jLbHW3]7hLeHY3Y7fLiHZ3U7fLlH[3S7cLPI]3o6bLRI_3m6aLTI`3j6_LWIb3g6^L[Ic3c6[L`Ie3_6ZLcIf3\\6YLeIh3Z6TLkIk3U6SLnIo3o5nKUJT4h5jKZJW4e5gK]JY4i7000O10000O10000O100O2O001N1O10000O2O0O01001N1O10O010001N10O11O1N1O1O10001N0100O02N2N1O1O1O1O2N1O1O1O100O2N1O001O2O2L10100O3M10O00010O10O10O0100O10O001000O01O0001O010O0001O1O010O000010O0000O2O001N2O001O001O000O2O1O1O001O0010O0000O3OO01O1O001O001O1O001O00000010O2N1O2NO2M2000001O00001N101N1001000O0000010O001O002N001NZGTKT8l4c000N3N2O001N2O1N2O001O1O2M1O2N102L3L4M3O1O1O2M2N2UObEkMb:P2bEmMi;AVDLW1>^NnA1O0000Oea1" + }, + { + "size": [ + 454, + 640 + ], + "counts": "^_^51U>00000000001O2N2N010N2O1O1O1O1O001O2cFCm4>RKBo4>\\41O2N1O1O1O1O3eB0OXOg04XO064W:b3L0O3jEZLg9S4O0100O1O3Ng0XO2N1O10O6J100cE]LU:d37O02N1O2O0O010O01O0000001O001O00000000001O000000000000000000000001O00000000O11O000000O1000000000000001O00001O00001O001O000000000000hMeEe0[:WOlEe0U:[O^FeNVO0f00ZO:\\:R1Z2O10000000000000000000000000000000000000hFdNf5\\1ZJdNf5\\1ZJdNf5\\1b30000000000000hFeNe5[1[JeNe5[1[JeNe5[1[JeNe5[1c300000000000000000000001O001O001^FaN[6_1dIbN\\6^1aIeN_6\\1[IoN_6R1^IQOa6o0^ITO`6m0UG`NK1?N>d0S8Z3RGfLm8^3nFeLQ9X40000000000000000000000000000000000000001O0000000000O11O\\OnFhK22P9i3fGUL[8j3gGULY8k3hGTLX8l3iGRLX8n3P100000000000000000000001O00001O1O:F1OO010000000O10000000000O10000000000000000O1000nF" + } + ], + "question": "Where is located relative to ?", + "answer": " is beside .", + "image": "images/caption_simple_9.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000551822.jpg", + "mask_rles": [ + { + "size": [ + 453, + 640 + ], + "counts": "Y_P3e0Z=]OhBn0o and ?", + "answer": " is attached to .", + "image": "images/caption_simple_10.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000498463.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "j`T7a0]>201O000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000O100000000000000000000000000000000000000000001O001O000000001O000000000000000000000000000000000000000000TH" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l_T7i0V>2O000000000O1O1bNVOdDl0Y;YOaCKd0V1k;YO_CY1` and ?", + "answer": " is on top of .", + "image": "images/caption_simple_11.png", + "model_output": " is on top of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000275198.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^[P34j>5J5M1M4M2N2N2M3N2N2M3N2N2O1N1O2N2O1N2O1O1O1O1O1O2O1N1O010O10000VO\\NWDf1c;jNRDU1l;POUDm0j;UOVDj0i;XOTDYOKX1P[O1K[NoM9Oo00W1j2WO\\O2KbNPNn00V1h2YO]O5HTNRN21N0X12iNMX2g2@^O5G[NUNQ1OhN0X2b1eM`N6e0e1^17EX2lNnLNd0_1:EQNXNY10bN1X2nM;ZMWO_3P1nMQO[3;QN;i0[O_Lg0]1XOeNN`4b0^LQOW64R1l0gHPOY17k:h0mCSOU17n:f0kCUOV15R;k0mDVOmNGO3i;o0[E^OkNCj;n0Q2F[BXOf=h09O00100O1O10O10O10O0010O1O1O10O01O001O01N11O101N00100O010O1OO2N1100O00O2O1O100O0O2N2001OO100O10000O10ON32N0WOQB`0X>O1N2O1O1L6IoRe6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is looking at .", + "image": "images/caption_simple_12.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000257896.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "`fV46dc0`0D:F9H4M1N1O2O2M101O0001N2O0O1O2N2N1O2N2N2M3N1O2N2N1O2N2N2N1O2N2N1O2M2N3N1O2N2N1O\\Q[4" + }, + { + "size": [ + 640, + 480 + ], + "counts": "ZT`21mc04M4K4M2O1N2O1N100O101M5M1N101N2O2M2O000O2N102N1N101N101[CnNZ6S1cIVOV6j0hIYOW6h0eI_OV6d0eIDV6 and ?", + "answer": " is wearing .", + "image": "images/caption_simple_13.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000034417.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "hc[4?n:=I:G4N1O101N10000J6O10000O2O00000000lNQF`0o9^OTFa0m9^OTFb0k9\\OWFc0k9VO\\Fh0[:A>O2O3N1O100OO02O0O100O010O0010O01O0010O01O000O101NVUi0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "kZm35\\;7I6K6L3M3N3N2M4iEkNg9d1N2O0O1O1F:O1O01O0O2C=N2O1ROmE12\\OT:?SFBI4\\:9SFFn97TFIn94SFLo91RFNQ:MRF3i:O100O2NTf]1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_14.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000047585.jpg", + "mask_rles": [ + { + "size": [ + 640, + 424 + ], + "counts": "V91Z2>_1C`L0f00000O100O10000000000O10000000000000000O1O10000000000001O001O00001O000000001eNd]OP1\\b0lNi]OS1Wb0kNk]OU1`b001O000000000000000000O100O1O1O1O1O1O1KeNd]O\\1\\b0dNc]O]1]b0dNb]O\\1ab00001OO10000000000000000000000001O0000000000LdNd]O20T1\\b0jNk]OU1ab0O001O00000000000000O10000000000000000j]OlN`a0T1f0000O10000O100O1O1N2O1O1O100000000000000000000000000000000bGiNSNW1m1oNmMQ1S2XOXFJ`6n0X35YF_No5\\1g37WFbNo5W1j3k0SLUOm3n0oKSOQ4o0mKQOS4S1iKmNW4T1hKlNX4V1eKkN[4V1dKjN\\4X1bKhN^4[1_KeNa4\\1^KdNb4^1\\KbNc4c1YK]Ng4g1TKZNl4l1nJTNn4`NUF_3j4QNQ5`NUFc3e4nMV5_NUFh3`4iM`5kNnER3\\4TNP6^NkE\\3R4VNQ7X1lHhNd7l0XHTOZ8=cGC_8?]GAe8a0XG^Oi8e0SG[On8i0mFWOU9n0dFRO]9b1hD]MWOQ1QlFIQ9:jFJT99fFLW97dFNY96aFO\\93aF1\\91]FhK[OX4X:1]FhKYOX4Y:0aFeKXOZ4U:MmF2S9OmF1R90nF0R90oFOP92PGNo83RGLm85RGKo86oFKP95QGKo84SGKl85WGIi87TGKl85TGLk85UGKk85UGJk87UGIk87UGIj88VGHj88WGGV7nKoH]4JEU7SLnHX4MET7ZLhHS43DS7[LiHR42DT7[LjHQ42DQ7aLiHm31FT7dLeHg34IT7dLfHd34IU7hLbH`36Km6^KbHY1:`36Jm6PMkH[36Cn6TMjH[37Am6WMjHZ37Al6YMiHY39_Om6j2mHZMQ7m2hHTMW7k7O2kH\\Cl6m<0O0O3N1O010N1O101O000000000O1O1O1N200O100001O0002N1O001O1O0000001SEfHR9[7kFiHS9W7jFlHV9U7gFoHW9R7eFRIZ9o6dFSI[9n6dFSI[9n6eFRIZ9P7eFoH[9T7cFlH\\9V7mESH and ?", + "answer": " is standing on .", + "image": "images/caption_simple_15.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000234757.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y\\Z52k>6J5K4L5K5K4M3N2O1N2O100O1000000000001O00000O100O0O1O2N10O0jN`Bn0`=ROaBm0_=ROcBn0\\=ROeBP1X=POhBP1X=POiBP1V=QOiBo0W=ROhBn0Y=SOeBm0[=<1O1\\OoBWOQ=e0VCXOk1iA0V>OjA3U>MkA3U>MkA3U>MkA4T>MkA3U>NjA2W>NiA1W>;2O001N2N2N4JbRT3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "oXT4:e>2O1N3N1O0aE>QLUOZ9=UJg1V5ZN]J[2[5eM^Jl2W5VMbJX3V5hLaJg3X5ZL]JV4^5jKZJd4`5\\KYJT5^5lJ^J_5[5bJ`Jf5\\5[J^Jk5a5VJYJo5g5QJUJS6k5mISJU6l5lIoIZ6P6fIlI^6T6bIhIb6X6]IdIh6[6YIcIi6]6m000O10000000O00100O1000O1000O1000O100000000O101O001O1N1000000O100000SNXKaIi4m5^2B>iNW1kNU1iNV1_Nb1O1O2M201N2N2N2M4L[bT4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is in front of .", + "image": "images/caption_simple_16.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000008899.jpg", + "mask_rles": [ + { + "size": [ + 539, + 640 + ], + "counts": "Z>e1V?0000000000000O1000000O10000O1O1O100O100O100000000O100000000001O000000000O101O00JZNXAe1h>[NXAe1g>[NZAe1e>]NYAd1e>^N[Ab1c>`N]A`1d>^N]Ab1c>\\N_Ad1a>\\N^Ae1b>YN_Ah1i>10000L4N2000000KoM`AQ2`>oM`AQ2`>oM`AQ2d>1O100O1O100O11O000000O1O1O100O01000O10000O1000XNbAS1^>kNeAT1[>lNeAT1[>lNeAT1[>lNeAT1[>kNfAU1Z>jNgAV1Y>iNhAW1X>hNiAX1W>gNjAY1V>gNjAY1V>fNkAZ1U>eNlA[1T>dNmA\\1S>cNnA]1R>bNoA^1Q>aNPB_1P>aNPB_1P>`NQB`1o=`NQB`1o=_NRBa1n=]NTBc1m=[NTBe1a>0000000000000000000000O10000000000001O00O100000000000000000001O2N:FY2PBfMP>[2PBdMQ>\\2oAeMP>[2QBeMn=[2RBeMn=[2RBeMn=[2PBfMQ>Z2PBeMP>[28000iAeMn=[2QBgMn=Y2SBeMo=Y2:000000000000000O10000000000000000jAeMl=[2SBfMm=Z2:000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000O100O100000000O1000000000000000000000000000000kAbMm=^2SBbMm=^2800000000000000000000000000000O100000000001O0000O1000000000000000000000000000000000000000O11O0000000000000000O11O0000O1000000000000000000000000001OO0101O0001O0O10000000000000000000001O000000000000O1000000000000000000000O1001O00O1000000000000000000000000001OO10000001OO100001O00nNfACZ>=hA@Y>`0hA_OX>`0kA_OT>a0mA^OS>b0mA]OT>c0mA\\OS>d0nA[OR>e0oA[OP>e0RBYOn=g0SBWOn=i0SBVOm=j0TBUOk=l0WBROi=n0WBSOh=m0YBQOh=o0YBPOg=P1ZBoNf=Q1[BnNe=R1\\BmNd=S1\\BmNd=S1]BlNc=T1^BkNb=U1`BiN`=W1aBhN_=X1aBhN_=X1aBhN_=X1aBhN_=X1bBgN^=Y1bBgN^=Y1bBgN^=Y1aBhN`=W1aBhN_=X1aBhN_=X1aBgN`=Y1`BgN`=Y1`BgN`=Y1`BgN_=Z1bBeN^=[1cBdN]=\\1Q10001O00O100000000001O0000000000`AeN`=[1o0100001O00000000000000O11O00000000000000O1001O00000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000001O0O1M4ZO]@Aih5h0PgJ=H2O000001O000000001O00001O00001O00001O00^B" + }, + { + "size": [ + 539, + 640 + ], + "counts": "VTZ22e`08J4M2N2O0O1_OAf@`0X?Cf@=Y?Ee@i0N1O1J6O1N3L3N200O100000O102N1O2N2N2N6I2O1O3mNbAC`>_OfA6?Fm=3dA6e0Ai=7dA6^?He@4^?If@2^ea7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_17.png", + "model_output": " is enclosing ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000260261.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "Xi`33lc02M3N2M2O2N2N2N101O1O10O1000000O10000O10000O10O1000O1O001O0O2M3O0O2O001N2O1M2O]^P4" + }, + { + "size": [ + 640, + 426 + ], + "counts": "dbZ32mc03N2N6J2O0O001O01O01O1O001O01O01O00000001O0000000O10O1O100O1O1O1N2O01000O1gK@QEa0n:DmD=Q;GlD:S;IjD8U;IjD8U;JjD6U;KjD5W;KhD6W;KhD6W;KgD7W;KfD7[;HaD=^;BaDa0_;_O^Dd0a;]OZDg0g;YOVDj0h;WOUCCeN^1U>POQCP2nNZE5_;I]4Objb1" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_18.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000301563.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^2m:_20000000000000000O2O000001O01O0000001O0O1000010O000000000O10000001O01O00000O101O00001O00001O001O000]KXMbNi2]1XMbNh2^1WMdNi2o50000001O000O2O2cE_Mk9Q300010O0001O000O2O001O00001O1O00001O001]KZL^Og3`0ZL@f3`0ZL@g3?YLAg3R50000001O000^KWLAj3>VLBj3>VLBk3=ULCk3o4@cFQM]9n2dFRMi4EDY3cKRMi4EDS3kKWM`4FER3mKWM^4FFS3mKWM\\4FGS3mKWM\\4FGS3mKXMn8h2RGXMn8h2TGVMl8j2UGUMk8k2UGUMk8l2TGTMl8l2UGSMk8m2VGSMi8m2XGRM^OJW9T3[GRM^OJW9T3\\GQM\\OLX9S3\\GSMe8n2ZGRMf8n2ZGRMg8m2YGTMe8m2[GSMe8n2ZGQMg8o2YGQMg8o2ZGPMf8Q3YGoLg8Q3YGoLg8Q3YGoLg8Q3YGnLh8R3YGmLg8T3YGjLh8V3g00000001O000000001O0000001O001O00000000001O0000000000001O000000000000001O00000000001O000000001O0000001O0010OO101O000000000000001O0000000000002N1O001O0000000000001O0000000bHgLi4Y3eJ^MV5b2aJnMX5S2]J\\NCkMX5i3SK`N_OSNV5]3WK_Og4a0XK@h4`0WKBh4?UKDj44d4`6lK`IT4Z6SLfIl3T6[LkIe3T6]LlIb3S6`LmI_3S6bLmI]3S6cLnI\\3R6eLnIZ3S6fLlIZ3U6fLkIY3X6eLhIZ3Z6dLgI[3Z6eLfIZ3[6fLeIY3]6fLcIY3_6fLaIY3b6eL]I[3e6dL[I[3f6dL[I[3f6eLZIoNOg3i6YMWIoN2g3h6ZMWIkN5i3e6\\MUIjN8i3c6^MUIgN:j3b6_MiIa2Y6^MgIa2Z6^MgIa2Z6_MeIa2_6\\M`Id2b6[M^Id2e6ZMZIf2g6ZMXIf2i6ZMUIg2l6XMTIh2m6XMRIh2Q7VMmHk2V7SMiHm2Y7QMgHo2[7PMdHP3]7oLeHo2\\7PMeHo2]7PMbHP3`7nL`HR3a7mL_HS3c7lL\\HT3f7jLZHV3g7iLYHW3h7hLYHW3h7iLWHW3j7hLVHX3k7gLUHY3l7gLSHY3n7fLRHZ3o7eLQH[3P8dLPH\\3Q8cLoG]3Q8dLnG\\3S8cLmG]3S8cLmG]3T8cLkG]3V8bLjG^3W8aLiG_3X8`LhG`3Y8_LgGa3Z8^LfGb3[8^LdGb3]8]LdGb3]8]LcGc3]8]LcGc3^8\\LbGd3^8]LaGc3`8\\L`Gd3`8\\L_Ge3b8ZL^Gf3c8XL^Gh3S901O000000O1000000O100000000hK\\LgNe3X1^LeNc3Z1aLcN_3\\1dLbN\\3e0XLUL`0T3X3f0[LSLa0T3T3i0_LmKa0Y3o2j0hMVOV2j0kMVOT2j0mMVOR2i0PNWOo1j0QNVOn1j0SNVOl1j0VNSLWN`2c3]1WNRLWN`2b3^1XNQLWNCOk2a3a1YNQLZN^2]3a1ZNQLYN^2\\3a1\\NPLXN_2\\3b1\\NoKYN]2[3d1]NnKYN]2Z3e1^NnKXN\\2Z3g1^NmKXN[2Z3h1_NmKWNZ2Z3i1`NmKVNY2Z3j1bNlKUNX2Y3m1bNkKTNW2[3n1bNkKSNV2[3o1cNkKRNT2\\3Q2cNkKQNS2\\3R2dNkKoMR2^3T2bNkKPNP2^3U2cNkKoMo1^3V2eNjKmMo1^3X2eNiKlMn1`3Y2eNiKjMm1b3Z2eNiKiMl1b3[2fNiKiMj1a3]2gNiKhMi1a3^2hNiKgMh1a3_2iNiKfMg1a3a2iNhKfMe1b3c2iNhKfMc1a3e2jNhKfMa1`3g2kNhKfM^1`3j2kNhKeM]1`3k2lNhKdM\\1`3l2mNhKbM\\1a3l2nNhKaM[1a3m2oNhK`MZ1a3n2POhK_MX1b3Q3oNgK_MW1b3R3QOfK]MW1b3S3ROfK[MW1c3S3SOfKZMV1c3T3TOfKXMV1d3T3UOfKWMU1d3U3VOfKVMT1d3W3VOeKVMS1d3X3WOfKSMR1f3X3XOfKQMQ1h3Y3XOfKPMQ1g3Y3ZOfKnLQ1h3Y3[OSMe0m2]OQMc0o2^OoLc0Q3^OnLb0R3^OnLb0R3_OgKjLe0h3d3_OkLa0U3@jL`0W3@gLa0Y3AdL`0\\3AcL?]3AhKbL;n3m3AgKfL7i3S4@fKiL5g3U4AeKiL4g3W4@eKjL2g3Y4@eKjL0f3\\4@cKkLOf3^4AbKjLMf3a4C_KQ1b4PO\\KP1d4l300001O00000000000000001O00000000000000001O00000000000000001O0000000000001O00000000001O000000000000001O00000000000000001O0000001O0000000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000001O0000000000000000001O00000000001O0000000000001O0000001O001O000000000000001O0000000000000000000000001O0001O0000000000O101O00000001O0000O10001O00000000000000001O000000000000000000000000000000000000000000000000001O01O0001O0000]J" + }, + { + "size": [ + 428, + 640 + ], + "counts": "ff[21[=00000SX<3egC8oBJSL4L4H8E<0O102N3M4L1O3M3M3M1O2N1O101N3N2M1100O001N2O4L1O000001N5L1N1O1O2O0O2N3M2O2L3N1O1O2N101N2N1O1O100O1O100O1000O010_LfGo1Z8PNfGQ2Y8nMhGR2Y8nMhGQ2W8oMfGdN2\\3X8nMmGR2S8mMmGT2S8mMlGS2T8PNiGP2W8PNiGo1X8mMmGR2R8nMnGS2R8mMgGgN3\\3V8QNkGm1U8SNmGl1S8SNmGm1T8QNnGo1Q8RNnGo1R8QNnGo1Q8RNPHm1P8RNQHo1n7PNRHQ2n7PNQHo1P8RNPHm1o7WNoGg1R8WNoGi1S8UNnGi1T8mMcGUO9m2U8kMeGZO3j2Z8kMeGZO2i2Z8kMgGYO1k2Y8jMXHV2j7gMXHY2i7dMYH\\2h7aMZH_2g7]M\\Hb2i7TMSGNW1m2V9O2N2N2M4L3M2O3L3N4L1O1O1O1O1O1O1O1O1O2N1O010O0O101N2N101N2O1O1O2N1O1N2N6K1O0O2O1O1O1O1N101O1O1O0O2N1O201L2O2M4MZ>InA0N4M0O2L41O01ON11O10O101N3ImB0Pko2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_19.png", + "model_output": " is enclosing ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000016598.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Sme635l0Ni0OYNe`0Z3K3N2O1O010000O100O001M3N2O1O1O1O100O10000O10000O10000YOZ_OTNf`0j1^_OSNc`0l1`_ORN``0m1b_ORN^`0n1c_OQN]`0n1d_OQN]`0o1d_OPN\\`0o1f_OPNZ`0P2f_OPNZ`0P2g_OoMY`0Q2g_OoMY`0P2i_OoMW`0Q2i_OoMW`0Q2i_OoMW`0Q2j_OnMV`0R2j_OnMV`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OnMV`0R2k_OmMU`0S2k_OmMU`0S2l00000000000O10000010O01O001O1O1N2L5iNj]O_OXc06g\\OLb^X1" + }, + { + "size": [ + 640, + 478 + ], + "counts": "Vj133Nac0a0^\\OAPc0l0Fg5mAYJV>Z5jAlJ]>S4fA^Lg>^3[AnKM9l>g3WA_Lm>_3SA`LQ?]3QAaLR?\\3UA[Lo>b3RA]LR?]3`@]L?6S?Z3PAfLR?W3P1YO\\_OeMi\\1" + } + ], + "question": "What is doing with ?", + "answer": " is holding ", + "image": "images/caption_simple_20.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439854.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "bhP11[:2O0O101N1O100O1O100O1O010O2O0O3N0OLTFIl9650010O0010O010O010O0010O0010O010O000010O0100O100O2OMRFHl98UFHk97WFHh996O1O100O2MaY`3" + }, + { + "size": [ + 333, + 500 + ], + "counts": "hmT19o0JR8=gGDX8`0SGG3Jj8o0SGROl8X100ZOjNUHV1i7nNUHQ1k7QOTHo0k7SOTHm0k7UOTHk0l7VORHL26l70QHJ54j73PHI73i75oGH;0f78oGH=Me7 and ?", + "answer": " is on .", + "image": "images/caption_simple_21.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000012062.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "l`k7>k<4L4[C\\O^ and ?", + "answer": " is in front of .", + "image": "images/caption_simple_22.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000121586.jpg", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "VTZ191IY>g0KO=YOaA2OO4Oi=0]B0H002OOk=2SB20:1Bk=k0VBTOT<;VEHdN:3EV5]A0R>a0M2N2N101N2O1N1O000O1O100O100O100O10000O100010O01O4L1O0001O00001O1O001N2N2M3N2O2M2N3M4L4L2M6Inok5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_23.png", + "model_output": " is in ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000476704.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "V8T5X8001O000000O10000000000010O02N1O00003L3N0000010O5K2N0O2O00000001O00000000O01O1L4M30000O10001OO1000000001O0000000000O1000000000000000000000O1000001O000000O100000000001O00000000O2O00101N3M3M1O0000000O11O00000000RL\\GT3d8jL_GU3a8gLcGDJ[3d8nLTHl2l7RMWHm2U9O00000010O0001N100000001O0000000000000000001O001O000000001O0000O10000001O0000000000000000001O00000000000000001O0000000000000001O0001O0O100000000001O0001O0O100000000001O00O100000001O00000000000000001O00000000000000001O0000000001O0000O100000001O0000000000001O00000000001O1O001O000000000000000000000000000001O0000000000000001O000000O100001O000000000O100000000000001O01O00000000000000000001O00000000000000000000000000001O0000000000000000001O000O10000000000000000001O0001N010000000000000001O00001O00000000O101O000000000000000000001O00000000000000000000000000000000000000001O000000O10000O10000O10000O10000000000001O4L1O001O001O0000001O000000000000O10000O1O1O1O1N2N2N2N2N2M3N2L4O1O1O1O10000O10000O100O10000O100O10000O10000000000O100O100O100O100O1O1O1O1O1O100O10000000eL_FP1K`0i9[N`Fb0IF1[1U:nNmEA2a1X:]N`Fb1e:0000000001O001O00002N3M1O002N1O2N001O1O1O1O00O10O10O2N100O100L4O1O1O1N2O100YLoNSKQ1_4^O`Kb0_4_O`Kb0`4^O`Kb0`4^O`Kb0`4^O`Kb0`4]ORISOf1`1X5]OQIVOe1]1Y5^OQIXOd1Z1[5^OPI\\Ob1V1^5^OPI^O`1T1`5^OPI_O`1R1_5POcHI?4_1S1_5oNfHG?2_1X1\\5oNULQ1k3oNULQ1k3nNVLR1j3mNWLS1j3jNXLV1h3gNTKMYN]1c6eNUK2TNY1g6eNUK6PNU1k6dNVK=hMP1R7bNWKb0bMm0X7^NXKg0]Ml0[7ZN[Kl0WMk0^:Q100001O00000000000000001O000000000000000000O100000000O1O1O1N2N2O1SOPMgFg3^91FYLoFg3P9\\LmFe3Q9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_24.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000211042.jpg", + "mask_rles": [ + { + "size": [ + 640, + 458 + ], + "counts": "YdX34ic05L3N2N2N2N101N1O2N1O2N1O2O1N101N2O0O2N100O1O0001O0O1O2O001O001N10O20O0001O01O01O1O01O0O2O0O1O100O10001O01O01O001O1O001M3N2K7Ghbf0L_]YO5`N3n]OK;7ea0b0n]OFQb0U1N2N2N2I7K5K5N201O00000000000001O00001O000000001O0000001O00001O00001O00001O00001O00001O001O0000001N10001O00001O001O00001O0O2O001O001O001N101O001O1N101O001O0O2O1O0O2O1O1N101N2O1N101N2O1O1N2N2O2M1O2O1N3L3N3L5Jbcc1" + }, + { + "size": [ + 640, + 458 + ], + "counts": "]dW3;bc04M2L4L4N2oK^OaDe0^:X1RB_Nd1?U<_2WChMc^3m0N3M2O1N2O1O1O1O1N2O1O1O1N1O2N1_Ng_OYO]`0f0h_OnN``0P1X10001M2M4J5E;K6L5JnX=0`SB3WhQ3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to and leaning on .", + "image": "images/caption_simple_25.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000055299.jpg", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "e and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_26.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435206.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "b531`7c5`H]J`7c54000\\H]J`7c5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5400000000000000O11O0000O10000001O00O11O0000O1000000000000001O00O1000000000000001O00O1000000000000001O00O10000001OO1001O000000O11O0000000000000000000000000000001O0000O1000000000000001ZH]Jb7b54000000000000000000000000000[H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b541O000[H\\Jc7f501O000000O100O11O0000N]H\\Jc7c530000000000000000000000O1001O00O1000000O100001O0000000000O1000000000000000000000000O10000001O00O1O1O1000000001O00O100O100O10000000000O100N200001O1O001O000000000000000000000000000000000000000000000000000000001O00001O001O001O001O1O0000001O00001O001O000mH^J]6c5`I_J`6a5`I`JCLg6e5dIeJ\\6\\5bIfJ]6[5_IhJa6X5^IiJb6X5]IhJc6X5]IjJa6W5]IkJb6U5^IlJa6T5^ImJb6o5O000000001O00001O001UJ^Ik4b6TK`Ik4`6UK`Ik4`6TKbIk4_6SKbIm4^6RKcIn4]6RKcIn4^6PKcIP5^6oJbIQ5^6mJdIS5\\6kJfIU5[6iJfIW5[6gJfIY5[6eJfI[5[6dJfI[5U701O1O1O1O1O4L000000O11O00001O0000001O0000O10000000000000000000[LjGU2W8hMkGX2U8hMkGX2U8hMkGX2U8gMlGY2U8fMkGZ2U8fMkGZ2U8fMkGZ2V8eMjG[2V8dMkG\\2V8cMjG]2W8bMjG]2X8gLfG<2m2Y8fLeG=1n2[8cLfG?Nn2\\8bLhG?Lo2^8_LgGa0LP3i8oLYGP3h8nLYGR3g8mL\\GQ3e8mL\\GS3f8jL]GT3e8iL]GV3e8gL\\GY3\\9O00001O2N001O00001O00001O001O001O1O1O001O1O1O001O001O1O2N001O001O1O001O1O1O1O000WN[El0f:mNaER1`:kNcET1^:kNbEU1^:jNdEU1\\:jNeEV1\\:gNgEX1Z:dNjE[1X:`NkE`1Q;eNXDQ1h;nN]Dn0P<01O00001O100O2N0000001O00003MM3O100O1O100O10000002N6J00001O1O3M2N1O2N2M5L3M1O1O0Oml`2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "Vbn3:e< doing in relation to the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_27.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137950.jpg", + "mask_rles": [ + { + "size": [ + 415, + 640 + ], + "counts": "WW1231N11O0O11N10?0<4 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_28.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435208.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PjNXAb1c>`NWAg1g>ZNVAk1g>VNTAR2g>PNVAW2e>jMXA^2b>`M_Ae2\\>[MdAg2[>SMhAR3V>jLlA[3Q>dLoA^3o=dLnA`3P>`LoAb3P>_LnAb3Q>_LnAc3P>_LmAd3P>^LnAd3o=`LPB`3k=jLnAY3i=PMTBS3g=RMWBo2h=SMVBn2i=VMRBk2n=d1O1O1O10O010O0100000O01000O10O010000000001O0000001O000000001O001O000O2eKPBd2Q>YMRBf2n=ZMTBd2m=ZMUBe2k=[MWBc2j=[MXBd2j=ZMXBd2l=XMXBd2i=ZMXBf2m=YL_Ag0e0P3U>iL`Bd2a=PMQCi2V?L6K2N2N1O1O0O100000001O0O102N1N01N1101O1N01000O01N110O1O101N101O0O2N2N100O100O1000O10O1O10O1@i^OWNVa0f1P_OWNQa0a1X_O]Ni`0a1l0O1N2N2N2N2N2M4M2L4L5I7J6N6CWgZ6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_29.png", + "model_output": " is sitting on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137576.jpg", + "mask_rles": [ + { + "size": [ + 563, + 640 + ], + "counts": "0g15UOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AUOf>k0ZAUOf>l0YATOg>l0YATOg>l0YATOg>k0ZATOg>l0YATOg>l0YATOg>l0YATOg>l0YASOh>m0XASOh>m0XASOh>m0XASOh>m0XASOh>m0XAROi>n0WAROi>n0WAROi>n0WAROi>n0WAQOj>o0UAROk>n0UAROk>n0UAQOl>P1TAoNl>Q1TAoNl>Q1TAnNm>Q1SAPOm>P1TAoNl>Q1TAoNl>Q1TAoNl>Q1TAnNm>R1SAnNm>R1SAnNm>R1SAnNm>S1RAmNn>R1SAmNn>S1RAmNn>S1RAmNn>S1RAmNn>S1RAlNo>T1QAlNo>T1QAkNP?U1PAkNP?U1PAkNP?V1o@iNR?V1o@jNQ?V1o@jNQ?V1o@jNS?T1n@jNS?V1m@jNS?V1m@jNS?V1m@iNT?W1l@iNU?V1k@jNU?W1j@iNV?W1j@iNV?W1j@hNX?W1h@iNY?V1g@iN[?V1e@jN[?V1e@jN\\?U1d@kN]?T1c@kN^?U1b@kN_?T1a@lN_?T1b@kN]?V1c@jN]?V1c@iN^?W1b@iN^?W1b@iN^?W1b@hN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@fN^?[1b@eN^?[1c@dN]?\\1c@cN]?^1c@bN]?^1c@bN]?^1c@bN]?^1c@aN^?_1;000000001O000001O1O000010O00000001O0001O0001O000000001O001O00001O001N10001N10001O0O2O001O001N101O001O1N10001O0O101O000O101O1N101O1O1O0O2O2N1Ao^O9Wa0NS_S8" + }, + { + "size": [ + 563, + 640 + ], + "counts": "adj0X1P`0?Q@`N]?e1e@ZNZ?g1f@YNX?i1h@TNY?m1h@QNY?m16M4K5J7F:F:F;CY\\34ncL04HWc62lfM2dTKM26Kn01PO4N000NV>[3I100O1O0000000001O001N2O001N2O1O1O1O1O1N2O1N2O2N1O1O001N1O101O000000K5M4N10000001O001O1M4I6N2M5fMk@P2X?oMh@Q2Y?32OO02O000O1O3N0O10001N100O101O0O1000001O000O2O00001O00001O1O001O001O001O001O0O2O001O001O0O2O0000001O000O2O000010O0001O00001O0O101O001N100O1O2N1O3L2N3N2N3N2M9H1O001O00000000001N1O10gon6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_30.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000126137.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bQ`1`0]<`0L2J7OO2mNmCl0SQE6c;I_D6b;H`D6b;H`D7a;HaD5a;IcD4a;FbD5aQZ5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_31.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000573943.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Ya`84l>5Kb0^O6J1O1O000O01N2N2M3N200O1000001O000000001O000000001O0000000000001O002Ldd9" + }, + { + "size": [ + 480, + 640 + ], + "counts": "doj7g0U>5K5N10000000000000O010O10000001O1O1O1O001O0000000000O2O0O1001O01O000O1000010O0000000O1O2N1O6I_`j0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_32.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000225532.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "h_d21f;1O1O2N1O1N2O1O1O1O001O001O001O0oDBi:?VEBi:?VECh:=XEDg:=ZEBe:>\\ECb:>]EDa:=_ED_:=aEC^:=bED]:=cED[:=eECZ:=gEDW:=hEEV:SHAl7?UHAj7a0UH^Ok7c0UH\\Ok7e0TH[Ol7f0THYOl7g0VHWOj7j0VHUOj7l0VHSOj7n0VHQOj7P1VHoNj7R1VHmNj7S1WHlNi7U1WHjNi7V1XHiNh7Y1WHfNi7[1WHdNi7\\1XHcNh7_1WH`Ni7a1WH^Ni7c1WH\\Ni7d1XH[Nh7f1YHXNf7j1ZHUNf7l1ZHSNf7n1ZHQNg7o1YHPNg7Q2YHnMg7S2YHlMg7U2XHkMh7V2XHiMh7X2XHgMh7Z2XHeMh7[2YHdMg7]2YHbMf7`2ZH_Mf7b2ZH]Mf7d2ZH[Mf7f2ZHYMf7h2ZHWMf7j2ZHUMf7l2ZHSMg7m2ZHPMg7P3`01O1O1O100O002N001O001O2N001O1O1O1O1O1O0000000000000000000000000000cMQHR1o7eN_HV1a7eNgHX1X7fNPIU1Q7hNXIQ1h6nNbIi0]6VOiIf0W6YOmId0S6\\OnIc0R6\\OQJb0o5^ORJ`0o5@SJ>n5@TJ2fMOU80WJKdML14OLU89WJ and ?", + "answer": " is driving on .", + "image": "images/caption_simple_33.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000424349.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "XlW48l<>`CBX3L^4a1YKdNb4e1UK_Ng4g1RK^Nl4f1oJ]NP5g1jJ\\NU5h1fJZNX5j1dJXNZ5l1bJVN^5l1_JUN`5n1WIdMLa0m6V3nHlLR7X3bHPM]7_4O1O100O100O10000O100O100O100O10000O10000O100O100O10000001O00000000O10000O1_NoIiKQ6V4[J^Kf5_4cJYK_5`4mJYKU5e4oJSKgNK[6P5j1N2N2O1O1O1N2N2O1O1O1O1O100O100O100O100000000O1000000O10000001O0000000000001O001O001O001O000eIcKi3^4nKlKP4V4fK\\KXNc0R6Q4dKTL\\4m3aKVL^4k3`KVL`4k3^KWLa4j3]KWLc4k3[KVLd4k3ZKWLe4j3XKYLg4j3TKXLf3kNZLP5IYLk3nNWLl4G[LP4mNVLn6i3TIVLl6i3VIVLj6i3[ISLf6k3\\ITLd6k3_ISLa6l3bIRL^6n3dIPL\\6o3iIlKX6T4mIgKR6Y4`100000000O10001O0O101O001N101N1O1N3J5H9EZFiLm9g2c0L5L3N201N101O0000001N2O1O1N101O1O1O00001O001O1O1O1O001O00000001O0001N110O00000001O0001O000000001N10000O101N1O2O001O000O2O001O1O001O1N2N2O0O2O1O2N1O2M2O2N1O1N2O0O2O1N2N1N5L6GcbP1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "^j7=g<6I8L4`ETO[8Q1bGRO[8R1cGoNY8f1UG[Ng8l1RGXNg8Q2UGQNf8U2XGlMf8V2YGkMd8Z2XGhMc8^2[GcMc8`2ZGbMd8a2ZG`Mc8d2[G]Mb8g2\\GZMa8k2\\GVMb8j3O1O2L3QM^KUKJW1j4[3^LcLc3[3`LbLb3[3aLdL`3Y3cLcLa3\\3`LbLb3Z3bLeL_3Z3bLdL`3[3aLdL`3Z3cLdL^3[3fLbLZ3]3lL]LU3c3lL[LU3e3kLZLV3d3PMWLQ3h3SMTLn2l3TMRLl2n3TMQLm2n3UMQLk2o3UMPLl2P4VMmKk2R4XMkKi2U4WMiKk2W4UMhKl2X4UMeKm2[4Q300O1O100O1000000001O00O10000O100000000O1000000000000000000000000000000O11O000000000000O1000000000000001O001O001O0eJeKg1[4WNhKh1Y4SNlKl1T4QNoKo1R4nMQLQ2o3nMRLR2o3lMSLS2n3jMTLV2m3hMULW2l3fMVLZ2l3dMUL[2l3cMUL]2l3bMUL]2m3aMTL^2m3`MUL_2m3_MVL^2P4\\MTL`2R4YMRLd2T4VMQLe2R4WMPLh2R4UMPLj2R4TMoKk2Z4eKTJP1e1Z3_4_LmKY3\\4XLlKf3n6N001O0000001O1O2N3M4L3M2M2O1O2PO[FVNi9f1_FQNd9l1aFPNb9m1o0N3L5K4M3M3L4L4K5M3L4K8I_ko5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_34.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000173302.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "en\\43U=10000O101O00000001NVDMfo19k[N4O100000001QOBQE>l:HQEd0a:@]ET1n9nNQFR1n9QOPFo0P:ROoENE7]:LmELS;5lDKS;7lDIT;8kDHU;9jDGV;9jDGV;:jDEV;gDBY;>gDBY;>gDBY;>gDBY;>hDAX;?hD@Y;`0gD@Y;`0gD@Y;?iD@J3g:=_E7`:I[EmEClNOW;>mEBmN1U;=mECnN0U;=mECnN0U;>lEBoN0T;?mEAPOOS;`0mEAPOOS;`0mE@QO0m96jF:8@QO0k9:jF6:@RO0i9;jF5;@RO0h9>iF3;AROOc03X8nDAR;?nDAR;?nDAR;?nDAR;?nDAR;?nDA6Ln9c0lE`0S:@mE`0S:@mE?T:AlEOHROh:o0aE_OR;`0oD_OR;a0oD^OQ;b0oD^OQ;a0oDG9D\\jd3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_35.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000352760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 544 + ], + "counts": "PX1`bW16YQiN6J3N3L2O3M3M1O2N2N2O1N2N2N1O2O1O1N200O001O1001O0O2O1O0O100O11OO100000000O10O10O01000O100O010O100O1O02OO10000O010O100O00100O01000O010O00100O010O010O01O010hMQO\\Ao0a>TO`Ak0_>VOaAk0\\>XOa_OJm1m0a>@_Aa0^>AbA?]>BdA=Z>DgA=W>DjAEnA;Q>GnA:o=HRB7l=KTB5k=LVB3g=O[B1a=1`BO^=2cBN]j7`1`EPNg2?i7\\1hERNa2b0d7Z1QFPN]2g0`7S1[FTNW2h0\\7d0SG]Nd1o0X7`0WKAf4>^KAa4>aKB]4>eKBZ4=hKDU4`1o_OYNg12Y>]1\\2N2N1O2N101O001O0010O2O0O2O2[OlNZ^OV1_a0TO[^On0^a0o0\\EaMa3c2]L^Ma3e2\\L]Ma3g2]L[Ma3g2]LZMa3j2\\LWMb3l2\\GPM\\36V5n2oF\\Ne2iNY6Q3dFQO`2PNk6\\6jHeIU7d6\\FSIe1;m7e7eFWHL8]9b9N2O1O2M2O1O2N002N1O2N1O010O00O100O2M2O2K4N3M3C`FVFd9c9`0K5J6L4fM[FQJm9h5`FmIe9m5eFeIh9R6_2B7]O`0E;J6J6M2N3N2N2O1O10001O000001O1O00100O2N1O100O2N1O2O1N2N3M2O2M3N1NZLVAY2g>fM_AX2_>gMdAX2Z>iMiAU2U>lMmAS2R>lMQBS2n=mMSBS2k=mMYBQ2f=PNZBo1f=QN[Bn1d=RN^Bm1c=RN^Bn1`=SNaBm1^=SNcBm1]=RNdBn1[=RNfBo1X=PNjBQ2T=oMmBR2R=mMoBT2o and ?", + "answer": " is below .", + "image": "images/caption_simple_36.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000344614.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Qag32kc04N2M2O100N201M2O1O2K6K4W@VOUXOa0lJj@00Y4Ta0[N8H3K5J6J6K5K5oNk]O6Vb0In]O3Sb0Lo]O0Tb0LU^OGSb06m0M3M3NRXj02ngUO001O000000000000001OO10000001O00001O001O1O001O001O001O0000001O0000001O0000000000001O0000001O000000O100001O1O00001O001O000000000000001O00001O1O000000001O000000001O0000001O00000000001OO1001O1O00001O0000001O00000000001O00000000001O001O00000000001O001O1O2N00OV]OnNhb0Z1K7I7I1O1O1O1O2N000000O100O1N2001O1OO1O1O1M30000000000000000003M1O1O001O00000000001O0R^ORN60J1010N10k`0n1h_OTN``0l1a_OTN@Ne`0o1^_OUNG4ONl`0m2o@oLUN61JO11NS>S3nBlLPOW2Y=l0lCoLiNS2Y=0iBeNU1XOiN;1c1m<6TCcNT1IjNc1jLGc8K_GX4`8iK^GX4b8iK\\GX4d8c3kGfEb6\\:WIgFk5[9lIdDOZ40gK2n01RONO150;OF0Lm0lc0" + }, + { + "size": [ + 640, + 478 + ], + "counts": "W_W6e0Vc0:F9I6K4M4K4L4M3M3M3M2N3M2N3M2N3M2O1N3N0O2N2O1O1N2O1N2O1O1O001O1N101O1O1O1O001O001O001O001O001O001O0001O0001O01O010O001O00001O001O001O001O001O001O001O1N2O1O001N2O1O0O2O1N2O1N3M2O1N2N3M2O1N3M2N3M2N3M3L3N4K4L4M3K7I6J8H=]On_n0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_37.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000272148.jpg", + "mask_rles": [ + { + "size": [ + 378, + 640 + ], + "counts": "V4c7W40000000000O100000000000000001O00O1000000000000001O00O1000O1O11O00001O2N001O0000JbHPL^7P4bHoK_7Q4aHoK_7V4001O0K`HPL_7Q4aHoK_7U41O100001O0001O00O1000000000000001O00O100000000000000001OO1000000000000000000000000000000000010OO0101O000000000000000000001O0kJfKj2Z4UMhKj2X4VMiKi2W4VMjKj2V4VMjKj2V4TMlKl2T4UMkKk2U4UMkKk2U4WMhKj2X4\\2000lIfKh4Z4VKhKj4X4VKgKk4Y4[100000RJgK[4Y4eKgK[4Z4b10000000000001O2N00000000000001O]IeKe00RO0l3[4_LcKb08mNKQ4[4nMkKPNJR4[4oMjKoMKR4[4PNhKoMMQ4[4QNgKnMNQ4[4RNfKlM0R4Z4RNfKlM0R4Z4RNeKlM2S4X4SNcKkM4S4X4]NhKd1Y4YNiKg1W4XNjKg1W4XNjKh1V4WNkKi1U4WNkKi1U4WNjKj1V4oMbKjM7W4W4oMbKlM4V4Z4nMbKa2]4_McKa2]4]21000000000000001O00O10000000000000O1000000001O000000000000000[LbKN^41gKKY45gKKY43iKMW40lK0T4NnK2R4NnK2R4NoK1Q4OoK1Q40nK0R42kKOU42jKNV42jKNW42gKOY44cKM]4h30000000000000000001O00O1001O0000000000001O000oI`Kh4`4XK_Ki4a4X1000000000000000000000000001O000000O11O0dNeKYK[4f4gKYKY4g4gKYKX4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4g4iKYKW4g4iKYKW4g4iKYKW4h4hKXKX4h4hKYKX4i2]KRN:TOZ4h2_KSN7UOZ4h2_KSN7UOZ4g2`KUN5TO[4g2aKYL0d14\\O[4g2hKmMM\\O[4g2hKmMN[OZ4h2hKmMN[OZ46]KZ1;TON^OY44^KY1;VON]OY44^KX1ZOL[OZ4b2kKSNK[OZ4a2kKUNLYOY4b2kKUNLZOX4a2kKVNMYOX4b2jKTNOZOV4e2hKRN2YOW4h2aKPN8YOW4d4jK\\KV4c4kK]KV4b4jK]KW4b4iK_KW4`4jKaKV4]4kKcKU4]4kKbKV4]4kKcKU4]4kKcKU4]4kKcKU4^4iKcKT4nN^Ka5 and ?", + "answer": " is over .", + "image": "images/caption_simple_38.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000222317.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "kc^1g0T>>E7J2N2O0O1fCQOnN_O10[;`1fEFlNkNn:_1WF2`9N`F9Y9GgFl0f8SO[GX1Z8hNgG]1S8cNmGb1n7^NRHf1j7ZNVHj1e7WN[Hk1c7UN^Hl1`7TN`Hm1_7RNbHo1]7QNcHQ2[7oMeHR2Z7nMfHS2Y7mMgHT2X7lMhHT2X7kMiHV2V7jMjHW2U7iMkHX2T7hMlHY2S7gMmHZ2R7fMnH[2P7fMPI[2o6dMRI\\2n6dMRI]2m6cMSI^2l6bMTI_2k6aMUI`2j6`MVIa2i6_MVIc2i6]MWId2g6]MYId2f6\\MZIe2e6[M[If2d6YM]Ih2b6XM^Ih2b6XM^Ij2`6VM`Ik2_6TMbIm2]6SMcIm2]6SMcIn2\\6RMdIYOkNK2P2]7mNfISOTOG0Y2T7mNhIkN@GJ`2m6nNiIiN:Z2l5mNjIgN=\\2h5mNlIdN?_2d5mNoKR1P4nNQLR1n3nNSLR1l3nNTLS1k3mNVLS1i3mNZLQ1d3PO]LP1b3PO_LP1`3PO`LQ1_3oNdLo0[3QOfLo0Y3QOhLo0W3QOjLo0U3QOlLn0T3ROmLn0Q3SOPMn0n2ROSMm0m2SOTMm0k2SOUMn0j2ROWMn0h2ROYMo0e2QO\\Mo0c2QO]Mo0c2QO]MP1b2PO^MQ1a2oN_MR1_2nNcMR1\\2nNdMS1[2mNeMS1[2mNeMT1Z2kNgMV1X2iNiMX1V2hNjMY1U2fNlM[1R2eNoM\\1P2cNQN^1n1aNSN_1m1aNSN`1l1_NUNa1k1]NWNc1h1^NXNb1h1^NXNb1h1]NYNc1g1\\NYNe1g1ZNZNf1f1YN[Ng1e1XN\\Nh1d1XN\\Nh1d1WN]Ni1b1XN^Nh1b1XN^Nh1b1XN^Nh1b1XN]Ni1c1WN]Ni1c1WN]Ni1c1WN]Ni1b1YN]Ng1c1YN\\Nh1d1XN\\Nh1d1XN\\Nh1d1YN[Ng1e1YNZNh1f1YNYNg1g1YNYNg1g1ZNXNf1h1ZNWNg1i1YNWNg1h1ZNXNf1h1[NVNf1j1YNWNg1i1WNYNi1g1VNZNj1f1VNZNj1f1UN[Nk1e1TN\\Nl1d1SN^Nl1a1TNaNk1_1TNcNk1]1UNdNj1\\1UNeNk1[1TNgNk1Y1TNhNl1X1TNiNk1W1TNjNl1V1SNmNk1S1UNoNi1Q1VNQOi1o0WNQOi1o0WNSOg1l0YNXOd1h0[N]Oa1c0_N@^1`0aNC]1=cND\\1P2@SN>n1^OVNb0j1\\OXNd0h1[OYNe0g1YO\\Nf0d1YO]Ng0c1WO_Ni0a1TObNl0^1SOcNl0^1ROeNm0[1ROgNm0Y1ROiNm0W1QOkNo0U1POmNo0S1oNPOP1P1oNQOQ1o0nNTOP1l0nNWOP1j0oNXOP1h0oNZOP1f0POZOP1f0nN^OP1b0oN@P1`0POAo0?POBP1>POCo0=PODo0=QOCo0=QOCo0=QOCn0>ROBn0>ROBn0>QOCo0=QOCo0=QOCo0=QOCo0=ROBn0>ROBn0>ROBm0?SOAm0?SOAm0?SOAm0?SOAm0?SOBk0?UOAk0?UOBj0>WOAi0?WOAh0`0XO@h0`0XOAg0?YOAg0?YOBf0>ZOAg0?ZO@f0`0ZO@f0`0ZO_Og0a0YO_Og0a0ZO]Of0d0ZO[Og0e0YO[Of0f0ZOZOf0f0[OYOe0g0[OXOf0h0ZOXOf0h0[OVOf0j0ZOVOf0j0ZOVOUN^LV1\\4e0VOSNaLW1Y4g0TOoMhLY1T4h0TOPNgLX1U4h0TOSNdLU1X4h0TORNgLS1V4l0ROoMlLS1R4n0ROnMnLS1P4o0SOiMRMV1l3Q1ROhMSMW1k3R1QOfMVMW1i3S1QOcMYMZ1f3S1QOaM\\MZ1d3U1oN_MaM[1`3V1nN`MbMZ1`3W1lN_MfMY1^3X1kN_MhMY1]3X1lNhLfLc0R1\\1\\3Y1POYMfM]1Z3[1POVMgM^1Z3\\1POTMiM^1W3^1QORMjM^1V3`1POQMlM^1T3a1QOoLmM_1R3c1QOkLPNa1o2d1ROjLRN_1l2g1SOhLSN_1k2i1o0UNQOl1Q1QNoNo1S1nMnNR2U1kMkNV2X1fMhNZ2[1cMeN]2_1^MbNc2h1iLfIK0Ob4]3m6001O000000001O00001O0000001O000000001O0000001O0000001O0000001O0000001O00000O2N1L4K5F:J7H7M3L4L5M2N2O2M2N2O1O3M2M2O2N1O100O1O101N1O1O2O0O10000O1000001N10001N1000000O2O00001O000O101O00001N100N2J7L3N2O2N1O1N200O2N101N1O100O101O000O101O0O101O000O101O0O10001N1000000[JgHc3Z7ZLkHc3U7\\LVIY3k6gL_Io2a6PMaIo2`6PMbIn2^6QMaJQ2_5oM`KeNiL0O07h1b7CfMcNa2\\1c5O0000001O000O2O00000000001O00001N1000001N1000001O00001O00000000001N1000001O0000VK" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`UZ33k>2O2N2N1O2N1O2O0O2N1O101N00100O1O010O01000O010O101N10000O2O00001O0O101O001O0O2O001O001N101O001O001O1O2N0000001O000000001O0000001O0010O01mBfN^ and ?", + "answer": " is lying on .", + "image": "images/caption_simple_39.png", + "model_output": " is lying on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231088.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "cna02jc05L5L3M3M2M4L4L4L4L4K5J6J6H8N1O2N2O2N1O1O1N3N1O1O1N2O3L2O1O2N2M3N2M2O1O1N2O001N10000O10000O10000O1000000O101N10000O100O101N10000O10000O2O0O101N100O101O0O101Z@XLQ?i3PAVLP?j3QATLo>m3b01O0000O1O1B\\@`Ld?[3a@fL^?Q3g@nLZO5m?e2g@gMY?T2f@SNY?d1j@cNV?X1e@ROZ?e0]@SNO`1d?:_@1a?@V@PO4DKo1l?[OY@X1f?hNZ@X1f?a101O01O0000000O1O101N100O102M4M5J4M5J6K2M[OPMj_OOc0m2d?ZM[@b2f?aM[@[2e?kMY@P2i?RNX@j1h?YNZ@b1g?dNU@X1l?lNT@P1m?UOR@f0n?]OS@>n?FR@6o?LR@0o?4o_OHR`0 and ?", + "answer": " is attached to .", + "image": "images/caption_simple_40.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000421923.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "_`Q7f0Yc0=D`0@`0_O>C4L3L2O1M3M3O1N2O10O01O0000000O2N100M32N2N00O1O001O1O1N2N2I6L5O100O10O10O010000N20O0100000O2N1001O000000000000O1O2N100004VMm_O_1e0" + }, + { + "size": [ + 640, + 426 + ], + "counts": "ogo42jc06L2O1L5N1O1d^OFR?:m@HR?8m@KP?7n@LXNA``0e0SAO[N^O``0d0TA7k>JTA6k>KUA5k>JUA7k>ITA8l>Hl@d0P?\\Oo@e0Q?[On@g0R?YOm@f0T?]Oh@d0W?^Oh@a0X?@i@>X?Cg@ and ?", + "answer": " is inside .", + "image": "images/caption_simple_41.png", + "model_output": " is inside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000057149.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "_hW4 and ?", + "answer": " is in front of .", + "image": "images/caption_simple_42.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000577932.jpg", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "VTR5 and ?", + "answer": " is touching .", + "image": "images/caption_simple_43.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000311002.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZWb02W=3O0N20001N10001N10000O2O0000000O10000O100000000000O01000O1K6Mmh[1MWWdN5K3M2O2N1O1O1O1O001O0O101O001O1O1O2N1O1O1N2O1O1O1O0010O103L10O01O3M6I5K8B[b97e]F2N3L6J3M4K5L4K6K4L4L2O3M3]D_NV;c2XO0O2O0000000000001O0000000000000000000000000000001O0000001O00000000000000000000001O0001O00001ZMkET2U:jMoET2R:hMRFX2`:00000010N100000O10O01N101K5F:N2O10O10000O10000000000O11N10000000000000O10000000000000000001O000001O00000000000000000000000000000O2O000O100000000O101O000O1000000O2O000000000O2O00000O101nNYE]Oh:c0YE\\Og:c0ZE]Of:c0ZE]Of:c0[E[Og:d0[EZOe:f0[EZOe:f0\\EYOd:g0]EWOd:i0\\EWOd:g0^EYOc:f0^EYOb:f0_EZOa:f0_EZOa:f0_EYOb:g0^EYOb:g0_EXOb:g0^EYOb:g0_EWOb:i0^EWOb:i0_EVOa:j0_EUOc:j0^EUOb:k0_ETOa:l0`ESO`:m0`ESO`:m0aEQO`:o0aEoN`:Q1m001O000000000000000000000000000000001O000000000O100000000O100000001O0O100000000O100O100O100O1O1O2J5K5N2O100O10000000000000000000001O000O100O5IT[k1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\gc3:n<4M3L4L3L4N2L4L5M2M3M3N2N3H7O2N1O1]OSNfEa1BkNe:JbE[1MjN_:0_EV15kNY:d1iE`NR:_1QFbNm9\\1UFgNh9Y1YFjNb9Y1\\FhNc9`1UFaNj9`2jFjLW8V3cGQM\\8P3aGRM_8W3VGkLj8l3O000000O100O2N1_OnFfLU9W3b0L3jNV1J4M2EmD^NV;d18000O11O1O10O10O100nN^D:d;D^D:c;D`D9c;DaD8b;F`D8b;DeD6_;Dcol3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_44.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000525600.jpg", + "mask_rles": [ + { + "size": [ + 326, + 640 + ], + "counts": "TeV33o99H7J5L5I6L4K5L4G:I:_HnM^6W2YIRNb6l2L2O100001\\O^IbMc6W2iIbMZ6X2m0K5K5L5K4N3M2M4N16J5L2M3MSOnGIo70ZH1e7GhH5W7@VI>\\8M1M2M6K^Yh2" + }, + { + "size": [ + 326, + 640 + ], + "counts": "oTe2:i9:ZFBT9P1H3M2O2M2N2N3M2OO0M3M4L3O2M2N2O2O0O100O10O2O001N2O4J9G?_O8Hk0VOYN_I1U6dNeIZ1a0Ge5g0hJjN\\5V1i1O3O1XOPHYOQ8a0l0H9N3F`oX3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_45.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000378139.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VRa17R=4M2M5L2O1O0O2O1N2O0O100O2O0O10000000000O11O000000O100O1O1N200O1O1O1O1O1O1O2N1O2N1O2N1O1O1O2N1O2N2N2N1O101N100O100O1000000O10000O0100000O100000O100000000O1000O1000O10000000000O10000000001O00O10O2O00O100000O10000000000O1000O1000000000O100000000O100O11O0000O100000000000000O100000O10O10000O10000000000O1000O10000000O1000000O10000000O01000000000000O10000000O10O100000000O1000O10000000O10000000O10O100O10001O00000000O010001O000O01000000000O100000000O100000000O10000000O10O10000000000O10O100000O10000O1O100000O10O10000000000000000O100000O1000O100000000O1000O100000O10000000000O10O100000O10000000000O10000O100000O01000000000000O1000001O000O100000000O10000O100000000O100000000O10000O2O000O101N2N101N2N1N3N1O2N100O101N100O1O2O0O101N100O2N2ORjP2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "_]l23W=2O1O007I00000O1O00100O1O1O010000O106H4J]fU5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_46.png", + "model_output": " is in ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000189806.jpg", + "mask_rles": [ + { + "size": [ + 400, + 500 + ], + "counts": "d\\W32[<6I7^OK_D:[;b0K4M3N2N2M3N2N2O1N2N1O1O1O1O100O1M2000N2O010N200O1001O1O001OO100O100100O1ONSFWNX9h1hF[NV9e1kF\\NT9b1mF_NR9_1QGaNn8_1RGaNn8_1SG`Nm8`1SG_Nm8c1SG\\Nm8e1SGYNo8g1QGXNo8i1QGWNo8i1QGWNn8k1PGUNQ9k1nFVNQ9l1lFUNU9m1dFWN\\9m1]FWNc9[200O100000O100000000O100000000O1O1M3N2N2N2N2N2O10PFlMd9T2ZFoMe9]210000O10000O100O10O0100O2O0O100O1CUFTNl9l1Y:N^E5\\:R1M4M3M2O2N1O1O2N2N1O1O1O2N1000O01O100O10O01000O010O001N1DUFSNm9j1WFSNk9m1 and ?", + "answer": " is looking at .", + "image": "images/caption_simple_47.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000515445.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "fm_7:[>d0A doing with ?", + "answer": " is leaning on .", + "image": "images/caption_simple_48.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000203580.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Pc]43V=3N2N100O1O1O1O1O100000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000O10001O1O2O0ON101O100O10001O000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O1000000000O100000000000000000000001N2O_TV1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "dYT47S=2N3L4N1N2O1N2N2O0O100O1O2N100O1O101N1O101N2N101N1O2O0O1O10O01O1O100O100O1O100O100O1O100O01000O1O100O010O01G]DhNe;W171O2O1N101N2O1N2O1M3N2N101O00000O100000000O100000O01000000O10000O010O1000000O10O10O1000O010O1N1K6N1101O2M3N0O2O1O0O100O10000O100000000001OTElMa:S2]GlMg6T2[HlMPO0K00 and ?", + "answer": " is over .", + "image": "images/caption_simple_49.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000499622.jpg", + "mask_rles": [ + { + "size": [ + 456, + 412 + ], + "counts": "Qh>1W>0O2O0O100O1000001N10000000000O1000000000000O10000O100O1O1O1O1O1O1O1O10000N3O000O1000000O1O100O10000O1000000O10000O100O100O100O100O101N100000000O100O10000O1000000O100000000O100000000O2O00000O10000000000O1000001O000000000O100000000O10000000001O0O100000000000000O1000000000000000000000001O000000000000O010N2L4N101N2N2N2O1O00100O001O010O10O010O01000O10O011OO10000O100O1O100O10001N10000O2O0O2O001O001O1O1O1O1O1O2N3M2N2N1O3M3M1O1O00001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000003fNjC[O3g0UTEBl:=UECk:=UECl: located relative to ?", + "answer": " is on .", + "image": "images/caption_simple_50.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000135872.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZYl01Z=0iR20hfb01^V[O001O01NmW10Pkb08kl[O5J5L7I3K2O20O1OjCSON050_;l0\\D\\O3Jb;i0[D[O1Oi;b0UD[O0N16Q<;nC_O1;U<2PDMR<1nCOSCa0_O;E=D7H5L4K2N1O100O1O100[N_M_Hb2a7_M]Hb2b7_M]Hb2c7_M\\Ha2d7_M[Hb2e7_MYHb2g7_MXHa2h7aMUH`2j7eMQH\\2o7^NVGc1j8_100000000000000O100000000O100000000O100000000O100O100O10000O1O1O100O100O1O1O100O100O1O1O1O100O100O1O1O100O1O1O100O100O1O100O1O1O1O100O1O1O1@eJnH]5o6a0O1O1O100O1O100O1O1O10000O1O10aJQIb4n6^KVI_4j6`KWI`4i6_KYIj1OKh6lMUIIi6bMWIo1O`0a7@\\Hc0d7\\O[Hf0e7ZOYHh0S6ZMRKm1kNi0R6[MbIO]1l1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1oNk0Q6\\MaIN`1i1oNm0P6_MQK:bNf0=a1P6_MRK4gNk06c1P6XMdI3`14iNm03d1n5ZMfI0a14jNm01e1n5XMhI2U2o0VNg1V6XMcKQ1WNg1V6XMcKP1XNh1`8fMPGE52;c2R6ZMiK7kMG238e2R6[MiK4WNKNf2R6[MjK2XNLLg2f5YMRJ3U20WNLMh2e5YMRJ3U20jNd2o4ZMQJ2W2OiNe2o4YMSJ2U20iNe2]5YMlK0gNg2]5YMmKOfNh2]5YMmK0eNg2^5YMmK1dNf2`5XMkK3dNf2a5WMkK4cNe2T8[MlGe2T8\\MkGd2U8\\MkGd2V8\\MiGd2W8\\MiGd2W8\\MiGd2k4XM]M5gMc2l4XM^M4eMe2l4XM_M4eMc2l4YM_M5dMb2l4ZM_M5eMo0XO3d5iN_M6dMc0ZOUO3V1`5kN`M7cM91e0l4kN_M8eMO9m0d4kN^M:WOj0\\3kN\\MX7BXG1_1=Y7DVG0a1;Y7FVGOa15^7NoFNc12_71mFOb11a7OnF0a10a71mF0b1Nb73jF1c1Lc73jF1c1Lc73iF2c1Lc73jF2b1Je74hF3c1Ie74gF4c1If73fF59^Of0:[83eF67Ah06\\83dF85Bj03]83dF84Dj00`83aF;2Dl0Oa82`F=0Do0Ma82_F?MFR1Ib82_FT1c0bN@8^93]FV1:_NF336`92\\FY16jNMKa93[FX17kNLJb9m1aFYNLKc9l1aFZNKJd9m1aFXNKKd9m1bFWNJLd9n1aFVNJMe9m1aFVNJMf9l1`FgN`9Y1aFfN_9Z1aFTNI1g9k1`FSNJ2g9j1`FSNJ2f9l1_FRNK2f9l1`FRNH3h9k1aFbN_9^1aFbN_9_1aF`N_9`1aF`N`9_1`FaN`9_1aF`N_9`1aF`N_9_1cF`No0\\Ol6T2UH`Nc0^ORO2T8o1YH`N93_7[1YHcNM>h7n0]H9b7G^H9b7F_H:a7F^H and ?", + "answer": " is on .", + "image": "images/caption_simple_51.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439994.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "T?12b0OE2I114O100075d`0j2K2O0001O00001O000O10001O00001O0000001O0O101O0000001O0O11O010O0001O001O00001O00001O001O001O0000001O010O00001N10010O00001O0000001O00001O00001O01O0001N100001O00O20O001O01O00001O000001O0001O00001O00001O001O1O1O1O01OO1001N100000000000000001O1O1O0000O1N200O1000001O00000000000000000000000001O00000001OO100000010O2M2O2O1N1O1O3M>B001O001O0001O000000000VOfNn^OZ1Qa0lNk^OS1Ua0oNi^OR1Va0oNi^OQ1Va0POj^OP1Va0QOi^Oo0Wa0ROh^On0Xa0ROh^On0Xa0SOg^Om0Ya0SOg^Om0Ya0TOe^Om0[a0TOd^Ol0]a0TOa^Om0_a0TO`^Ol0`a0XO[^Oi0ea0i00000000000000000000000000000001O000001O0000000000000000000000000001O0000000001O0000000000000001O0000000001O0000O100000001O000001OO100000000000000001OO2O00000000000000000000000000001O01O001OO1000000000000001O0001O000000000000000000001O0000000000000000010O000O1000000000000010OO100001O00000000]OU^OQOka0k0\\^OROea0l0]^OSOca0l0_^OSOaa0l0a^ORO`a0l0e^OQO[a0m0k^OnNVa0m0o^OTOPa0h0T_OXOl`0e0W_O[Oj`0:`_OEa`09a_OGa`04c_OKYb000000O2O00000O1D<00K6L2M4N2M3O1J6I7O0101O`0^OZk;UOlaC7KJk05Ra0j1O1N10001O0000001O000000001O000000001O000000000000001O00000001O0O1000000N]3" + }, + { + "size": [ + 640, + 428 + ], + "counts": "oUc62kc06VMN_A3\\>6`AK[><`AG]><_AG^>>]AG`>9VAMh>5T@oN=P1^?2U@nN6X1c?KU@PO4X1g?HT@QOL`1o?]OU@U1j?jNS@Z1n?gNo_OV1MgMT`0b35O2M3M2N3M3N1N1M1O003N3N3M3N110;E7I>Ba0k_OkK^?]4N1O2OO01O01O1O1O1O1O1O1N2nL_@^1g?_N^@NJ@>^O^?f0i@MRa0NT_ONo`0OT_OOn`00X_OJh`06Z_OHg`07Z_OHg`06\\_OGf`08b1N2N2M4MmQ3NVnL1ag<" + } + ], + "question": "What is doing on ?", + "answer": " is walking on .", + "image": "images/caption_simple_52.png", + "model_output": " is walking on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000468501.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`h_11d;3HOfD3Z;5O101O00001O000000O100O10000O100000000000000000000000O10000000000O10000000O10O10000YGJVOLk4:mKMVOKl48lK2UOGm47mK7SOCn47nK;oN_OS56lK`0nNZOU57lKb0lNXOW57kKe0lNTOY57iKj0jNQO[56jKl0iNnN]55jKP1gNkN]57jKR1fNhN_57jKT1eNeNa57iKW1dNbNb58iKY1bN_Ne59hK[1aN\\Ng59fK`1_NXNi5:gKb1[NVNm59fKY2Y4hMfKY2Y4hMfKY2Y4iMdKX2[4jMdKW2[4nM_KT2c3VM_Ko0e06oN0l4nN]K]17EELf4TO[Kg1OYO4I_4[O\\Ka1IkM7_1c2f5kMPJZOa0j2`5jMSKU2n4jMSKU2o4fMVKY2j4`M_IIi1g2i4`M[K`2f4_M[Ka2f4]MZKd2f4XM^Kh2h6100fGVMe7Q2mGbN=^Og7j1THfN4Aj7d1VHjNOBS8Z1QHSOKDX8S1PHYOGD]8n0nG^OCEb8h0mGD@Df8b0nGJ[ODQ96gG5XOEW9JhGa0oNFb:;\\EEe:f01O1N2O2N2N1O1O1O2L3N3L4Kl[h2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "ccR42d;3N2N1O1O1O2O01OO1O10000N2O1O1O1O0L4M3O01N2010O3M2N2N1N2N1O2N1O01O0N2001O1N200O1O1O1O1O001N100gNhNTHY1j7jNTHV1k7mNSHS1n7mNQHT1n7mNPHT1P8mNoGT1Q8lNoGS1S8lNlGU1T8lNkGS1W8lNiGS1X8oNfGQ1[8oNdGP1^8oNbGQ1_8nNaGQ1`8PO_Go0c8SOZGm0g8QOZGn0h8QOWGo0j8ROUGn0k8ROUGm0l8SOTGm0l8SOTGl0n8TOQGl0n8UORGj0o8VOQGi0P9WOPGi0o8XOQGg0o8ZOQGf0^1nN]5YIc0_1nN[5?WIb0m8^OSGb0l8^OUGa0k8_OVG`0i8AXG>h8BYG=g8CZG and ?", + "answer": " is looking at .", + "image": "images/caption_simple_53.png", + "model_output": " is looking at ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000171190.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "mW_8d03Ej=m0J3O1O2O0O101N10001N100O2O01O100O001O0iNcBl0]=QOcBQ1^=mNaBU1_=610O00001O001O001O10O000N2J6K5G:J5L4K5K5N3M2OnB0m<7M2N2O100O0O2001O1N101O_NF\\D9d;J[D5e;L[D3e;NZD2k1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[k_71m>2O2N2N100O1O2N1O101N1O1O1O2N100O1O2N1O1O101N1O1O1O1N2N2O2N1N2XOTOYCo0dk0V;@[DE?k0W;AZDB>m0Y;2fDN[;1eDO[;2dDM^;3aDM`;3_DMb;3[DOg;0TD4m;_1002N1O2N1O1O1O1O1O1O1O`NfMZFZ2d9lMYFS2f9QNWFo1h9SNVFn1j9SNTFn1k9SNTFn1l9RNTFn1Q5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_54.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000565391.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "e6173a2M\\M0R>3nBM13QO14O[1NbN3NNM0e=n2\\BiNOOOZN218Z4f7f7K1N2O001O1O001O001O001O001O1N101O00001O001O001O1O1O00001O001O001O1O001O001O00001O1N2O1O001\\LPC6P=]3001O1O1O1O001O00002N001O1O001O1O1O1O1O001O002N1O001O1O2N2N1O2N1O1O1O3M2N2N4L2N1O001O2RKaAW4b>eK`AHNL0W4f>n04L4L3M5K6J;`KZ@P4m?N1O1O3M1O1O00003M1O1O00O1002N01O03M0O100001O00O1001O0000O100001O0000000000000000000000000000000000O11O00O10000000000000000000000000000000000000000000000001O00000000000000000jE`Ll3`3UL_Lk3a3QL_LS4a3V6000dE_LY4a3S6000bE_L]4a3cK_L]4a3cK_L]4a3Q600000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000001O000000000000000000001O0000001O001O0000001O0000001O001O00001O1O001O001O001O1O001O1O1O1O1O2X@lK`?\\4N2N2N2N2N2N3M2N2N2N3M3M2N2[EdJb6_5n36J3M2N2N3M3M3M2N1O3M5K3M2N2N3M3L4M3N1N3M3M3M1O2N2N3M3M1O2N2N4L2N2N2eEjGh8X8UGjGj8Z8nFbGPO4R:]8kF_GSO4R:_8mFbGR9a8jF`GV9b8eFbGZ9b8_FaGa9b8\\F^Gd9c8ZF^Gf9d8QFoF09NIP:d9PF\\FP:c9QF^Fn9b9RF^Fn9j901O000000001O00000N31N00LoEYFR:f9oEYFQ:g940lEXFP:h9oEYFQ:g940000000001O00000O11O000000000001O000O11O0aM_FYHNo1c9h5_FYHOn1d9g5]F[HOn1f9]5XFfH5NOm1d9\\5]FgH:1Db1b9i5TGdH[Oc1a9i5^GVJa8g5ZFgHV1c1_8]N\\FS7X1bJ\\8[N\\FS7W1cJ]8`5dG`J]8`5bG`J_8_5aGaJa8]5_GdJ`8\\5`GdJ_8]5aGcJ^8^5bGbJ`8\\5_GfJ`8Y5_GiJa8W5YGoJd8T5\\GlJd8T5\\GlJe8S5[GmJg8Q5YG^IQOm0f9e5YGZIUOQ1`9h5YGXIWOP1`9h5YGXIWOQ1`9f5YGUI[OU1]9e5XGUI]OU1W9i5]GQI]OV1V9i5]GQI]OV1T9k5_GoH]OV1T9k5_GPI[OV1V9j5_GoH\\OW1V9j5VGgHA27\\1R9k5UGiHAN9_1Q9j5VGhH2^1h8j5VGgH3`1f8i5VGiH3^1c8m5YGfH5\\1_8R6lF_HO<3D:_1g8S6lFRIL^O2O0O9a1k8R6nFUIM^O:[1k8S6mFUIM^O:Z1k8V6kF\\I4nNOa1P9c6mFaH3]OMb1Q9Z9XKWE]LKV12GP;^NoD0Ob0o5_O^J5c0Iac0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "n??240_OO30N11OO171H4<21KEi?o2Z@_MLBj?o3001O001O001O001O00001O001O001O1O001O001O001O001O1O001O001O001O001O1O001O1O001O001O1O001O1O001O1O001O001O001O1O1O1O001O1O1O001O1O1O2N1O001O1O1O1O1O1O2N2N1O1O2N2N1O1O2N2N1O2N2N3M2N2N3M3M4L5K6YOW]OK`c0O3M2N1O1O1O2NQhY7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_55.png", + "model_output": " is lying on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000322829.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "1b8i4001O5K0000001O0000M3N200000000000000001O000000O100000000001O00000000O1001O000000O1000000000000001O001OO10000000000000000000000000O2OO100001O00O10000000001N10O2O01N1000OZKeK>[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR and ?", + "answer": " is in front of .", + "image": "images/caption_simple_56.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000535523.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "Zeh52Y=2iB0P=600O2O001N101O1O1N2O3M1O001O1O3M0O10001O01O010O01O1O10001OO1000O01O010O101OO1cCTOY^:CbE<^:DdE:]:FbE;]:FbE;]:FcE:\\:GcE9\\:IcE9[:GeE:Z:GdEX:CcEa0Y:CeE?V:GiE:Q:LnE4k94SFMj97UFJj96VFJ\\9gNeF`1MJV9oNlFX1LJU9QOoFj2P9WMoFi2Q9WMPGh2Q9XMmFi2T9WMkFh2W9f0001O001O1O1O001O001O1O001O1O001O00001O1O001O001O00001O001O001O001O1O001O00001O1O001O001O00001O001O001O001O001O001O001O001O00001O1O00001O001O001O001O001O00001O001O001O001O001O00001O001O00001O0000001O000000000000000000TE" + }, + { + "size": [ + 428, + 640 + ], + "counts": "eiR73X=1O2N2N1O2N100O2@GjC:h;3YDM`;:_DH_;8aDI`;5`DLa;2_D0`;O`D2`;M`D5_;JaD7_;HaD9`;DaD=_;B`Da0_;^OaDc0_;\\OaDe0`;XO`Dj0`;UO`Dl0`;ROaDo0_;oNbDR1j;2\\OmNmDU1k:hNmD47U1k:hNoD07Y1i:hN^EZ1b:fN\\E\\1d:dN[E]1d:cN[E`1W;1M3M3O1M3N1N3N2M102O0010O2O0O10000O2O0O101N1O2N1O1O2M2O2L4M2M4L4L4K5I7K5J8GeW:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_57.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000276018.jpg", + "mask_rles": [ + { + "size": [ + 640, + 416 + ], + "counts": "Una05kc01N102N1O2N1O0O2O0O2M2M3M3K6K4J6L4N2O1O101O000d@cNU<]1cCmN[WOeAo0[>QO]AW1c>iNWA]1R>VMVB\\1Dc1U>RMXBY1_Oi1X>oLZBX1[Ol1[>kLZBR2\\O9[>dMZBQ2_O9V>fM\\BP2^OdM`BT1CROM\\2j0jM[;D[DQ1oLAQ3`0oL_OQ3b0mL^OS3c0hHUL32\\2T3j4h0^IZLc1n2P5i0YI_Ld1d2X5m0nHhL5AMj2S7m0lHhL4\\OOf2\\7U1bHiL\\1b1e6e1oGiLY1[1R7k1fGkLS1]1W7i1fGjLo0`1\\7g1dGiLn0b1^7e1cGkLe0h1i7^1aGjL?o1P8W1bGiLk7TM_H^2C`0n7RM`H_2_O`0R8oLaH_7`7_HbH`7_7^HdH`7]7_HcHa7^7]HdHb7]7\\HdHd7^7YHeHe7]7XHeHg7\\7WHeHi7]7SHeHm7^7YGjGJk0m8e9O00001bNTEfIl:Y6]E_Id:^6bE^I_:`6dE^I]:_6iE]IW:^6PF`IQ:\\6TFbIm9\\6YF_Ig9a6^FZIc9e6n1M101O1O1O1N2O1O1O1N2N3N1M4A?]M[B_Nn=YNcAk2U1UNg=h1]2L1N2N2O2M3N2M2O2N2M4K6I4L5K6J6K5J5M5I7J7EejR4" + }, + { + "size": [ + 640, + 416 + ], + "counts": "9Y33PNd0hN]O0c01]O4O0f0KZO7OK0O1o0NXO0K`0R1@a05\\42^7LbH3a7J`H5f=00O1O1000000O100O100Oig19fWNId\\O`0Sc0?QOYOT^OQ1ca0k0C=L5L4K9H9G6JR5@=C:F3L10000O1O1M3L4L4J6L4J6M3K5K5O1O1O1O1O1O1lEcHb7`7]HcH`7^7_HdH^7^7aHeHZ7^7dHeHZ7\\7dHhHW7[7hHhHT7Z7kHhHS7Y7lHjHP7X7oHlHk6W7UIkHe6Y7ZIlH`6V7_IPIYNmN_7W8WJhIUOYOa3T7YMQJ\\NBl3b6gMmLd1X3[NQMT1X3kNPMg0W3XOXMXN\\IT12ROj9a1\\M`N\\3_1c700O1O1O1O1O1O1O1O1O1O100O1O1N2O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O1O1N2O1N2O1NRTg5" + } + ], + "question": "What is the relationship between and ?", + "answer": " is guiding .", + "image": "images/caption_simple_58.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404249.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "Y[i17hc03M3M1O2O0O2O1n\\OD^b0<`]OH]b09b]OI]b07b]OK]b06b]OJ^b07a]OI^b08a]OI_b08`]OH`b01_]OC1<`b00a]ODNd:n0cHg0gMmNf9<_HP1eMeNk9>[HR1gMaNm9i0nGk0PN_NQ:R1\\EgMj0f0CU4U:gMoEYN^O]4c:\\MkEc3T:cLcEa3]:Q300O100O001dNiFnGW9n7cFQG;l0R9o7WGQHh8m7\\GRHd8k7`GSH`8m7bGQH_8n7cGQH\\8n7gGPHZ8o7hGoGY8o7mGlGS8T8SH_GU8`8e101OO2H701O010lMYGnHg8P7bGhH_8Y7[HkGf7T8_HeGe7Z8Q2N2ZOSEmGJ0Z;l7f0N2H9M2M3L5O_FQIQ6o6ZIiHiMj0d8h6cH_JX7i5RHlJNZMm6U8fHgJ7ZMR7Z;kHgDV7Y;hHhDX7X;eHlDZ7T;bHPE_7h;0000001O1XE_Ho8a7PGaHo8`7mFcHS9_7hFeHW9\\7bFjH^9Y7^FiHa9X7ZFlHf9W7UFkHk9V7RFmHm9[7iEgHW:b8O001N101O0O1O1O2N1N2O2L4M3eLoE_LW:`NVFn2HUN]:jNQFY2AaL9X2V:nNTFR1_1H_8TOZF`0a2]OX73bFLb2Ko68jKBY4>lKXOX4h0a7O10X]OjNeb0Y100Z]OhNcb0W1^]OhNbb0X14O12ON10O01O1002ON001O1000\\]OkN\\b0T193M1O1O0000O1O0106KM2N1100004K1O2O0002N10N11O0011NO21M13MO101N2O11OO000O21ON011O100O21M10N101O01O02M1O2N2M1O2N2N2N3M2O3J]bf2" + } + ], + "question": "What is doing on ?", + "answer": " is riding .", + "image": "images/caption_simple_59.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000098287.jpg", + "mask_rles": [ + { + "size": [ + 640, + 415 + ], + "counts": "m;4o2O^MNa22cMNk;0hC02:KHa0Nb03RO9K07FI1140KO:7FO0d04WO02KO001OO106OK0_;7`Dm7];;000000000000O1000000001O000000O1iNaGPFOc0a8V9_GQF`0g0Q8X9`GPFa0f0o7Z9SHgFm7Y9PHjFP8V9jGPGV8P9hGRGX8n8gGSGY8l8hGTGX8[:O2N1O1O1O2N1O001O001O1O1O00000000000000000000O10000O1J6000000gNgGjFZ8T9iGjFX8U9jGiFW8V9jGiFW8W9iGiFW8X9iGfFX8Z9hGfFX8Z9hGfFX8Z9hGfFX8Z9hGeFY8[9hGdFX8\\9hGdFX8\\9hGcFY8\\9hGdFX8\\9iGbFX8]9iGbFX8^9iG`FX8_9iG`FX8_9iG`FX8^9kG_FW8a9iG]FY8b9jGYFY8f9Q10000001OO1O1001O00000000000000O100000000O100000000O1O1000000000000001O0000000000000000O100000000000000O100001O00000000O10000O100000000000000000000O1000000000000000[GYFQ7g9cHYFlN;^8\\9fHZFjN;`8Z9gHiFY7W9gHiFY7W9cHmF]7T9\\HRGd7n8WHYFZOk0_8l8SH[Gm7e8QH]Go7f8gGaGY8R:100O1K_GjDb8S;7N2M3N2O1L4I7M3O1M31O1O002N1O2N4L1O3M1O5K5K1O2N1O]N^GmG]8R8iGjGV8V8mGhGR8W8RHfGn7Z8VHbGj7^8YH_Gg7a8`HXG`7g8fHSG[7m8iHoFW7Q9kHmFU7S9nHjFR7W9oHfFR7Z9RIaFo6_9RI_Fo6a9h1O100000000000000000000O100000000000000000000O1000000000000001O00O100O1000000000000000000000000000000000000000000O10000000000O1000000000000001OO100000000O1O10000001O000000000000000000O100000000001O000000O100001O0000O10000O1000000000000000000000000000000000000000000O11O0000O1000000000000000000O10000000000000000000000002N001O000000000000000000O100O100N2M3O10000001O00000000O100000000001O001`FlFlN100j3NXL0T4Wc0dKk\\O1?" + }, + { + "size": [ + 640, + 415 + ], + "counts": "UeX31T22a?4U@4h?NV@4h?NV@4i?NR@6l?LP@8o?In_O:P`0Hm_O:S`0Je_O:\\`0LX_O3n`00e^O8^a0R1201O1N2LJPNd^Ok1[a0=2M3O100O21O002N1O2N2O1N2O1O3McMe^OK2g04 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_60.png", + "model_output": " is enclosing ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278973.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "Sfn13W=1N2O0000001O1O1O1N102N1O1O000O2O001O1O0O101N1000000O10000000001O1O00001O001O00001O00001O00000O10000O100000000000O01001O00O10000O10000O1000O100000000000000O10000000000O2O0000001O0O2O001O001O0O2O001O001O000O10000O10001O000O2O00001N101O001O001O0O2O001O00001O0010O010O010O10O010O010O1O2MiQ`4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "fjT12V=?A5M1N100O0001O010O010001O0O2O0O10001CW9k1O2O0O01_OkFkLU9S3nFlLS9Q3PGnLP9o2TGPMm8m2VGRMj8m2XGRMi8h2]GWMd8f2_GYMb8d2aG[M`8`2dGaM\\8\\2gGcMZ8[2hGdMY8Z2iGeMY8W2`1L5J;E7J6J7H8@>Ghdg4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on top of .", + "image": "images/caption_simple_61.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000104198.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dS2b0P;6J6N3M3N2N1O2N2M101IfNnE\\1P:6O20O2ON1O101O001XG\\N[7e1WHjNg7c1bGgN_8X2O1000000O1001O00000000O1O11O1O000jNaGSO_8S200O2N20O1O000O02O1N10O100000O10000lNkLfIP3TOjLT7=fIe2]6^McIT2VO^M_7a0ZIe1S7`NkHZ1W7kNhHU1V7nNhHR1X7ROdHo0\\7ROcHn0^7SO_Hn0b7SO\\Hm0d7UOYHl0h7TOSHP1n7\\11O00O1O1O100O1O1O10000001O001OO1O1O10000O1000000O100O100O10O1001N100O1O1O1POTLUJl3h5XLVJi3f5^LVJc3h5`LVJa3j5`LUJ`3j5cLTJ]3k5fLQJ\\3o5gLkI\\3U6S11O2N0000O100O1N2N2O2N1O1O10O10001O2N0010O0O100O1N2N2N2M3M3M3M3L4N2K5J6L4N20000O100O1GQIjKo6U4TIhKm6e3VIbLNHl6Z3SIfL88HHm6Z3SIeL89EG01P7Y3TIfL7b0e6g2UIeL8e0b6W3RIkLn6o301O00001O3M4L9G4L5K5K3M3M3M2NO100O100O1001O001O1O1O001O2N1O1O1N1001N10O10O2O00O200_KoIZ3R6dLQJZ3o5dLTJ[3l5cLWJ\\3j5bLWJ^3j5`LXJ_3j5^LXJa3j5\\LXJc3n601O1O1O1O001O001O001O1O001O1O1O1O00000000OgMSHj0l7VOXHg0g7YOZHg0f7WO\\Hi0d7UO^Hk0b7TO`Hk0a7TO_Hl0b7POaHP1`7mNbHS1`7kN`HU1b7hN_HY16XNU6=gIb2X6YMlIh2Z6lLkIT3]7O001O001O001O1O1O00001O001O001O000000000000001J5ZO\\GPNn8k1b0D;N2O01O1N1O1100OO10O2N2O1N2O1N3M3N3K?BZTW1\\O[lH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "2g15N1aNd7a1[HN0bNe7a1ZHM1bNe7a1ZHM1bNe7a1[HL1bNd7b1[HL0cNe7b1ZHK1cNe7b1[HJ0dNe7b1[HJ0dNe7b1[HJ0dNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7d1ZHG1eNe7d1[HF0fNe7d1[HE1gNd7d1[HE1fNe7f1YHD2gNd7e1[HC1gNe7f1ZHC1gNe7g1YHB1hNf7f1YHB1hNf7f1YHB1hNf7f1ZHA0iNf7f1ZHA0iNf7g1YH@1iNf7g1ZH_O0jNf7g1ZH_OOkNg7f1ZH_OOkNg7g1YH]O1lNf7g1YH]O1lNf7g1ZH\\O0mNf7g1ZH\\O0mNf7g1ZH\\O0mNf7h1YH[O1mNf7h1YH[O1mNf7h1ZHZO0nNf7h1ZHZO0nNf7i1YHYO1mNg7j1XHYO1mNg7j1YHXO0nNg7j1YHXO0nNg7j1YHXO0nNg7k1XHWO0oNh7j1YHVOOPOh7j1YHUO0QOg7j1YHUO0QOg7k1YHSO0ROg7k1YHSO0ROg7k1YHSO0ROg7l1XHRO1ROg7l1XHRO1ROg7l1YHQO0SOg7l1YHQO0SOg7l1YHQO0SOg7m1XHPO1SOg7m1XHPO1SOg7m1YHoN0TOg7m1YHoN0SOh7n1XHoN0SOh7o1XHmNOUOi7n1XHmNOUOi7n1XHmN0TOh7o1XHmNOTOj7o1WHlN0UOi7P2VHkN2SOi7R2VHjN1SOj7S2UHjN1SOj7S2UHjN1QOl7V2RHiNV8W1jGiNV8W1jGiNV8W1kGcNZ8]1fG_N^8b1aG]N`8c1`G\\Na8d1_GZNc8f1^GXNc8h1]GWNd8j1[GVNj0He6R2aHUNa09h6b1hHTN6ELm0n6Z1PISN6HJP1l6U1TISN6HJU1g6Q1XIQN8II\\1`6j0_ImMbJkMYOl1Q69dJmM[Oo1l55eJoM_OQ2g50ZJlMM92P2b5K_J_NOh1`5IaJ_NOi1_5IaJ^N0i1_5IaJ^N0j1^5HbJ^N0j1^5HcJ]NOk1^5HcJ\\N0l1]5IbJ[N1l1]5IbJ[N1l1]5IcJZN0m1]5IcJZNOn1^5HcJZNOn1^5HcJZNOo1]5HcJYN0o1]5HcJYN0o1]5HdJXNOP2]5HdJXNOo1^5IcJXNOo1^5IcJWN0P2]5IcJWN0P2]5JbJVN1o1^5KbJUN0R12oN]5j1aJUN0o07oNY5m1`JUN0m0:POV5n1`JUN0h0a0SOo4Q2_JTN1f0f0QOk4U2_JSN0g0g0POj4V2_JSN0f0j0mNi4Z2]JRN0g0m0iNh4_2ZJQN2f0i6Y1VIPN1g0i6Y1VIPN0i0i6W1WIPN0j0h6V1XIPN0j0h6W1WIoM1k0g6U1YIPN0j0h6W1WIoM1j0h6W1XInM0j0i6X1WInM0h0k6Z1UInM0g0l6\\1SImM1e0n6^1QIlM2e0n6_1PIlM2e0n6_1PIlM1e0P7_1PIkM0e0Q7a1nHjM1e0Q7a1nHjM1e0Q7a1nHjM1f0P7`1oHjM1f0P7`1oHjM1g0o6_1PIjM1h0n6^1QIjM0j0n6\\1SIiMOj0o6]1RIiMOj0o6^1QIgM1j0o6_1PIgM1i0P7`1oHgM1g0R7b1mHgM1d0U7e1jHgM1a0X7h1gHgM1>[7k1eHfM0;_7o1aHfM09a7R2^HeM07e7T2[HeM04h7W2XHeM1Ok7\\2THeM1FT8e2kGeM\\8[2dGeM\\8[2dGdM]8\\2cGdM]8\\2dGbM]8^2cG`M_8_2=N2F:E;G9C=N2O100002N1lEiNf9e2oN2VG]M\\8c2cG^M]8b2bG_M^8a2bG_M1MS8d2lG_M1OQ8c2nG]M13n7`2QH]M1[N]O1c5i2bJmN;[N_O0c5h2cJoN7[NCNc5h2cJRO2ZNHMb5g2dJFJdMa5f2eJFIbMe5i2aJEJbMe5i2aJDKcMd5i2`JEL`MNNf5m2`JEL`MNOe5l2aJEKaMONe5l2aJDLbMNNe5l2aJDLeMc5g2aJDLbMNNe5l2aJDKbM0Nd5l2aJDKcMOMe5l2aJDKbM0Nd5l2aJXOMoM1Nf5k2[JXO0lM11d5j2\\JYO8mM\\5j2\\JYO8lM]5l2ZJXOW6h0iIXOW6g0jIYOV6g0jIYOV6g0jIYOV6g0jIYOV6g0iIZOW6f0iIZOW6g0hIYOX6g0hIYOX6f0iIZOW6f0iIZOW6f0iIZOW6f0iI[OV6e0jI\\OU6d0kI^OS6b0mI@Q6`0oIAP6?oIBQ6>oIBQ6>PJAP6?oIBQ6>oIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6>nIBS6>mIBS6>mIBS6>mIAT6?lIAT6?lIAT6?lIAT6?lIAT6?lIAT6>lICT6>kIBU6>kIAV6>kIBU6>kIBU6>kIBU6>kIBU6=lICT6=lICT6=lICT6=kICV6=jICV6=jICV6=jIBW6>iIBW6=jIBW6>iIAW6`0iI_OX6a0hI^OZ6a0eI_O\\6`0eI_O[6b0eI]O\\6c0dI\\O^6c0bI\\O^6e0bIZO_6f0aIYO`6g0`IXOa6g0`IYO`6g0_IYOb6g0^IWOd6i0\\IVOe6j0[IVOe6j0[IUOf6k0ZISOh6l0YISOh6m0XIROi6n0WIQOj6o0VIPOk6P1UIoNl6P1TIPOm6P1TIeNJjNT7b2SI[M\\8R2n0I7M3J6I7J6L4I7J6K5K5LPic0NTW\\OS1cDhNY:\\1mEiNd9W1[FkNd9S1\\FoNd9P1[FQOf9l0\\FUOd9j0[FXOe9f0[F\\Oe9b0\\F_Od9?\\FCd9;\\FHd95\\FMd91\\F?U9@kFb08kNFMZ8d0hGf0LfN55JO[8>jGj0KeN63KO[8=jGm0JdN63KO0MU8?oGP1LaN54JO2MT8Ih7m0hG\\O?Gj7i0gGD?Cj7g0eGJa0@h7d0jGL=Ch7>iG3?@B^Oo7l0RH71kN6m0GVOf7i0^H:OjN6U1[7F`HRIPN0c1m6=SIPN0d1l6 located relative to ?", + "answer": " is over .", + "image": "images/caption_simple_62.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000224051.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "WeS3451j4o0j3R4QLbJ1<6n0k3Q4mKhJ199l0k3Q4jKkJ37f0j3V4aKoJ93=g0k3f4gKcJ>L_O6_4Y5eKdJ?EE8CDf4j5cKeJ`0AK6X4c5^KeJ]6Y5cIgJ^6Y5bIfJ`6X5aIgJa6W5_IiJb6W5^IgJc6Y5]IgJd6X5\\IhJ=H]OOe5a5`JhJ7=U5j4dJiJ6?U5h4eJiJ5`0W5f4dJjJ5?Y5f4bJmJ3=\\5f4_JPK38a5g4\\JRK25e5h4ZJSK04g5i4YJTKN4i5h4YJRLi5m3WJQLk5P4TJoKn5P4RJoKo5Q4QJnKP6S4oIkKS6U4mIiKV6V4jIiKW6V4kIhKV6X4jIhKV6X4kIdKX6\\4hI\\K`6d4aImJm6S5SIlJn6U5RIjJn6V5RIjJn6W5RIgJo6Z5QIdJP7`5mH]JU7d591QI^J\\O?o5S5aJ_KY5b4eJdKU5]4iJgKU5Y4jJiKU5V4kJlKT5T4kJnKS5S4lJnKT5S4kJmKU5S4jJnKU5S4kJmKT5T4kJnKT5S4kJmKT5S4nJlKR5R4PKoKn4Q4SKZLb4e3`K^L[4b3fK_LY4a3gK_LX4b3hK^LW4c3iK]LW4c3hK^LW4c3iK^LU4c3jK]LW4c3iKULCoM11b4l5jK]LT4d3lK]LR4d3mK]LR4e3nK[LP4f3PLZLo3h3PLXLo3l3oKTLo3Q4nKnKQ4S4oKnKP4S4QLlKl3V4VLhKi3Z4VLgKg3[4YLgKd3Z4\\LfKb3\\4^LeK`3\\4`LeK^3\\4bLeK\\3\\4dLdKZ3_4eLbKY3_4gLcKT3a4lL_KQ3c4oL_Km2d4SM\\Ki2g4WM[Kf2e4\\M\\K_2h4`MYK^2i4`M[K[2g4dM[KX2i4hMWKT2l4lMYKl1j4TNYKe1k4\\NYK[1k4eNXKT1m4lNYKk0i4UOg4001O001O0O100O00O010OO200001N2N200O3LhTi1" + }, + { + "size": [ + 428, + 640 + ], + "counts": "T:X3T:000O100OoMRFmNMc1n9ROSFD5CM657H30EQ:MPF50K1N08NJ7NL584J4KHV:NoEO15LI60>2[O069JJQ:KPF33O15LJ5OL1OO:2E158KJk99VFG42JL77JH3;0MMKi9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_63.png", + "model_output": " is parked on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530099.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "T\\Y11d;3N1O100O1O1O1O1O1O100O1O1O100O1O1O1O1O1O1O100O1O1O1O100O1O1O1O1O1O100O1O1O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O010O1O1O2N1O100O1O1O1O100O1O1O1O100N200O1O1O100O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1N2O1O100O100O1O1O100O100O1O100O1O10000O10000O100O100O10000O10000O10000O10000O1000000O100O10000O100O10000O100O100O10000000000O100O10000O10000O1000000O100O10000O10000O1000000000000O1OVM[Hd1e7ZN\\Hg1d7VN_Hj1a7UN_Hl1a7SN_Hn1a7QN_HP2`7PN`HQ2`7nM`HS2`7mM_HT2`7kMaHV2_7iMaHX2^7hMaHZ2_7eMaH\\2_7bMbH_2]7aMcH`2]7_McHb2]7]MdHc2\\7\\McHf2\\7[McHf2]7XMdHi2[7VMfHk2Z7TMfHm2Z7RMfHo2Y7QMgHP3Y7oLhHQ3X7nLgHT3Y7kLgHV3X7jLhHW3X7hLhHY3X7fLhH[3X7dLhH]3X7bLiH^3W7aLiH`3V7`LjHa3W7\\LiHf3b72O1O1O1N2O1O1O1O1O1O1O1O100O100000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000GhKmH11X4R7gKmH21V4R7hKmH21V4R7kKPIU4P7kKPIU4Q7jKoHV4R7gKPIY4W700000001O000000cH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "Zel11f;1N2O3L2O0O2O0O2O1N2N3N3L3M102M1O3L2N3O1N2O1N3M2N2M2O1O1O1O1O1N2O1O001O1O10O00010O01O100O001O100O100O100O1N3O1N2N1O2OO10O0100O010O010O10O01O10000O010O1000000O10O1000O1000O010000O10000O1O01000O1O1000O01O1000O0100000O01O1000O10O1000O01000000O01000O1000O1000O0100000O1000000O0100000O1000000000000O10000O1000000O100O1000000O101O0O10001N101N101O001O000O2O001O001N10001N1O1O1O2N1O1N3N1OaEROX:l0gEVOY:i0gEYOY:e0gE[OZ:d0gE\\OZ:a0fEA[:=eED[: and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_64.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000202339.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Vk_18cc06I6J7J5J7I6J6J7J6J5M3M4L3M3M3M3O1000000000000000000000O100000000O100000000O10000000000O011O001Og0XOc0^O9G:F4K6Kohi6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "koo18gc03J6L3L3L5L3L4K4O2M3O1H8L2O0J3QN`NP@IY1m1k>j1O10M3O1O1010o_O_Lg0LSOe0n=h3[BRMEcNR34U7^4kEYMf2]N\\7d6[H^Id7j6SHXIk7P7mGQIR8[7aGhH^8^7\\GbHc8c7YG]Hg8g7UGYHP7H[IS8AVHn63[Ii7DUHP79UIf7HPHR7a0QIb7JmGS7f0PI^7KmGT7h0nH_7JjGW7l0kH[7LjGX7m0kH[7JiGZ7o0jH[7HfG_7P1hHZ8W7iGfHWNIj9`7SHbHUN4a9[7[H]HXN3a9`7XH[HW8e7R20O2N1001OO10O0100O1O2O0RFWHQN6JJ2No8l7SIWHoMj0i4[OVLNU1f7LYHmMk0f4l0\\MP61oHmM_O2F`4P2aKVN_1h7`0cHnMDa0KR4]2lLa5b0kHZ3e1nKa5d0mHe3e1\\K`5m0kH[NTOX5b2[K`5Q1jHP4e1kJd5P1kHX4a1cJg5P1mH^4^1UJn5Y1hHf4m9TKXFn4h9nJZFV5c9dJoDTO_1[6a9UJPE@i1]6V9QJoFQ6Q9jISGW6Z;002O0O2N010O1O2OO01O10O02N001O100O1O2O1N2N4L4L3N1N3M3N1NTJiCP18iMI\\1Ud600000O10O01O1O1000O01O1[OVOR^Ok0la0\\OP^Oc0Pb0@m]Oa0Pb0Cn]O>Qb0Dn]O;Sb0Fk]O;Tb0Gj]O9Wb0h0000O101N1O101N1O2O2M2^O_]OBcb08h]O_O\\b0=f0K6KggS4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is carrying .", + "image": "images/caption_simple_65.png", + "model_output": " is being held by ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000172396.jpg", + "mask_rles": [ + { + "size": [ + 351, + 640 + ], + "counts": "3_2`82M5L3M3M6J3M2N5K5K4L2N4L3M3M6J4L2N5K6J3M2N3M5K2N8H4L6J1O3M3N1M5L3M3N7H3L5L2N4L3M3M3M4L4L5K2N3M4L4L4M1M5L3M4L5K1O2N6J3M4M3K3N4L4L4M1M2O5K3M5K2N2N4L2N2N2N00001O000000000001OO10000001OO100001O00O100001O00O100001O00O100000000000000001OO10000001OO10000001OO10000000000000000000000000O1001O00000000000000O1001O000000O1001O00O1001O000000O10000000000001O00O1000000000000001O00O1000000000000001OO100000000000000001O00O1000000000000001O00O10000000000001O000000O1001OO10000001O00O1000000000000001O00O1000000000000001OO1000000000000001O0000O1000000000000001OO10000000001O0000O1001O00O100000000000000000cILo14^40000000000000000000000000O1001O000001N11N1001N10O2O0001O0000O1001N10O2O0001O0000O1001O00O10000001OO10000001OO10000001OO100001O00O100001O00O100001O00O100001O0000O1001O00O10000001OO1000000000000001O0000O10000000000001O00O1000000000001OO100000000000000001O00O1000000000000001O00O2OO101OO100001O000000O10000000000001O00O10000000000001O000000O11O0000O1001O000000000000O10000001O0000O1001OO100QJMR13n4O00000000000000O1001O0000000000000000O11O0000000000000000O11O000000000000000000000000000000000000O11O00O100N2001OO1L4N20000O1O1N2H@mEd0P:7I7K5N2O1O1L4L4O1N2D]NXGg1f8:F:M3O1N2N2N3L2N[MSH^2a7`M]H65Y2]7h0G9K5N2M3M3N3N`LXIj2h6WMXIi2V6bLlINUO:01f4h2gKjLB6l0MVO20M93[4m2lKnL_O5k0NVO12M20K4e4i2PLRM[O2ONm02VOO<2@33K^4j2XLWMXO00Nl03BNB6O02J_4f2\\L[MSO5l0KC:BJ1K_4b2`L_MoN5l0JD;@K2J_4_2cLbMlN4m0LB:AJ3K^4W2kKiMF4b0M\\O20Lm02VON7d0I]O^4V2SLjM21ZO49K78H and ?", + "answer": " is inside .", + "image": "images/caption_simple_66.png", + "model_output": " is inside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404839.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "_l02g15\\NK?0E0b`07j@S1S?POj@S1U?lNi@FdN^1b`0kNa@IXOY1X`0mN`@LXOV1W`0oNX@HWO74HM[1``0oNT@7]OP1``0hNQ@:^Oo0b`0eNP@T2Q`0kMo_OT2R`0mMj_OV2V`0jMi_OW2X`0hMh_OX2Y`0fMi_OX2Y`0fMh_O4@j1i`0SNf_O3Ai1l`0RNb_O6Bh1l`0TN`_O4Aj1Pa0VN[_ONGl1n`0XNX_OMJk1n`0YNW_OQ2j`0nMT_OT2l`0lMR_OV2Ua0cMk^O]2Wa0bMg^O7On1Xa0`0M32N2N0O01000OgN\\MXAc2h>^M]@0;b2W?_M]@14OD`2j?aM^@3FL42M^2]`0_Mg_OO03M_2k5iMQ5U2hJYNS5g1hJbNT5^1hJlNR5T1kJYOk4f0RKKa44XK:b4ETJfMZLO40?k2k8^OmITOhLb1e7oLgIX2b1P3V6nLdI_3W6]LeIW4o5gKnI`4P6`KiIg4W6YKeIk4Z6VKbIn4]6SK_IQ5a6oJ[IU5e6kJYIW5h6iJUIY5k6gJPI^5i6`HfGR2^1b5l6\\HfGR2U1oMgNi7^8VHfGT2l0P6d7RJWHQ6h7\\JdGl5[8m20000000000LdDdG\\;h7dDlH\\;g6hDiHN?Z;m70002NLgDaGY;^8hDbGX;]86ZLRE]KIo0G4^;\\3mE^KcNh03=^;[3WFULYNa0_;X3[FVLVNb03hNV;]4fFbM`9]2eF_M[9`2kFgKgM^1d1aNZN3a9U4TIUMQOfNn7T4RIUMPOgNo7S4TITMmNiNo7Q4ZIRMhNlNn7P4\\IPLQMm0\\1ROa8P4SIUMXNnNf8m3QIVMWNoNh8j3SIVMTNoNk8j3RIWMRNnNn8j3QIYMjMROV9d3QIZMjMmNY9h3nH[MlMiNW9l3nHYMhMkN^9k3kHYMfMnN_9i3kHYMdMPOa9g3kHYMdMPOa9g3kHYMcMPOc9f3lHYM_MQOg9e3kHZM]MQO]84kG`3]2YM[MSOU8e0bG\\OAc3]3ZM[MQOP8o0`GQ3V3oL[MPOm7a5iJ^KT6c4nI[KQ6g4oIXKUMQOR8i5iJWKTMPOR8j5jJVKTMPOR8j5jJVKTMPOS8h5jJXKP6f4PJYKW6a4iI_K[6\\4fIeK]6W4cIhKa6T4`IlKb6Q4`InKc6o3\\ISLg6i3YIXLoLoNQ9h4PJYLj6d3VI\\LkLQO]9`4iI_LfLTOd9Z4fIcLgLQOe9Y4eIfLgLQOd9X4eIiLaLlNR:Z4]IRMXLfN\\:V4\\IXMh6d2XI[Mk6c2UI]Mm6a2SIVMZ7f2fHWM`7f2`HYMd7e2[H[Mi7a2WH`Mh7`2XH`Mh7`2XH`Mh7a2WH_Mi7a2WH_Mi7a2VHaMi7_2WHaMi7`2VH`Mj7`2VHaMi7`2VH`Mj7`2UHaM2kNQ4e3lK`M4jNP4f3mK_Mk7b2TH^Ml7c2SH^Ml7c2RH^M5hNT4k3gK\\Mn7e2RHZMn7g2QHYMo7i2oGXMP8i2oGWMb0iNa3R4kKUMS8l2lGTMa0kNg3S4gKQMb0lNg3T4fKQMb0kNh3T4fKQMb0kNh3U4dKQMf0hNf3X4TKhKkMX1k:R3WGjKjMU1`3iNa3Y4SKlKjMS1`3hNd3Z4oJoKkMo0b3hNd3[4lJSLkMj0U;U3kFdMT9]2iFeMW9]2fFdMZ9^2_FgMa9\\2WFiMi9b2bEhM^:m501O1iHYGS3g8S40hN\\GTGe8f8cGWG^8`8kG_GU8]8PHbGQ8Z8SHeGm7X8WHgGi7l7fHRHZ7j7kHUHV7o5SGcJ;iNa1a0Q7R6UG^Jc2[OX6V6YG\\I1a0j2Gl5k3SGZNh0hMf;m3bCZN^=e1dBYN]=U41N100O1hM_BQNb=]1bBoN_=o0gBiLEh1e=^1iBgLGg1`=a1lBdLGi1^=a1ZC^MPO3g=]2eDSM^;j2jDnLX;n2o2L4M3M3Ll^O[MUa0R2R_OVNSa0h1Q_OTNPa0k1c000O2N101N101O1O0O3[Ol]OZObc0JZmm3" + }, + { + "size": [ + 640, + 427 + ], + "counts": "nY:1nc02N1a]70^bH3N1O2M8H4M2M3N2M5K6K1O1N2O1O0OI`]OoNab0Q1`]OnN`b0S1_]OmN^b0V1a]OkN^b0\\100O010O2OO1000OO2M3M2O200003M00N2O10000N\\]OiNcb0X1201O3ROo]OHQb05Q^OKoa05Q^OKoa05Q^OKTb0OgQ?6nn@6N000O020O00000004LBHU]O7kb0IX]O4hb0MY]OOib02V]ONib03b01O1O001O1O1O1O00001O4L2N1O2N2N3M1O1O001O1O0000001O0O1000O1100O1O0000001O001O00001O000000001O000000000O2O0000010O00001O0O101O01O01N2N2M201O000000001O00000000001O0000000010O000001O0000001O0j@_Nm;a1QDbNo;]1QDeNm;[1SDgNk;Z1UDfNj;Z1VDhNh;X1YDiNe;X1]DfNa;[1`DfN^;[1aDfN^;Z1bDhN\\;X1eDiNY;X1gDhNX;X1iDhNV;X1jDiNU;X1jDiNU;W1kDiNU;V1lDkNS;U1mDkNS;U1lDmNS;T1kDnNT;R1kDoNU;Q1iDROV;n0iDSOW;m0hDTOX;l0YD\\NgMi0P>k0YDaNaMe0V>j0YDCg;=YDCh;=WDBj;>VDBj;>WDAj;>VD^On;c0QDQO`MZO1Na>g1nCQO`MZOf>f1iCPOaMZOg>f1gCTOZeDBZ;>fDBY;?iD_OW;a0kD\\OV;d0kD[OU;e0kD^OR;b0oD^OP;b0PEBl:>UEBk:>TECf9lNWDa1S2C`9WOXDV1Y2D_8XOeD6?n0^2DW8n0lD]On2ER8n2nGRMn7R3RHoLj7T3VHmLf7X3ZHfLc7^3]HbLa7_3`HaL]7`3dH`L[7b3\\FaKF31k0i9d3[FmK[O21RO2\\1U:d3[FXM_OTOT:g3XFmNf9T1XFoNf9S1VFQOh9e5N1O1O101_KSFlNQ:n0RFQOU:g0mEYOT:c0oE]OR:a0oE^OY:8jEHX:OoE0T:KoE4U:FnE:U:BlE>X:mKoDT3m0n0g:^NdEb1a:UNcEk1`:\\MQDmNd1g3]:fLhFZ3^9ZLhFf3i and ?", + "answer": " is beside .", + "image": "images/caption_simple_67.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000069138.jpg", + "mask_rles": [ + { + "size": [ + 640, + 371 + ], + "counts": "o96b0NDNM72KQb04[^OLD;056@Hj00SO48LJNOV?9XAKB=35NYO1]7i9nHVFL1d00`7i9YIWFg6i9ZIVFf6j9ZIVFf6j9ZIVFf6j9ZIVFf6j9[IUFe6j9i10000001O000UJUFa1k9Z4000VJUF_1k9`NWF_1i9aNWF_1i9bNUF_1k9[4001O00000XJTF\\1m9cJSFf31e1m9cJTFg30e1W:bJgEb32l1X:[NhEd1Y:[NgEe1`:TN`El1d:PN\\EP2e:oM[EQ2e:PN[Eo1f:_KXE`11Q3j:\\KUEc11Q3n:XKQEh10P3T;SKlD`7T;f0000000001O0000OaJlDT2T;lMmDS2S;]300O1O1OlHoD[5P;fJPEZ5m:iJSEI2e3j:[NVEd1g:_NYEa1g:_NZE`1f:aNYE_1g:aNZE^1f:bNZE^1f:bNZE^1g:aNYE_1g:bNXE^1i:aNWE_1i:bNVE^1k:aNUE_1l:aNSE_1m:aNSE_1n:`NSE_1n:i30iJQE]1P;bNPE^1P;i31O0jJoD]1Q;bNPE^1P;bNPE^1P;cNoD]1Q;i3O10lJPEX1o:iNREV1m:kNTET1k:mNUES1k:mNVER1i:oNWEQ1h:QOWEo0i:QOXEn0i:QOWEo0i:QOWEo0i:QOXEn0h:SOWEm0i:SOWEm0i:SOXE_LV1b2b9WNUEo03PNm1i2k8VNZE\\O2_OL7S3h2e7iM\\JV2g;00000000000001O001O00O11O00000000001YNQ_OGea0VOZ^O1olo0NoRPOV1Ya0]1N2O100000000O1000000O1N200000000000000O10000000000001O0000O100000000001O00000000O10000001O000000000000O10000001O00000000O1O1000000O1000000000000YMRMkA0Z2o2Qh?S1dB`NcM1i?_1hB`NX=`1iB_NW=`1iBaNW=_1iBaNW=_1iBaNW=_1iBaNW=_1eB`NdM1g?_1eBaNcM0h?_1dBbNdMOh?`1cBaNeMOh?`1cBaNeMOh?`1bBbNfMNh?`1bBgN]=Y1cBgN]=Y1dBfN\\=Z1dBfN\\=Y1dBhN\\=Y1cBgN]=Y1cBgN]=Y1cBcNeMNh?_1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=W1aBiN_=W1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=X1_BiNa=W1_BiNa=W1_BiNa=V1`BjN`=V1_BlN_=U1aBkNV<^2jCbMQ;c3oD]Lf:n3YESLf:R4ZC`Kb1`0T;e4^D\\Kb;n4SDSKm;`5\\CdJd<\\600O100O1O1O10000000000O101O0000000000000000000000000000000001O00O1001O0000000000000000000000000001O0000000O10000O100000000000001N10000O2[Od0oMnG_F0a0j8R3\\Gj0m:oN^E[OBZM5Gc;Y2YDhMi1T1TNiNR>[1n[Z2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_68.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000342367.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "QnQ62n>1m>NYRO1e>0ZA4a>M`A3`>L_A6`>I`A9c>I\\A2l0MVO2V=MiB2N4c0=e<]OkB9Nm0f;jNgE1cNb1d;UO[DB<^1X;BbD>];EaD;\\;IcD8Z;h1M3M3M4L4L3N3N11O000000000010O0001O0000001O000O101O00001O001O00001O0000010O0001O01O00001O010O001O010O0000001O0jLQE\\2o:]MYEFI`2o:fM[EII_2m:aMaEOC_2h;`MZD_2e;aM\\D^2d;bM^D\\2b;dM_D[2U5K4N2O2N1O1O1O100O2N1N2O2O0O100O2N1QNZOdEf0V:EdE and ?", + "answer": " is in front of .", + "image": "images/caption_simple_69.png", + "model_output": " is in front of ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000263796.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "[i;a0Tc0>L2N3N3M1N4K3O2N3f]O`Nma0l1M8I1\\OhM]_O[2``0hM]_O]2^`0fM__O^2^`0e0L5L5K3L2O0O2O000O10000O101O001O002N0002N2N3M1O3M2N6J4L1O1O2N1O001O000000TOoLi@Q3V?SMf@n2Z?UMc@k2]?XM]@k2b?XM[@i2e?[MU@g2k?g0O10000O1001O00O1O1O100O1000000000000002N2N1O2N6J4L1O1O0000000000VO^LVAb3h>dLTA\\3k>gLRAZ3n>hLo@Y3P?lLi@W3W?mLc@U3\\?h000O10000O100O100O1000000000000000000O10000000000O10000O10000O10000O1000000O100O11O1O00O1O10000000000N2O100N200000000O10000001O1O5K3M8H2N6J3M2N3M3M2N2N1O1O1O2N1O1O1O001O1O1O1O1O001O1O2N1VM__OS2a`0gMk_OS2W`0gMQ@U2P`0hMT@V2l`0O1O2N2N1O2N1O1O1O1O001O001O1O001O001O001O001O0000001O000000001O0000001O00000000000000000000000000000000000000000000O10000O100O10000O1O1O1O100O1N2O1O1O1O1O1O1001O002N3MO1O1C=N2TOhMY_O;7m1[`0^Nd_Ob1[`0`Nd_O`1[`0bNc_O_1]`0R1BhLT@X3k?iLU@W3k?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3m?iLS@W3l?jLS@W3m?kLQ@U3o?>O1N2N2K5A?O1O100O100001O001O0000XMZKWFe4h9^KaCMc2e4l9bKRF^4n9bKRF^4n9cKRF\\4n9cKSF]4m9bKUF]4l9^KaC0c2b4l9]KbC1b2b4l9]KbC1b2b4g<0O1O100O100O100O10000000000001O00000000000000O10000000000000000O1000000000000001O001O00001O001O1O001O1O3M2N1O1O2N1O1O001O00001O1O002N3M2N2N1O2N3M1O2fMi_Og0W`0UOQ@g0Q`0TOT@j0o?_N`_O2i0\\1i?`N__OLQ1b1b?`N^_OLS1b1a?^Nj@`1n`0M4L1O002N2N3M1O001O1O2N2N2N1O2N3M1O2N1O2N3M1O2N1O2N5KQl`0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dad3a0\\c0:G8G8H7J5K6K3L7J4M1N4L2O2M4M1N2O2N2^_OWMh?k2T@ZMj?g2S@\\Ml?f2Q@\\Mm?f2Q@\\Mn?e2Q@[Mn?g2P@ZMP`0\\3N1O100O1O1O1O2Z@nK\\?\\4N001O100O00100O10O01O010O10O0100h@\\KS?`4RA`Kk20o8`4VD`Kk20o8`4\\31000000O10001O000O10000000000000001O000000000000001O0000000001O000000000000001O00000000001O000001h@\\KS?c4l@`KR?g41O0100O01O00010O10O01O0O2J6L4N101O1O1O1O001O2N010N2O1O1ZOT@VMm?h2U@WMk?g2X@WMj?h2W@WMj?g2W@XMk?g2X@UMi?j2[@nLk?n2f0N1M4N2N1O2L3M4L3M4N2K6K4I8J6M4\\Ombf2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_70.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000119828.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`oY1a0P;:C;I8I5M4K4N3L3M3N1O2N1O2M3N1N3N2M3N2N1O2M2O2N1O2N1O2N1O2N2N1O2M2O2N1N3N2N1O1O2N1O1O1O2N100O101N1O101N1O1O2O0O101N10000O100O101N1000000O10000000000O1000000O10000000O100000O10000000O100O10000O10000O100O100O1O1O100O10000000000O1000000000000000001O0000000010O00000000001O0001O01O01O0010O00010O010O2O1N2O1N2N4M3M3L4L2O0O1fJoJl3T5lKlJnN2W5T5fKjJUO3T5U5eKiJTO4W5V5aKnJ`4V5ZKkJf4W5YKhJg4Y5UK^JD9X5S6001O00010O001O001O00001O001O001O0000001O00000000001O0000000000010O00001O00001N100010OO2O001O1O0O4MOLUJ^Jj40QLc5WOZJg4:lKU5[OfJm49dKP5CdJj4]6VKbIk4_6SKcI1Ne4_6[KbI0Of4_6ZK`I20e4_6YKaI20f4^6XKbIo4_65O1OjJcIS5`61O1O1O1NgJeIY5V67M2N3K401M2N2O2H7B>^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "_Sg31h;0N0bh01[WO4M5K3N3MCPE4n:LUE4i:K[E3d:M]E3a:NaE1^:OeEOZ:0hEOX:1g00PE2V:0gE3X:g001M2O1N101N2O001O001O00001N100O100O100O10000000000000000000000000000000000001O0000001O0000001O001O0O2O0O2O0J7K5J5L6I8H\\eR1" + } + ], + "question": "Where is located relative to ?", + "answer": " is attached to .", + "image": "images/caption_simple_71.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000001993.jpg", + "mask_rles": [ + { + "size": [ + 419, + 640 + ], + "counts": "]8a0a<1O2O1O0O10001N10000O2O0O10000O2O000O10001O0O100000000O2O00000O100001POTDg0Vo:CoD>P;]3fDiKc8]4^OcK\\H]4d7dKZH]4e7fKWH\\4i7gKRHf2c9Gf0N1010O10O100O010O100O10O10O100O0100000O10O100000O100001N2O3M3M2N2NGnEcMP:]2RFdMk9\\2WFdMg9\\2[FdMb9]2`FdM]9]2dFcMZ9_2gF`MW9b2iF^MU9c2lF]MR9d2PG[Mn8g2SGXMk8[2fFkM>Jk8W2nFnM7Ki8U2TGPN4Ig8V2ZGoM0Hf8X2]GoMV9P2mFPNQ9d1XF]Ni00n8]1aF^Ne05h8[1jGdNU8[1nGeNP8X1THhNk7T1[HlNc7P1cHPO\\7k0jHUOT7g0RIYOm6b0XI_Of6=`IB`69fIGY65kILT6OSJOm5LYJ4[9010O1000O1N2O1MbVX5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_72.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000221502.jpg", + "mask_rles": [ + { + "size": [ + 320, + 640 + ], + "counts": "nR63g99K3O1N1O2O1N2O1O0jFZOR9k0N5YGTOQ8n0mGUOk0Nk5n0YITOj00l5m0ZITOg01YOGY6W1gIROLL>5CJ[6S1`IjN4f0;OP6b0aIkN2g0NWO7g0X6h1dIZN[6h1cIYN]6e20DcIWM[6i2gIUMY6m2eIRM]6n2dImL_6n2n1g5QNkI0?n1f5TNjIMa0o1d5VNjIKb0o1d5UNjILc0o1d5TNiIMc0n1d5WNhIKd0n1d5XNhIHe0P2c5YNhIFe0Q2d5WNiIEe0U2c5TNdJl1]5SNcJn1]5WN^Ji1b5VN_Ji1a5WN_Jj1`5VNaJi1_5WNaJi1_5WNaJi1`5VN`Jk1_5UNaJk1`5TN`Jl1b5QN_JP2c5mM]JT2c5jM_JU2a5kM_JV2a5iM_JW2e602N2N2M5K8I;E5K8G;F9Febm4" + }, + { + "size": [ + 320, + 640 + ], + "counts": "Qgj0131f91XF1g95N2N1O101N1O100O1000000O1O10000O10000O100O100O2O0mN_OgHa0i5I`Ie0b0Bn5T1RJmNl5U1RJlNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5V1RJkNm5U1RJlNn5S1SJmNm5T1RJlNn5T1RJlNn5T1RJmNm5S1TJnNj5R1VJQOg5o0YJSOe5m0[J]O[5c0eJC^O_Nc5n1oJOd0UNi2l1cL8:nMS3i1cLa00hM]3h1bLg0DZMK:o3i1\\LZ1d3a201O00O1001O000000[MXLROh3n0`LiNa3V1eLeN[3[1mL]NS3c1WMRNj2n1YMnMh2R2aMcMa2]2cM^M^2X1XLmN]1F\\2\\1[LkN]1CZ2b1YLkN`1]OZ2h1VLkN`1\\O[2i1ULkNa1ZO[2k1TLkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNR6V1nIjNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIjNV6V1jIjNV6V1jIjNV6V1jIjNV6V1jIjNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6T1jIlNV6T1jIlNV6T1jIlNV6T1jIlNV6T1iImNW6S1iImNW6S1iImNW6S1iIlNX6T1hImNW6T1hIlNX6S1iImNW6S1iIlNX6T1hIlNX6T1hImNW6S1iImNW6S1iImNW6S1iIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1hIjNX6V1gIkNY6U1gIkNY6U1gIkNY6U1hIiNY6W1gIiNY6W1gIiNY6W1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1fIjNZ6V1fIiN[6W1eIiN[6W1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1dIjN\\6V1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIhN\\6X1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1dIhN\\6X1dIhN\\6W1fIhNZ6Y1eIgN[6Y1eIgN[6Z1bIhN^6]200000000000000000001O00:Fg0`JnKd3Y4TLkKi3X5NlLaKi0]4jNRLU1m3jNULW1i3hNXLY1g3gNXL[1g3eNXL\\1h3dNXL]1g3cNZL]1e3cN[L]1e3cN[L]1e3cN[L]1e3cN[L^1d3bN\\L^1d3bN\\L^1d3bN\\LjN6@YO]1U49]LiNg0d0l2c0\\LjNR19b2l0]LkN]1MW2W1\\LkNc1IQ2[1^LkNe1Fn1X1cLRO`1Da2f0PLF`1CT32]K;`1BZ4>iK^OX4b0c2O1000001O00001O00000000002N001O00000O100000001N5Hgm?N]R@21Oci:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_73.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000312586.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "`eo63X=2N1N3L3N3HD]C>b<61N101O000O100O2O00O1O101N2O103`CXOT and ?", + "answer": " is in front of .", + "image": "images/caption_simple_74.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000187236.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y6c0n0i1U:XNkEg1T:^NjE`1U:cNjE\\1V:fNjEX1V:iNlES1U:oNjEP1U:SOjEl0V:UOiEk0W:WOhEh0W:\\OfEd0Z:]OeEb0\\:@aEa0_:A_E?`:C_E=a:E\\E;e:GYE9g:IVE8j:KSE5m:o100O\\LTEV3m:gLVEX3j:fLYEY3e:hL]EW3c:iL]EW3d:gL^EX3d:eL]E[3[7dLnK[3l7000O1000000000YM_Do1a;oMbDo1`;oMcDo1_;nMbDR2a;iMbDV2_;gMcDY2_;cMcD]2P<0O100000O010O1000O01000O10O010O1O01WObMPE^2o:cMPE]2P;cMQE]2o:cMPE]2]:bMXF]2i9cMVF]2k9cM[E16\\2_:eMZE06Z2a:gMWEO8[2a:gMVEN8[2b:jMREM;Z2c:kMoDMoBDP=;QCFn<:QCGo<8RCIm<7RCJn<6RCJn<5RCMl<4TCLl<4SCMm<2SCOm<1SCOm<0SC1m and ?", + "answer": " is lying on .", + "image": "images/caption_simple_75.png", + "model_output": " is on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000255749.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "W_U1e0Z2H]7Q1bG]OZ8j0[G]Oc8g0WG]Oh8f0QG_Om8f0eFEZ9U2O2N100O100O1N2M3NAXGbLd8`3`G^L`8_3cGaL]8\\3gGcLY8Z3jGeLW8X3mGgLS8W3oGhLR8U3R1I7_Ob0WN]E`0n:]OZEi]Y2" + }, + { + "size": [ + 424, + 640 + ], + "counts": "m`T28i<;I4M3N1O1O2N1O1O1O1N200O1O1O1O2N1O100O1L4M4L3N2O1O10000O10O100nNlDGKD[;b0nDGIEZ;b0PEGY;9hDD[;:gDC\\;;j0M3N2N4JhSc5" + } + ], + "question": "What are and doing in relation to ?", + "answer": " and are entering .", + "image": "images/caption_simple_76.png", + "model_output": " and are entering ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000575243.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZiV34Q=6M3L4L4N2M2N2O1O2O0N3O001N10001O000O2O010O001O00001O10O0001O001O010O001M3O1O010000O10O01O100O1O00100O1O1O010O10O01OO20O0100O01O100O100O00100O100O010O1O1O100O010O100O101N1O100O1000001N1O100O1N2O2O0O1O2N100O1O2O0O2O001N100O1O101N101O001Ni`b3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_77.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000473118.jpg", + "mask_rles": [ + { + "size": [ + 500, + 346 + ], + "counts": "Uod01c?00000\\im00cVRO9_@Kh>f0G9D<\\ObNgBi1W=;K5M3M3O1000000001OAaMhC`2Th80bIBfM>h80aIDfM]2KYN:m8]O]F`0U2O`N4n8<]HCeN2n8;ZHFfNOR9;UHJgNJV9=lG0lNCY9=bFkNc0Y1@@\\9<_FROXORB7X>GmAMchm1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is jumping from .", + "image": "images/caption_simple_78.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + } + ], + "question": "What is doing on the ?", + "answer": " is walking on the .", + "image": "images/caption_simple_79.png", + "model_output": " is walking on the ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509131.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mVb55T=2N2M2N4L3M4M2M4L5L3M1O2O1O2N3M3ZDfNU;k1N2L2O00100O0010O1O1O010O2N1O0O10001N1O2O1N2N2O1N2N2N2N2N3M2N4L5K4K4K7HS[Q2" + }, + { + "size": [ + 425, + 640 + ], + "counts": "^_^58P=e1\\N7I5K6J5Ld0\\O0001O001N2O1O2M2O2N3L4L101N2N2M3N4QNPE]1V;WNTEe1];J5K5J6J6K6H8Hc[]2" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to .", + "image": "images/caption_simple_80.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000167902.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "[9Y1^:0O1000O10000000O100000O10000000O100000000000O1000000O1000000000O100000O10O100000000000O1000O1000000000O1000O1000000000000O01000000O1000O1000000000O1000O1000000000O10000000O1000O100000000000O1000O10000000O100000O10000000000O10O100000000000O100000O1000000000O100000O100000O10000000O100000O10000000000000O0100000000000000O10O1001O00O1000000000O10O10000000000000O10O100000000000O10000000O100000O100000000000O10000000O100000O10O10000000000000000O10O100000000000000O0100000000000O10000000O0100000O2O00001O001O001O1N2O2N2N3M2N6J8H4Kk]10UbN3N3L5VEFo9>lECDL]:f0dEE^:j00000000O01000000000O1000000000O1000O100000000O1000O10O1000000000000O1000001O00001O1O2M2O2N3M2N1O001N4M001O[ODSF8l9JTF5l9NQF2o91mE0S:4gEMZ:h0O1000000000O0100000000000O010000000O10000000000000O010000000000000O010000000000O10O10000000O100000000O10O100000O1001OO1000000000O01000000000000O0100000000000000O010000000000000O100000O1000000000000000O100000O100000000000000O1000O100000000O10000000O1000000000000000O010000000000000000O10O1000000000O1000000000O010000000000000O1000000\\H" + }, + { + "size": [ + 375, + 500 + ], + "counts": "anV15Z;MhD;o: and ?", + "answer": " is over .", + "image": "images/caption_simple_81.png", + "model_output": " is standing on ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000097924.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "n7a4o70000000000@VHZLj7e3XHYLh7g3ZHYLe7c0WHS24ZMe7b0eHg1FgMe7b0gHe1DiMf7a0jHb1@mMf7`0lHc1\\OnMh7?lHd1[OmMi7?mHd1YOmMj7?mHc1]OjMg7b0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7b0nHb1_OlMc7b0nHb1_OlMd7a0mHd1^OkMg7?kHg1\\OkMj7=jHh1\\OkMk7k1[9UNfFj1[9TNgFk1o901O000000O1SOUN^F2OO:j1Y9UN]F95g1c9TNcFk1]9QNhFn1Y9SNeFm1[9SNfFl1Z9TNfFl1P:O1O0000001O00000000001O1N1001O000O10O10O100O100001O00000001O000O100O100O1001O1O001O0000O1YOWNUFN?k1\\9ZNaFi1^9XNaFi1b9SN`Fl1S:O1O1O001OXNZNYHNTOh1a8]NgHc1Y7^NgHa1W7eNeH[1Z7gNfHX1Y7kNfHT1Z7mNfHR1Y7ROeHm0[7UOdHj0]7UOcHk0_7SOaHm0g7gN^HX1]9000000000000000O100N2L400@`0000000000oMlEH0k1d:N00O10000TNXNPIh1P7YNZH\\2f7hMUHY2k7mMmGL_Oj1c8bNeGDNf1]8[OfGd0Z8[OhGd0W8\\OkGc0U8\\OmGc0S8\\OnGd0Q8]OPHb0P8^OPHb0P8]OQHc0o7]OQHc0n7^ORHb0n7^ORHb0n7^ORHb0o7^OPHb0Q8]OoGc0R8]OmGc0U8[OjGf0W8YOiGg0Z8VOfGj0\\8TOcGm0^8RO]GS1f8iNTG^1m8aN^GT1d8iN^GV1e8fN]GY1e8dN]G[1e8bN\\G^1f8`NZG`1i8\\NYGc1j8[NUGe1l8\\NbF10c1_9\\N_F21c1m9]NSFc1m9]NRFd1m9_NPFb1m9b0fNaMcH_2\\7eMaH[2_7fM`HZ2`7iM^HV2b7kM]HU2b7oM[HQ2e7QNZHn1f7SNYHm1g7TNXHl1h7VNVHj1j7ZNRHf1m7\\NRHd1n7^NPHb1P8`NnG`1R8bNkG_1T8eNiG[1W8gNfGZ1Z8hNcGY1]8]100O1000000000000O100000000000000000000O1O1001O0000000000000000001OO10000001O000000000000000000000000000000000000000000000000001O0000000000000000000000000000001OO100000000000000000000000000000000O1001O00000000`G" + }, + { + "size": [ + 400, + 600 + ], + "counts": "R]^16T<;H9G4oKXOULk0f3CoK?n3HkK;R4KiK8T4MfK6Y4NbK4\\4O`K4_41ZK2d45TKNf4hJDX5 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_82.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509656.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0b6^800001O00O1001O000000000000001O000000000000001O00000000000000000000000000000000000000000000O1001O00O1001O00001O00N2bNmGPKJ=O@^8S5jGkJO0l8c5oFbJn8Y6_O4L2N0000000000000000001O00000000000000000000O1000000000;\\IbG?In11a0Y[JVOf5R1_JaNa5d1dJRN\\5V2gJ\\M\\5l2X36J?A9G:F:F:F:F:F>B7I5K5K1O0000000000000000000000O1lJeGc3[8[LhGc3Y8\\LiGc3W8\\LoG]3S8XLhGRO8_4U8_LcGQO9Z4Z8eL]GQO9X4\\8eLPHZ3P8eLQH`3j7`LWHc3e7]L[He3c7[L]Hh3`7WLaHj3^7SLeHn3Z7QLhHo3W7QLiHo3W7PLjHP4V7PLjHQ4U7oKlHP4T7mKgG]OV1e4S7QLkHQ4U7oKkHQ4U7lKgG]OU1g4U7]KfG0e1]4d6`KkG1b1_4c6aKjG0c1_4c6aKkGO_1N^Nb4X8aKjG0`1b4f6]KjG2_1b4h6[KhG4`1a4h6[KhG4`1a4h6[KhG2b1b4n6]KQIc4o6]KPIe4P7ZKoHg4e8O101O0O100000000O10000O1010O11N1O1O100O1O002N3N1N1N2O00001O000000O10O1O1O1N2O1N3M3M45L0OOO01O2O0O1O001O1O0O2O00001O001O00001O001dLmDi2T;TMoDi2i;K3L3N2N1O100O1O2O0O1O1O1O2O0O100O101N100O10O0101O0O2N1N2O2L4M3L3N3N2K4L5N3J6L4I6D^A12OPUP5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aah1c0U>?C`0B;F:E>D5K7H6oDlLm9Z4I4L5K5K9H3M2N0O01O1O1O2O0O10000O1O2O0O1O101K41ON3M2NiE]KW:b431O1OH9O0O2N1M4M201N2O001O1O1O001000bFWLe7i3YHZLf7h3UH\\Lj7f3QH^Ln7d3mG`LQ8c3iGaLW8`3fGbLZ8a3aGbL^8c3UGiLh8j4L4M2N2N000000001OO100O1O1L4N2M3M4L3O1O1N2N2N2O2M2O1N2O1O1M3N2N2N2L4L4L5L3000000O110O001O0000001O000000010O00001O00000001O0000000000000O1000000O103cEbKR:m4I3M9F5L2N1O:F2N1N2N100O2O000000001N101O000O10OFjIeG02V6^8kI`GZ5`0lJQ8J_GP5`9721M21OO200_Ob0M2N2N3O0O2J501O0O2L4N3O0O2M5L3N3M>\\HaHk6o7J6I5K and ?", + "answer": " is in front of .", + "image": "images/caption_simple_83.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000140658.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "fb076LN2M;[b02g]O=Qb0m0O1G9K5J6H`Ml^Od2Sa05B>DZLWAN_Oj3Z?ZLRAl3m>d000O100O1O1N2N200O1O100O10000O1G9C=K5N2L4I7lNjITD`6k;bIRD`6m;cInC`6Rn2o10000O^ARMTk2eATM\\>k2eAVMZ>j2eAWMD0e=h2_BVMTO3h0Oe=h2hBXMZ>h2fAXMZ>h2fAWM[>h2_10U@XMFOf=i2fCXMZg2iAZMV>e2kAZMV>f2b1O1000000OTA\\M^l2PBTMP>n2nARMR>P3^11O1O002N3Z@hL^>Y3_@iLa`0[300ZDcLi6^3l4001O1O2N3RD[L[7g3eDZLX?g3g@YLY?g3g@YLY?g3`000001O001O3Q@TLd?P4W@RLh?S400000O1O100O10000001OO100000000000000000000O10000000kCnK\\8R4cGoK]8Q4cGoK]8R4cGmK]8T4g3OlCmK]8S4bGnK^8R4h31O00001O0jClK`8U4_GkKa8U4_GlK^L5i;o3kGlK]LOk;U4iGkK]LOj;V4iGkK]LOj;W4k3000000001O00000000001O00001O00001O0000001O0000001O0000000000001O00001O001O00000000001O00001O00000000001O001O00001O000000001O0000001O0000000000001O001O001O0000000000001O000000001O00000000001O00001O000000001O00001O00000000000000001O001O00000000001O001O00000000001O000000001O0000000000001O000000001O0000001O00001O00000000000000001O1O00002N3M2N001O0\\H^JbIN00O1Y41bKj0l0Sc0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dhj26fc07L2M3M2O2N2N1O1O1O1O1O1O1O100O010O010O0100O010O1O100O1O1O1O1O1O1O2M2O2M3M4LPlh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_84.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Sjn73k?3N2N1O2N2N2N102M2N2N1N3O1N2N2M3N2O0O2M3N2M3N2N2O1N2N2O001N2O2FUN[Bn1_=XN\\Bl1`==L3[E^M[7S3_HVMY7n2aHWM\\7m2aHVM\\7n2aHSM^7o2`HRM^7Q3`HQM^7>`F]1Q2VN^7P3bHQM\\7P3dHPM[7P3fHQMY7o2gHRMW7o2hHRMW7o2iHRMV7n2jHRMU7o2kHQMU7n2lHSMS7m2lHTMS7T2oFcMT1]OoN26j0h8R2RGdMP1U1m7W1TGcMP1U1m7W1TGdMP1S1m7W1UGfMn0Q1o7W1TGhMo0n0n7Y1UGiMn0l0n7Y1VGjMn0j0n7Z1VGlMm0h0n7[1UGmMo0f0m7[1VGoMm0d0o7[1VGPNm0b0o7\\1UGSNm0?o7\\1VGTNm0>n7]1UGUNo0;n7^1UGVNo09n7_1UGXNn06o7a1SGYNQ12n7c1SG[NQ1Nn7e1RG]NR1Kn7g1QG^NS1EP8l1mF_N];`1dD_N];`1cDaN^;]1bDcN_;]1`DdN`;[1aDdN`;[1`DeNb;Y1]DiNc;W1\\DiNe;V1ZDlNg;S1XDmNi;R1WDnNk;Q1SDQOm;h0XCgNl0`0n;f0YD[Oh;c0YD]Oh;c0WD]Oj;c0UD\\Om;c0SD]Oo;b0oC_OT<>lCBY<:fCE_<7bCH`<6`CIa and ?", + "answer": " is running on .", + "image": "images/caption_simple_85.png", + "model_output": " is attached to ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000106048.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^9l3`90000[MaF^1^9X10000000ZMbF^1^9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1\\9Y1000000000000000YMdF^1\\9bNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1g8]MiGV1@]1g8]MiGV1@]1f8^MiGV1A\\1f8^MiGV1A\\1f8^MiGV1A\\1f8_MhGU1B\\1f8_MhGU1B\\1f8_MhGV1A[1g8_MhGV1A[1g8_MhGV1A[1g8`MgGU1B[1g8`MgGU1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1A\\1g8`MhGT1A\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1C[1f8bMgGS1C[1f8bMfGT1DZ1f8bMfGT1DZ1f8bMfGT1C[1g8bMeGS1D[1f8cMfGh3Z8XLfGh3Z8XLfGR1D[1f8cMfGR1EZ1e8dMfGR1EZ1e8dMfGh3Z8XLfGh3Z8XLfGR1D[1f8dMeGQ1E[1f8dMeGg3[8YLdGh3\\8XLdGh3\\8XLdGh3\\8XLeGQ1E[1e8fMdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8[LcGe3]8[LcGe3]8\\LbGd3^8[LcGe3]8[LcGe3]8\\LbGd3^8\\LbGP1IV1e8jMbGP1IV1e8jMbGP1IV1e8jMbGd3^8\\LbGd3^8\\LbGd3^8]LaGc3_8]LaGc3_8]LaGc3_8]LaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1d8lMbGn0JV1d8lMbGn0JV1d8mMaGm0KV1d8mMaGa3_8_LaGa3_8_LaGa3_8_LaGa3_8`L`G`3`8`L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3`8aL_G_3a8aL_G_3a8bL^G^3c8aL]G_3c8aL]G_3c8aL]G_3c8aL]G_3c8e000O100001O000000O1000000000000000000000000O100000000O10000001O0000001O1O4L1O1O1O1O1O001O0000001O0O20O01O1O000000000000000000000000000000O10000O1O1O1000000O100000000001O000000001O0000001O00001O0000001O000000001O000000001O00001O001O000000001O00000000001O00000000001O001O00001O00001O000000001O0000001O00001O00001O1O1O6J2N1O00001O00001O1O2N1O1O1O1O1O000000000000000000000000O10000000000O1M3L4O100001O00000000000000001O001OO1001O00000000000000000000O1001O1O1O00O1O1O1001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000001O0000000000000000000000000000000000001O1O00001O001O001O001O0000O1001O0000O11O000000O1000000O1O1O100O100O100000000O100000000000000000000000000000000O100000000000000000000000000O10000000000000000GgL^FZ3Z9`0N2O1001O0000000000000000001O00O1001O00000000000000000000000000001O00000000000000000000000000001O0000000000000000000000001O000000000000001O00O11O00000000000000000000000TLcFi3]9WLcFi3\\9XLdFh3\\9XLdFh3\\9XLdFh3]9WLdFh3\\9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3_900000000000000000000000O1000000000000000000000000O100aF" + }, + { + "size": [ + 428, + 640 + ], + "counts": "[jl16XJ500O1O10000O2O00003L3N2N1N2O1O1O1N1000000O101O001N2O0O2OO01O0100000O10O1M3O11N100O100O100O101O000O1000001N101N100O101O000O10000O2O0000000O2O000O2O0O1O101O000O101O00000O2O0O100O101O0O10001O0O101O000O2O000O100O2O0O1000001N10001O0O101O0O100O2N1000001N10000000001N3N1O002L4M2O01OO3N3L2O1O1O1O1N10001N1O10O0100000O010O1O1O10OO2O1N13M1000000O100000001O0O100000001O000000001O000000000000000000000O1000000000000000000000000N20000O10000000000000000000000000000000000000000000000000000001O1OO1O1001O1O001O0000000000000000000000000000001O00000000000O100001O00O1001O0001N100001O000000000001O0000000000001O00000000001O00000000001O0000000000001O000000001O0000000O2O00001O1O001O001O2N1O001O1O3L3N2N1O6J:F4L5K4L9G7I6J4L8H5K2cK]Gl3e8oKaGU2H^Og8]NaGo1OC`8^NbG`0L]O1=a0V1k7hNeG3j1o0Y6WOnGCQ2R1j5DUHVOU2R1i5HRHUOW2j0n52jGUO[2?R6 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_86.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000052565.jpg", + "mask_rles": [ + { + "size": [ + 458, + 640 + ], + "counts": "e;d2g;O00001O00001O000000001O00000000000000000000001O00001O0000000000001O001O00O10000001O000000000000000000000000000000000000001O00000000001O000000000000001OO100000000000000001O000000O100001O00000000000nMQDi1P4N1O0O2O1N2O0O2O1O1N100010O000000100N100O100000O1O101O1N4L7H7J5J;F8]CVNWUO_B0_f\\2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_87.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000165039.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "[8m1^;001O1N2O0001O1O01OnNfD3Z;LhD3X;MhD3X;MhD3X;MhD30ROS;k0lD32ROR;j0mD41SOQ;i0nD40UOQ;g0nD51UOP;f0oD51VOn:f0QE40XOn:d0RE40XOm:d0TE4OYO^:MgEf0L30[O\\:NbEh02O0[O]:o0cEF0\\O\\:n0eEEO^O[:m0gEDN_O[:m0hECM@[:m0iEBKC[:k0jEBKC[:j0kECJDY:j0nEAIFX:i0oE@JGW:i0oE@JHU:h0RFAHHU:g0SFAGIU:g0TF6k9JUF6k9JUF7j9IVF7j9IVF7j9IUF9j9FWF:j9EVFk9fNlE>8l0m9eNkE`07l0n9cNkEa07m0n9`NlEc06n0m9_NmEc06n0n9^NlEc07P1m9\\NlEd07P1m9\\NlEd06R1n9YNlEe06R1n9YNlEe06S1n9WNlEf05T1o9VNlEe06o0T:\\NfEe06i0`:WO`Ef0d:YO\\Ed0S;POmDn0i;N2N;E;FgV`0D\\i_O0XC9_aHB_7>`HC`7=`HC`7=`HC`7=`HC`7B8J3N213OL00O000001O000000000000O100O0@cNRE_1m:dNPE\\1Q;?0O1N2O1O01000000O0100000O10000O10000O100001N100000O10000000000O1000O1000000000O10000000000000O10O100000000000000000001O0O200O1O1O1O000000001O000001N11O000O01000000O10000O1O2N1N200O1O2N11O00O10001O0000001O000011N0010O10O00000001O00001N100O2O001O0O2N1000001N100O1O1O100O1000000000001O001N2O00001N101O3L6K2M3K5K5L3MP`o0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_88.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000370270.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "bc01oc01N=D0000000000000O10000O1O1O1O100O1O100O1O100O1O1O1O1O1O100O1O1O1O1O100O1O1O1O1O100O100O1O1O1O100O1O1O1O1O1L_Nh]Ob1ha0c0N2N2O1J6J6O1000000000000001O2NO100001O0000O1lN_Mj@`2V?eMh@X2W?jMi@U2W?lMh@T2X?lMh@T2X?lMh@T2X?mMg@S2Y?mMg@S2Y?PNd@P2\\?PNd@P2\\?TN_@m1a?TN\\@n1e?RNY@o1g?QNY@o1g?QNY@o1g?QNY@o1h?PNX@P2h?PNX@P2h?PNW@Q2i?oMV@R2j?bMQ@O0`2S`0YMR@V3]`0O0000001O0000O100000000000000O100O100O1O1O1N2O1O1O1N2N2M3O1N200O11O0000000000000000000000000[Lk_O_3U`0aLl_O^3T`0aLm_O_3S`0bLl_O^3T`0bLk_O_3V`0]Lm_Oc3W`012N1O1O5K2N1O001O000000001OO1000000O1000000O100O100O100O1O1O100O100O10000O100O100I_Lm_Oc3S`0500O1000N3N1O1O002O000O100001O002N4L0000000000000000000000000000000000000000000001OO1000000000000000000000000000000000000O1I7N2000000000000001OO10000000000000000000000001O0000000000000DU@cLk?S3f0O1O1O1N2O2M2O10000O10000O1O1HS_OYMQa0c28N2M3N2N2M3N2O2M2O1M3O1O2N1N2E;E;N`0QOok83^TG1N2N2O100O1N2L4N2O100O1L4M3O1O1O1M3O1O1M3N2E;N2N2M3N2I7N2L4O1L4L4J6N2L4L4H8L4M3O1OUMc@U1\\?eNcA@`?>T2DmSl1" + }, + { + "size": [ + 640, + 480 + ], + "counts": "Zdf151Mdc0e0B:G7I`0A1O1N2N2mN]Ng_Od1U`0aNi_O`1R`0gNj_O[1T`0gNk_OZ1R`0jNk_OX1R`0lNk_OU1R6PN`3m0]FT1P6RNa3k0_FS1n5TNb3k0^FR1o5UNa3n0ZFo0`1WN?GoN8]ON[9Y3cG^M3l0iN^N`9Y3dFZL?`18X1e8n0`FoLLV19XNOg2\\9kN\\FZN:S8Z9bIoFY6P9hIWGP6j8PJVGP6j8PJUGQ6k8oIVGP6j8oIXGP6h8PJYGo5g8QJYGn5h8SJWGm5i8SJWGl5j8SJWGl5j8TJVGk5k8UJUGi5n8VJRGh5P9XJoFg5S9ZJjFg5X9XJgFh5Z9XJdFh5_9WJ]Fh5i9XJUFc5R:`JhEi3j0gJk9c1UEe3\\Bk3g>b05K5N2K5J5O2O13M4L4L3L5L3M3M6J4L4M4L4K4M5J4M3M4M2`DcIV9`6fF_I[9d6cFZI`9d6aF[Ia9c6P2N1N2N3N2M3N1RK]B\\3f=bL\\BZ3i=cL[BX3h=fLZBCFj2S>aMYBAKi2o=bMYBBLh2m=eMYBAMDAn2]>iMXBB4_2h=lMVBB8\\2d=PNVBA;l0QO7a>YOTD@YNb0g=Igkh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_89.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481413.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "loi3?g<:F9I6K4L4K4M4M2N2M3O1M3N2N2N2O0O2N101N1O100O2O0O100000000001O1N2O3M0O10000O1O010O001O100O2N2M3N3M2M3M3N3L4L4L4K6I9HWYh3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "mkd29P=3L4M2N2K5L4L4N2N1M3O1O1ON2NdEQOb8g0cG_OZ8b0bGC]8<`GJ]87`GM_82^G3`8N]G6b8I\\G doing with the purple frisbee ?", + "answer": " is holding .", + "image": "images/caption_simple_90.png", + "model_output": " is holding ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000092839.jpg", + "mask_rles": [ + { + "size": [ + 517, + 640 + ], + "counts": "a:a5`:4J6N3O0001O001O001O2N1O001O1O001O2N1O2N3M1O001O2N1O1O1O3M2N2N1O1O2N1O2N2N2N5K2N1O1O1O1O001O00001O2N2N001O1O1O2N001O2N2N1O1O002N3M1O2N2N2N1O2N1O2N1O2N2N2N2N2N2N2N4L1O2N2N1O1O002M5L2N101N4L5J5L1O2N2N2N1N200O2N1N3N0010O00001O000000001O000O1000O11O000000000O1000000001O0000000000000000000000000000O1000000000000000000000000000000001O0000000000000000000000001O0000000000000000000000001O0000000000001O0000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000001O00000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000000000000000000000000000000000000000000000001O0000O1000000000000000000000O010000O2O000O01000O100O10000O10000O100O10000O1O10000O100O10000O100O10000O10000O10000O100O100O100O10000O100O100O1000000O100O10000O100O100O10000O100O10000O100O10000O10000O2O0O01000O10000O2O0O1O1000O01000000O2O000O1000O01000000O100O1000000O100O10000O1O100O100O100O10000O10000O10000O10000O10000O10000O10000O100O100O100O10000O10000O10000O10000O1000000O100O2O000O01000O2O000O100O1000O10O2O0000000O100000O100001O0002NN2N20O1gLZC1N6O]2i<\\McC1G4NU2h located relative to ?", + "answer": " is sitting on .", + "image": "images/caption_simple_91.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000336209.jpg", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "e6]2S;00000000000000001N1000000000000000000000000O1000000000001O000O1000000000000000000O10000000000000000000O10000000O10001O1O000000000000001O0000000O100000000000000000001O00O1001O00000O1000001O002N2N006I9H1O0000000000000000001O0000000000000O10000000000000O100001O00001O000O100000001O0000000000000O1000000000000000000000O100O1O10O10O1N2O100O100O1O1O010O1000000000000001O00001O001O0O2O001O2N2N2N2M2O000000000000000000000000000000000000000000000O101O0000000O10000000000O100000000000001O001O0000000000001O00000000000000001O0O100000000000O1000000000000000000000000000O10000000N2O1O10000O1O2N3M3N6I5KUFAk9`0TF_Om9a0SF_Om9a0TF^Ol9c0UF[Ok9f0UFXOl9h0TFXOl9k0RFTOn9m0QFSOo9n0QFPOP:S1nElNR:U1oEiNQ:W1oEiNQ:[1lEdNT:]1mEaNS:`1oE]NQ:d1oEZNR:h1nEVNR:n1kEQNU:Q2jElMX:U2RFiMe9Y2XFiMg9X2XFiMg9X2XFhMg9Z2WFgMi9Y2WFgMi9Y2WFgMi9Y2WFgMi9Z2WFeMi9\\2VFdMj9]2VFcMh9`2VFfMd9\\2ZFfMd9\\2ZFeMe9\\2YFfMf9Z2ZFfMf9Z2ZFfMf9Z2YFgMg9X2WFkMi9U2VFlMj9S2WFmM:^OA6R9`2RGmM;Fc8\\2SGoM8Hc8Y2UGoM8Hc8Y2RG[MNd0_8^3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGdLR8\\3nGdLR8\\3nGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3PHdLP8\\3RHbLm7_3QHcLo7]3PHdLP8\\3PHdLP8\\3PHdLP8\\3PHdLP8\\3oGeLP8\\3PHdLP8\\3PHeLo7[3QHeLo7[3QHeLo7[3QHeLo7\\3PHdLo7]3PHdLP8\\3PHdLP8\\3oGeLQ8[3oGeLQ8[3PHdLP8\\3PHdLP8\\3oGeLP8\\3oGeLQ8[3nGfLR8Z3mGhLR8X3mGiLS8W3mGiLS8W3mGiLS8X3lGhLT8\\3hGdLX8]3gGcLY8[3iGeLV8[3kGeLU8]3iGcLW8\\4O00O1O1001O000O2O1O1O1O00003M001N10000000001O001N2O1O00001O0O10000000001O00000O10000000000000000O10000000001O000000O10gJ" + }, + { + "size": [ + 432, + 640 + ], + "counts": "2[5U800000000O10000000000000000000000O10UHiJ]7W5>0000O1000000000000O10000000000000000O1000000000000000000O100000000000000000000O10000000000O10000000000O10000000000000000O1000000000000000000O100000000000000O100001O0000O1000000000000O1000000000000O10000000000000000O10000000000000000O10000000000O10000000000O1000000000000000000O100000000O1G]GdKd8[4900O1O1O1O1N2N2O1O1O1O1O1O1001O1dFTLR9_4F1O000000000000O1000000000000O1000000000000O10000000000000000O1000000000000000000O100000000000000O10000000000O100000000000000O1000000000000O10000000000O100000000000000000000O1000000O]GhKP8X4PHhKP8X4d00000O10_GkKg7U4UHiK^O8Y8o3XHVLf7j3\\HULc7k3^HULa7k3`HUL_7k3cHTL\\7l3fHSLY7m3U100O100O100000000000000O1O1N2N2N2O1N2N200O1O100000000000000000000001O000XG]LUOOU8e3bHlLZ7T3eHRMV7o2iHRMV7n2iHTMV7m2iHTMV7m2dHZMZ7h2aH\\M^7e2`H]M_7[4O1O001O00000000B[HPKf7l4_HSKa7j4cHTK^7k4dHRK^7k4e0N2N20000O10000O100O100O1000000O10000000000001O001O0000001OO100O100O1000000O1000000000000O10000O1O100000000001O001O001O:F1O1O3M6J1O000000O11O1O1O001O1O1O001O0000000000000000000000000000000000000000000000O1000000000000000000O1O1O1000000000000000000000000000000000000000000000000001O000000001O0000001O00000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000O1000000000000000000O10000000000000000O1000000000000O100O1L4RNn1M3O100001_FSMSON01Q9Q5YO1O1O0000000000000000000N" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_92.png", + "model_output": " is over ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000458325.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "]ca33Q17k:MkD;S;K^D?\\;j01L3O2M4N2O4L2M2N202N1O1XOlC6g doing on the ?", + "answer": " is crossing the .", + "image": "images/caption_simple_93.png", + "model_output": " is walking on the ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000350122.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lPT5g0U>7H8J5K4L5L2O2M2N2O1O1O1O1O1O100N12N3M1O1O0O2O2N2N5J3M3M5K:D3M101O001O0O2O001O1N1O2O00O010000O01O01O01000O000O2O1N2WO_OTCa0lfDJV;Q2O0OO2H8L4N2N2N2N2O1000YD]NR:c1iEeNMgNi9k4O01VK[FW4g9eK\\FZ4f9bK]F\\4V:N1N2M3M3^N`E[Ng:`1`1J5L5\\OSCoNR=m0UCmNo doing with the bicycle ?", + "answer": " is pushing .", + "image": "images/caption_simple_94.png", + "model_output": " is pushing the bicycle ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000295809.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "d547k9_5UFaJk9_5UFaJk9_5UFaJk9^5VFaJk9_5UFaJk9\\5XFcJi9]5WFcJi9]5WFcJi9]5WFcJi9]5WFbJj9]5WFbJj9^5WFcJg9]5YFcJg9]5XFdJh9\\5XFcJi9]5WFbJj9^5VFbJj9^5WF`Jj9`5VFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WF`Jj9`5;O10000O10000000000000000O1000000O100000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000O1N200002NN2000000000000O11O00O10000O1000000000000O1000000000000000000O100000000000000O10000000000000000000000O1000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1001O000000O11O0000O100001O00O100001OO10000000000000000000000O11O000000O11O00O100LPEXKP;l401OO1000000000000LPEXKP;l4000000000000000000000000000000000000000001O0000001O000000O11O000000ISEZKn:l40IQE^Kn:i410000001O00HRE]Ko:b4SE]Km:d4RE]Km:c4SE^Kl:b4TE]Km:c4RE^Kn:i411OMRETKn:i4UEWKk:i4TEWKm:h4TEXKl:h4TEWKm:i4SEWKm:i4SEWKm:i45000000O10000000000000000O11O003MM300001O00O1O11O2N1cMZK^If4n8MmDZKR;f4nDZKR;j4000001O00O100001OO100001O0000O10\\NTKTHl4`91OO10000001O0000000000000000000000000bLTKhKl4f70000000000000000000000000000001O000000000000O1001O000000000000000000000dNSKeGm4[8SKeGm4g9000000000000000000000UNRKdHn4W90000000000000000000000000000000000000000000000000000000000000000000000000000O11O0000LQEVKP;j4PEVKP;n4O0000000000000000000000000000000000LPEWKQ;i4PEVKP;n4O0000LQEWKo:i4QEWKo:i4QEWKo:m4000LPEWKQ;i4PEVKP;n4O0000MPEVKP;i4QEVKP;k42001O00001O1O00LQEWKo:i4QEWKo:l4100001O1O001O00001O0000001O0000001O1O1O3M001O1O0000001O1O1O00001O1O00001O1O3M1O001O001O0000001O1O1O00001O1O001O001O001O000000001O00001O1O1O2N1O1O1O1O001O001O5K00001O6J1O001O1O1O001O1O001O1O000000RI" + }, + { + "size": [ + 512, + 640 + ], + "counts": "PT[86h?3O0O2N10000O1000001N10001O00000O101O00000000000O10000000O1O010O0100O100O100O100O1O1O100O1000000O100000000000000000000000000000000000000000N2D=G]`86e_G5K4L000001O00000000000O100000[L" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_95.png", + "model_output": " is beside ." + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000511760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "i?9V3HZ=d0a_OD00000:0F0J5Q1MQOe?S400000000000000000000O1001O00O10000000000000000000000000000000000000000000000000000000000000000001O00O1001O000000O10000000000000000001O000000000000O11O0000000000000000000000000000000000O100000000000000000000000000000000O100001O0QLb@\\3^?bLe@4Ih2b?oL]Ao2k?O1O>B:gM^^O`1Tb0M1O1O000000000000000000000000001O0000000000000000ZM^NnBb1R=`NlB`1S=eNiB[1W=iNeBW1[=lNbBT1^=mNPAB2M7Om0_?n0`@RO_?P1_@QOa?Q1\\@POe?e20ZMe@g0[?QOn@n0R?QOPAXO[OR1e?ERAmN]OK1b1`?BkA=U>[OSBe0m=YOUBg0k=XOVBh0j=WOWBi0h=XOXBh0h=XOXBh0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=XOVBh0j=XOcAAnNW1_?ZOf@TO and ?", + "answer": " is standing on .", + "image": "images/caption_simple_96.png", + "model_output": " is beside ." + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_8b_simple_eval.json b/evaluation/GAR-Bench/model_outputs/gar_8b_simple_eval.json new file mode 100644 index 0000000000000000000000000000000000000000..e62b03b7c3bc832e5002db11371b6b31458cd352 --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_8b_simple_eval.json @@ -0,0 +1,2337 @@ +[ + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000227491.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[f`46i>3N001fA3`0DQ<9^CU1_4M1O2O000O22M0000O010O00001O0QDJ7FT6a0bI6N\\O_6>ZIf0OmNg6>VIn0IgNR7;RIU1GaNV7;QIY1D_NY7:oHl1_N^M>?U8:iHS3Y7mLcHU3]7mL`HT3a7lL]HU3d7kLZHW3f7iLXHX3i7hLVHX3k7hLSHY3n7gLPHZ3P8gLnG[3R8eLmG[3S8eLlG\\3T8eLkG[3U8gLhGZ3X8gLgGY3X8jLfGV3Y8lLfGT3X8oLfGQ3T8WMkGj2m7^MRHb2i7cMWH]2f7gMXHZ2e7jMZHV2b7nM^HR2_7RN_Hn1`7TN`Hl1^7VNbHi1]7ZNbHe1^7\\NbHd1\\7^NdHa1\\7aNcH^1\\7dNdHnNKaNc0ENc0R7[2aHgNo1bNc5g2]HgNV2[N^5o2[HgNQ:T3O1O1O1O102N2M2O3K4aHhJc42^I[5i1gJo33XJl5c1UJd3P7YLQIe3U7ULnHj3U7PLnHo3W7kKkHT4^7ZKlHe4\\7nJjHQ5c8O100O1O100O100O100000000000000000000000000000000001O000dMWKaJi4[5ULjIm3S6XLUInNJk4P7UMjHo2T7_20001O001N2XI[HS6g7hI_HU6b7mI]HQ6c7PJ^Hn5d7RJ[Hm5f7SJ[Hk5f7TJ[Hk5f7SJ^Hj5c7UJ_Hh5c7WJ_Hg5^8O001N2N2O1O1O2L3M5L4L2N4L2M3O1O001N101O001N101O0O2O1O0O2N101N2O0N3L4M3M3M3L4B>M3M3M3DUD^MP<_2:N2O2N2M2O2N2L5J6WOlBUOc=;YWf4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_0.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000029397.jpg", + "mask_rles": [ + { + "size": [ + 449, + 640 + ], + "counts": "Zl95l=1O1O001O01O0000000010O012MU6CaIg0^6ZOVIQ1j6S301O001O0001O00000000000000001O002N3M:QIiH`6^7N100O1O00001O000000001O00001O00001O0001O0000000000000000000000000O100O100O10000O1O1N2O1N2N2N2O1O1O1O100O101O0O1000000O10000O100O100O1O1O1O10001O0O10000000000000O11O00000000001O000000O2O00000000000O1O1O1N2J6J6K5J6L4J6J6L4J6K5I7J6K5K5K5I7I7K5K5M3J6K6L3J6O10001O1O2N2N2N2N3M2N2N3M2N2N2N3M3M2N2N2N3M4L1N102M5I6H7J8G9Edm<" + }, + { + "size": [ + 449, + 640 + ], + "counts": "Qa_4?_=6L2L5L3M3N2N1O3M1O2O0O100L5ZOe0L5J5M3N3M2O1N2O1O2L4N4lN`M^Fd2k8f1E;@`0H7L301O1O1O001O1O00001O000000001O00000000000000000000000000000000O1000000000000O100N2O1M3CXH\\Jm7]4`HeKA5Z8S4W1M3]Oc0M3L4L4M3K5L4I7F:I7H8J6E;H8EXCVOTR;5a[E6H7L4RLBSJa0h5LnI7o56dI7n5g0UI_Of6e0UI^Oi6h0PIK^68oH9P7LcH=\\7T30001O001O0000000000001O00000000001O000000000000000000O1000000O10000O\\NlI]KS6Y6O1O1O1N2N2N2O1N2N2O1O10000O10000O10000000000N2gNgGjLY8U3iGiLX8U3kGjLU8T3PHgLR8V3SHgLn7V3WHfLk7X3XHcLl7X3[H`Lk7[3`1L4\\MQG0U9M\\G_Oj8?]2O1O1O1O1N2O1O1O1N2O1O1O1N2NSaZ1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_1.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000163117.jpg", + "mask_rles": [ + { + "size": [ + 500, + 376 + ], + "counts": "[o0Q82QH\\7V8N000O2HhGRIX8U7001O01O000000000O1O1O1N2N2dL`HcNa7Z1cHdN^7Z1eHeN[7[1gHcNY7]1kH_NU7a1oH[NQ7f1nH[NQ7e1oH]No6d1oH`Nn6a1PIaNo6`1PI`NP7`1PI`NP7a1PI_No6d1nH]NQ7d1jHVKOV3W7e1hHWK0T3X7S5000O1O100000000PNgHfKX7Y4jHfKV7Z4kHeKU7Y4PIeKo6Z4UIoIE^1U7b4^I^Kb6b4^I_Ka6a4_I^Kb6b4]I_Kc6a4]I^Kd6b4ZI_Kg6b4XI]Ki6d4XIYKi6h4WITKl6m4SIQKo6P5QInJP7S5gHhIMU1\\7S5aHhI120S1^7T5_HjI022o0_7l601O2N4L1O1O`HhHc6X7]IjHa6n0iHT5c0RJb6T7[IUIA@h6[7eIYI@]Ok6Z7eI]I[6c6dI]I]6c6cI]I]6b6dI\\I^6d6cI[I^6d6bI[I`6d6`I\\Ib6b6`I]Ib6`6_IaI`6_6`IZI]OES7?nHl5c0VJ_6NmHk5f0VJ]6_6dIQI[O3LMU7P7cIoH@0J0T7P7[1O02M2L4WKhHQ1\\7nNhHn0[7POfHP1Z7POdHeLMX4_7TOhHl0X7TOhHl0X7VOfHj0Z7YObHi0]7i3M2O01O01N11O0000O100000001O01OO3N3M001O1O2N00TMVIoLi6P3ZInLf6R3[ImLe6R3\\InLd6Q3]InLd6R3\\InLd6R3\\ImLe6T3ZIlLf6U3ZIiLg6X3WIiLa5cMoJN@g5InLi5\\MgJNA47\\8[6cGeIX8b6fG]IY8P7O2N1O01ZOlGmIR8k6O1O01001N0100001OO01UMgH[MY7d2gH\\MZ7d2fH\\MZ7d2gH[MY7e2gH[MY7f2fHZM[7f2bH\\M]7^501O0000O1UKaH`1a7^N_Ha1c7^N^H`1c7\\N\\HQM3a4b7]NcHb1_7\\NcH`1`7`NbH\\1_7dNcHY1_7gN]H\\1e7dNUH^1d0fK^6n2iH]1j0fK\\6X7gIhHZ6U7U1M2O00101aNgG^JIo0_8d4kG]KU8a4mG_KS8^3fGZL89R8\\3gGZL7 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_2.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000170613.jpg", + "mask_rles": [ + { + "size": [ + 640, + 439 + ], + "counts": "eiT57gc03M2N2O2K4N2K5L4O1O1N2jLL^B6^=n1UAjL0`1k>`3N1OkLeAj0X>UOmAj0R>TORBj0n=SO\\Bg0c=WOeBc0[=YOmBd0S=YORCe0mVS<@oC`0P<@RD>o;@SD`0m;@TD>l;AWD>i;AYD=i;AYD;j;DXD4o;MQDKV<5kCD\\<l8BTG8S9HkFGg99YF]OQ:c0nEYOX:f0hEUO^:j0bEPOd:P1]EhNk:2\\BMk2HQ;8YBM_a03a^OM_a02c^OM]a02d^ON]a00d^O1[a0Ng^O1Za0Lh^O4Xa0Jj^O6Va0Il^O6Ta0Hn^O8Sa0Eo^O;Qa0DQ_O;o`0Cj^OCDj0ca0_Oj^OJBg0ea0ZOl^O1^Of0bb0ZO^]Of0\\b0WOa]O34e0Zb0ZO`]O26d0Zb0Cf]O[MgNIg2e;UNVG`0ZMeNJf2e;VNTGc0[M[1a;RNRGg0[M^NNe2d;WNQGl0bMh0\\;]NoFn0jM`0W;aNmFR1lM=W;aNkFU1mM:W;bNhF\\1oMOZ;fNoDH5j1\\O^M1Y2^;8iD:FVM4V2^;c3]DWJ4V2`;e3_DVJMR2d;j3_DRJOQ2d;o3aDnK`;S4aDgKc;Z4\\DcKg;W63L5gNjCbJ\\<^5eC[Jb^6d;]JYD`5h;dJVDX5m;jJQDQ5S`0VBUOP>k0UBlNo=S1YB^No=a1ZBoMm=P2S2O1N1O2N3L3K5K6J7H9G8H;EVhV4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is talking to .", + "image": "images/caption_simple_3.png", + "model_output": " is looking at .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000465822.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dh^31e;1ig72RXH4J7I8J4M3N3L4]OROVFR1h9RORFR1k9?M3N3L2O100O2N1O1O1O1O1O2N1O100O2N101N1O1O1O2O0O101N1O1O101O0O2O0O100_HQMZ6P3dIQM[6P3bISM^6m2`IUM`6k2_IWM`6i2]IZMc6f2QIgMn6Z2oHhMR7W2kHmMT7[3010O0mLfHi1Z7VNiHh1X7WNlHf1S7YNQId1o6\\NTIa1m6]NWIa1i6]NZIa1g6]N\\Ia1e6]N]Ic1c6XNcIf1_6UNfIi1Z6TNjIk1W6RNlIbNh0`7dNRJc0`Nh0W9WOkFh0T9XOnFg0R9XOPGg0P9VOTGj0j8VOXGi0h8VO[Gh0f8WO\\Gh0c8WOaGg0^8YOcGg0ZOnNi8:oGS1o7kNSHU1n7hNSHX1o7dNTH\\1S9001O010O10O00010O01O010O001O010O00010O10O010O01O010O0100O010O0010O0101N1O2M2I8YOl0BcPb0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "f[b41c;5M3M3N3L2N3N3M2N2N10O01O000000O2N100O1O2N100O101N1O1O2O0O1O1O1O1O101N1O1O101N1O2N`[e0" + } + ], + "question": "What is doing with the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_4.png", + "model_output": " is holding the .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "32kc0k0XO2N001O000000001O000000001O000000001O0000001O00000000001O0000001O00000000001O0000001O0000001O0000000000001O00000000001O00000000001O000000001O000000001O000000001O0000001O0000001O0000001O0000001O000000001O0000001O000000001O0000000000O10000O100O100O1O1O100O1O100O100O100O1O1000000O100000000000000000000001O00001O000000001O0000001O00001O0000000000001O0000000000000000000000000000000000001O0000000000001O0000001O001O0000001O00000000000000001O0000001O1O001O001O1O001O1O001O001O001O1O001O00001O001O001O00001O001O00001O00000000001O00001O00001O00001O0000001O0000001O0000001O001O0000001O000000001O000000001O001O0000001O00O1000000O100O100O10000O100O100O10000O1000000O100000000000000O10000O10000000000000000001O0000000000001O000000001O000000000000001O000000000001O001O000000000000000000001O00000O1000001O0O100O1N2O100O1N3M2O1O2N1N2N3N1O1O1N3M2O1N3N100O100O2O0000000000d^OROg?m0V@WOi?i0V@YOi?h0V@XOj?h0V@YOj?g0T@ZOl?g0S@ZOl?g0R@[Om?f0R@[Om?f0`_OoNG=i`0a1P_OcNo`0X200O1O1O1O1O1O010O001O1O1O2N1O1O1O0001O01O0000001O01O0000010O0000000001O0000001O00000001O01O00000000001O0001O01O000000001O01O000001O01O00010O00000010O0001O0001O000001O0000010O000001O00000010O00000001O000000010O00000001O01O00001O01O0000010O000000000010O00001O0000000010O0001O0000000010O00000001O00000001O00001O;bLX_O46KO67HL001Je0[?" + }, + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y and ?", + "answer": " is in front of .", + "image": "images/caption_simple_5.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000128051.jpg", + "mask_rles": [ + { + "size": [ + 360, + 640 + ], + "counts": "U4V4R7000000000000000001O00000000000000000000000000000001O000000001O00001O2N1O001O00O10000O1O100O1010O00000O1001O01O1O0000000000O1L4O2N10000000000000001O0O2O00O11O000001O010O001O00001O3M4M2M1O1O2N1O001O01O00001O0010O01N10001O1O001O1N3M2N3M2M4K5J6E;K5K4O3L3N3M2N2M5L2N4K3L5J8SO^FFi9DXF8 located relative to ?", + "answer": " is in front of .", + "image": "images/caption_simple_6.png", + "model_output": " is in front of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000560266.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "ocP1335iZLAf3>\\LAd3>^L@c3`0_L^Ob3`0aL]O`3b0cL\\O^3b0dL\\O_3a0cL^O_3?dL^Oe39]LFe36`LFd35hPY4" + } + ], + "question": "What are and doing to each other?", + "answer": " is looking at and biting .", + "image": "images/caption_simple_7.png", + "model_output": " is biting .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000024919.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "iah21bYQ14bSoN0O2O0O1O1O000UN8nE1k9 and doing to each other?", + "answer": " and are pushing each other.", + "image": "images/caption_simple_8.png", + "model_output": " is pushing .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231169.jpg", + "mask_rles": [ + { + "size": [ + 454, + 640 + ], + "counts": "WaS2=f=g0SOb0E9H4L3M2N4K6K3N1O1N3M4M3L2O2M4M3L4M1O1O2M4M2N1N2O2M4M2M2O1O2N1O2N1N2O2N1N2O1O2N1O2M3N001OSLmFi2P9WMSGi2k8VMWGj2g8WMZGj2d8UM]Gl2b8TM_Gl2`8TMaGl2^8SMdGn2Y8RMiGm2W8SMjGn2T8QMmGP3R8oLPHQ3o7oLRHR3l7nLUHQ3k7nLVHS3i7mLXHT3f7kL\\HU3b7kL`HV3^7jLbHW3]7hLeHY3Y7fLiHZ3U7fLlH[3S7cLPI]3o6bLRI_3m6aLTI`3j6_LWIb3g6^L[Ic3c6[L`Ie3_6ZLcIf3\\6YLeIh3Z6TLkIk3U6SLnIo3o5nKUJT4h5jKZJW4e5gK]JY4i7000O10000O10000O100O2O001N1O10000O2O0O01001N1O10O010001N10O11O1N1O1O10001N0100O02N2N1O1O1O1O2N1O1O1O100O2N1O001O2O2L10100O3M10O00010O10O10O0100O10O001000O01O0001O010O0001O1O010O000010O0000O2O001N2O001O001O000O2O1O1O001O0010O0000O3OO01O1O001O001O1O001O00000010O2N1O2NO2M2000001O00001N101N1001000O0000010O001O002N001NZGTKT8l4c000N3N2O001N2O1N2O001O1O2M1O2N102L3L4M3O1O1O2M2N2UObEkMb:P2bEmMi;AVDLW1>^NnA1O0000Oea1" + }, + { + "size": [ + 454, + 640 + ], + "counts": "^_^51U>00000000001O2N2N010N2O1O1O1O1O001O2cFCm4>RKBo4>\\41O2N1O1O1O1O3eB0OXOg04XO064W:b3L0O3jEZLg9S4O0100O1O3Ng0XO2N1O10O6J100cE]LU:d37O02N1O2O0O010O01O0000001O001O00000000001O000000000000000000000001O00000000O11O000000O1000000000000001O00001O00001O001O000000000000hMeEe0[:WOlEe0U:[O^FeNVO0f00ZO:\\:R1Z2O10000000000000000000000000000000000000hFdNf5\\1ZJdNf5\\1ZJdNf5\\1b30000000000000hFeNe5[1[JeNe5[1[JeNe5[1[JeNe5[1c300000000000000000000001O001O001^FaN[6_1dIbN\\6^1aIeN_6\\1[IoN_6R1^IQOa6o0^ITO`6m0UG`NK1?N>d0S8Z3RGfLm8^3nFeLQ9X40000000000000000000000000000000000000001O0000000000O11O\\OnFhK22P9i3fGUL[8j3gGULY8k3hGTLX8l3iGRLX8n3P100000000000000000000001O00001O1O:F1OO010000000O10000000000O10000000000000000O1000nF" + } + ], + "question": "Where is located relative to ?", + "answer": " is beside .", + "image": "images/caption_simple_9.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000551822.jpg", + "mask_rles": [ + { + "size": [ + 453, + 640 + ], + "counts": "Y_P3e0Z=]OhBn0o and ?", + "answer": " is attached to .", + "image": "images/caption_simple_10.png", + "model_output": " is attached to .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000498463.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "j`T7a0]>201O000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000O100000000000000000000000000000000000000000001O001O000000001O000000000000000000000000000000000000000000TH" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l_T7i0V>2O000000000O1O1bNVOdDl0Y;YOaCKd0V1k;YO_CY1` and ?", + "answer": " is on top of .", + "image": "images/caption_simple_11.png", + "model_output": " is on top of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000275198.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^[P34j>5J5M1M4M2N2N2M3N2N2M3N2N2O1N1O2N2O1N2O1O1O1O1O1O2O1N1O010O10000VO\\NWDf1c;jNRDU1l;POUDm0j;UOVDj0i;XOTDYOKX1P[O1K[NoM9Oo00W1j2WO\\O2KbNPNn00V1h2YO]O5HTNRN21N0X12iNMX2g2@^O5G[NUNQ1OhN0X2b1eM`N6e0e1^17EX2lNnLNd0_1:EQNXNY10bN1X2nM;ZMWO_3P1nMQO[3;QN;i0[O_Lg0]1XOeNN`4b0^LQOW64R1l0gHPOY17k:h0mCSOU17n:f0kCUOV15R;k0mDVOmNGO3i;o0[E^OkNCj;n0Q2F[BXOf=h09O00100O1O10O10O10O0010O1O1O10O01O001O01N11O101N00100O010O1OO2N1100O00O2O1O100O0O2N2001OO100O10000O10ON32N0WOQB`0X>O1N2O1O1L6IoRe6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is looking at .", + "image": "images/caption_simple_12.png", + "model_output": " is looking at .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000257896.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "`fV46dc0`0D:F9H4M1N1O2O2M101O0001N2O0O1O2N2N1O2N2N2M3N1O2N2N1O2N2N2N1O2N2N1O2M2N3N1O2N2N1O\\Q[4" + }, + { + "size": [ + 640, + 480 + ], + "counts": "ZT`21mc04M4K4M2O1N2O1N100O101M5M1N101N2O2M2O000O2N102N1N101N101[CnNZ6S1cIVOV6j0hIYOW6h0eI_OV6d0eIDV6 and ?", + "answer": " is wearing .", + "image": "images/caption_simple_13.png", + "model_output": " is over .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000034417.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "hc[4?n:=I:G4N1O101N10000J6O10000O2O00000000lNQF`0o9^OTFa0m9^OTFb0k9\\OWFc0k9VO\\Fh0[:A>O2O3N1O100OO02O0O100O010O0010O01O0010O01O000O101NVUi0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "kZm35\\;7I6K6L3M3N3N2M4iEkNg9d1N2O0O1O1F:O1O01O0O2C=N2O1ROmE12\\OT:?SFBI4\\:9SFFn97TFIn94SFLo91RFNQ:MRF3i:O100O2NTf]1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_14.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000047585.jpg", + "mask_rles": [ + { + "size": [ + 640, + 424 + ], + "counts": "V91Z2>_1C`L0f00000O100O10000000000O10000000000000000O1O10000000000001O001O00001O000000001eNd]OP1\\b0lNi]OS1Wb0kNk]OU1`b001O000000000000000000O100O1O1O1O1O1O1KeNd]O\\1\\b0dNc]O]1]b0dNb]O\\1ab00001OO10000000000000000000000001O0000000000LdNd]O20T1\\b0jNk]OU1ab0O001O00000000000000O10000000000000000j]OlN`a0T1f0000O10000O100O1O1N2O1O1O100000000000000000000000000000000bGiNSNW1m1oNmMQ1S2XOXFJ`6n0X35YF_No5\\1g37WFbNo5W1j3k0SLUOm3n0oKSOQ4o0mKQOS4S1iKmNW4T1hKlNX4V1eKkN[4V1dKjN\\4X1bKhN^4[1_KeNa4\\1^KdNb4^1\\KbNc4c1YK]Ng4g1TKZNl4l1nJTNn4`NUF_3j4QNQ5`NUFc3e4nMV5_NUFh3`4iM`5kNnER3\\4TNP6^NkE\\3R4VNQ7X1lHhNd7l0XHTOZ8=cGC_8?]GAe8a0XG^Oi8e0SG[On8i0mFWOU9n0dFRO]9b1hD]MWOQ1QlFIQ9:jFJT99fFLW97dFNY96aFO\\93aF1\\91]FhK[OX4X:1]FhKYOX4Y:0aFeKXOZ4U:MmF2S9OmF1R90nF0R90oFOP92PGNo83RGLm85RGKo86oFKP95QGKo84SGKl85WGIi87TGKl85TGLk85UGKk85UGJk87UGIk87UGIj88VGHj88WGGV7nKoH]4JEU7SLnHX4MET7ZLhHS43DS7[LiHR42DT7[LjHQ42DQ7aLiHm31FT7dLeHg34IT7dLfHd34IU7hLbH`36Km6^KbHY1:`36Jm6PMkH[36Cn6TMjH[37Am6WMjHZ37Al6YMiHY39_Om6j2mHZMQ7m2hHTMW7k7O2kH\\Cl6m<0O0O3N1O010N1O101O000000000O1O1O1N200O100001O0002N1O001O1O0000001SEfHR9[7kFiHS9W7jFlHV9U7gFoHW9R7eFRIZ9o6dFSI[9n6dFSI[9n6eFRIZ9P7eFoH[9T7cFlH\\9V7mESH and ?", + "answer": " is standing on .", + "image": "images/caption_simple_15.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000234757.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y\\Z52k>6J5K4L5K5K4M3N2O1N2O100O1000000000001O00000O100O0O1O2N10O0jN`Bn0`=ROaBm0_=ROcBn0\\=ROeBP1X=POhBP1X=POiBP1V=QOiBo0W=ROhBn0Y=SOeBm0[=<1O1\\OoBWOQ=e0VCXOk1iA0V>OjA3U>MkA3U>MkA3U>MkA4T>MkA3U>NjA2W>NiA1W>;2O001N2N2N4JbRT3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "oXT4:e>2O1N3N1O0aE>QLUOZ9=UJg1V5ZN]J[2[5eM^Jl2W5VMbJX3V5hLaJg3X5ZL]JV4^5jKZJd4`5\\KYJT5^5lJ^J_5[5bJ`Jf5\\5[J^Jk5a5VJYJo5g5QJUJS6k5mISJU6l5lIoIZ6P6fIlI^6T6bIhIb6X6]IdIh6[6YIcIi6]6m000O10000000O00100O1000O1000O1000O100000000O101O001O1N1000000O100000SNXKaIi4m5^2B>iNW1kNU1iNV1_Nb1O1O2M201N2N2N2M4L[bT4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is in front of .", + "image": "images/caption_simple_16.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000008899.jpg", + "mask_rles": [ + { + "size": [ + 539, + 640 + ], + "counts": "Z>e1V?0000000000000O1000000O10000O1O1O100O100O100000000O100000000001O000000000O101O00JZNXAe1h>[NXAe1g>[NZAe1e>]NYAd1e>^N[Ab1c>`N]A`1d>^N]Ab1c>\\N_Ad1a>\\N^Ae1b>YN_Ah1i>10000L4N2000000KoM`AQ2`>oM`AQ2`>oM`AQ2d>1O100O1O100O11O000000O1O1O100O01000O10000O1000XNbAS1^>kNeAT1[>lNeAT1[>lNeAT1[>lNeAT1[>kNfAU1Z>jNgAV1Y>iNhAW1X>hNiAX1W>gNjAY1V>gNjAY1V>fNkAZ1U>eNlA[1T>dNmA\\1S>cNnA]1R>bNoA^1Q>aNPB_1P>aNPB_1P>`NQB`1o=`NQB`1o=_NRBa1n=]NTBc1m=[NTBe1a>0000000000000000000000O10000000000001O00O100000000000000000001O2N:FY2PBfMP>[2PBdMQ>\\2oAeMP>[2QBeMn=[2RBeMn=[2RBeMn=[2PBfMQ>Z2PBeMP>[28000iAeMn=[2QBgMn=Y2SBeMo=Y2:000000000000000O10000000000000000jAeMl=[2SBfMm=Z2:000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000O100O100000000O1000000000000000000000000000000kAbMm=^2SBbMm=^2800000000000000000000000000000O100000000001O0000O1000000000000000000000000000000000000000O11O0000000000000000O11O0000O1000000000000000000000000001OO0101O0001O0O10000000000000000000001O000000000000O1000000000000000000000O1001O00O1000000000000000000000000001OO10000001OO100001O00nNfACZ>=hA@Y>`0hA_OX>`0kA_OT>a0mA^OS>b0mA]OT>c0mA\\OS>d0nA[OR>e0oA[OP>e0RBYOn=g0SBWOn=i0SBVOm=j0TBUOk=l0WBROi=n0WBSOh=m0YBQOh=o0YBPOg=P1ZBoNf=Q1[BnNe=R1\\BmNd=S1\\BmNd=S1]BlNc=T1^BkNb=U1`BiN`=W1aBhN_=X1aBhN_=X1aBhN_=X1aBhN_=X1bBgN^=Y1bBgN^=Y1bBgN^=Y1aBhN`=W1aBhN_=X1aBhN_=X1aBgN`=Y1`BgN`=Y1`BgN`=Y1`BgN_=Z1bBeN^=[1cBdN]=\\1Q10001O00O100000000001O0000000000`AeN`=[1o0100001O00000000000000O11O00000000000000O1001O00000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000001O0O1M4ZO]@Aih5h0PgJ=H2O000001O000000001O00001O00001O00001O00^B" + }, + { + "size": [ + 539, + 640 + ], + "counts": "VTZ22e`08J4M2N2O0O1_OAf@`0X?Cf@=Y?Ee@i0N1O1J6O1N3L3N200O100000O102N1O2N2N2N6I2O1O3mNbAC`>_OfA6?Fm=3dA6e0Ai=7dA6^?He@4^?If@2^ea7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_17.png", + "model_output": " is enclosing .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000260261.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "Xi`33lc02M3N2M2O2N2N2N101O1O10O1000000O10000O10000O10O1000O1O001O0O2M3O0O2O001N2O1M2O]^P4" + }, + { + "size": [ + 640, + 426 + ], + "counts": "dbZ32mc03N2N6J2O0O001O01O01O1O001O01O01O00000001O0000000O10O1O100O1O1O1N2O01000O1gK@QEa0n:DmD=Q;GlD:S;IjD8U;IjD8U;JjD6U;KjD5W;KhD6W;KhD6W;KgD7W;KfD7[;HaD=^;BaDa0_;_O^Dd0a;]OZDg0g;YOVDj0h;WOUCCeN^1U>POQCP2nNZE5_;I]4Objb1" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_18.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000301563.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^2m:_20000000000000000O2O000001O01O0000001O0O1000010O000000000O10000001O01O00000O101O00001O00001O001O000]KXMbNi2]1XMbNh2^1WMdNi2o50000001O000O2O2cE_Mk9Q300010O0001O000O2O001O00001O1O00001O001]KZL^Og3`0ZL@f3`0ZL@g3?YLAg3R50000001O000^KWLAj3>VLBj3>VLBk3=ULCk3o4@cFQM]9n2dFRMi4EDY3cKRMi4EDS3kKWM`4FER3mKWM^4FFS3mKWM\\4FGS3mKWM\\4FGS3mKXMn8h2RGXMn8h2TGVMl8j2UGUMk8k2UGUMk8l2TGTMl8l2UGSMk8m2VGSMi8m2XGRM^OJW9T3[GRM^OJW9T3\\GQM\\OLX9S3\\GSMe8n2ZGRMf8n2ZGRMg8m2YGTMe8m2[GSMe8n2ZGQMg8o2YGQMg8o2ZGPMf8Q3YGoLg8Q3YGoLg8Q3YGoLg8Q3YGnLh8R3YGmLg8T3YGjLh8V3g00000001O000000001O0000001O001O00000000001O0000000000001O000000000000001O00000000001O000000001O0000001O0010OO101O000000000000001O0000000000002N1O001O0000000000001O0000000bHgLi4Y3eJ^MV5b2aJnMX5S2]J\\NCkMX5i3SK`N_OSNV5]3WK_Og4a0XK@h4`0WKBh4?UKDj44d4`6lK`IT4Z6SLfIl3T6[LkIe3T6]LlIb3S6`LmI_3S6bLmI]3S6cLnI\\3R6eLnIZ3S6fLlIZ3U6fLkIY3X6eLhIZ3Z6dLgI[3Z6eLfIZ3[6fLeIY3]6fLcIY3_6fLaIY3b6eL]I[3e6dL[I[3f6dL[I[3f6eLZIoNOg3i6YMWIoN2g3h6ZMWIkN5i3e6\\MUIjN8i3c6^MUIgN:j3b6_MiIa2Y6^MgIa2Z6^MgIa2Z6_MeIa2_6\\M`Id2b6[M^Id2e6ZMZIf2g6ZMXIf2i6ZMUIg2l6XMTIh2m6XMRIh2Q7VMmHk2V7SMiHm2Y7QMgHo2[7PMdHP3]7oLeHo2\\7PMeHo2]7PMbHP3`7nL`HR3a7mL_HS3c7lL\\HT3f7jLZHV3g7iLYHW3h7hLYHW3h7iLWHW3j7hLVHX3k7gLUHY3l7gLSHY3n7fLRHZ3o7eLQH[3P8dLPH\\3Q8cLoG]3Q8dLnG\\3S8cLmG]3S8cLmG]3T8cLkG]3V8bLjG^3W8aLiG_3X8`LhG`3Y8_LgGa3Z8^LfGb3[8^LdGb3]8]LdGb3]8]LcGc3]8]LcGc3^8\\LbGd3^8]LaGc3`8\\L`Gd3`8\\L_Ge3b8ZL^Gf3c8XL^Gh3S901O000000O1000000O100000000hK\\LgNe3X1^LeNc3Z1aLcN_3\\1dLbN\\3e0XLUL`0T3X3f0[LSLa0T3T3i0_LmKa0Y3o2j0hMVOV2j0kMVOT2j0mMVOR2i0PNWOo1j0QNVOn1j0SNVOl1j0VNSLWN`2c3]1WNRLWN`2b3^1XNQLWNCOk2a3a1YNQLZN^2]3a1ZNQLYN^2\\3a1\\NPLXN_2\\3b1\\NoKYN]2[3d1]NnKYN]2Z3e1^NnKXN\\2Z3g1^NmKXN[2Z3h1_NmKWNZ2Z3i1`NmKVNY2Z3j1bNlKUNX2Y3m1bNkKTNW2[3n1bNkKSNV2[3o1cNkKRNT2\\3Q2cNkKQNS2\\3R2dNkKoMR2^3T2bNkKPNP2^3U2cNkKoMo1^3V2eNjKmMo1^3X2eNiKlMn1`3Y2eNiKjMm1b3Z2eNiKiMl1b3[2fNiKiMj1a3]2gNiKhMi1a3^2hNiKgMh1a3_2iNiKfMg1a3a2iNhKfMe1b3c2iNhKfMc1a3e2jNhKfMa1`3g2kNhKfM^1`3j2kNhKeM]1`3k2lNhKdM\\1`3l2mNhKbM\\1a3l2nNhKaM[1a3m2oNhK`MZ1a3n2POhK_MX1b3Q3oNgK_MW1b3R3QOfK]MW1b3S3ROfK[MW1c3S3SOfKZMV1c3T3TOfKXMV1d3T3UOfKWMU1d3U3VOfKVMT1d3W3VOeKVMS1d3X3WOfKSMR1f3X3XOfKQMQ1h3Y3XOfKPMQ1g3Y3ZOfKnLQ1h3Y3[OSMe0m2]OQMc0o2^OoLc0Q3^OnLb0R3^OnLb0R3_OgKjLe0h3d3_OkLa0U3@jL`0W3@gLa0Y3AdL`0\\3AcL?]3AhKbL;n3m3AgKfL7i3S4@fKiL5g3U4AeKiL4g3W4@eKjL2g3Y4@eKjL0f3\\4@cKkLOf3^4AbKjLMf3a4C_KQ1b4PO\\KP1d4l300001O00000000000000001O00000000000000001O00000000000000001O0000000000001O00000000001O000000000000001O00000000000000001O0000001O0000000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000001O0000000000000000001O00000000001O0000000000001O0000001O001O000000000000001O0000000000000000000000001O0001O0000000000O101O00000001O0000O10001O00000000000000001O000000000000000000000000000000000000000000000000001O01O0001O0000]J" + }, + { + "size": [ + 428, + 640 + ], + "counts": "ff[21[=00000SX<3egC8oBJSL4L4H8E<0O102N3M4L1O3M3M3M1O2N1O101N3N2M1100O001N2O4L1O000001N5L1N1O1O2O0O2N3M2O2L3N1O1O2N101N2N1O1O100O1O100O1000O010_LfGo1Z8PNfGQ2Y8nMhGR2Y8nMhGQ2W8oMfGdN2\\3X8nMmGR2S8mMmGT2S8mMlGS2T8PNiGP2W8PNiGo1X8mMmGR2R8nMnGS2R8mMgGgN3\\3V8QNkGm1U8SNmGl1S8SNmGm1T8QNnGo1Q8RNnGo1R8QNnGo1Q8RNPHm1P8RNQHo1n7PNRHQ2n7PNQHo1P8RNPHm1o7WNoGg1R8WNoGi1S8UNnGi1T8mMcGUO9m2U8kMeGZO3j2Z8kMeGZO2i2Z8kMgGYO1k2Y8jMXHV2j7gMXHY2i7dMYH\\2h7aMZH_2g7]M\\Hb2i7TMSGNW1m2V9O2N2N2M4L3M2O3L3N4L1O1O1O1O1O1O1O1O1O2N1O010O0O101N2N101N2O1O1O2N1O1N2N6K1O0O2O1O1O1O1N101O1O1O0O2N1O201L2O2M4MZ>InA0N4M0O2L41O01ON11O10O101N3ImB0Pko2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_19.png", + "model_output": " is enclosing .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000016598.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Sme635l0Ni0OYNe`0Z3K3N2O1O010000O100O001M3N2O1O1O1O100O10000O10000O10000YOZ_OTNf`0j1^_OSNc`0l1`_ORN``0m1b_ORN^`0n1c_OQN]`0n1d_OQN]`0o1d_OPN\\`0o1f_OPNZ`0P2f_OPNZ`0P2g_OoMY`0Q2g_OoMY`0P2i_OoMW`0Q2i_OoMW`0Q2i_OoMW`0Q2j_OnMV`0R2j_OnMV`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OoMU`0Q2k_OnMV`0R2k_OmMU`0S2k_OmMU`0S2l00000000000O10000010O01O001O1O1N2L5iNj]O_OXc06g\\OLb^X1" + }, + { + "size": [ + 640, + 478 + ], + "counts": "Vj133Nac0a0^\\OAPc0l0Fg5mAYJV>Z5jAlJ]>S4fA^Lg>^3[AnKM9l>g3WA_Lm>_3SA`LQ?]3QAaLR?\\3UA[Lo>b3RA]LR?]3`@]L?6S?Z3PAfLR?W3P1YO\\_OeMi\\1" + } + ], + "question": "What is doing with ?", + "answer": " is holding ", + "image": "images/caption_simple_20.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439854.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "bhP11[:2O0O101N1O100O1O100O1O010O2O0O3N0OLTFIl9650010O0010O010O010O0010O0010O010O000010O0100O100O2OMRFHl98UFHk97WFHh996O1O100O2MaY`3" + }, + { + "size": [ + 333, + 500 + ], + "counts": "hmT19o0JR8=gGDX8`0SGG3Jj8o0SGROl8X100ZOjNUHV1i7nNUHQ1k7QOTHo0k7SOTHm0k7UOTHk0l7VORHL26l70QHJ54j73PHI73i75oGH;0f78oGH=Me7 and ?", + "answer": " is on .", + "image": "images/caption_simple_21.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000012062.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "l`k7>k<4L4[C\\O^ and ?", + "answer": " is in front of .", + "image": "images/caption_simple_22.png", + "model_output": " is in front of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000121586.jpg", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "VTZ191IY>g0KO=YOaA2OO4Oi=0]B0H002OOk=2SB20:1Bk=k0VBTOT<;VEHdN:3EV5]A0R>a0M2N2N101N2O1N1O000O1O100O100O100O10000O100010O01O4L1O0001O00001O1O001N2N2M3N2O2M2N3M4L4L2M6Inok5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_23.png", + "model_output": " is in .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000476704.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "V8T5X8001O000000O10000000000010O02N1O00003L3N0000010O5K2N0O2O00000001O00000000O01O1L4M30000O10001OO1000000001O0000000000O1000000000000000000000O1000001O000000O100000000001O00000000O2O00101N3M3M1O0000000O11O00000000RL\\GT3d8jL_GU3a8gLcGDJ[3d8nLTHl2l7RMWHm2U9O00000010O0001N100000001O0000000000000000001O001O000000001O0000O10000001O0000000000000000001O00000000000000001O0000000000000001O0001O0O100000000001O0001O0O100000000001O00O100000001O00000000000000001O00000000000000001O0000000001O0000O100000001O0000000000001O00000000001O1O001O000000000000000000000000000001O0000000000000001O000000O100001O000000000O100000000000001O01O00000000000000000001O00000000000000000000000000001O0000000000000000001O000O10000000000000000001O0001N010000000000000001O00001O00000000O101O000000000000000000001O00000000000000000000000000000000000000001O000000O10000O10000O10000O10000000000001O4L1O001O001O0000001O000000000000O10000O1O1O1O1N2N2N2N2N2M3N2L4O1O1O1O10000O10000O100O10000O100O10000O10000000000O100O100O100O100O1O1O1O1O1O100O10000000eL_FP1K`0i9[N`Fb0IF1[1U:nNmEA2a1X:]N`Fb1e:0000000001O001O00002N3M1O002N1O2N001O1O1O1O00O10O10O2N100O100L4O1O1O1N2O100YLoNSKQ1_4^O`Kb0_4_O`Kb0`4^O`Kb0`4^O`Kb0`4^O`Kb0`4]ORISOf1`1X5]OQIVOe1]1Y5^OQIXOd1Z1[5^OPI\\Ob1V1^5^OPI^O`1T1`5^OPI_O`1R1_5POcHI?4_1S1_5oNfHG?2_1X1\\5oNULQ1k3oNULQ1k3nNVLR1j3mNWLS1j3jNXLV1h3gNTKMYN]1c6eNUK2TNY1g6eNUK6PNU1k6dNVK=hMP1R7bNWKb0bMm0X7^NXKg0]Ml0[7ZN[Kl0WMk0^:Q100001O00000000000000001O000000000000000000O100000000O1O1O1N2N2O1SOPMgFg3^91FYLoFg3P9\\LmFe3Q9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_24.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000211042.jpg", + "mask_rles": [ + { + "size": [ + 640, + 458 + ], + "counts": "YdX34ic05L3N2N2N2N101N1O2N1O2N1O2O1N101N2O0O2N100O1O0001O0O1O2O001O001N10O20O0001O01O01O1O01O0O2O0O1O100O10001O01O01O001O1O001M3N2K7Ghbf0L_]YO5`N3n]OK;7ea0b0n]OFQb0U1N2N2N2I7K5K5N201O00000000000001O00001O000000001O0000001O00001O00001O00001O00001O00001O001O0000001N10001O00001O001O00001O0O2O001O001O001N101O001O1N101O001O0O2O1O0O2O1O1N101N2O1N101N2O1O1N2N2O2M1O2O1N3L3N3L5Jbcc1" + }, + { + "size": [ + 640, + 458 + ], + "counts": "]dW3;bc04M2L4L4N2oK^OaDe0^:X1RB_Nd1?U<_2WChMc^3m0N3M2O1N2O1O1O1O1N2O1O1O1N1O2N1_Ng_OYO]`0f0h_OnN``0P1X10001M2M4J5E;K6L5JnX=0`SB3WhQ3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to and leaning on .", + "image": "images/caption_simple_25.png", + "model_output": " is attached to .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000055299.jpg", + "mask_rles": [ + { + "size": [ + 429, + 640 + ], + "counts": "e and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_26.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435206.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "b531`7c5`H]J`7c54000\\H]J`7c5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b5400000000000000O11O0000O10000001O00O11O0000O1000000000000001O00O1000000000000001O00O1000000000000001O00O10000001OO1001O000000O11O0000000000000000000000000000001O0000O1000000000000001ZH]Jb7b54000000000000000000000000000[H^Ja7b5_H^Ja7b5_H^Ja7b5_H^Ja7b541O000[H\\Jc7f501O000000O100O11O0000N]H\\Jc7c530000000000000000000000O1001O00O1000000O100001O0000000000O1000000000000000000000000O10000001O00O1O1O1000000001O00O100O100O10000000000O100N200001O1O001O000000000000000000000000000000000000000000000000000000001O00001O001O001O001O1O0000001O00001O001O000mH^J]6c5`I_J`6a5`I`JCLg6e5dIeJ\\6\\5bIfJ]6[5_IhJa6X5^IiJb6X5]IhJc6X5]IjJa6W5]IkJb6U5^IlJa6T5^ImJb6o5O000000001O00001O001UJ^Ik4b6TK`Ik4`6UK`Ik4`6TKbIk4_6SKbIm4^6RKcIn4]6RKcIn4^6PKcIP5^6oJbIQ5^6mJdIS5\\6kJfIU5[6iJfIW5[6gJfIY5[6eJfI[5[6dJfI[5U701O1O1O1O1O4L000000O11O00001O0000001O0000O10000000000000000000[LjGU2W8hMkGX2U8hMkGX2U8hMkGX2U8gMlGY2U8fMkGZ2U8fMkGZ2U8fMkGZ2V8eMjG[2V8dMkG\\2V8cMjG]2W8bMjG]2X8gLfG<2m2Y8fLeG=1n2[8cLfG?Nn2\\8bLhG?Lo2^8_LgGa0LP3i8oLYGP3h8nLYGR3g8mL\\GQ3e8mL\\GS3f8jL]GT3e8iL]GV3e8gL\\GY3\\9O00001O2N001O00001O00001O001O001O1O1O001O1O1O001O001O1O2N001O001O1O001O1O1O1O000WN[El0f:mNaER1`:kNcET1^:kNbEU1^:jNdEU1\\:jNeEV1\\:gNgEX1Z:dNjE[1X:`NkE`1Q;eNXDQ1h;nN]Dn0P<01O00001O100O2N0000001O00003MM3O100O1O100O10000002N6J00001O1O3M2N1O2N2M5L3M1O1O0Oml`2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "Vbn3:e< doing in relation to the ?", + "answer": " is looking at the .", + "image": "images/caption_simple_27.png", + "model_output": " is looking at .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137950.jpg", + "mask_rles": [ + { + "size": [ + 415, + 640 + ], + "counts": "WW1231N11O0O11N10?0<4 and ?", + "answer": " is flying over .", + "image": "images/caption_simple_28.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000435208.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "o`02d03AMeb0T1M5Z]OhN1NZb0f1L1OO1O1001N10001O10OO2O1O1O1N2O1O1O1O1N2O1O1O100N2002YN\\^Om0fa0PO]^Oo0ca0PO^^OQ1aa0lNb^OT1_a0jNb^OV1^a0hNd^OX1]a0eNe^O[1[a0dNf^O\\1oa0000000000O1000000000000000000O10000000000000000O10000000O100000000000O1000000000000O1000000000O1000O100000000000000O100000000000000000000O10O100000000000000000000000000000000000V_O^NdN2W`0a1SAiNm>W1k@]NlN>Y`0U1h@TOX?l0c@YO]?g0\\@@d?`0\\@@d?`0\\@@c?a0[@Ae??W@Ei?;m_OOS`01j_O2V`0Ni_O3W`0k100000000000000000000000000000000000000eM]LYDc3`;fL^DZ3_;iLaDW3Z;nLfDR3Q;\\LkBg0T2m2b:fM^EZ2a:gM_EY2a:gM_EY2a:gM^EZ2b:fM[E]2e:cMeDS3Z;nL]D[3c;dLPDj3PjNXAb1c>`NWAg1g>ZNVAk1g>VNTAR2g>PNVAW2e>jMXA^2b>`M_Ae2\\>[MdAg2[>SMhAR3V>jLlA[3Q>dLoA^3o=dLnA`3P>`LoAb3P>_LnAb3Q>_LnAc3P>_LmAd3P>^LnAd3o=`LPB`3k=jLnAY3i=PMTBS3g=RMWBo2h=SMVBn2i=VMRBk2n=d1O1O1O10O010O0100000O01000O10O010000000001O0000001O000000001O001O000O2eKPBd2Q>YMRBf2n=ZMTBd2m=ZMUBe2k=[MWBc2j=[MXBd2j=ZMXBd2l=XMXBd2i=ZMXBf2m=YL_Ag0e0P3U>iL`Bd2a=PMQCi2V?L6K2N2N1O1O0O100000001O0O102N1N01N1101O1N01000O01N110O1O101N101O0O2N2N100O100O1000O10O1O10O1@i^OWNVa0f1P_OWNQa0a1X_O]Ni`0a1l0O1N2N2N2N2N2M4M2L4L5I7J6N6CWgZ6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_29.png", + "model_output": " is sitting on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000137576.jpg", + "mask_rles": [ + { + "size": [ + 563, + 640 + ], + "counts": "0g15UOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>k0ZAUOf>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AVOe>j0[AUOf>k0ZAUOf>l0YATOg>l0YATOg>l0YATOg>k0ZATOg>l0YATOg>l0YATOg>l0YATOg>l0YASOh>m0XASOh>m0XASOh>m0XASOh>m0XASOh>m0XAROi>n0WAROi>n0WAROi>n0WAROi>n0WAQOj>o0UAROk>n0UAROk>n0UAQOl>P1TAoNl>Q1TAoNl>Q1TAnNm>Q1SAPOm>P1TAoNl>Q1TAoNl>Q1TAoNl>Q1TAnNm>R1SAnNm>R1SAnNm>R1SAnNm>S1RAmNn>R1SAmNn>S1RAmNn>S1RAmNn>S1RAmNn>S1RAlNo>T1QAlNo>T1QAkNP?U1PAkNP?U1PAkNP?V1o@iNR?V1o@jNQ?V1o@jNQ?V1o@jNS?T1n@jNS?V1m@jNS?V1m@jNS?V1m@iNT?W1l@iNU?V1k@jNU?W1j@iNV?W1j@iNV?W1j@hNX?W1h@iNY?V1g@iN[?V1e@jN[?V1e@jN\\?U1d@kN]?T1c@kN^?U1b@kN_?T1a@lN_?T1b@kN]?V1c@jN]?V1c@iN^?W1b@iN^?W1b@iN^?W1b@hN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@gN^?Y1b@fN^?[1b@eN^?[1c@dN]?\\1c@cN]?^1c@bN]?^1c@bN]?^1c@bN]?^1c@aN^?_1;000000001O000001O1O000010O00000001O0001O0001O000000001O001O00001O001N10001N10001O0O2O001O001N101O001O1N10001O0O101O000O101O1N101O1O1O0O2O2N1Ao^O9Wa0NS_S8" + }, + { + "size": [ + 563, + 640 + ], + "counts": "adj0X1P`0?Q@`N]?e1e@ZNZ?g1f@YNX?i1h@TNY?m1h@QNY?m16M4K5J7F:F:F;CY\\34ncL04HWc62lfM2dTKM26Kn01PO4N000NV>[3I100O1O0000000001O001N2O001N2O1O1O1O1O1N2O1N2O2N1O1O001N1O101O000000K5M4N10000001O001O1M4I6N2M5fMk@P2X?oMh@Q2Y?32OO02O000O1O3N0O10001N100O101O0O1000001O000O2O00001O00001O1O001O001O001O001O0O2O001O001O0O2O0000001O000O2O000010O0001O00001O0O101O001N100O1O2N1O3L2N3N2N3N2M9H1O001O00000000001N1O10gon6" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_30.png", + "model_output": " is over .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000126137.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "bQ`1`0]<`0L2J7OO2mNmCl0SQE6c;I_D6b;H`D6b;H`D7a;HaD5a;IcD4a;FbD5aQZ5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_31.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000573943.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Ya`84l>5Kb0^O6J1O1O000O01N2N2M3N200O1000001O000000001O000000001O0000000000001O002Ldd9" + }, + { + "size": [ + 480, + 640 + ], + "counts": "doj7g0U>5K5N10000000000000O010O10000001O1O1O1O001O0000000000O2O0O1001O01O000O1000010O0000000O1O2N1O6I_`j0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_32.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000225532.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "h_d21f;1O1O2N1O1N2O1O1O1O001O001O001O0oDBi:?VEBi:?VECh:=XEDg:=ZEBe:>\\ECb:>]EDa:=_ED_:=aEC^:=bED]:=cED[:=eECZ:=gEDW:=hEEV:SHAl7?UHAj7a0UH^Ok7c0UH\\Ok7e0TH[Ol7f0THYOl7g0VHWOj7j0VHUOj7l0VHSOj7n0VHQOj7P1VHoNj7R1VHmNj7S1WHlNi7U1WHjNi7V1XHiNh7Y1WHfNi7[1WHdNi7\\1XHcNh7_1WH`Ni7a1WH^Ni7c1WH\\Ni7d1XH[Nh7f1YHXNf7j1ZHUNf7l1ZHSNf7n1ZHQNg7o1YHPNg7Q2YHnMg7S2YHlMg7U2XHkMh7V2XHiMh7X2XHgMh7Z2XHeMh7[2YHdMg7]2YHbMf7`2ZH_Mf7b2ZH]Mf7d2ZH[Mf7f2ZHYMf7h2ZHWMf7j2ZHUMf7l2ZHSMg7m2ZHPMg7P3`01O1O1O100O002N001O001O2N001O1O1O1O1O1O0000000000000000000000000000cMQHR1o7eN_HV1a7eNgHX1X7fNPIU1Q7hNXIQ1h6nNbIi0]6VOiIf0W6YOmId0S6\\OnIc0R6\\OQJb0o5^ORJ`0o5@SJ>n5@TJ2fMOU80WJKdML14OLU89WJ and ?", + "answer": " is driving on .", + "image": "images/caption_simple_33.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000424349.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "XlW48l<>`CBX3L^4a1YKdNb4e1UK_Ng4g1RK^Nl4f1oJ]NP5g1jJ\\NU5h1fJZNX5j1dJXNZ5l1bJVN^5l1_JUN`5n1WIdMLa0m6V3nHlLR7X3bHPM]7_4O1O100O100O10000O100O100O100O10000O10000O100O100O10000001O00000000O10000O1_NoIiKQ6V4[J^Kf5_4cJYK_5`4mJYKU5e4oJSKgNK[6P5j1N2N2O1O1O1N2N2O1O1O1O1O100O100O100O100000000O1000000O10000001O0000000000001O001O001O001O000eIcKi3^4nKlKP4V4fK\\KXNc0R6Q4dKTL\\4m3aKVL^4k3`KVL`4k3^KWLa4j3]KWLc4k3[KVLd4k3ZKWLe4j3XKYLg4j3TKXLf3kNZLP5IYLk3nNWLl4G[LP4mNVLn6i3TIVLl6i3VIVLj6i3[ISLf6k3\\ITLd6k3_ISLa6l3bIRL^6n3dIPL\\6o3iIlKX6T4mIgKR6Y4`100000000O10001O0O101O001N101N1O1N3J5H9EZFiLm9g2c0L5L3N201N101O0000001N2O1O1N101O1O1O00001O001O1O1O1O001O00000001O0001N110O00000001O0001O000000001N10000O101N1O2O001O000O2O001O1O001O1N2N2O0O2O1O2N1O2M2O2N1O1N2O0O2O1N2N1N5L6GcbP1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "^j7=g<6I8L4`ETO[8Q1bGRO[8R1cGoNY8f1UG[Ng8l1RGXNg8Q2UGQNf8U2XGlMf8V2YGkMd8Z2XGhMc8^2[GcMc8`2ZGbMd8a2ZG`Mc8d2[G]Mb8g2\\GZMa8k2\\GVMb8j3O1O2L3QM^KUKJW1j4[3^LcLc3[3`LbLb3[3aLdL`3Y3cLcLa3\\3`LbLb3Z3bLeL_3Z3bLdL`3[3aLdL`3Z3cLdL^3[3fLbLZ3]3lL]LU3c3lL[LU3e3kLZLV3d3PMWLQ3h3SMTLn2l3TMRLl2n3TMQLm2n3UMQLk2o3UMPLl2P4VMmKk2R4XMkKi2U4WMiKk2W4UMhKl2X4UMeKm2[4Q300O1O100O1000000001O00O10000O100000000O1000000000000000000000000000000O11O000000000000O1000000000000001O001O001O0eJeKg1[4WNhKh1Y4SNlKl1T4QNoKo1R4nMQLQ2o3nMRLR2o3lMSLS2n3jMTLV2m3hMULW2l3fMVLZ2l3dMUL[2l3cMUL]2l3bMUL]2m3aMTL^2m3`MUL_2m3_MVL^2P4\\MTL`2R4YMRLd2T4VMQLe2R4WMPLh2R4UMPLj2R4TMoKk2Z4eKTJP1e1Z3_4_LmKY3\\4XLlKf3n6N001O0000001O1O2N3M4L3M2M2O1O2PO[FVNi9f1_FQNd9l1aFPNb9m1o0N3L5K4M3M3L4L4K5M3L4K8I_ko5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_34.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000173302.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "en\\43U=10000O101O00000001NVDMfo19k[N4O100000001QOBQE>l:HQEd0a:@]ET1n9nNQFR1n9QOPFo0P:ROoENE7]:LmELS;5lDKS;7lDIT;8kDHU;9jDGV;9jDGV;:jDEV;gDBY;>gDBY;>gDBY;>gDBY;>hDAX;?hD@Y;`0gD@Y;`0gD@Y;?iD@J3g:=_E7`:I[EmEClNOW;>mEBmN1U;=mECnN0U;=mECnN0U;>lEBoN0T;?mEAPOOS;`0mEAPOOS;`0mE@QO0m96jF:8@QO0k9:jF6:@RO0i9;jF5;@RO0h9>iF3;AROOc03X8nDAR;?nDAR;?nDAR;?nDAR;?nDAR;?nDA6Ln9c0lE`0S:@mE`0S:@mE?T:AlEOHROh:o0aE_OR;`0oD_OR;a0oD^OQ;b0oD^OQ;a0oDG9D\\jd3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_35.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000352760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 544 + ], + "counts": "PX1`bW16YQiN6J3N3L2O3M3M1O2N2N2O1N2N2N1O2O1O1N200O001O1001O0O2O1O0O100O11OO100000000O10O10O01000O100O010O100O1O02OO10000O010O100O00100O01000O010O00100O010O010O01O010hMQO\\Ao0a>TO`Ak0_>VOaAk0\\>XOa_OJm1m0a>@_Aa0^>AbA?]>BdA=Z>DgA=W>DjAEnA;Q>GnA:o=HRB7l=KTB5k=LVB3g=O[B1a=1`BO^=2cBN]j7`1`EPNg2?i7\\1hERNa2b0d7Z1QFPN]2g0`7S1[FTNW2h0\\7d0SG]Nd1o0X7`0WKAf4>^KAa4>aKB]4>eKBZ4=hKDU4`1o_OYNg12Y>]1\\2N2N1O2N101O001O0010O2O0O2O2[OlNZ^OV1_a0TO[^On0^a0o0\\EaMa3c2]L^Ma3e2\\L]Ma3g2]L[Ma3g2]LZMa3j2\\LWMb3l2\\GPM\\36V5n2oF\\Ne2iNY6Q3dFQO`2PNk6\\6jHeIU7d6\\FSIe1;m7e7eFWHL8]9b9N2O1O2M2O1O2N002N1O2N1O010O00O100O2M2O2K4N3M3C`FVFd9c9`0K5J6L4fM[FQJm9h5`FmIe9m5eFeIh9R6_2B7]O`0E;J6J6M2N3N2N2O1O10001O000001O1O00100O2N1O100O2N1O2O1N2N3M2O2M3N1NZLVAY2g>fM_AX2_>gMdAX2Z>iMiAU2U>lMmAS2R>lMQBS2n=mMSBS2k=mMYBQ2f=PNZBo1f=QN[Bn1d=RN^Bm1c=RN^Bn1`=SNaBm1^=SNcBm1]=RNdBn1[=RNfBo1X=PNjBQ2T=oMmBR2R=mMoBT2o and ?", + "answer": " is below .", + "image": "images/caption_simple_36.png", + "model_output": " is over .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000344614.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "Qag32kc04N2M2O100N201M2O1O2K6K4W@VOUXOa0lJj@00Y4Ta0[N8H3K5J6J6K5K5oNk]O6Vb0In]O3Sb0Lo]O0Tb0LU^OGSb06m0M3M3NRXj02ngUO001O000000000000001OO10000001O00001O001O1O001O001O001O0000001O0000001O0000000000001O0000001O000000O100001O1O00001O001O000000000000001O00001O1O000000001O000000001O0000001O00000000001OO1001O1O00001O0000001O00000000001O00000000001O001O00000000001O001O1O2N00OV]OnNhb0Z1K7I7I1O1O1O1O2N000000O100O1N2001O1OO1O1O1M30000000000000000003M1O1O001O00000000001O0R^ORN60J1010N10k`0n1h_OTN``0l1a_OTN@Ne`0o1^_OUNG4ONl`0m2o@oLUN61JO11NS>S3nBlLPOW2Y=l0lCoLiNS2Y=0iBeNU1XOiN;1c1m<6TCcNT1IjNc1jLGc8K_GX4`8iK^GX4b8iK\\GX4d8c3kGfEb6\\:WIgFk5[9lIdDOZ40gK2n01RONO150;OF0Lm0lc0" + }, + { + "size": [ + 640, + 478 + ], + "counts": "W_W6e0Vc0:F9I6K4M4K4L4M3M3M3M2N3M2N3M2N3M2O1N3N0O2N2O1O1N2O1N2O1O1O001O1N101O1O1O1O001O001O001O001O001O001O0001O0001O01O010O001O00001O001O001O001O001O001O001O1N2O1O001N2O1O0O2O1N2O1N3M2O1N2N3M2O1N3M2N3M2N3M3L3N4K4L4M3K7I6J8H=]On_n0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_37.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000272148.jpg", + "mask_rles": [ + { + "size": [ + 378, + 640 + ], + "counts": "V4c7W40000000000O100000000000000001O00O1000000000000001O00O1000O1O11O00001O2N001O0000JbHPL^7P4bHoK_7Q4aHoK_7V4001O0K`HPL_7Q4aHoK_7U41O100001O0001O00O1000000000000001O00O100000000000000001OO1000000000000000000000000000000000010OO0101O000000000000000000001O0kJfKj2Z4UMhKj2X4VMiKi2W4VMjKj2V4VMjKj2V4TMlKl2T4UMkKk2U4UMkKk2U4WMhKj2X4\\2000lIfKh4Z4VKhKj4X4VKgKk4Y4[100000RJgK[4Y4eKgK[4Z4b10000000000001O2N00000000000001O]IeKe00RO0l3[4_LcKb08mNKQ4[4nMkKPNJR4[4oMjKoMKR4[4PNhKoMMQ4[4QNgKnMNQ4[4RNfKlM0R4Z4RNfKlM0R4Z4RNeKlM2S4X4SNcKkM4S4X4]NhKd1Y4YNiKg1W4XNjKg1W4XNjKh1V4WNkKi1U4WNkKi1U4WNjKj1V4oMbKjM7W4W4oMbKlM4V4Z4nMbKa2]4_McKa2]4]21000000000000001O00O10000000000000O1000000001O000000000000000[LbKN^41gKKY45gKKY43iKMW40lK0T4NnK2R4NnK2R4NoK1Q4OoK1Q40nK0R42kKOU42jKNV42jKNW42gKOY44cKM]4h30000000000000000001O00O1001O0000000000001O000oI`Kh4`4XK_Ki4a4X1000000000000000000000000001O000000O11O0dNeKYK[4f4gKYKY4g4gKYKX4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4f4jKZKV4g4jKXKV4h4jKXKW4f4jKZKV4f4jKZKV4g4iKYKW4g4iKYKW4g4iKYKW4h4hKXKX4h4hKYKX4i2]KRN:TOZ4h2_KSN7UOZ4h2_KSN7UOZ4g2`KUN5TO[4g2aKYL0d14\\O[4g2hKmMM\\O[4g2hKmMN[OZ4h2hKmMN[OZ46]KZ1;TON^OY44^KY1;VON]OY44^KX1ZOL[OZ4b2kKSNK[OZ4a2kKUNLYOY4b2kKUNLZOX4a2kKVNMYOX4b2jKTNOZOV4e2hKRN2YOW4h2aKPN8YOW4d4jK\\KV4c4kK]KV4b4jK]KW4b4iK_KW4`4jKaKV4]4kKcKU4]4kKbKV4]4kKcKU4]4kKcKU4]4kKcKU4^4iKcKT4nN^Ka5 and ?", + "answer": " is over .", + "image": "images/caption_simple_38.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000222317.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "kc^1g0T>>E7J2N2O0O1fCQOnN_O10[;`1fEFlNkNn:_1WF2`9N`F9Y9GgFl0f8SO[GX1Z8hNgG]1S8cNmGb1n7^NRHf1j7ZNVHj1e7WN[Hk1c7UN^Hl1`7TN`Hm1_7RNbHo1]7QNcHQ2[7oMeHR2Z7nMfHS2Y7mMgHT2X7lMhHT2X7kMiHV2V7jMjHW2U7iMkHX2T7hMlHY2S7gMmHZ2R7fMnH[2P7fMPI[2o6dMRI\\2n6dMRI]2m6cMSI^2l6bMTI_2k6aMUI`2j6`MVIa2i6_MVIc2i6]MWId2g6]MYId2f6\\MZIe2e6[M[If2d6YM]Ih2b6XM^Ih2b6XM^Ij2`6VM`Ik2_6TMbIm2]6SMcIm2]6SMcIn2\\6RMdIYOkNK2P2]7mNfISOTOG0Y2T7mNhIkN@GJ`2m6nNiIiN:Z2l5mNjIgN=\\2h5mNlIdN?_2d5mNoKR1P4nNQLR1n3nNSLR1l3nNTLS1k3mNVLS1i3mNZLQ1d3PO]LP1b3PO_LP1`3PO`LQ1_3oNdLo0[3QOfLo0Y3QOhLo0W3QOjLo0U3QOlLn0T3ROmLn0Q3SOPMn0n2ROSMm0m2SOTMm0k2SOUMn0j2ROWMn0h2ROYMo0e2QO\\Mo0c2QO]Mo0c2QO]MP1b2PO^MQ1a2oN_MR1_2nNcMR1\\2nNdMS1[2mNeMS1[2mNeMT1Z2kNgMV1X2iNiMX1V2hNjMY1U2fNlM[1R2eNoM\\1P2cNQN^1n1aNSN_1m1aNSN`1l1_NUNa1k1]NWNc1h1^NXNb1h1^NXNb1h1]NYNc1g1\\NYNe1g1ZNZNf1f1YN[Ng1e1XN\\Nh1d1XN\\Nh1d1WN]Ni1b1XN^Nh1b1XN^Nh1b1XN^Nh1b1XN]Ni1c1WN]Ni1c1WN]Ni1c1WN]Ni1b1YN]Ng1c1YN\\Nh1d1XN\\Nh1d1XN\\Nh1d1YN[Ng1e1YNZNh1f1YNYNg1g1YNYNg1g1ZNXNf1h1ZNWNg1i1YNWNg1h1ZNXNf1h1[NVNf1j1YNWNg1i1WNYNi1g1VNZNj1f1VNZNj1f1UN[Nk1e1TN\\Nl1d1SN^Nl1a1TNaNk1_1TNcNk1]1UNdNj1\\1UNeNk1[1TNgNk1Y1TNhNl1X1TNiNk1W1TNjNl1V1SNmNk1S1UNoNi1Q1VNQOi1o0WNQOi1o0WNSOg1l0YNXOd1h0[N]Oa1c0_N@^1`0aNC]1=cND\\1P2@SN>n1^OVNb0j1\\OXNd0h1[OYNe0g1YO\\Nf0d1YO]Ng0c1WO_Ni0a1TObNl0^1SOcNl0^1ROeNm0[1ROgNm0Y1ROiNm0W1QOkNo0U1POmNo0S1oNPOP1P1oNQOQ1o0nNTOP1l0nNWOP1j0oNXOP1h0oNZOP1f0POZOP1f0nN^OP1b0oN@P1`0POAo0?POBP1>POCo0=PODo0=QOCo0=QOCo0=QOCn0>ROBn0>ROBn0>QOCo0=QOCo0=QOCo0=QOCo0=ROBn0>ROBn0>ROBm0?SOAm0?SOAm0?SOAm0?SOAm0?SOBk0?UOAk0?UOBj0>WOAi0?WOAh0`0XO@h0`0XOAg0?YOAg0?YOBf0>ZOAg0?ZO@f0`0ZO@f0`0ZO_Og0a0YO_Og0a0ZO]Of0d0ZO[Og0e0YO[Of0f0ZOZOf0f0[OYOe0g0[OXOf0h0ZOXOf0h0[OVOf0j0ZOVOf0j0ZOVOUN^LV1\\4e0VOSNaLW1Y4g0TOoMhLY1T4h0TOPNgLX1U4h0TOSNdLU1X4h0TORNgLS1V4l0ROoMlLS1R4n0ROnMnLS1P4o0SOiMRMV1l3Q1ROhMSMW1k3R1QOfMVMW1i3S1QOcMYMZ1f3S1QOaM\\MZ1d3U1oN_MaM[1`3V1nN`MbMZ1`3W1lN_MfMY1^3X1kN_MhMY1]3X1lNhLfLc0R1\\1\\3Y1POYMfM]1Z3[1POVMgM^1Z3\\1POTMiM^1W3^1QORMjM^1V3`1POQMlM^1T3a1QOoLmM_1R3c1QOkLPNa1o2d1ROjLRN_1l2g1SOhLSN_1k2i1o0UNQOl1Q1QNoNo1S1nMnNR2U1kMkNV2X1fMhNZ2[1cMeN]2_1^MbNc2h1iLfIK0Ob4]3m6001O000000001O00001O0000001O000000001O0000001O0000001O0000001O0000001O00000O2N1L4K5F:J7H7M3L4L5M2N2O2M2N2O1O3M2M2O2N1O100O1O101N1O1O2O0O10000O1000001N10001N1000000O2O00001O000O101O00001N100N2J7L3N2O2N1O1N200O2N101N1O100O101O000O101O0O101O000O101O0O10001N1000000[JgHc3Z7ZLkHc3U7\\LVIY3k6gL_Io2a6PMaIo2`6PMbIn2^6QMaJQ2_5oM`KeNiL0O07h1b7CfMcNa2\\1c5O0000001O000O2O00000000001O00001N1000001N1000001O00001O00000000001N1000001O0000VK" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`UZ33k>2O2N2N1O2N1O2O0O2N1O101N00100O1O010O01000O010O101N10000O2O00001O0O101O001O0O2O001O001N101O001O001O1O2N0000001O000000001O0000001O0010O01mBfN^ and ?", + "answer": " is lying on .", + "image": "images/caption_simple_39.png", + "model_output": " is lying on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000231088.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "cna02jc05L5L3M3M2M4L4L4L4L4K5J6J6H8N1O2N2O2N1O1O1N3N1O1O1N2O3L2O1O2N2M3N2M2O1O1N2O001N10000O10000O10000O1000000O101N10000O100O101N10000O10000O2O0O101N100O101O0O101Z@XLQ?i3PAVLP?j3QATLo>m3b01O0000O1O1B\\@`Ld?[3a@fL^?Q3g@nLZO5m?e2g@gMY?T2f@SNY?d1j@cNV?X1e@ROZ?e0]@SNO`1d?:_@1a?@V@PO4DKo1l?[OY@X1f?hNZ@X1f?a101O01O0000000O1O101N100O102M4M5J4M5J6K2M[OPMj_OOc0m2d?ZM[@b2f?aM[@[2e?kMY@P2i?RNX@j1h?YNZ@b1g?dNU@X1l?lNT@P1m?UOR@f0n?]OS@>n?FR@6o?LR@0o?4o_OHR`0 and ?", + "answer": " is attached to .", + "image": "images/caption_simple_40.png", + "model_output": " is attached to .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000421923.jpg", + "mask_rles": [ + { + "size": [ + 640, + 426 + ], + "counts": "_`Q7f0Yc0=D`0@`0_O>C4L3L2O1M3M3O1N2O10O01O0000000O2N100M32N2N00O1O001O1O1N2N2I6L5O100O10O10O010000N20O0100000O2N1001O000000000000O1O2N100004VMm_O_1e0" + }, + { + "size": [ + 640, + 426 + ], + "counts": "ogo42jc06L2O1L5N1O1d^OFR?:m@HR?8m@KP?7n@LXNA``0e0SAO[N^O``0d0TA7k>JTA6k>KUA5k>JUA7k>ITA8l>Hl@d0P?\\Oo@e0Q?[On@g0R?YOm@f0T?]Oh@d0W?^Oh@a0X?@i@>X?Cg@ and ?", + "answer": " is inside .", + "image": "images/caption_simple_41.png", + "model_output": " is inside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000057149.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "_hW4 and ?", + "answer": " is in front of .", + "image": "images/caption_simple_42.png", + "model_output": " is in front of .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000577932.jpg", + "mask_rles": [ + { + "size": [ + 543, + 640 + ], + "counts": "VTR5 and ?", + "answer": " is touching .", + "image": "images/caption_simple_43.png", + "model_output": " is attached to .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000311002.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZWb02W=3O0N20001N10001N10000O2O0000000O10000O100000000000O01000O1K6Mmh[1MWWdN5K3M2O2N1O1O1O1O001O0O101O001O1O1O2N1O1O1N2O1O1O1O0010O103L10O01O3M6I5K8B[b97e]F2N3L6J3M4K5L4K6K4L4L2O3M3]D_NV;c2XO0O2O0000000000001O0000000000000000000000000000001O0000001O00000000000000000000001O0001O00001ZMkET2U:jMoET2R:hMRFX2`:00000010N100000O10O01N101K5F:N2O10O10000O10000000000O11N10000000000000O10000000000000000001O000001O00000000000000000000000000000O2O000O100000000O101O000O1000000O2O000000000O2O00000O101nNYE]Oh:c0YE\\Og:c0ZE]Of:c0ZE]Of:c0[E[Og:d0[EZOe:f0[EZOe:f0\\EYOd:g0]EWOd:i0\\EWOd:g0^EYOc:f0^EYOb:f0_EZOa:f0_EZOa:f0_EYOb:g0^EYOb:g0_EXOb:g0^EYOb:g0_EWOb:i0^EWOb:i0_EVOa:j0_EUOc:j0^EUOb:k0_ETOa:l0`ESO`:m0`ESO`:m0aEQO`:o0aEoN`:Q1m001O000000000000000000000000000000001O000000000O100000000O100000001O0O100000000O100O100O100O1O1O2J5K5N2O100O10000000000000000000001O000O100O5IT[k1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "\\gc3:n<4M3L4L3L4N2L4L5M2M3M3N2N3H7O2N1O1]OSNfEa1BkNe:JbE[1MjN_:0_EV15kNY:d1iE`NR:_1QFbNm9\\1UFgNh9Y1YFjNb9Y1\\FhNc9`1UFaNj9`2jFjLW8V3cGQM\\8P3aGRM_8W3VGkLj8l3O000000O100O2N1_OnFfLU9W3b0L3jNV1J4M2EmD^NV;d18000O11O1O10O10O100nN^D:d;D^D:c;D`D9c;DaD8b;F`D8b;DeD6_;Dcol3" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_44.png", + "model_output": " is in front of .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000525600.jpg", + "mask_rles": [ + { + "size": [ + 326, + 640 + ], + "counts": "TeV33o99H7J5L5I6L4K5L4G:I:_HnM^6W2YIRNb6l2L2O100001\\O^IbMc6W2iIbMZ6X2m0K5K5L5K4N3M2M4N16J5L2M3MSOnGIo70ZH1e7GhH5W7@VI>\\8M1M2M6K^Yh2" + }, + { + "size": [ + 326, + 640 + ], + "counts": "oTe2:i9:ZFBT9P1H3M2O2M2N2N3M2OO0M3M4L3O2M2N2O2O0O100O10O2O001N2O4J9G?_O8Hk0VOYN_I1U6dNeIZ1a0Ge5g0hJjN\\5V1i1O3O1XOPHYOQ8a0l0H9N3F`oX3" + } + ], + "question": "What is the relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_45.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000378139.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "VRa17R=4M2M5L2O1O0O2O1N2O0O100O2O0O10000000000O11O000000O100O1O1N200O1O1O1O1O1O1O2N1O2N1O2N1O1O1O2N1O2N2N2N1O101N100O100O1000000O10000O0100000O100000O100000000O1000O1000O10000000000O10000000001O00O10O2O00O100000O10000000000O1000O1000000000O100000000O100O11O0000O100000000000000O100000O10O10000O10000000000O1000O10000000O1000000O10000000O01000000000000O10000000O10O100000000O1000O10000000O10000000O10O100O10001O00000000O010001O000O01000000000O100000000O100000000O10000000O10O10000000000O10O100000O10000O1O100000O10O10000000000000000O100000O1000O100000000O1000O100000O10000000000O10O100000O10000000000O10000O100000O01000000000000O1000001O000O100000000O10000O100000000O100000000O10000O2O000O101N2N101N2N1N3N1O2N100O101N100O1O2O0O101N100O2N2ORjP2" + }, + { + "size": [ + 427, + 640 + ], + "counts": "_]l23W=2O1O007I00000O1O00100O1O1O010000O106H4J]fU5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is inside .", + "image": "images/caption_simple_46.png", + "model_output": " is in .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000189806.jpg", + "mask_rles": [ + { + "size": [ + 400, + 500 + ], + "counts": "d\\W32[<6I7^OK_D:[;b0K4M3N2N2M3N2N2O1N2N1O1O1O1O100O1M2000N2O010N200O1001O1O001OO100O100100O1ONSFWNX9h1hF[NV9e1kF\\NT9b1mF_NR9_1QGaNn8_1RGaNn8_1SG`Nm8`1SG_Nm8c1SG\\Nm8e1SGYNo8g1QGXNo8i1QGWNo8i1QGWNn8k1PGUNQ9k1nFVNQ9l1lFUNU9m1dFWN\\9m1]FWNc9[200O100000O100000000O100000000O1O1M3N2N2N2N2N2O10PFlMd9T2ZFoMe9]210000O10000O100O10O0100O2O0O100O1CUFTNl9l1Y:N^E5\\:R1M4M3M2O2N1O1O2N2N1O1O1O2N1000O01O100O10O01000O010O001N1DUFSNm9j1WFSNk9m1 and ?", + "answer": " is looking at .", + "image": "images/caption_simple_47.png", + "model_output": " is looking at .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000515445.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "fm_7:[>d0A doing with ?", + "answer": " is leaning on .", + "image": "images/caption_simple_48.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000203580.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "Pc]43V=3N2N100O1O1O1O1O100000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000O10001O1O2O0ON101O100O10001O000000O100000000000000000000000000000O100000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000O1000000000O100000000000000000000001N2O_TV1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "dYT47S=2N3L4N1N2O1N2N2O0O100O1O2N100O1O101N1O101N2N101N1O2O0O1O10O01O1O100O100O1O100O100O1O100O01000O1O100O010O01G]DhNe;W171O2O1N101N2O1N2O1M3N2N101O00000O100000000O100000O01000000O10000O010O1000000O10O10O1000O010O1N1K6N1101O2M3N0O2O1O0O100O10000O100000000001OTElMa:S2]GlMg6T2[HlMPO0K00 and ?", + "answer": " is over .", + "image": "images/caption_simple_49.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000499622.jpg", + "mask_rles": [ + { + "size": [ + 456, + 412 + ], + "counts": "Qh>1W>0O2O0O100O1000001N10000000000O1000000000000O10000O100O1O1O1O1O1O1O1O10000N3O000O1000000O1O100O10000O1000000O10000O100O100O100O100O101N100000000O100O10000O1000000O100000000O100000000O2O00000O10000000000O1000001O000000000O100000000O10000000001O0O100000000000000O1000000000000000000000001O000000000000O010N2L4N101N2N2N2O1O00100O001O010O10O010O01000O10O011OO10000O100O1O100O10001N10000O2O0O2O001O001O1O1O1O1O1O2N3M2N2N1O3M3M1O1O00001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000003fNjC[O3g0UTEBl:=UECk:=UECl: located relative to ?", + "answer": " is on .", + "image": "images/caption_simple_50.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000135872.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZYl01Z=0iR20hfb01^V[O001O01NmW10Pkb08kl[O5J5L7I3K2O20O1OjCSON050_;l0\\D\\O3Jb;i0[D[O1Oi;b0UD[O0N16Q<;nC_O1;U<2PDMR<1nCOSCa0_O;E=D7H5L4K2N1O100O1O100[N_M_Hb2a7_M]Hb2b7_M]Hb2c7_M\\Ha2d7_M[Hb2e7_MYHb2g7_MXHa2h7aMUH`2j7eMQH\\2o7^NVGc1j8_100000000000000O100000000O100000000O100000000O100O100O10000O1O1O100O100O1O1O100O100O1O1O1O100O100O1O1O100O1O1O100O100O1O100O1O1O1O100O1O1O1@eJnH]5o6a0O1O1O100O1O100O1O1O10000O1O10aJQIb4n6^KVI_4j6`KWI`4i6_KYIj1OKh6lMUIIi6bMWIo1O`0a7@\\Hc0d7\\O[Hf0e7ZOYHh0S6ZMRKm1kNi0R6[MbIO]1l1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1POj0Q6[MaI0^1k1oNk0Q6\\MaIN`1i1oNm0P6_MQK:bNf0=a1P6_MRK4gNk06c1P6XMdI3`14iNm03d1n5ZMfI0a14jNm01e1n5XMhI2U2o0VNg1V6XMcKQ1WNg1V6XMcKP1XNh1`8fMPGE52;c2R6ZMiK7kMG238e2R6[MiK4WNKNf2R6[MjK2XNLLg2f5YMRJ3U20WNLMh2e5YMRJ3U20jNd2o4ZMQJ2W2OiNe2o4YMSJ2U20iNe2]5YMlK0gNg2]5YMmKOfNh2]5YMmK0eNg2^5YMmK1dNf2`5XMkK3dNf2a5WMkK4cNe2T8[MlGe2T8\\MkGd2U8\\MkGd2V8\\MiGd2W8\\MiGd2W8\\MiGd2k4XM]M5gMc2l4XM^M4eMe2l4XM_M4eMc2l4YM_M5dMb2l4ZM_M5eMo0XO3d5iN_M6dMc0ZOUO3V1`5kN`M7cM91e0l4kN_M8eMO9m0d4kN^M:WOj0\\3kN\\MX7BXG1_1=Y7DVG0a1;Y7FVGOa15^7NoFNc12_71mFOb11a7OnF0a10a71mF0b1Nb73jF1c1Lc73jF1c1Lc73iF2c1Lc73jF2b1Je74hF3c1Ie74gF4c1If73fF59^Of0:[83eF67Ah06\\83dF85Bj03]83dF84Dj00`83aF;2Dl0Oa82`F=0Do0Ma82_F?MFR1Ib82_FT1c0bN@8^93]FV1:_NF336`92\\FY16jNMKa93[FX17kNLJb9m1aFYNLKc9l1aFZNKJd9m1aFXNKKd9m1bFWNJLd9n1aFVNJMe9m1aFVNJMf9l1`FgN`9Y1aFfN_9Z1aFTNI1g9k1`FSNJ2g9j1`FSNJ2f9l1_FRNK2f9l1`FRNH3h9k1aFbN_9^1aFbN_9_1aF`N_9`1aF`N`9_1`FaN`9_1aF`N_9`1aF`N_9_1cF`No0\\Ol6T2UH`Nc0^ORO2T8o1YH`N93_7[1YHcNM>h7n0]H9b7G^H9b7F_H:a7F^H and ?", + "answer": " is on .", + "image": "images/caption_simple_51.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439994.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "T?12b0OE2I114O100075d`0j2K2O0001O00001O000O10001O00001O0000001O0O101O0000001O0O11O010O0001O001O00001O00001O001O001O0000001O010O00001N10010O00001O0000001O00001O00001O01O0001N100001O00O20O001O01O00001O000001O0001O00001O00001O001O1O1O1O01OO1001N100000000000000001O1O1O0000O1N200O1000001O00000000000000000000000001O00000001OO100000010O2M2O2O1N1O1O3M>B001O001O0001O000000000VOfNn^OZ1Qa0lNk^OS1Ua0oNi^OR1Va0oNi^OQ1Va0POj^OP1Va0QOi^Oo0Wa0ROh^On0Xa0ROh^On0Xa0SOg^Om0Ya0SOg^Om0Ya0TOe^Om0[a0TOd^Ol0]a0TOa^Om0_a0TO`^Ol0`a0XO[^Oi0ea0i00000000000000000000000000000001O000001O0000000000000000000000000001O0000000001O0000000000000001O0000000001O0000O100000001O000001OO100000000000000001OO2O00000000000000000000000000001O01O001OO1000000000000001O0001O000000000000000000001O0000000000000000010O000O1000000000000010OO100001O00000000]OU^OQOka0k0\\^OROea0l0]^OSOca0l0_^OSOaa0l0a^ORO`a0l0e^OQO[a0m0k^OnNVa0m0o^OTOPa0h0T_OXOl`0e0W_O[Oj`0:`_OEa`09a_OGa`04c_OKYb000000O2O00000O1D<00K6L2M4N2M3O1J6I7O0101O`0^OZk;UOlaC7KJk05Ra0j1O1N10001O0000001O000000001O000000001O000000000000001O00000001O0O1000000N]3" + }, + { + "size": [ + 640, + 428 + ], + "counts": "oUc62kc06VMN_A3\\>6`AK[><`AG]><_AG^>>]AG`>9VAMh>5T@oN=P1^?2U@nN6X1c?KU@PO4X1g?HT@QOL`1o?]OU@U1j?jNS@Z1n?gNo_OV1MgMT`0b35O2M3M2N3M3N1N1M1O003N3N3M3N110;E7I>Ba0k_OkK^?]4N1O2OO01O01O1O1O1O1O1O1N2nL_@^1g?_N^@NJ@>^O^?f0i@MRa0NT_ONo`0OT_OOn`00X_OJh`06Z_OHg`07Z_OHg`06\\_OGf`08b1N2N2M4MmQ3NVnL1ag<" + } + ], + "question": "What is doing on ?", + "answer": " is walking on .", + "image": "images/caption_simple_52.png", + "model_output": " is walking on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000468501.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`h_11d;3HOfD3Z;5O101O00001O000000O100O10000O100000000000000000000000O10000000000O10000000O10O10000YGJVOLk4:mKMVOKl48lK2UOGm47mK7SOCn47nK;oN_OS56lK`0nNZOU57lKb0lNXOW57kKe0lNTOY57iKj0jNQO[56jKl0iNnN]55jKP1gNkN]57jKR1fNhN_57jKT1eNeNa57iKW1dNbNb58iKY1bN_Ne59hK[1aN\\Ng59fK`1_NXNi5:gKb1[NVNm59fKY2Y4hMfKY2Y4hMfKY2Y4iMdKX2[4jMdKW2[4nM_KT2c3VM_Ko0e06oN0l4nN]K]17EELf4TO[Kg1OYO4I_4[O\\Ka1IkM7_1c2f5kMPJZOa0j2`5jMSKU2n4jMSKU2o4fMVKY2j4`M_IIi1g2i4`M[K`2f4_M[Ka2f4]MZKd2f4XM^Kh2h6100fGVMe7Q2mGbN=^Og7j1THfN4Aj7d1VHjNOBS8Z1QHSOKDX8S1PHYOGD]8n0nG^OCEb8h0mGD@Df8b0nGJ[ODQ96gG5XOEW9JhGa0oNFb:;\\EEe:f01O1N2O2N2N1O1O1O2L3N3L4Kl[h2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "ccR42d;3N2N1O1O1O2O01OO1O10000N2O1O1O1O0L4M3O01N2010O3M2N2N1N2N1O2N1O01O0N2001O1N200O1O1O1O1O001N100gNhNTHY1j7jNTHV1k7mNSHS1n7mNQHT1n7mNPHT1P8mNoGT1Q8lNoGS1S8lNlGU1T8lNkGS1W8lNiGS1X8oNfGQ1[8oNdGP1^8oNbGQ1_8nNaGQ1`8PO_Go0c8SOZGm0g8QOZGn0h8QOWGo0j8ROUGn0k8ROUGm0l8SOTGm0l8SOTGl0n8TOQGl0n8UORGj0o8VOQGi0P9WOPGi0o8XOQGg0o8ZOQGf0^1nN]5YIc0_1nN[5?WIb0m8^OSGb0l8^OUGa0k8_OVG`0i8AXG>h8BYG=g8CZG and ?", + "answer": " is looking at .", + "image": "images/caption_simple_53.png", + "model_output": " is looking at .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000171190.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "mW_8d03Ej=m0J3O1O2O0O101N10001N100O2O01O100O001O0iNcBl0]=QOcBQ1^=mNaBU1_=610O00001O001O001O10O000N2J6K5G:J5L4K5K5N3M2OnB0m<7M2N2O100O0O2001O1N101O_NF\\D9d;J[D5e;L[D3e;NZD2k1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "[k_71m>2O2N2N100O1O2N1O101N1O1O1O2N100O1O2N1O1O101N1O1O1O1N2N2O2N1N2XOTOYCo0dk0V;@[DE?k0W;AZDB>m0Y;2fDN[;1eDO[;2dDM^;3aDM`;3_DMb;3[DOg;0TD4m;_1002N1O2N1O1O1O1O1O1O1O`NfMZFZ2d9lMYFS2f9QNWFo1h9SNVFn1j9SNTFn1k9SNTFn1l9RNTFn1Q5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_54.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000565391.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "e6173a2M\\M0R>3nBM13QO14O[1NbN3NNM0e=n2\\BiNOOOZN218Z4f7f7K1N2O001O1O001O001O001O001O1N101O00001O001O001O1O1O00001O001O001O1O001O001O00001O1N2O1O001\\LPC6P=]3001O1O1O1O001O00002N001O1O001O1O1O1O1O001O002N1O001O1O2N2N1O2N1O1O1O3M2N2N4L2N1O001O2RKaAW4b>eK`AHNL0W4f>n04L4L3M5K6J;`KZ@P4m?N1O1O3M1O1O00003M1O1O00O1002N01O03M0O100001O00O1001O0000O100001O0000000000000000000000000000000000O11O00O10000000000000000000000000000000000000000000000001O00000000000000000jE`Ll3`3UL_Lk3a3QL_LS4a3V6000dE_LY4a3S6000bE_L]4a3cK_L]4a3cK_L]4a3Q600000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000001O000000000000000000001O0000001O001O0000001O0000001O001O00001O1O001O001O001O1O001O1O1O1O1O2X@lK`?\\4N2N2N2N2N2N3M2N2N2N3M3M2N2[EdJb6_5n36J3M2N2N3M3M3M2N1O3M5K3M2N2N3M3L4M3N1N3M3M3M1O2N2N3M3M1O2N2N4L2N2N2eEjGh8X8UGjGj8Z8nFbGPO4R:]8kF_GSO4R:_8mFbGR9a8jF`GV9b8eFbGZ9b8_FaGa9b8\\F^Gd9c8ZF^Gf9d8QFoF09NIP:d9PF\\FP:c9QF^Fn9b9RF^Fn9j901O000000001O00000N31N00LoEYFR:f9oEYFQ:g940lEXFP:h9oEYFQ:g940000000001O00000O11O000000000001O000O11O0aM_FYHNo1c9h5_FYHOn1d9g5]F[HOn1f9]5XFfH5NOm1d9\\5]FgH:1Db1b9i5TGdH[Oc1a9i5^GVJa8g5ZFgHV1c1_8]N\\FS7X1bJ\\8[N\\FS7W1cJ]8`5dG`J]8`5bG`J_8_5aGaJa8]5_GdJ`8\\5`GdJ_8]5aGcJ^8^5bGbJ`8\\5_GfJ`8Y5_GiJa8W5YGoJd8T5\\GlJd8T5\\GlJe8S5[GmJg8Q5YG^IQOm0f9e5YGZIUOQ1`9h5YGXIWOP1`9h5YGXIWOQ1`9f5YGUI[OU1]9e5XGUI]OU1W9i5]GQI]OV1V9i5]GQI]OV1T9k5_GoH]OV1T9k5_GPI[OV1V9j5_GoH\\OW1V9j5VGgHA27\\1R9k5UGiHAN9_1Q9j5VGhH2^1h8j5VGgH3`1f8i5VGiH3^1c8m5YGfH5\\1_8R6lF_HO<3D:_1g8S6lFRIL^O2O0O9a1k8R6nFUIM^O:[1k8S6mFUIM^O:Z1k8V6kF\\I4nNOa1P9c6mFaH3]OMb1Q9Z9XKWE]LKV12GP;^NoD0Ob0o5_O^J5c0Iac0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "n??240_OO30N11OO171H4<21KEi?o2Z@_MLBj?o3001O001O001O001O00001O001O001O1O001O001O001O001O1O001O001O001O001O1O001O1O001O001O1O001O1O001O1O001O001O001O1O1O1O001O1O1O001O1O1O2N1O001O1O1O1O1O1O2N2N1O1O2N2N1O1O2N2N1O2N2N3M2N2N3M3M4L5K6YOW]OK`c0O3M2N1O1O1O2NQhY7" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_55.png", + "model_output": " is lying on .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000322829.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "1b8i4001O5K0000001O0000M3N200000000000000001O000000O100000000001O00000000O1001O000000O1000000000000001O001OO10000000000000000000000000O2OO100001O00O10000000001N10O2O01N1000OZKeK>[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR and ?", + "answer": " is in front of .", + "image": "images/caption_simple_56.png", + "model_output": " is in front of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000535523.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "Zeh52Y=2iB0P=600O2O001N101O1O1N2O3M1O001O1O3M0O10001O01O010O01O1O10001OO1000O01O010O101OO1cCTOY^:CbE<^:DdE:]:FbE;]:FbE;]:FcE:\\:GcE9\\:IcE9[:GeE:Z:GdEX:CcEa0Y:CeE?V:GiE:Q:LnE4k94SFMj97UFJj96VFJ\\9gNeF`1MJV9oNlFX1LJU9QOoFj2P9WMoFi2Q9WMPGh2Q9XMmFi2T9WMkFh2W9f0001O001O1O1O001O001O1O001O1O001O00001O1O001O001O00001O001O001O001O1O001O00001O1O001O001O00001O001O001O001O001O001O001O001O00001O1O00001O001O001O001O001O00001O001O001O001O001O00001O001O00001O0000001O000000000000000000TE" + }, + { + "size": [ + 428, + 640 + ], + "counts": "eiR73X=1O2N2N1O2N100O2@GjC:h;3YDM`;:_DH_;8aDI`;5`DLa;2_D0`;O`D2`;M`D5_;JaD7_;HaD9`;DaD=_;B`Da0_;^OaDc0_;\\OaDe0`;XO`Dj0`;UO`Dl0`;ROaDo0_;oNbDR1j;2\\OmNmDU1k:hNmD47U1k:hNoD07Y1i:hN^EZ1b:fN\\E\\1d:dN[E]1d:cN[E`1W;1M3M3O1M3N1N3N2M102O0010O2O0O10000O2O0O101N1O2N1O1O2M2O2L4M2M4L4L4K5I7K5J8GeW:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_57.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000276018.jpg", + "mask_rles": [ + { + "size": [ + 640, + 416 + ], + "counts": "Una05kc01N102N1O2N1O0O2O0O2M2M3M3K6K4J6L4N2O1O101O000d@cNU<]1cCmN[WOeAo0[>QO]AW1c>iNWA]1R>VMVB\\1Dc1U>RMXBY1_Oi1X>oLZBX1[Ol1[>kLZBR2\\O9[>dMZBQ2_O9V>fM\\BP2^OdM`BT1CROM\\2j0jM[;D[DQ1oLAQ3`0oL_OQ3b0mL^OS3c0hHUL32\\2T3j4h0^IZLc1n2P5i0YI_Ld1d2X5m0nHhL5AMj2S7m0lHhL4\\OOf2\\7U1bHiL\\1b1e6e1oGiLY1[1R7k1fGkLS1]1W7i1fGjLo0`1\\7g1dGiLn0b1^7e1cGkLe0h1i7^1aGjL?o1P8W1bGiLk7TM_H^2C`0n7RM`H_2_O`0R8oLaH_7`7_HbH`7_7^HdH`7]7_HcHa7^7]HdHb7]7\\HdHd7^7YHeHe7]7XHeHg7\\7WHeHi7]7SHeHm7^7YGjGJk0m8e9O00001bNTEfIl:Y6]E_Id:^6bE^I_:`6dE^I]:_6iE]IW:^6PF`IQ:\\6TFbIm9\\6YF_Ig9a6^FZIc9e6n1M101O1O1O1N2O1O1O1N2N3N1M4A?]M[B_Nn=YNcAk2U1UNg=h1]2L1N2N2O2M3N2M2O2N2M4K6I4L5K6J6K5J5M5I7J7EejR4" + }, + { + "size": [ + 640, + 416 + ], + "counts": "9Y33PNd0hN]O0c01]O4O0f0KZO7OK0O1o0NXO0K`0R1@a05\\42^7LbH3a7J`H5f=00O1O1000000O100O100Oig19fWNId\\O`0Sc0?QOYOT^OQ1ca0k0C=L5L4K9H9G6JR5@=C:F3L10000O1O1M3L4L4J6L4J6M3K5K5O1O1O1O1O1O1lEcHb7`7]HcH`7^7_HdH^7^7aHeHZ7^7dHeHZ7\\7dHhHW7[7hHhHT7Z7kHhHS7Y7lHjHP7X7oHlHk6W7UIkHe6Y7ZIlH`6V7_IPIYNmN_7W8WJhIUOYOa3T7YMQJ\\NBl3b6gMmLd1X3[NQMT1X3kNPMg0W3XOXMXN\\IT12ROj9a1\\M`N\\3_1c700O1O1O1O1O1O1O1O1O1O100O1O1N2O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O1O1N2O1N2O1NRTg5" + } + ], + "question": "What is the relationship between and ?", + "answer": " is guiding .", + "image": "images/caption_simple_58.png", + "model_output": " is in front of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404249.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "Y[i17hc03M3M1O2O0O2O1n\\OD^b0<`]OH]b09b]OI]b07b]OK]b06b]OJ^b07a]OI^b08a]OI_b08`]OH`b01_]OC1<`b00a]ODNd:n0cHg0gMmNf9<_HP1eMeNk9>[HR1gMaNm9i0nGk0PN_NQ:R1\\EgMj0f0CU4U:gMoEYN^O]4c:\\MkEc3T:cLcEa3]:Q300O100O001dNiFnGW9n7cFQG;l0R9o7WGQHh8m7\\GRHd8k7`GSH`8m7bGQH_8n7cGQH\\8n7gGPHZ8o7hGoGY8o7mGlGS8T8SH_GU8`8e101OO2H701O010lMYGnHg8P7bGhH_8Y7[HkGf7T8_HeGe7Z8Q2N2ZOSEmGJ0Z;l7f0N2H9M2M3L5O_FQIQ6o6ZIiHiMj0d8h6cH_JX7i5RHlJNZMm6U8fHgJ7ZMR7Z;kHgDV7Y;hHhDX7X;eHlDZ7T;bHPE_7h;0000001O1XE_Ho8a7PGaHo8`7mFcHS9_7hFeHW9\\7bFjH^9Y7^FiHa9X7ZFlHf9W7UFkHk9V7RFmHm9[7iEgHW:b8O001N101O0O1O1O2N1N2O2L4M3eLoE_LW:`NVFn2HUN]:jNQFY2AaL9X2V:nNTFR1_1H_8TOZF`0a2]OX73bFLb2Ko68jKBY4>lKXOX4h0a7O10X]OjNeb0Y100Z]OhNcb0W1^]OhNbb0X14O12ON10O01O1002ON001O1000\\]OkN\\b0T193M1O1O0000O1O0106KM2N1100004K1O2O0002N10N11O0011NO21M13MO101N2O11OO000O21ON011O100O21M10N101O01O02M1O2N2M1O2N2N2N3M2O3J]bf2" + } + ], + "question": "What is doing on ?", + "answer": " is riding .", + "image": "images/caption_simple_59.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000098287.jpg", + "mask_rles": [ + { + "size": [ + 640, + 415 + ], + "counts": "m;4o2O^MNa22cMNk;0hC02:KHa0Nb03RO9K07FI1140KO:7FO0d04WO02KO001OO106OK0_;7`Dm7];;000000000000O1000000001O000000O1iNaGPFOc0a8V9_GQF`0g0Q8X9`GPFa0f0o7Z9SHgFm7Y9PHjFP8V9jGPGV8P9hGRGX8n8gGSGY8l8hGTGX8[:O2N1O1O1O2N1O001O001O1O1O00000000000000000000O10000O1J6000000gNgGjFZ8T9iGjFX8U9jGiFW8V9jGiFW8W9iGiFW8X9iGfFX8Z9hGfFX8Z9hGfFX8Z9hGfFX8Z9hGeFY8[9hGdFX8\\9hGdFX8\\9hGcFY8\\9hGdFX8\\9iGbFX8]9iGbFX8^9iG`FX8_9iG`FX8_9iG`FX8^9kG_FW8a9iG]FY8b9jGYFY8f9Q10000001OO1O1001O00000000000000O100000000O100000000O1O1000000000000001O0000000000000000O100000000000000O100001O00000000O10000O100000000000000000000O1000000000000000[GYFQ7g9cHYFlN;^8\\9fHZFjN;`8Z9gHiFY7W9gHiFY7W9cHmF]7T9\\HRGd7n8WHYFZOk0_8l8SH[Gm7e8QH]Go7f8gGaGY8R:100O1K_GjDb8S;7N2M3N2O1L4I7M3O1M31O1O002N1O2N4L1O3M1O5K5K1O2N1O]N^GmG]8R8iGjGV8V8mGhGR8W8RHfGn7Z8VHbGj7^8YH_Gg7a8`HXG`7g8fHSG[7m8iHoFW7Q9kHmFU7S9nHjFR7W9oHfFR7Z9RIaFo6_9RI_Fo6a9h1O100000000000000000000O100000000000000000000O1000000000000001O00O100O1000000000000000000000000000000000000000000O10000000000O1000000000000001OO100000000O1O10000001O000000000000000000O100000000001O000000O100001O0000O10000O1000000000000000000000000000000000000000000O11O0000O1000000000000000000O10000000000000000000000002N001O000000000000000000O100O100N2M3O10000001O00000000O100000000001O001`FlFlN100j3NXL0T4Wc0dKk\\O1?" + }, + { + "size": [ + 640, + 415 + ], + "counts": "UeX31T22a?4U@4h?NV@4h?NV@4i?NR@6l?LP@8o?In_O:P`0Hm_O:S`0Je_O:\\`0LX_O3n`00e^O8^a0R1201O1N2LJPNd^Ok1[a0=2M3O100O21O002N1O2N2O1N2O1O3McMe^OK2g04 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_60.png", + "model_output": " is enclosing .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278973.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "Sfn13W=1N2O0000001O1O1O1N102N1O1O000O2O001O1O0O101N1000000O10000000001O1O00001O001O00001O00001O00000O10000O100000000000O01001O00O10000O10000O1000O100000000000000O10000000000O2O0000001O0O2O001O001O0O2O001O001O000O10000O10001O000O2O00001N101O001O001O0O2O001O00001O0010O010O010O10O010O010O1O2MiQ`4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "fjT12V=?A5M1N100O0001O010O010001O0O2O0O10001CW9k1O2O0O01_OkFkLU9S3nFlLS9Q3PGnLP9o2TGPMm8m2VGRMj8m2XGRMi8h2]GWMd8f2_GYMb8d2aG[M`8`2dGaM\\8\\2gGcMZ8[2hGdMY8Z2iGeMY8W2`1L5J;E7J6J7H8@>Ghdg4" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on top of .", + "image": "images/caption_simple_61.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000104198.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "dS2b0P;6J6N3M3N2N1O2N2M101IfNnE\\1P:6O20O2ON1O101O001XG\\N[7e1WHjNg7c1bGgN_8X2O1000000O1001O00000000O1O11O1O000jNaGSO_8S200O2N20O1O000O02O1N10O100000O10000lNkLfIP3TOjLT7=fIe2]6^McIT2VO^M_7a0ZIe1S7`NkHZ1W7kNhHU1V7nNhHR1X7ROdHo0\\7ROcHn0^7SO_Hn0b7SO\\Hm0d7UOYHl0h7TOSHP1n7\\11O00O1O1O100O1O1O10000001O001OO1O1O10000O1000000O100O100O10O1001N100O1O1O1POTLUJl3h5XLVJi3f5^LVJc3h5`LVJa3j5`LUJ`3j5cLTJ]3k5fLQJ\\3o5gLkI\\3U6S11O2N0000O100O1N2N2O2N1O1O10O10001O2N0010O0O100O1N2N2N2M3M3M3M3L4N2K5J6L4N20000O100O1GQIjKo6U4TIhKm6e3VIbLNHl6Z3SIfL88HHm6Z3SIeL89EG01P7Y3TIfL7b0e6g2UIeL8e0b6W3RIkLn6o301O00001O3M4L9G4L5K5K3M3M3M2NO100O100O1001O001O1O1O001O2N1O1O1N1001N10O10O2O00O200_KoIZ3R6dLQJZ3o5dLTJ[3l5cLWJ\\3j5bLWJ^3j5`LXJ_3j5^LXJa3j5\\LXJc3n601O1O1O1O001O001O001O1O001O1O1O1O00000000OgMSHj0l7VOXHg0g7YOZHg0f7WO\\Hi0d7UO^Hk0b7TO`Hk0a7TO_Hl0b7POaHP1`7mNbHS1`7kN`HU1b7hN_HY16XNU6=gIb2X6YMlIh2Z6lLkIT3]7O001O001O001O1O1O00001O001O001O000000000000001J5ZO\\GPNn8k1b0D;N2O01O1N1O1100OO10O2N2O1N2O1N3M3N3K?BZTW1\\O[lH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "2g15N1aNd7a1[HN0bNe7a1ZHM1bNe7a1ZHM1bNe7a1[HL1bNd7b1[HL0cNe7b1ZHK1cNe7b1[HJ0dNe7b1[HJ0dNe7b1[HJ0dNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7c1[HH0eNe7d1ZHG1eNe7d1[HF0fNe7d1[HE1gNd7d1[HE1fNe7f1YHD2gNd7e1[HC1gNe7f1ZHC1gNe7g1YHB1hNf7f1YHB1hNf7f1YHB1hNf7f1ZHA0iNf7f1ZHA0iNf7g1YH@1iNf7g1ZH_O0jNf7g1ZH_OOkNg7f1ZH_OOkNg7g1YH]O1lNf7g1YH]O1lNf7g1ZH\\O0mNf7g1ZH\\O0mNf7g1ZH\\O0mNf7h1YH[O1mNf7h1YH[O1mNf7h1ZHZO0nNf7h1ZHZO0nNf7i1YHYO1mNg7j1XHYO1mNg7j1YHXO0nNg7j1YHXO0nNg7j1YHXO0nNg7k1XHWO0oNh7j1YHVOOPOh7j1YHUO0QOg7j1YHUO0QOg7k1YHSO0ROg7k1YHSO0ROg7k1YHSO0ROg7l1XHRO1ROg7l1XHRO1ROg7l1YHQO0SOg7l1YHQO0SOg7l1YHQO0SOg7m1XHPO1SOg7m1XHPO1SOg7m1YHoN0TOg7m1YHoN0SOh7n1XHoN0SOh7o1XHmNOUOi7n1XHmNOUOi7n1XHmN0TOh7o1XHmNOTOj7o1WHlN0UOi7P2VHkN2SOi7R2VHjN1SOj7S2UHjN1SOj7S2UHjN1QOl7V2RHiNV8W1jGiNV8W1jGiNV8W1kGcNZ8]1fG_N^8b1aG]N`8c1`G\\Na8d1_GZNc8f1^GXNc8h1]GWNd8j1[GVNj0He6R2aHUNa09h6b1hHTN6ELm0n6Z1PISN6HJP1l6U1TISN6HJU1g6Q1XIQN8II\\1`6j0_ImMbJkMYOl1Q69dJmM[Oo1l55eJoM_OQ2g50ZJlMM92P2b5K_J_NOh1`5IaJ_NOi1_5IaJ^N0i1_5IaJ^N0j1^5HbJ^N0j1^5HcJ]NOk1^5HcJ\\N0l1]5IbJ[N1l1]5IbJ[N1l1]5IcJZN0m1]5IcJZNOn1^5HcJZNOn1^5HcJZNOo1]5HcJYN0o1]5HcJYN0o1]5HdJXNOP2]5HdJXNOo1^5IcJXNOo1^5IcJWN0P2]5IcJWN0P2]5JbJVN1o1^5KbJUN0R12oN]5j1aJUN0o07oNY5m1`JUN0m0:POV5n1`JUN0h0a0SOo4Q2_JTN1f0f0QOk4U2_JSN0g0g0POj4V2_JSN0f0j0mNi4Z2]JRN0g0m0iNh4_2ZJQN2f0i6Y1VIPN1g0i6Y1VIPN0i0i6W1WIPN0j0h6V1XIPN0j0h6W1WIoM1k0g6U1YIPN0j0h6W1WIoM1j0h6W1XInM0j0i6X1WInM0h0k6Z1UInM0g0l6\\1SImM1e0n6^1QIlM2e0n6_1PIlM2e0n6_1PIlM1e0P7_1PIkM0e0Q7a1nHjM1e0Q7a1nHjM1e0Q7a1nHjM1f0P7`1oHjM1f0P7`1oHjM1g0o6_1PIjM1h0n6^1QIjM0j0n6\\1SIiMOj0o6]1RIiMOj0o6^1QIgM1j0o6_1PIgM1i0P7`1oHgM1g0R7b1mHgM1d0U7e1jHgM1a0X7h1gHgM1>[7k1eHfM0;_7o1aHfM09a7R2^HeM07e7T2[HeM04h7W2XHeM1Ok7\\2THeM1FT8e2kGeM\\8[2dGeM\\8[2dGdM]8\\2cGdM]8\\2dGbM]8^2cG`M_8_2=N2F:E;G9C=N2O100002N1lEiNf9e2oN2VG]M\\8c2cG^M]8b2bG_M^8a2bG_M1MS8d2lG_M1OQ8c2nG]M13n7`2QH]M1[N]O1c5i2bJmN;[N_O0c5h2cJoN7[NCNc5h2cJRO2ZNHMb5g2dJFJdMa5f2eJFIbMe5i2aJEJbMe5i2aJDKcMd5i2`JEL`MNNf5m2`JEL`MNOe5l2aJEKaMONe5l2aJDLbMNNe5l2aJDLeMc5g2aJDLbMNNe5l2aJDKbM0Nd5l2aJDKcMOMe5l2aJDKbM0Nd5l2aJXOMoM1Nf5k2[JXO0lM11d5j2\\JYO8mM\\5j2\\JYO8lM]5l2ZJXOW6h0iIXOW6g0jIYOV6g0jIYOV6g0jIYOV6g0jIYOV6g0iIZOW6f0iIZOW6g0hIYOX6g0hIYOX6f0iIZOW6f0iIZOW6f0iIZOW6f0iI[OV6e0jI\\OU6d0kI^OS6b0mI@Q6`0oIAP6?oIBQ6>oIBQ6>PJAP6?oIBQ6>oIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6?nIAR6>nIBS6>mIBS6>mIBS6>mIAT6?lIAT6?lIAT6?lIAT6?lIAT6?lIAT6>lICT6>kIBU6>kIAV6>kIBU6>kIBU6>kIBU6>kIBU6=lICT6=lICT6=lICT6=kICV6=jICV6=jICV6=jIBW6>iIBW6=jIBW6>iIAW6`0iI_OX6a0hI^OZ6a0eI_O\\6`0eI_O[6b0eI]O\\6c0dI\\O^6c0bI\\O^6e0bIZO_6f0aIYO`6g0`IXOa6g0`IYO`6g0_IYOb6g0^IWOd6i0\\IVOe6j0[IVOe6j0[IUOf6k0ZISOh6l0YISOh6m0XIROi6n0WIQOj6o0VIPOk6P1UIoNl6P1TIPOm6P1TIeNJjNT7b2SI[M\\8R2n0I7M3J6I7J6L4I7J6K5K5LPic0NTW\\OS1cDhNY:\\1mEiNd9W1[FkNd9S1\\FoNd9P1[FQOf9l0\\FUOd9j0[FXOe9f0[F\\Oe9b0\\F_Od9?\\FCd9;\\FHd95\\FMd91\\F?U9@kFb08kNFMZ8d0hGf0LfN55JO[8>jGj0KeN63KO[8=jGm0JdN63KO0MU8?oGP1LaN54JO2MT8Ih7m0hG\\O?Gj7i0gGD?Cj7g0eGJa0@h7d0jGL=Ch7>iG3?@B^Oo7l0RH71kN6m0GVOf7i0^H:OjN6U1[7F`HRIPN0c1m6=SIPN0d1l6 located relative to ?", + "answer": " is over .", + "image": "images/caption_simple_62.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000224051.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "WeS3451j4o0j3R4QLbJ1<6n0k3Q4mKhJ199l0k3Q4jKkJ37f0j3V4aKoJ93=g0k3f4gKcJ>L_O6_4Y5eKdJ?EE8CDf4j5cKeJ`0AK6X4c5^KeJ]6Y5cIgJ^6Y5bIfJ`6X5aIgJa6W5_IiJb6W5^IgJc6Y5]IgJd6X5\\IhJ=H]OOe5a5`JhJ7=U5j4dJiJ6?U5h4eJiJ5`0W5f4dJjJ5?Y5f4bJmJ3=\\5f4_JPK38a5g4\\JRK25e5h4ZJSK04g5i4YJTKN4i5h4YJRLi5m3WJQLk5P4TJoKn5P4RJoKo5Q4QJnKP6S4oIkKS6U4mIiKV6V4jIiKW6V4kIhKV6X4jIhKV6X4kIdKX6\\4hI\\K`6d4aImJm6S5SIlJn6U5RIjJn6V5RIjJn6W5RIgJo6Z5QIdJP7`5mH]JU7d591QI^J\\O?o5S5aJ_KY5b4eJdKU5]4iJgKU5Y4jJiKU5V4kJlKT5T4kJnKS5S4lJnKT5S4kJmKU5S4jJnKU5S4kJmKT5T4kJnKT5S4kJmKT5S4nJlKR5R4PKoKn4Q4SKZLb4e3`K^L[4b3fK_LY4a3gK_LX4b3hK^LW4c3iK]LW4c3hK^LW4c3iK^LU4c3jK]LW4c3iKULCoM11b4l5jK]LT4d3lK]LR4d3mK]LR4e3nK[LP4f3PLZLo3h3PLXLo3l3oKTLo3Q4nKnKQ4S4oKnKP4S4QLlKl3V4VLhKi3Z4VLgKg3[4YLgKd3Z4\\LfKb3\\4^LeK`3\\4`LeK^3\\4bLeK\\3\\4dLdKZ3_4eLbKY3_4gLcKT3a4lL_KQ3c4oL_Km2d4SM\\Ki2g4WM[Kf2e4\\M\\K_2h4`MYK^2i4`M[K[2g4dM[KX2i4hMWKT2l4lMYKl1j4TNYKe1k4\\NYK[1k4eNXKT1m4lNYKk0i4UOg4001O001O0O100O00O010OO200001N2N200O3LhTi1" + }, + { + "size": [ + 428, + 640 + ], + "counts": "T:X3T:000O100OoMRFmNMc1n9ROSFD5CM657H30EQ:MPF50K1N08NJ7NL584J4KHV:NoEO15LI60>2[O069JJQ:KPF33O15LJ5OL1OO:2E158KJk99VFG42JL77JH3;0MMKi9 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_63.png", + "model_output": " is parked on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530099.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "T\\Y11d;3N1O100O1O1O1O1O1O100O1O1O100O1O1O1O1O1O1O100O1O1O1O100O1O1O1O1O1O100O1O1O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O010O1O1O2N1O100O1O1O1O100O1O1O1O100N200O1O1O100O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1N2O1O100O100O1O1O100O100O1O100O1O10000O10000O100O100O10000O10000O10000O10000O1000000O100O10000O100O10000O100O100O10000000000O100O10000O10000O1000000O100O10000O10000O1000000000000O1OVM[Hd1e7ZN\\Hg1d7VN_Hj1a7UN_Hl1a7SN_Hn1a7QN_HP2`7PN`HQ2`7nM`HS2`7mM_HT2`7kMaHV2_7iMaHX2^7hMaHZ2_7eMaH\\2_7bMbH_2]7aMcH`2]7_McHb2]7]MdHc2\\7\\McHf2\\7[McHf2]7XMdHi2[7VMfHk2Z7TMfHm2Z7RMfHo2Y7QMgHP3Y7oLhHQ3X7nLgHT3Y7kLgHV3X7jLhHW3X7hLhHY3X7fLhH[3X7dLhH]3X7bLiH^3W7aLiH`3V7`LjHa3W7\\LiHf3b72O1O1O1N2O1O1O1O1O1O1O1O100O100000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000GhKmH11X4R7gKmH21V4R7hKmH21V4R7kKPIU4P7kKPIU4Q7jKoHV4R7gKPIY4W700000001O000000cH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "Zel11f;1N2O3L2O0O2O0O2O1N2N3N3L3M102M1O3L2N3O1N2O1N3M2N2M2O1O1O1O1O1N2O1O001O1O10O00010O01O100O001O100O100O100O1N3O1N2N1O2OO10O0100O010O010O10O01O10000O010O1000000O10O1000O1000O010000O10000O1O01000O1O1000O01O1000O0100000O01O1000O10O1000O01000000O01000O1000O1000O0100000O1000000O0100000O1000000000000O10000O1000000O100O1000000O101O0O10001N101N101O001O000O2O001O001N10001N1O1O1O2N1O1N3N1OaEROX:l0gEVOY:i0gEYOY:e0gE[OZ:d0gE\\OZ:a0fEA[:=eED[: and ?", + "answer": " is sitting on .", + "image": "images/caption_simple_64.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000202339.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "Vk_18cc06I6J7J5J7I6J6J7J6J5M3M4L3M3M3M3O1000000000000000000000O100000000O100000000O10000000000O011O001Og0XOc0^O9G:F4K6Kohi6" + }, + { + "size": [ + 640, + 480 + ], + "counts": "koo18gc03J6L3L3L5L3L4K4O2M3O1H8L2O0J3QN`NP@IY1m1k>j1O10M3O1O1010o_O_Lg0LSOe0n=h3[BRMEcNR34U7^4kEYMf2]N\\7d6[H^Id7j6SHXIk7P7mGQIR8[7aGhH^8^7\\GbHc8c7YG]Hg8g7UGYHP7H[IS8AVHn63[Ii7DUHP79UIf7HPHR7a0QIb7JmGS7f0PI^7KmGT7h0nH_7JjGW7l0kH[7LjGX7m0kH[7JiGZ7o0jH[7HfG_7P1hHZ8W7iGfHWNIj9`7SHbHUN4a9[7[H]HXN3a9`7XH[HW8e7R20O2N1001OO10O0100O1O2O0RFWHQN6JJ2No8l7SIWHoMj0i4[OVLNU1f7LYHmMk0f4l0\\MP61oHmM_O2F`4P2aKVN_1h7`0cHnMDa0KR4]2lLa5b0kHZ3e1nKa5d0mHe3e1\\K`5m0kH[NTOX5b2[K`5Q1jHP4e1kJd5P1kHX4a1cJg5P1mH^4^1UJn5Y1hHf4m9TKXFn4h9nJZFV5c9dJoDTO_1[6a9UJPE@i1]6V9QJoFQ6Q9jISGW6Z;002O0O2N010O1O2OO01O10O02N001O100O1O2O1N2N4L4L3N1N3M3N1NTJiCP18iMI\\1Ud600000O10O01O1O1000O01O1[OVOR^Ok0la0\\OP^Oc0Pb0@m]Oa0Pb0Cn]O>Qb0Dn]O;Sb0Fk]O;Tb0Gj]O9Wb0h0000O101N1O101N1O2O2M2^O_]OBcb08h]O_O\\b0=f0K6KggS4" + } + ], + "question": "What is the relationship between and ?", + "answer": " is carrying .", + "image": "images/caption_simple_65.png", + "model_output": " is being held by .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000172396.jpg", + "mask_rles": [ + { + "size": [ + 351, + 640 + ], + "counts": "3_2`82M5L3M3M6J3M2N5K5K4L2N4L3M3M6J4L2N5K6J3M2N3M5K2N8H4L6J1O3M3N1M5L3M3N7H3L5L2N4L3M3M3M4L4L5K2N3M4L4L4M1M5L3M4L5K1O2N6J3M4M3K3N4L4L4M1M2O5K3M5K2N2N4L2N2N2N00001O000000000001OO10000001OO100001O00O100001O00O100001O00O100000000000000001OO10000001OO10000001OO10000000000000000000000000O1001O00000000000000O1001O000000O1001O00O1001O000000O10000000000001O00O1000000000000001O00O1000000000000001OO100000000000000001O00O1000000000000001O00O10000000000001O000000O1001OO10000001O00O1000000000000001O00O1000000000000001OO1000000000000001O0000O1000000000000001OO10000000001O0000O1001O00O100000000000000000cILo14^40000000000000000000000000O1001O000001N11N1001N10O2O0001O0000O1001N10O2O0001O0000O1001O00O10000001OO10000001OO10000001OO100001O00O100001O00O100001O00O100001O0000O1001O00O10000001OO1000000000000001O0000O10000000000001O00O1000000000001OO100000000000000001O00O1000000000000001O00O2OO101OO100001O000000O10000000000001O00O10000000000001O000000O11O0000O1001O000000000000O10000001O0000O1001OO100QJMR13n4O00000000000000O1001O0000000000000000O11O0000000000000000O11O000000000000000000000000000000000000O11O00O100N2001OO1L4N20000O1O1N2H@mEd0P:7I7K5N2O1O1L4L4O1N2D]NXGg1f8:F:M3O1N2N2N3L2N[MSH^2a7`M]H65Y2]7h0G9K5N2M3M3N3N`LXIj2h6WMXIi2V6bLlINUO:01f4h2gKjLB6l0MVO20M93[4m2lKnL_O5k0NVO12M20K4e4i2PLRM[O2ONm02VOO<2@33K^4j2XLWMXO00Nl03BNB6O02J_4f2\\L[MSO5l0KC:BJ1K_4b2`L_MoN5l0JD;@K2J_4_2cLbMlN4m0LB:AJ3K^4W2kKiMF4b0M\\O20Lm02VON7d0I]O^4V2SLjM21ZO49K78H and ?", + "answer": " is inside .", + "image": "images/caption_simple_66.png", + "model_output": " is inside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404839.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "_l02g15\\NK?0E0b`07j@S1S?POj@S1U?lNi@FdN^1b`0kNa@IXOY1X`0mN`@LXOV1W`0oNX@HWO74HM[1``0oNT@7]OP1``0hNQ@:^Oo0b`0eNP@T2Q`0kMo_OT2R`0mMj_OV2V`0jMi_OW2X`0hMh_OX2Y`0fMi_OX2Y`0fMh_O4@j1i`0SNf_O3Ai1l`0RNb_O6Bh1l`0TN`_O4Aj1Pa0VN[_ONGl1n`0XNX_OMJk1n`0YNW_OQ2j`0nMT_OT2l`0lMR_OV2Ua0cMk^O]2Wa0bMg^O7On1Xa0`0M32N2N0O01000OgN\\MXAc2h>^M]@0;b2W?_M]@14OD`2j?aM^@3FL42M^2]`0_Mg_OO03M_2k5iMQ5U2hJYNS5g1hJbNT5^1hJlNR5T1kJYOk4f0RKKa44XK:b4ETJfMZLO40?k2k8^OmITOhLb1e7oLgIX2b1P3V6nLdI_3W6]LeIW4o5gKnI`4P6`KiIg4W6YKeIk4Z6VKbIn4]6SK_IQ5a6oJ[IU5e6kJYIW5h6iJUIY5k6gJPI^5i6`HfGR2^1b5l6\\HfGR2U1oMgNi7^8VHfGT2l0P6d7RJWHQ6h7\\JdGl5[8m20000000000LdDdG\\;h7dDlH\\;g6hDiHN?Z;m70002NLgDaGY;^8hDbGX;]86ZLRE]KIo0G4^;\\3mE^KcNh03=^;[3WFULYNa0_;X3[FVLVNb03hNV;]4fFbM`9]2eF_M[9`2kFgKgM^1d1aNZN3a9U4TIUMQOfNn7T4RIUMPOgNo7S4TITMmNiNo7Q4ZIRMhNlNn7P4\\IPLQMm0\\1ROa8P4SIUMXNnNf8m3QIVMWNoNh8j3SIVMTNoNk8j3RIWMRNnNn8j3QIYMjMROV9d3QIZMjMmNY9h3nH[MlMiNW9l3nHYMhMkN^9k3kHYMfMnN_9i3kHYMdMPOa9g3kHYMdMPOa9g3kHYMcMPOc9f3lHYM_MQOg9e3kHZM]MQO]84kG`3]2YM[MSOU8e0bG\\OAc3]3ZM[MQOP8o0`GQ3V3oL[MPOm7a5iJ^KT6c4nI[KQ6g4oIXKUMQOR8i5iJWKTMPOR8j5jJVKTMPOR8j5jJVKTMPOS8h5jJXKP6f4PJYKW6a4iI_K[6\\4fIeK]6W4cIhKa6T4`IlKb6Q4`InKc6o3\\ISLg6i3YIXLoLoNQ9h4PJYLj6d3VI\\LkLQO]9`4iI_LfLTOd9Z4fIcLgLQOe9Y4eIfLgLQOd9X4eIiLaLlNR:Z4]IRMXLfN\\:V4\\IXMh6d2XI[Mk6c2UI]Mm6a2SIVMZ7f2fHWM`7f2`HYMd7e2[H[Mi7a2WH`Mh7`2XH`Mh7`2XH`Mh7a2WH_Mi7a2WH_Mi7a2VHaMi7_2WHaMi7`2VH`Mj7`2VHaMi7`2VH`Mj7`2UHaM2kNQ4e3lK`M4jNP4f3mK_Mk7b2TH^Ml7c2SH^Ml7c2RH^M5hNT4k3gK\\Mn7e2RHZMn7g2QHYMo7i2oGXMP8i2oGWMb0iNa3R4kKUMS8l2lGTMa0kNg3S4gKQMb0lNg3T4fKQMb0kNh3T4fKQMb0kNh3U4dKQMf0hNf3X4TKhKkMX1k:R3WGjKjMU1`3iNa3Y4SKlKjMS1`3hNd3Z4oJoKkMo0b3hNd3[4lJSLkMj0U;U3kFdMT9]2iFeMW9]2fFdMZ9^2_FgMa9\\2WFiMi9b2bEhM^:m501O1iHYGS3g8S40hN\\GTGe8f8cGWG^8`8kG_GU8]8PHbGQ8Z8SHeGm7X8WHgGi7l7fHRHZ7j7kHUHV7o5SGcJ;iNa1a0Q7R6UG^Jc2[OX6V6YG\\I1a0j2Gl5k3SGZNh0hMf;m3bCZN^=e1dBYN]=U41N100O1hM_BQNb=]1bBoN_=o0gBiLEh1e=^1iBgLGg1`=a1lBdLGi1^=a1ZC^MPO3g=]2eDSM^;j2jDnLX;n2o2L4M3M3Ll^O[MUa0R2R_OVNSa0h1Q_OTNPa0k1c000O2N101N101O1O0O3[Ol]OZObc0JZmm3" + }, + { + "size": [ + 640, + 427 + ], + "counts": "nY:1nc02N1a]70^bH3N1O2M8H4M2M3N2M5K6K1O1N2O1O0OI`]OoNab0Q1`]OnN`b0S1_]OmN^b0V1a]OkN^b0\\100O010O2OO1000OO2M3M2O200003M00N2O10000N\\]OiNcb0X1201O3ROo]OHQb05Q^OKoa05Q^OKoa05Q^OKTb0OgQ?6nn@6N000O020O00000004LBHU]O7kb0IX]O4hb0MY]OOib02V]ONib03b01O1O001O1O1O1O00001O4L2N1O2N2N3M1O1O001O1O0000001O0O1000O1100O1O0000001O001O00001O000000001O000000000O2O0000010O00001O0O101O01O01N2N2M201O000000001O00000000001O0000000010O000001O0000001O0j@_Nm;a1QDbNo;]1QDeNm;[1SDgNk;Z1UDfNj;Z1VDhNh;X1YDiNe;X1]DfNa;[1`DfN^;[1aDfN^;Z1bDhN\\;X1eDiNY;X1gDhNX;X1iDhNV;X1jDiNU;X1jDiNU;W1kDiNU;V1lDkNS;U1mDkNS;U1lDmNS;T1kDnNT;R1kDoNU;Q1iDROV;n0iDSOW;m0hDTOX;l0YD\\NgMi0P>k0YDaNaMe0V>j0YDCg;=YDCh;=WDBj;>VDBj;>WDAj;>VD^On;c0QDQO`MZO1Na>g1nCQO`MZOf>f1iCPOaMZOg>f1gCTOZeDBZ;>fDBY;?iD_OW;a0kD\\OV;d0kD[OU;e0kD^OR;b0oD^OP;b0PEBl:>UEBk:>TECf9lNWDa1S2C`9WOXDV1Y2D_8XOeD6?n0^2DW8n0lD]On2ER8n2nGRMn7R3RHoLj7T3VHmLf7X3ZHfLc7^3]HbLa7_3`HaL]7`3dH`L[7b3\\FaKF31k0i9d3[FmK[O21RO2\\1U:d3[FXM_OTOT:g3XFmNf9T1XFoNf9S1VFQOh9e5N1O1O101_KSFlNQ:n0RFQOU:g0mEYOT:c0oE]OR:a0oE^OY:8jEHX:OoE0T:KoE4U:FnE:U:BlE>X:mKoDT3m0n0g:^NdEb1a:UNcEk1`:\\MQDmNd1g3]:fLhFZ3^9ZLhFf3i and ?", + "answer": " is beside .", + "image": "images/caption_simple_67.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000069138.jpg", + "mask_rles": [ + { + "size": [ + 640, + 371 + ], + "counts": "o96b0NDNM72KQb04[^OLD;056@Hj00SO48LJNOV?9XAKB=35NYO1]7i9nHVFL1d00`7i9YIWFg6i9ZIVFf6j9ZIVFf6j9ZIVFf6j9ZIVFf6j9[IUFe6j9i10000001O000UJUFa1k9Z4000VJUF_1k9`NWF_1i9aNWF_1i9bNUF_1k9[4001O00000XJTF\\1m9cJSFf31e1m9cJTFg30e1W:bJgEb32l1X:[NhEd1Y:[NgEe1`:TN`El1d:PN\\EP2e:oM[EQ2e:PN[Eo1f:_KXE`11Q3j:\\KUEc11Q3n:XKQEh10P3T;SKlD`7T;f0000000001O0000OaJlDT2T;lMmDS2S;]300O1O1OlHoD[5P;fJPEZ5m:iJSEI2e3j:[NVEd1g:_NYEa1g:_NZE`1f:aNYE_1g:aNZE^1f:bNZE^1f:bNZE^1g:aNYE_1g:bNXE^1i:aNWE_1i:bNVE^1k:aNUE_1l:aNSE_1m:aNSE_1n:`NSE_1n:i30iJQE]1P;bNPE^1P;i31O0jJoD]1Q;bNPE^1P;bNPE^1P;cNoD]1Q;i3O10lJPEX1o:iNREV1m:kNTET1k:mNUES1k:mNVER1i:oNWEQ1h:QOWEo0i:QOXEn0i:QOWEo0i:QOWEo0i:QOXEn0h:SOWEm0i:SOWEm0i:SOXE_LV1b2b9WNUEo03PNm1i2k8VNZE\\O2_OL7S3h2e7iM\\JV2g;00000000000001O001O00O11O00000000001YNQ_OGea0VOZ^O1olo0NoRPOV1Ya0]1N2O100000000O1000000O1N200000000000000O10000000000001O0000O100000000001O00000000O10000001O000000000000O10000001O00000000O1O1000000O1000000000000YMRMkA0Z2o2Qh?S1dB`NcM1i?_1hB`NX=`1iB_NW=`1iBaNW=_1iBaNW=_1iBaNW=_1iBaNW=_1eB`NdM1g?_1eBaNcM0h?_1dBbNdMOh?`1cBaNeMOh?`1cBaNeMOh?`1bBbNfMNh?`1bBgN]=Y1cBgN]=Y1dBfN\\=Z1dBfN\\=Y1dBhN\\=Y1cBgN]=Y1cBgN]=Y1cBcNeMNh?_1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=W1aBiN_=W1bBhN^=X1bBhN^=X1aBiN_=W1aBiN_=W1aBiN_=X1_BiNa=W1_BiNa=W1_BiNa=V1`BjN`=V1_BlN_=U1aBkNV<^2jCbMQ;c3oD]Lf:n3YESLf:R4ZC`Kb1`0T;e4^D\\Kb;n4SDSKm;`5\\CdJd<\\600O100O1O1O10000000000O101O0000000000000000000000000000000001O00O1001O0000000000000000000000000001O0000000O10000O100000000000001N10000O2[Od0oMnG_F0a0j8R3\\Gj0m:oN^E[OBZM5Gc;Y2YDhMi1T1TNiNR>[1n[Z2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_68.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000342367.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "QnQ62n>1m>NYRO1e>0ZA4a>M`A3`>L_A6`>I`A9c>I\\A2l0MVO2V=MiB2N4c0=e<]OkB9Nm0f;jNgE1cNb1d;UO[DB<^1X;BbD>];EaD;\\;IcD8Z;h1M3M3M4L4L3N3N11O000000000010O0001O0000001O000O101O00001O001O00001O0000010O0001O01O00001O010O001O010O0000001O0jLQE\\2o:]MYEFI`2o:fM[EII_2m:aMaEOC_2h;`MZD_2e;aM\\D^2d;bM^D\\2b;dM_D[2U5K4N2O2N1O1O1O100O2N1N2O2O0O100O2N1QNZOdEf0V:EdE and ?", + "answer": " is in front of .", + "image": "images/caption_simple_69.png", + "model_output": " is in front of .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000263796.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "[i;a0Tc0>L2N3N3M1N4K3O2N3f]O`Nma0l1M8I1\\OhM]_O[2``0hM]_O]2^`0fM__O^2^`0e0L5L5K3L2O0O2O000O10000O101O001O002N0002N2N3M1O3M2N6J4L1O1O2N1O001O000000TOoLi@Q3V?SMf@n2Z?UMc@k2]?XM]@k2b?XM[@i2e?[MU@g2k?g0O10000O1001O00O1O1O100O1000000000000002N2N1O2N6J4L1O1O0000000000VO^LVAb3h>dLTA\\3k>gLRAZ3n>hLo@Y3P?lLi@W3W?mLc@U3\\?h000O10000O100O100O1000000000000000000O10000000000O10000O10000O10000O1000000O100O11O1O00O1O10000000000N2O100N200000000O10000001O1O5K3M8H2N6J3M2N3M3M2N2N1O1O1O2N1O1O1O001O1O1O1O1O001O1O2N1VM__OS2a`0gMk_OS2W`0gMQ@U2P`0hMT@V2l`0O1O2N2N1O2N1O1O1O1O001O001O1O001O001O001O001O0000001O000000001O0000001O00000000000000000000000000000000000000000000O10000O100O10000O1O1O1O100O1N2O1O1O1O1O1O1001O002N3MO1O1C=N2TOhMY_O;7m1[`0^Nd_Ob1[`0`Nd_O`1[`0bNc_O_1]`0R1BhLT@X3k?iLU@W3k?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3l?jLT@V3m?iLS@W3l?jLS@W3m?kLQ@U3o?>O1N2N2K5A?O1O100O100001O001O0000XMZKWFe4h9^KaCMc2e4l9bKRF^4n9bKRF^4n9cKRF\\4n9cKSF]4m9bKUF]4l9^KaC0c2b4l9]KbC1b2b4l9]KbC1b2b4g<0O1O100O100O100O10000000000001O00000000000000O10000000000000000O1000000000000001O001O00001O001O1O001O1O3M2N1O1O2N1O1O001O00001O1O002N3M2N2N1O2N3M1O2fMi_Og0W`0UOQ@g0Q`0TOT@j0o?_N`_O2i0\\1i?`N__OLQ1b1b?`N^_OLS1b1a?^Nj@`1n`0M4L1O002N2N3M1O001O1O2N2N2N1O2N3M1O2N1O2N3M1O2N1O2N5KQl`0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dad3a0\\c0:G8G8H7J5K6K3L7J4M1N4L2O2M4M1N2O2N2^_OWMh?k2T@ZMj?g2S@\\Ml?f2Q@\\Mm?f2Q@\\Mn?e2Q@[Mn?g2P@ZMP`0\\3N1O100O1O1O1O2Z@nK\\?\\4N001O100O00100O10O01O010O10O0100h@\\KS?`4RA`Kk20o8`4VD`Kk20o8`4\\31000000O10001O000O10000000000000001O000000000000001O0000000001O000000000000001O00000000001O000001h@\\KS?c4l@`KR?g41O0100O01O00010O10O01O0O2J6L4N101O1O1O1O001O2N010N2O1O1ZOT@VMm?h2U@WMk?g2X@WMj?h2W@WMj?g2W@XMk?g2X@UMi?j2[@nLk?n2f0N1M4N2N1O2L3M4L3M4N2K6K4I8J6M4\\Ombf2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_70.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000119828.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "`oY1a0P;:C;I8I5M4K4N3L3M3N1O2N1O2M3N1N3N2M3N2N1O2M2O2N1O2N1O2N1O2N2N1O2M2O2N1N3N2N1O1O2N1O1O1O2N100O101N1O101N1O1O2O0O101N10000O100O101N1000000O10000000000O1000000O10000000O100000O10000000O100O10000O10000O100O100O1O1O100O10000000000O1000000000000000001O0000000010O00000000001O0001O01O01O0010O00010O010O2O1N2O1N2N4M3M3L4L2O0O1fJoJl3T5lKlJnN2W5T5fKjJUO3T5U5eKiJTO4W5V5aKnJ`4V5ZKkJf4W5YKhJg4Y5UK^JD9X5S6001O00010O001O001O00001O001O001O0000001O00000000001O0000000000010O00001O00001N100010OO2O001O1O0O4MOLUJ^Jj40QLc5WOZJg4:lKU5[OfJm49dKP5CdJj4]6VKbIk4_6SKcI1Ne4_6[KbI0Of4_6ZK`I20e4_6YKaI20f4^6XKbIo4_65O1OjJcIS5`61O1O1O1NgJeIY5V67M2N3K401M2N2O2H7B>^Ob0K5N2N2N2O1O2N002N100O1O011N10O0101N10O0101N100O100O100O1O100O001O1O100O1O1O1O2N1O1O1O1O001O0O20OO2N2N1N3M3J5@a0L4N100M3O2N1M3O2L4^OlE[OY:b0?M4N2N1N201N2N100O101N2N3K[Vf0" + }, + { + "size": [ + 375, + 500 + ], + "counts": "_Sg31h;0N0bh01[WO4M5K3N3MCPE4n:LUE4i:K[E3d:M]E3a:NaE1^:OeEOZ:0hEOX:1g00PE2V:0gE3X:g001M2O1N101N2O001O001O00001N100O100O100O10000000000000000000000000000000000001O0000001O0000001O001O0O2O0O2O0J7K5J5L6I8H\\eR1" + } + ], + "question": "Where is located relative to ?", + "answer": " is attached to .", + "image": "images/caption_simple_71.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000001993.jpg", + "mask_rles": [ + { + "size": [ + 419, + 640 + ], + "counts": "]8a0a<1O2O1O0O10001N10000O2O0O10000O2O000O10001O0O100000000O2O00000O100001POTDg0Vo:CoD>P;]3fDiKc8]4^OcK\\H]4d7dKZH]4e7fKWH\\4i7gKRHf2c9Gf0N1010O10O100O010O100O10O10O100O0100000O10O100000O100001N2O3M3M2N2NGnEcMP:]2RFdMk9\\2WFdMg9\\2[FdMb9]2`FdM]9]2dFcMZ9_2gF`MW9b2iF^MU9c2lF]MR9d2PG[Mn8g2SGXMk8[2fFkM>Jk8W2nFnM7Ki8U2TGPN4Ig8V2ZGoM0Hf8X2]GoMV9P2mFPNQ9d1XF]Ni00n8]1aF^Ne05h8[1jGdNU8[1nGeNP8X1THhNk7T1[HlNc7P1cHPO\\7k0jHUOT7g0RIYOm6b0XI_Of6=`IB`69fIGY65kILT6OSJOm5LYJ4[9010O1000O1N2O1MbVX5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_72.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000221502.jpg", + "mask_rles": [ + { + "size": [ + 320, + 640 + ], + "counts": "nR63g99K3O1N1O2O1N2O1O0jFZOR9k0N5YGTOQ8n0mGUOk0Nk5n0YITOj00l5m0ZITOg01YOGY6W1gIROLL>5CJ[6S1`IjN4f0;OP6b0aIkN2g0NWO7g0X6h1dIZN[6h1cIYN]6e20DcIWM[6i2gIUMY6m2eIRM]6n2dImL_6n2n1g5QNkI0?n1f5TNjIMa0o1d5VNjIKb0o1d5UNjILc0o1d5TNiIMc0n1d5WNhIKd0n1d5XNhIHe0P2c5YNhIFe0Q2d5WNiIEe0U2c5TNdJl1]5SNcJn1]5WN^Ji1b5VN_Ji1a5WN_Jj1`5VNaJi1_5WNaJi1_5WNaJi1`5VN`Jk1_5UNaJk1`5TN`Jl1b5QN_JP2c5mM]JT2c5jM_JU2a5kM_JV2a5iM_JW2e602N2N2M5K8I;E5K8G;F9Febm4" + }, + { + "size": [ + 320, + 640 + ], + "counts": "Qgj0131f91XF1g95N2N1O101N1O100O1000000O1O10000O10000O100O100O2O0mN_OgHa0i5I`Ie0b0Bn5T1RJmNl5U1RJlNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5U1SJkNm5V1RJkNm5U1RJlNn5S1SJmNm5T1RJlNn5T1RJlNn5T1RJmNm5S1TJnNj5R1VJQOg5o0YJSOe5m0[J]O[5c0eJC^O_Nc5n1oJOd0UNi2l1cL8:nMS3i1cLa00hM]3h1bLg0DZMK:o3i1\\LZ1d3a201O00O1001O000000[MXLROh3n0`LiNa3V1eLeN[3[1mL]NS3c1WMRNj2n1YMnMh2R2aMcMa2]2cM^M^2X1XLmN]1F\\2\\1[LkN]1CZ2b1YLkN`1]OZ2h1VLkN`1\\O[2i1ULkNa1ZO[2k1TLkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIkNR6V1nIjNS6U1mIkNS6U1mIkNS6U1mIkNS6U1mIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1lIjNT6V1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIkNU6U1kIjNV6V1jIjNV6V1jIjNV6V1jIjNV6V1jIjNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6U1iIkNW6T1jIlNV6T1jIlNV6T1jIlNV6T1jIlNV6T1iImNW6S1iImNW6S1iImNW6S1iIlNX6T1hImNW6T1hIlNX6S1iImNW6S1iIlNX6T1hIlNX6T1hImNW6S1iImNW6S1iImNW6S1iIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6T1hIlNX6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1gIkNY6U1hIjNX6V1gIkNY6U1gIkNY6U1gIkNY6U1hIiNY6W1gIiNY6W1gIiNY6W1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1fIjNZ6V1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1eIkN[6U1fIjNZ6V1fIiN[6W1eIiN[6W1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1dIjN\\6V1dIjN\\6V1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIiN[6W1eIhN\\6X1dIhN\\6X1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1cIiN]6W1dIhN\\6X1dIhN\\6W1fIhNZ6Y1eIgN[6Y1eIgN[6Z1bIhN^6]200000000000000000001O00:Fg0`JnKd3Y4TLkKi3X5NlLaKi0]4jNRLU1m3jNULW1i3hNXLY1g3gNXL[1g3eNXL\\1h3dNXL]1g3cNZL]1e3cN[L]1e3cN[L]1e3cN[L]1e3cN[L^1d3bN\\L^1d3bN\\L^1d3bN\\LjN6@YO]1U49]LiNg0d0l2c0\\LjNR19b2l0]LkN]1MW2W1\\LkNc1IQ2[1^LkNe1Fn1X1cLRO`1Da2f0PLF`1CT32]K;`1BZ4>iK^OX4b0c2O1000001O00001O00000000002N001O00000O100000001N5Hgm?N]R@21Oci:" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is beside .", + "image": "images/caption_simple_73.png", + "model_output": " is beside .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000312586.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "`eo63X=2N1N3L3N3HD]C>b<61N101O000O100O2O00O1O101N2O103`CXOT and ?", + "answer": " is in front of .", + "image": "images/caption_simple_74.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000187236.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Y6c0n0i1U:XNkEg1T:^NjE`1U:cNjE\\1V:fNjEX1V:iNlES1U:oNjEP1U:SOjEl0V:UOiEk0W:WOhEh0W:\\OfEd0Z:]OeEb0\\:@aEa0_:A_E?`:C_E=a:E\\E;e:GYE9g:IVE8j:KSE5m:o100O\\LTEV3m:gLVEX3j:fLYEY3e:hL]EW3c:iL]EW3d:gL^EX3d:eL]E[3[7dLnK[3l7000O1000000000YM_Do1a;oMbDo1`;oMcDo1_;nMbDR2a;iMbDV2_;gMcDY2_;cMcD]2P<0O100000O010O1000O01000O10O010O1O01WObMPE^2o:cMPE]2P;cMQE]2o:cMPE]2]:bMXF]2i9cMVF]2k9cM[E16\\2_:eMZE06Z2a:gMWEO8[2a:gMVEN8[2b:jMREM;Z2c:kMoDMoBDP=;QCFn<:QCGo<8RCIm<7RCJn<6RCJn<5RCMl<4TCLl<4SCMm<2SCOm<1SCOm<0SC1m and ?", + "answer": " is lying on .", + "image": "images/caption_simple_75.png", + "model_output": " is on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000255749.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "W_U1e0Z2H]7Q1bG]OZ8j0[G]Oc8g0WG]Oh8f0QG_Om8f0eFEZ9U2O2N100O100O1N2M3NAXGbLd8`3`G^L`8_3cGaL]8\\3gGcLY8Z3jGeLW8X3mGgLS8W3oGhLR8U3R1I7_Ob0WN]E`0n:]OZEi]Y2" + }, + { + "size": [ + 424, + 640 + ], + "counts": "m`T28i<;I4M3N1O1O2N1O1O1O1N200O1O1O1O2N1O100O1L4M4L3N2O1O10000O10O100nNlDGKD[;b0nDGIEZ;b0PEGY;9hDD[;:gDC\\;;j0M3N2N4JhSc5" + } + ], + "question": "What are and doing in relation to ?", + "answer": " and are entering .", + "image": "images/caption_simple_76.png", + "model_output": " and are entering .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000575243.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "ZiV34Q=6M3L4L4N2M2N2O1O2O0N3O001N10001O000O2O010O001O00001O10O0001O001O010O001M3O1O010000O10O01O100O1O00100O1O1O010O10O01OO20O0100O01O100O100O00100O100O010O1O1O100O010O100O101N1O100O1000001N1O100O1N2O2O0O1O2N100O1O2O0O2O001N100O1O101N101O001Ni`b3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "hcW31Z=0ea91odJ5QiK7RCN^L5O0100O0102M2N4M2M:E7HTl0OZbk3" + } + ], + "question": "What is doing with ?", + "answer": " is holding .", + "image": "images/caption_simple_77.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000473118.jpg", + "mask_rles": [ + { + "size": [ + 500, + 346 + ], + "counts": "Uod01c?00000\\im00cVRO9_@Kh>f0G9D<\\ObNgBi1W=;K5M3M3O1000000001OAaMhC`2Th80bIBfM>h80aIDfM]2KYN:m8]O]F`0U2O`N4n8<]HCeN2n8;ZHFfNOR9;UHJgNJV9=lG0lNCY9=bFkNc0Y1@@\\9<_FROXORB7X>GmAMchm1" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is jumping from .", + "image": "images/caption_simple_78.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + } + ], + "question": "What is doing on the ?", + "answer": " is walking on the .", + "image": "images/caption_simple_79.png", + "model_output": " is walking on the .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509131.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mVb55T=2N2M2N4L3M4M2M4L5L3M1O2O1O2N3M3ZDfNU;k1N2L2O00100O0010O1O1O010O2N1O0O10001N1O2O1N2N2O1N2N2N2N2N3M2N4L5K4K4K7HS[Q2" + }, + { + "size": [ + 425, + 640 + ], + "counts": "^_^58P=e1\\N7I5K6J5Ld0\\O0001O001N2O1O2M2O2N3L4L101N2N2M3N4QNPE]1V;WNTEe1];J5K5J6J6K6H8Hc[]2" + } + ], + "question": "What is the relationship between and ?", + "answer": " is attached to .", + "image": "images/caption_simple_80.png", + "model_output": " is attached to .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000167902.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "[9Y1^:0O1000O10000000O100000O10000000O100000000000O1000000O1000000000O100000O10O100000000000O1000O1000000000O1000O1000000000000O01000000O1000O1000000000O1000O1000000000O10000000O1000O100000000000O1000O10000000O100000O10000000000O10O100000000000O100000O1000000000O100000O100000O10000000O100000O10000000000000O0100000000000000O10O1001O00O1000000000O10O10000000000000O10O100000000000O10000000O100000O100000000000O10000000O100000O10O10000000000000000O10O100000000000000O0100000000000O10000000O0100000O2O00001O001O001O1N2O2N2N3M2N6J8H4Kk]10UbN3N3L5VEFo9>lECDL]:f0dEE^:j00000000O01000000000O1000000000O1000O100000000O1000O10O1000000000000O1000001O00001O1O2M2O2N3M2N1O001N4M001O[ODSF8l9JTF5l9NQF2o91mE0S:4gEMZ:h0O1000000000O0100000000000O010000000O10000000000000O010000000000000O010000000000O10O10000000O100000000O10O100000O1001OO1000000000O01000000000000O0100000000000000O010000000000000O100000O1000000000000000O100000O100000000000000O1000O100000000O10000000O1000000000000000O010000000000000000O10O1000000000O1000000000O010000000000000O1000000\\H" + }, + { + "size": [ + 375, + 500 + ], + "counts": "anV15Z;MhD;o: and ?", + "answer": " is over .", + "image": "images/caption_simple_81.png", + "model_output": " is standing on .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000097924.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "n7a4o70000000000@VHZLj7e3XHYLh7g3ZHYLe7c0WHS24ZMe7b0eHg1FgMe7b0gHe1DiMf7a0jHb1@mMf7`0lHc1\\OnMh7?lHd1[OmMi7?mHd1YOmMj7?mHc1]OjMg7b0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7c0mHb1_OkMd7b0nHb1_OlMc7b0nHb1_OlMd7a0mHd1^OkMg7?kHg1\\OkMj7=jHh1\\OkMk7k1[9UNfFj1[9TNgFk1o901O000000O1SOUN^F2OO:j1Y9UN]F95g1c9TNcFk1]9QNhFn1Y9SNeFm1[9SNfFl1Z9TNfFl1P:O1O0000001O00000000001O1N1001O000O10O10O100O100001O00000001O000O100O100O1001O1O001O0000O1YOWNUFN?k1\\9ZNaFi1^9XNaFi1b9SN`Fl1S:O1O1O001OXNZNYHNTOh1a8]NgHc1Y7^NgHa1W7eNeH[1Z7gNfHX1Y7kNfHT1Z7mNfHR1Y7ROeHm0[7UOdHj0]7UOcHk0_7SOaHm0g7gN^HX1]9000000000000000O100N2L400@`0000000000oMlEH0k1d:N00O10000TNXNPIh1P7YNZH\\2f7hMUHY2k7mMmGL_Oj1c8bNeGDNf1]8[OfGd0Z8[OhGd0W8\\OkGc0U8\\OmGc0S8\\OnGd0Q8]OPHb0P8^OPHb0P8]OQHc0o7]OQHc0n7^ORHb0n7^ORHb0n7^ORHb0o7^OPHb0Q8]OoGc0R8]OmGc0U8[OjGf0W8YOiGg0Z8VOfGj0\\8TOcGm0^8RO]GS1f8iNTG^1m8aN^GT1d8iN^GV1e8fN]GY1e8dN]G[1e8bN\\G^1f8`NZG`1i8\\NYGc1j8[NUGe1l8\\NbF10c1_9\\N_F21c1m9]NSFc1m9]NRFd1m9_NPFb1m9b0fNaMcH_2\\7eMaH[2_7fM`HZ2`7iM^HV2b7kM]HU2b7oM[HQ2e7QNZHn1f7SNYHm1g7TNXHl1h7VNVHj1j7ZNRHf1m7\\NRHd1n7^NPHb1P8`NnG`1R8bNkG_1T8eNiG[1W8gNfGZ1Z8hNcGY1]8]100O1000000000000O100000000000000000000O1O1001O0000000000000000001OO10000001O000000000000000000000000000000000000000000000000001O0000000000000000000000000000001OO100000000000000000000000000000000O1001O00000000`G" + }, + { + "size": [ + 400, + 600 + ], + "counts": "R]^16T<;H9G4oKXOULk0f3CoK?n3HkK;R4KiK8T4MfK6Y4NbK4\\4O`K4_41ZK2d45TKNf4hJDX5 and ?", + "answer": " is standing on .", + "image": "images/caption_simple_82.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000509656.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0b6^800001O00O1001O000000000000001O000000000000001O00000000000000000000000000000000000000000000O1001O00O1001O00001O00N2bNmGPKJ=O@^8S5jGkJO0l8c5oFbJn8Y6_O4L2N0000000000000000001O00000000000000000000O1000000000;\\IbG?In11a0Y[JVOf5R1_JaNa5d1dJRN\\5V2gJ\\M\\5l2X36J?A9G:F:F:F:F:F>B7I5K5K1O0000000000000000000000O1lJeGc3[8[LhGc3Y8\\LiGc3W8\\LoG]3S8XLhGRO8_4U8_LcGQO9Z4Z8eL]GQO9X4\\8eLPHZ3P8eLQH`3j7`LWHc3e7]L[He3c7[L]Hh3`7WLaHj3^7SLeHn3Z7QLhHo3W7QLiHo3W7PLjHP4V7PLjHQ4U7oKlHP4T7mKgG]OV1e4S7QLkHQ4U7oKkHQ4U7lKgG]OU1g4U7]KfG0e1]4d6`KkG1b1_4c6aKjG0c1_4c6aKkGO_1N^Nb4X8aKjG0`1b4f6]KjG2_1b4h6[KhG4`1a4h6[KhG4`1a4h6[KhG2b1b4n6]KQIc4o6]KPIe4P7ZKoHg4e8O101O0O100000000O10000O1010O11N1O1O100O1O002N3N1N1N2O00001O000000O10O1O1O1N2O1N3M3M45L0OOO01O2O0O1O001O1O0O2O00001O001O00001O001dLmDi2T;TMoDi2i;K3L3N2N1O100O1O2O0O1O1O1O2O0O100O101N100O10O0101O0O2N1N2O2L4M3L3N3N2K4L5N3J6L4I6D^A12OPUP5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aah1c0U>?C`0B;F:E>D5K7H6oDlLm9Z4I4L5K5K9H3M2N0O01O1O1O2O0O10000O1O2O0O1O101K41ON3M2NiE]KW:b431O1OH9O0O2N1M4M201N2O001O1O1O001000bFWLe7i3YHZLf7h3UH\\Lj7f3QH^Ln7d3mG`LQ8c3iGaLW8`3fGbLZ8a3aGbL^8c3UGiLh8j4L4M2N2N000000001OO100O1O1L4N2M3M4L3O1O1N2N2N2O2M2O1N2O1O1M3N2N2N2L4L4L5L3000000O110O001O0000001O000000010O00001O00000001O0000000000000O1000000O103cEbKR:m4I3M9F5L2N1O:F2N1N2N100O2O000000001N101O000O10OFjIeG02V6^8kI`GZ5`0lJQ8J_GP5`9721M21OO200_Ob0M2N2N3O0O2J501O0O2L4N3O0O2M5L3N3M>\\HaHk6o7J6I5K and ?", + "answer": " is in front of .", + "image": "images/caption_simple_83.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000140658.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "fb076LN2M;[b02g]O=Qb0m0O1G9K5J6H`Ml^Od2Sa05B>DZLWAN_Oj3Z?ZLRAl3m>d000O100O1O1N2N200O1O100O10000O1G9C=K5N2L4I7lNjITD`6k;bIRD`6m;cInC`6Rn2o10000O^ARMTk2eATM\\>k2eAVMZ>j2eAWMD0e=h2_BVMTO3h0Oe=h2hBXMZ>h2fAXMZ>h2fAWM[>h2_10U@XMFOf=i2fCXMZg2iAZMV>e2kAZMV>f2b1O1000000OTA\\M^l2PBTMP>n2nARMR>P3^11O1O002N3Z@hL^>Y3_@iLa`0[300ZDcLi6^3l4001O1O2N3RD[L[7g3eDZLX?g3g@YLY?g3g@YLY?g3`000001O001O3Q@TLd?P4W@RLh?S400000O1O100O10000001OO100000000000000000000O10000000kCnK\\8R4cGoK]8Q4cGoK]8R4cGmK]8T4g3OlCmK]8S4bGnK^8R4h31O00001O0jClK`8U4_GkKa8U4_GlK^L5i;o3kGlK]LOk;U4iGkK]LOj;V4iGkK]LOj;W4k3000000001O00000000001O00001O00001O0000001O0000001O0000000000001O00001O001O00000000001O00001O00000000001O001O00001O000000001O0000001O0000000000001O001O001O0000000000001O000000001O00000000001O00001O000000001O00001O00000000000000001O001O00000000001O001O00000000001O000000001O0000000000001O000000001O0000001O00001O00000000000000001O1O00002N3M2N001O0\\H^JbIN00O1Y41bKj0l0Sc0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "dhj26fc07L2M3M2O2N2N1O1O1O1O1O1O1O100O010O010O0100O010O1O100O1O1O1O1O1O1O2M2O2M3M4LPlh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is on .", + "image": "images/caption_simple_84.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Sjn73k?3N2N1O2N2N2N102M2N2N1N3O1N2N2M3N2O0O2M3N2M3N2N2O1N2N2O001N2O2FUN[Bn1_=XN\\Bl1`==L3[E^M[7S3_HVMY7n2aHWM\\7m2aHVM\\7n2aHSM^7o2`HRM^7Q3`HQM^7>`F]1Q2VN^7P3bHQM\\7P3dHPM[7P3fHQMY7o2gHRMW7o2hHRMW7o2iHRMV7n2jHRMU7o2kHQMU7n2lHSMS7m2lHTMS7T2oFcMT1]OoN26j0h8R2RGdMP1U1m7W1TGcMP1U1m7W1TGdMP1S1m7W1UGfMn0Q1o7W1TGhMo0n0n7Y1UGiMn0l0n7Y1VGjMn0j0n7Z1VGlMm0h0n7[1UGmMo0f0m7[1VGoMm0d0o7[1VGPNm0b0o7\\1UGSNm0?o7\\1VGTNm0>n7]1UGUNo0;n7^1UGVNo09n7_1UGXNn06o7a1SGYNQ12n7c1SG[NQ1Nn7e1RG]NR1Kn7g1QG^NS1EP8l1mF_N];`1dD_N];`1cDaN^;]1bDcN_;]1`DdN`;[1aDdN`;[1`DeNb;Y1]DiNc;W1\\DiNe;V1ZDlNg;S1XDmNi;R1WDnNk;Q1SDQOm;h0XCgNl0`0n;f0YD[Oh;c0YD]Oh;c0WD]Oj;c0UD\\Om;c0SD]Oo;b0oC_OT<>lCBY<:fCE_<7bCH`<6`CIa and ?", + "answer": " is running on .", + "image": "images/caption_simple_85.png", + "model_output": " is attached to .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000106048.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "^9l3`90000[MaF^1^9X10000000ZMbF^1^9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1]9aNcF_1\\9Y1000000000000000YMdF^1\\9bNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1[9cNeF]1g8]MiGV1@]1g8]MiGV1@]1f8^MiGV1A\\1f8^MiGV1A\\1f8^MiGV1A\\1f8_MhGU1B\\1f8_MhGU1B\\1f8_MhGV1A[1g8_MhGV1A[1g8_MhGV1A[1g8`MgGU1B[1g8`MgGU1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1B[1f8aMhGT1A\\1g8`MhGT1A\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1B\\1g8aMgGS1C[1f8bMgGS1C[1f8bMfGT1DZ1f8bMfGT1DZ1f8bMfGT1C[1g8bMeGS1D[1f8cMfGh3Z8XLfGh3Z8XLfGR1D[1f8cMfGR1EZ1e8dMfGR1EZ1e8dMfGh3Z8XLfGh3Z8XLfGR1D[1f8dMeGQ1E[1f8dMeGg3[8YLdGh3\\8XLdGh3\\8XLdGh3\\8XLeGQ1E[1e8fMdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8ZLdGf3\\8[LcGe3]8[LcGe3]8\\LbGd3^8[LcGe3]8[LcGe3]8\\LbGd3^8\\LbGP1IV1e8jMbGP1IV1e8jMbGP1IV1e8jMbGd3^8\\LbGd3^8\\LbGd3^8]LaGc3_8]LaGc3_8]LaGc3_8]LaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1e8kMaGo0JV1d8lMbGn0JV1d8lMbGn0JV1d8mMaGm0KV1d8mMaGa3_8_LaGa3_8_LaGa3_8_LaGa3_8`L`G`3`8`L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3a8_L_Ga3`8aL_G_3a8aL_G_3a8bL^G^3c8aL]G_3c8aL]G_3c8aL]G_3c8aL]G_3c8e000O100001O000000O1000000000000000000000000O100000000O10000001O0000001O1O4L1O1O1O1O1O001O0000001O0O20O01O1O000000000000000000000000000000O10000O1O1O1000000O100000000001O000000001O0000001O00001O0000001O000000001O000000001O00001O001O000000001O00000000001O00000000001O001O00001O00001O000000001O0000001O00001O00001O1O1O6J2N1O00001O00001O1O2N1O1O1O1O1O000000000000000000000000O10000000000O1M3L4O100001O00000000000000001O001OO1001O00000000000000000000O1001O1O1O00O1O1O1001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000001O0000000000000000000000000000000000001O1O00001O001O001O001O0000O1001O0000O11O000000O1000000O1O1O100O100O100000000O100000000000000000000000000000000O100000000000000000000000000O10000000000000000GgL^FZ3Z9`0N2O1001O0000000000000000001O00O1001O00000000000000000000000000001O00000000000000000000000000001O0000000000000000000000001O000000000000001O00O11O00000000000000000000000TLcFi3]9WLcFi3\\9XLdFh3\\9XLdFh3\\9XLdFh3]9WLdFh3\\9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3[9WLeFi3_900000000000000000000000O1000000000000000000000000O100aF" + }, + { + "size": [ + 428, + 640 + ], + "counts": "[jl16XJ500O1O10000O2O00003L3N2N1N2O1O1O1N1000000O101O001N2O0O2OO01O0100000O10O1M3O11N100O100O100O101O000O1000001N101N100O101O000O10000O2O0000000O2O000O2O0O1O101O000O101O00000O2O0O100O101O0O10001O0O101O000O2O000O100O2O0O1000001N10001O0O101O0O100O2N1000001N10000000001N3N1O002L4M2O01OO3N3L2O1O1O1O1N10001N1O10O0100000O010O1O1O10OO2O1N13M1000000O100000001O0O100000001O000000001O000000000000000000000O1000000000000000000000000N20000O10000000000000000000000000000000000000000000000000000001O1OO1O1001O1O001O0000000000000000000000000000001O00000000000O100001O00O1001O0001N100001O000000000001O0000000000001O00000000001O00000000001O0000000000001O000000001O0000000O2O00001O1O001O001O2N1O001O1O3L3N2N1O6J:F4L5K4L9G7I6J4L8H5K2cK]Gl3e8oKaGU2H^Og8]NaGo1OC`8^NbG`0L]O1=a0V1k7hNeG3j1o0Y6WOnGCQ2R1j5DUHVOU2R1i5HRHUOW2j0n52jGUO[2?R6 and ?", + "answer": " is parked on .", + "image": "images/caption_simple_86.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000052565.jpg", + "mask_rles": [ + { + "size": [ + 458, + 640 + ], + "counts": "e;d2g;O00001O00001O000000001O00000000000000000000001O00001O0000000000001O001O00O10000001O000000000000000000000000000000000000001O00000000001O000000000000001OO100000000000000001O000000O100001O00000000000nMQDi1P4N1O0O2O1N2O0O2O1O1N100010O000000100N100O100000O1O101O1N4L7H7J5J;F8]CVNWUO_B0_f\\2" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_87.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000165039.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "[8m1^;001O1N2O0001O1O01OnNfD3Z;LhD3X;MhD3X;MhD3X;MhD30ROS;k0lD32ROR;j0mD41SOQ;i0nD40UOQ;g0nD51UOP;f0oD51VOn:f0QE40XOn:d0RE40XOm:d0TE4OYO^:MgEf0L30[O\\:NbEh02O0[O]:o0cEF0\\O\\:n0eEEO^O[:m0gEDN_O[:m0hECM@[:m0iEBKC[:k0jEBKC[:j0kECJDY:j0nEAIFX:i0oE@JGW:i0oE@JHU:h0RFAHHU:g0SFAGIU:g0TF6k9JUF6k9JUF7j9IVF7j9IVF7j9IUF9j9FWF:j9EVFk9fNlE>8l0m9eNkE`07l0n9cNkEa07m0n9`NlEc06n0m9_NmEc06n0n9^NlEc07P1m9\\NlEd07P1m9\\NlEd06R1n9YNlEe06R1n9YNlEe06S1n9WNlEf05T1o9VNlEe06o0T:\\NfEe06i0`:WO`Ef0d:YO\\Ed0S;POmDn0i;N2N;E;FgV`0D\\i_O0XC9_aHB_7>`HC`7=`HC`7=`HC`7=`HC`7B8J3N213OL00O000001O000000000000O100O0@cNRE_1m:dNPE\\1Q;?0O1N2O1O01000000O0100000O10000O10000O100001N100000O10000000000O1000O1000000000O10000000000000O10O100000000000000000001O0O200O1O1O1O000000001O000001N11O000O01000000O10000O1O2N1N200O1O2N11O00O10001O0000001O000011N0010O10O00000001O00001N100O2O001O0O2N1000001N100O1O1O100O1000000000001O001N2O00001N101O3L6K2M3K5K5L3MP`o0" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_88.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000370270.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "bc01oc01N=D0000000000000O10000O1O1O1O100O1O100O1O100O1O1O1O1O1O100O1O1O1O1O100O1O1O1O1O100O100O1O1O1O100O1O1O1O1O1L_Nh]Ob1ha0c0N2N2O1J6J6O1000000000000001O2NO100001O0000O1lN_Mj@`2V?eMh@X2W?jMi@U2W?lMh@T2X?lMh@T2X?lMh@T2X?mMg@S2Y?mMg@S2Y?PNd@P2\\?PNd@P2\\?TN_@m1a?TN\\@n1e?RNY@o1g?QNY@o1g?QNY@o1g?QNY@o1h?PNX@P2h?PNX@P2h?PNW@Q2i?oMV@R2j?bMQ@O0`2S`0YMR@V3]`0O0000001O0000O100000000000000O100O100O1O1O1N2O1O1O1N2N2M3O1N200O11O0000000000000000000000000[Lk_O_3U`0aLl_O^3T`0aLm_O_3S`0bLl_O^3T`0bLk_O_3V`0]Lm_Oc3W`012N1O1O5K2N1O001O000000001OO1000000O1000000O100O100O100O1O1O100O100O10000O100O100I_Lm_Oc3S`0500O1000N3N1O1O002O000O100001O002N4L0000000000000000000000000000000000000000000001OO1000000000000000000000000000000000000O1I7N2000000000000001OO10000000000000000000000001O0000000000000DU@cLk?S3f0O1O1O1N2O2M2O10000O10000O1O1HS_OYMQa0c28N2M3N2N2M3N2O2M2O1M3O1O2N1N2E;E;N`0QOok83^TG1N2N2O100O1N2L4N2O100O1L4M3O1O1O1M3O1O1M3N2E;N2N2M3N2I7N2L4O1L4L4J6N2L4L4H8L4M3O1OUMc@U1\\?eNcA@`?>T2DmSl1" + }, + { + "size": [ + 640, + 480 + ], + "counts": "Zdf151Mdc0e0B:G7I`0A1O1N2N2mN]Ng_Od1U`0aNi_O`1R`0gNj_O[1T`0gNk_OZ1R`0jNk_OX1R`0lNk_OU1R6PN`3m0]FT1P6RNa3k0_FS1n5TNb3k0^FR1o5UNa3n0ZFo0`1WN?GoN8]ON[9Y3cG^M3l0iN^N`9Y3dFZL?`18X1e8n0`FoLLV19XNOg2\\9kN\\FZN:S8Z9bIoFY6P9hIWGP6j8PJVGP6j8PJUGQ6k8oIVGP6j8oIXGP6h8PJYGo5g8QJYGn5h8SJWGm5i8SJWGl5j8SJWGl5j8TJVGk5k8UJUGi5n8VJRGh5P9XJoFg5S9ZJjFg5X9XJgFh5Z9XJdFh5_9WJ]Fh5i9XJUFc5R:`JhEi3j0gJk9c1UEe3\\Bk3g>b05K5N2K5J5O2O13M4L4L3L5L3M3M6J4L4M4L4K4M5J4M3M4M2`DcIV9`6fF_I[9d6cFZI`9d6aF[Ia9c6P2N1N2N3N2M3N1RK]B\\3f=bL\\BZ3i=cL[BX3h=fLZBCFj2S>aMYBAKi2o=bMYBBLh2m=eMYBAMDAn2]>iMXBB4_2h=lMVBB8\\2d=PNVBA;l0QO7a>YOTD@YNb0g=Igkh5" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is standing on .", + "image": "images/caption_simple_89.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481413.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "loi3?g<:F9I6K4L4K4M4M2N2M3O1M3N2N2N2O0O2N101N1O100O2O0O100000000001O1N2O3M0O10000O1O010O001O100O2N2M3N3M2M3M3N3L4L4L4K6I9HWYh3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "mkd29P=3L4M2N2K5L4L4N2N1M3O1O1ON2NdEQOb8g0cG_OZ8b0bGC]8<`GJ]87`GM_82^G3`8N]G6b8I\\G doing with the purple frisbee ?", + "answer": " is holding .", + "image": "images/caption_simple_90.png", + "model_output": " is holding .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000092839.jpg", + "mask_rles": [ + { + "size": [ + 517, + 640 + ], + "counts": "a:a5`:4J6N3O0001O001O001O2N1O001O1O001O2N1O2N3M1O001O2N1O1O1O3M2N2N1O1O2N1O2N2N2N5K2N1O1O1O1O001O00001O2N2N001O1O1O2N001O2N2N1O1O002N3M1O2N2N2N1O2N1O2N1O2N2N2N2N2N2N2N4L1O2N2N1O1O002M5L2N101N4L5J5L1O2N2N2N1N200O2N1N3N0010O00001O000000001O000O1000O11O000000000O1000000001O0000000000000000000000000000O1000000000000000000000000000000001O0000000000000000000000001O0000000000000000000000001O0000000000001O0000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000001O00000000001O000000001O000000001O000000001O0000000000001O00000000000000000000001O00000000000000000000000000000000000000000000000000001O0000O1000000000000000000000O010000O2O000O01000O100O10000O10000O100O10000O1O10000O100O10000O100O10000O10000O10000O100O100O100O10000O100O100O1000000O100O10000O100O100O10000O100O10000O100O10000O10000O2O0O01000O10000O2O0O1O1000O01000000O2O000O1000O01000000O100O1000000O100O10000O1O100O100O100O10000O10000O10000O10000O10000O10000O10000O100O100O100O10000O10000O10000O10000O1000000O100O2O000O01000O2O000O100O1000O10O2O0000000O100000O100001O0002NN2N20O1gLZC1N6O]2i<\\McC1G4NU2h located relative to ?", + "answer": " is sitting on .", + "image": "images/caption_simple_91.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000336209.jpg", + "mask_rles": [ + { + "size": [ + 432, + 640 + ], + "counts": "e6]2S;00000000000000001N1000000000000000000000000O1000000000001O000O1000000000000000000O10000000000000000000O10000000O10001O1O000000000000001O0000000O100000000000000000001O00O1001O00000O1000001O002N2N006I9H1O0000000000000000001O0000000000000O10000000000000O100001O00001O000O100000001O0000000000000O1000000000000000000000O100O1O10O10O1N2O100O100O1O1O010O1000000000000001O00001O001O0O2O001O2N2N2N2M2O000000000000000000000000000000000000000000000O101O0000000O10000000000O100000000000001O001O0000000000001O00000000000000001O0O100000000000O1000000000000000000000000000O10000000N2O1O10000O1O2N3M3N6I5KUFAk9`0TF_Om9a0SF_Om9a0TF^Ol9c0UF[Ok9f0UFXOl9h0TFXOl9k0RFTOn9m0QFSOo9n0QFPOP:S1nElNR:U1oEiNQ:W1oEiNQ:[1lEdNT:]1mEaNS:`1oE]NQ:d1oEZNR:h1nEVNR:n1kEQNU:Q2jElMX:U2RFiMe9Y2XFiMg9X2XFiMg9X2XFhMg9Z2WFgMi9Y2WFgMi9Y2WFgMi9Y2WFgMi9Z2WFeMi9\\2VFdMj9]2VFcMh9`2VFfMd9\\2ZFfMd9\\2ZFeMe9\\2YFfMf9Z2ZFfMf9Z2ZFfMf9Z2YFgMg9X2WFkMi9U2VFlMj9S2WFmM:^OA6R9`2RGmM;Fc8\\2SGoM8Hc8Y2UGoM8Hc8Y2RG[MNd0_8^3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGdLR8\\3nGdLR8\\3nGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3oGeLQ8[3PHdLP8\\3RHbLm7_3QHcLo7]3PHdLP8\\3PHdLP8\\3PHdLP8\\3PHdLP8\\3oGeLP8\\3PHdLP8\\3PHeLo7[3QHeLo7[3QHeLo7[3QHeLo7\\3PHdLo7]3PHdLP8\\3PHdLP8\\3oGeLQ8[3oGeLQ8[3PHdLP8\\3PHdLP8\\3oGeLP8\\3oGeLQ8[3nGfLR8Z3mGhLR8X3mGiLS8W3mGiLS8W3mGiLS8X3lGhLT8\\3hGdLX8]3gGcLY8[3iGeLV8[3kGeLU8]3iGcLW8\\4O00O1O1001O000O2O1O1O1O00003M001N10000000001O001N2O1O00001O0O10000000001O00000O10000000000000000O10000000001O000000O10gJ" + }, + { + "size": [ + 432, + 640 + ], + "counts": "2[5U800000000O10000000000000000000000O10UHiJ]7W5>0000O1000000000000O10000000000000000O1000000000000000000O100000000000000000000O10000000000O10000000000O10000000000000000O1000000000000000000O100000000000000O100001O0000O1000000000000O1000000000000O10000000000000000O10000000000000000O10000000000O10000000000O1000000000000000000O100000000O1G]GdKd8[4900O1O1O1O1N2N2O1O1O1O1O1O1001O1dFTLR9_4F1O000000000000O1000000000000O1000000000000O10000000000000000O1000000000000000000O100000000000000O10000000000O100000000000000O1000000000000O10000000000O100000000000000000000O1000000O]GhKP8X4PHhKP8X4d00000O10_GkKg7U4UHiK^O8Y8o3XHVLf7j3\\HULc7k3^HULa7k3`HUL_7k3cHTL\\7l3fHSLY7m3U100O100O100000000000000O1O1N2N2N2O1N2N200O1O100000000000000000000001O000XG]LUOOU8e3bHlLZ7T3eHRMV7o2iHRMV7n2iHTMV7m2iHTMV7m2dHZMZ7h2aH\\M^7e2`H]M_7[4O1O001O00000000B[HPKf7l4_HSKa7j4cHTK^7k4dHRK^7k4e0N2N20000O10000O100O100O1000000O10000000000001O001O0000001OO100O100O1000000O1000000000000O10000O1O100000000001O001O001O:F1O1O3M6J1O000000O11O1O1O001O1O1O001O0000000000000000000000000000000000000000000000O1000000000000000000O1O1O1000000000000000000000000000000000000000000000000001O000000001O0000001O00000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O10000000000O10000000000000000O100000000000000000000O1000000000000O1000000000000000000O10000000000000000O1000000000000O100O1L4RNn1M3O100001_FSMSON01Q9Q5YO1O1O0000000000000000000N" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is over .", + "image": "images/caption_simple_92.png", + "model_output": " is over .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000458325.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "]ca33Q17k:MkD;S;K^D?\\;j01L3O2M4N2O4L2M2N202N1O1XOlC6g doing on the ?", + "answer": " is crossing the .", + "image": "images/caption_simple_93.png", + "model_output": " is walking on the .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000350122.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lPT5g0U>7H8J5K4L5L2O2M2N2O1O1O1O1O1O100N12N3M1O1O0O2O2N2N5J3M3M5K:D3M101O001O0O2O001O1N1O2O00O010000O01O01O01000O000O2O1N2WO_OTCa0lfDJV;Q2O0OO2H8L4N2N2N2N2O1000YD]NR:c1iEeNMgNi9k4O01VK[FW4g9eK\\FZ4f9bK]F\\4V:N1N2M3M3^N`E[Ng:`1`1J5L5\\OSCoNR=m0UCmNo doing with the bicycle ?", + "answer": " is pushing .", + "image": "images/caption_simple_94.png", + "model_output": " is pushing the bicycle .", + "eval_result": "True" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000295809.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "d547k9_5UFaJk9_5UFaJk9_5UFaJk9^5VFaJk9_5UFaJk9\\5XFcJi9]5WFcJi9]5WFcJi9]5WFcJi9]5WFbJj9]5WFbJj9^5WFcJg9]5YFcJg9]5XFdJh9\\5XFcJi9]5WFbJj9^5VFbJj9^5WF`Jj9`5VFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WFaJi9_5WF`Jj9`5;O10000O10000000000000000O1000000O100000000O1000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000O1N200002NN2000000000000O11O00O10000O1000000000000O1000000000000000000O100000000000000O10000000000000000000000O1000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1001O000000O11O0000O100001O00O100001OO10000000000000000000000O11O000000O11O00O100LPEXKP;l401OO1000000000000LPEXKP;l4000000000000000000000000000000000000000001O0000001O000000O11O000000ISEZKn:l40IQE^Kn:i410000001O00HRE]Ko:b4SE]Km:d4RE]Km:c4SE^Kl:b4TE]Km:c4RE^Kn:i411OMRETKn:i4UEWKk:i4TEWKm:h4TEXKl:h4TEWKm:i4SEWKm:i4SEWKm:i45000000O10000000000000000O11O003MM300001O00O1O11O2N1cMZK^If4n8MmDZKR;f4nDZKR;j4000001O00O100001OO100001O0000O10\\NTKTHl4`91OO10000001O0000000000000000000000000bLTKhKl4f70000000000000000000000000000001O000000000000O1001O000000000000000000000dNSKeGm4[8SKeGm4g9000000000000000000000UNRKdHn4W90000000000000000000000000000000000000000000000000000000000000000000000000000O11O0000LQEVKP;j4PEVKP;n4O0000000000000000000000000000000000LPEWKQ;i4PEVKP;n4O0000LQEWKo:i4QEWKo:i4QEWKo:m4000LPEWKQ;i4PEVKP;n4O0000MPEVKP;i4QEVKP;k42001O00001O1O00LQEWKo:i4QEWKo:l4100001O1O001O00001O0000001O0000001O1O1O3M001O1O0000001O1O1O00001O1O00001O1O3M1O001O001O0000001O1O1O00001O1O001O001O001O000000001O00001O1O1O2N1O1O1O1O001O001O5K00001O6J1O001O1O1O001O1O001O1O000000RI" + }, + { + "size": [ + 512, + 640 + ], + "counts": "PT[86h?3O0O2N10000O1000001N10001O00000O101O00000000000O10000000O1O010O0100O100O100O100O1O1O100O1000000O100000000000000000000000000000000000000000N2D=G]`86e_G5K4L000001O00000000000O100000[L" + } + ], + "question": "What is the spatial relationship between and ?", + "answer": " is driving on .", + "image": "images/caption_simple_95.png", + "model_output": " is beside .", + "eval_result": "False" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000511760.jpg", + "mask_rles": [ + { + "size": [ + 640, + 296 + ], + "counts": "i?9V3HZ=d0a_OD00000:0F0J5Q1MQOe?S400000000000000000000O1001O00O10000000000000000000000000000000000000000000000000000000000000000001O00O1001O000000O10000000000000000001O000000000000O11O0000000000000000000000000000000000O100000000000000000000000000000000O100001O0QLb@\\3^?bLe@4Ih2b?oL]Ao2k?O1O>B:gM^^O`1Tb0M1O1O000000000000000000000000001O0000000000000000ZM^NnBb1R=`NlB`1S=eNiB[1W=iNeBW1[=lNbBT1^=mNPAB2M7Om0_?n0`@RO_?P1_@QOa?Q1\\@POe?e20ZMe@g0[?QOn@n0R?QOPAXO[OR1e?ERAmN]OK1b1`?BkA=U>[OSBe0m=YOUBg0k=XOVBh0j=WOWBi0h=XOXBh0h=XOXBh0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=WOWBi0i=XOVBh0j=XOcAAnNW1_?ZOf@TO and ?", + "answer": " is standing on .", + "image": "images/caption_simple_96.png", + "model_output": " is beside .", + "eval_result": "False" + } +] \ No newline at end of file diff --git a/evaluation/GAR-Bench/model_outputs/gar_8b_vqa.json b/evaluation/GAR-Bench/model_outputs/gar_8b_vqa.json new file mode 100644 index 0000000000000000000000000000000000000000..f9f143bb320fc015fa0a516a0f262b5bacf19baf --- /dev/null +++ b/evaluation/GAR-Bench/model_outputs/gar_8b_vqa.json @@ -0,0 +1,12166 @@ +[ + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1582.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`fS2:bo0=C9J3M3M1O1O001N1010O2N2N3N8G4L2O1N3N3M6I4Mgn3NYQL8J4K5L2N2M3N2M2O2M3N2M:G000O1000000O100O10000O1000001O0O10000O100O101N1O100O2O0O2N101N100O101O0O2O0O2O0O2O001N2N101O0O1000O100O100O010O1O100O010O10O10O10O010000O10000O1O100O00100O100O10000O100O100O100O100O100O100O100O1O100O10000O100O1O100O2O000O10000O10001N100O100O100O1O1O2N1N2N4KY[cP1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "igR2`02:`n0j0iQOhNWm0^1cROmNTm0Q2N2N2N1O1O1O10O01O1O100O00100O10000O1O10O001O0O2O11N2O1O1O1N3N1O1N101O2N1O1N101OO10000O1O0KdSOfL^l0V38M3N201O01O0000O1O1N2O2N100O101N101O0O1O2N1O1O2N1N3N1O2O001O1O0010O1000O100O1O1O2N2O03ON2M5K3M4L3M4K6K6J8G3N1N2N2N4KjVaR1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "`i[d0`0i0BLl0bj0WOVVOY3bi0nLZUO2KIMa3hj0a1M4cUOnJfi0T5VVOQKl1Bhe0S6[XOnI[1;Xf0m7N000O2O00000000000000000000000O10000000000000000000O1000000000000000000000000000001O1fJTYO^1nf0]N`YOX1Rg0PLmWOa1\\1^2Wg0mLYYOS3lf0SL_WOLk1P4ei001O001O00010O000O100000001O00000000001O0000000001O000O100000O1000001O00000000001O000000001O00000000001O000000000000000000000000000000000000000000IXLTTOj3hj0PMZUOX3Pj0g1K5J6N2O1O1_OhIQWOa6mh0:N2H8G9I7EZHVXOl7ig06O100000000000000000000000000001O00000000000000000000001O000000001O000001N1000000000O101O001N100O2N1RKTXORNMX2Yh0QNhWO[OX1@]Ok1_O[Ngh05mWOOk1`0\\NVOdi0L[ZO1Pl_=" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Sla<6Xo00jPOQ3TMo_OjC\\O03R1j`0_9WBlEmLBRa0[9[5nLbYORJ6J211Ijf0c4S4M3O101O00001N101O001O0O2O2N2dM^TO7kk0lNnTOn0Xm0J4K3N1O1O00001OO10000001O00000000000000000000000O100001O00000O1001N10000000000000000000000000000000O10000001O000000000000000000000000WHmNe@S1W?TOf@l0X?WOf@j0X?ZOf@f0Y?\\Of@d0X?^Oh@b0W?@h@`0V?Bj@>S?Fm@9P?JPA6n>LRA4m>NRA2l>0TA0j>2VANi>3WAMh>4XALh>4XALg>5YAKg>5YAKg>5YAKf>6ZAJf>6ZAJf>5[AKd>6[AKe>5[AKe>4\\ALd>4\\ALd>3^AKc>4^ALb>4^ALc>2^ANb>2^ANb>2^AOb>O_A1a>O_A1a>O_A0c>O]A1c>0\\A0d>0\\A0e>O[A1f>NZA2h>LXA4i>JWA7j>HVA8k>FVA:k>EUA;m>CSA=n>CRAo5a3gGRLZ2l_OBT`0?j_OBV`0?i_OAW`0`0h_O@Y`0?g_OAY`0?g_OAZ`0?f_O@Z`0`0f_O@[`0?e_OA[`0?e_OA\\`0>d_OB]`0=c_OC^`0l`0_OV_O`0k`0]OW_Oc0l`0YOU_Og0Qa0QOQ_Oo0og010000001O0000000000O10000000O10001O00O2O00000001O0O100000O1000000000000000000000000000000001O0000O1000000000000000000000000000000001O0000O10000000000001O00O1000000O1000000O1YNoN[SO4OJKV1ck0lNbTO^3ij0W1ZOf0gNY1WNeIaXOg7ef0W1[Oe0\\Od0jN`EP\\O\\;Rc0fDi\\O[b3TNl1UNU]OXETd0`9j[OaF`e0V8j1\\Od0XOl0mIVWO\\OolS5" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_0.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2925.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "^hb19Q>>D7I7I7I7I6J6J6J2O1N2O1N2M3N2M3M3M3M3N2M3M3L3L5L4K5G9F:H7O200001O1N3N2N2M4L4L4L5J8H:^OT]R1" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_1.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/49.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "Y_Rb05;6jn0KTQO6kn0MQQO5on0>000001O00000000O101O000O4YOXa\\e0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "hbS:46o0kl0]ORSOS2`j0k2[Oe0cNQJPXOO2Q6kg0UJQXOJ1U6lg0SJQXOI2U6lg0TJQXOG3V6kg0ZJjWO@;W6kg0aJUXO`5jg0`JVXO`5jg0aJTXO`5lg0aJSXO`5lg0aJoWOc5Qh0W1O001O0XXO]HRg0f7hXObHTg0a7hXOhHPg0T80001O0001O000001O00000001O000O100O1WNlXOeJUg0Z5mXOdJSg0]5mXOcJRg0^5nXOaJSg0_5mXO`JTg0a5jXO`JUg0a5kXO_JUg0a5kXO^JVg0a5kXO^JUg0b5lXO^JTg0a5mXO^JTg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0a5mXO_JSg0`5mXOaJSg0_5mXObJRg0^5nXObJRg0^5nXObJRg0^5nXObJRg0^5nXObJRg0^5mXObJTg0^5lXObJTg0^5lXObJTg0]5mXObJTg0^5lXObJTg0]5nXObJRg0]5oXOcJQg0]5oXObJRg0^5nXObJRg0^5nXObJRg0]5oXOcJQg0^5hXOgJYg0Y5eXOiJ[g0m6100O1000000O10000O10O100000O10001N10000O1O002WNdXOkI^h0Z5`WOeJ^i0U5j0kNkUOmKfj0^2\\2]M`ecj0" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_2.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2905.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "T[l342NP>f0C9G0O2O03L2O000000000000O2OO10O100000000O10O1000O100000000O10O100000O10000000000O010000000O100000O1000O1000000O100000O10O10O1O001O1O1O1O1O001O1O1O001O1O1O1O1O001O10O01O1O1O1O001O1O1O001O1O1O1O1O001O1O1N2O002Nn^Z3" + }, + { + "size": [ + 460, + 620 + ], + "counts": "cQ_5b0i=2OO010O01O001O01O01O010O0010O01O010O10O01O10O002Mfik2" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_3.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1496.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "0Q5oj0000000000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000000000000000O100000000000000000000000000O10000000000000000O100000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000O100000000000000000000O10000000000000000000000O1000000000000000000000000000000000000O1000000000000000000000000000000000000O100000000000000000000000000O10000000000000000000000000000O10000000000000000000000O100000000000000000000O1000000000000000000000000000000000000O100000000000000000000O100000000000000O1000000O10000000000000000000000O1000000000000O100N2]Oc0O1O10000O1O100000000000000001O00002eSObLPl0j3M3M2N2N1O1O00001O00000000O10000000000000000000000000000000000O100000000000000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000000000000000O1000000000000000000000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000000000O10000000000000000000000000000000000O1000000000000O1000004L2M:GP1aLgROU2dn0nNYmlg0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_4.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/515.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "YnmT1c1bm0m0L201O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000dRO" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "][gS1:7HUo0j0H2M101O00001O0000000000000000000000O100000000001O0000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O01N10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000dD" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_5.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1132.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^Tfo0;ao06M101O00O10000000000000000000O1000000000000000000000000000000000000000O10001N4JgkP7" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "XjZU1Z1lm0k0L4M2O10O01O00010O000001O01O0010O000010O01O010O0010OO2M3YOj0mNTXf1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "dQV:6ho05K4M1O2N1O101N1000000O10000O100000000O1000000O100O010O100O10000O100O1O1O100O100O100O1O001O100O1O1O10O01O100O1O1O100O1O1O1O001O1O1O1O1O001O1N2O1M30000000000000000000O100000000000000000000000000000000O100000000000000000000000000000000O100000000000000000000000000O100000000000000000000000000000000000000O100000000000000000000000000000000000000000000O100000000000000000000000000000000O100000000000000O10000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O10000000000000000000000000000O1000000000000000000O1000000000000O10000000000000000000000O1000000000000O1000000000000000000O100hNdQOl0`o0XO3M1N2O0000fo[10[ocN=[QOMan04\\QOOcn0E]QO117on0IQQO6Qo0IoPO5So0NjPO2jn0JZQOe0cn0]O[QOe0cn0\\O[QOe0en0\\OZQOe0en0\\OYQOe0gn0:0O1]O\\QOJdn04_QOKan06^QOJbn07]QOIcn0;YQOFfn0j0000000000000000O10000000000001O0000000000000000000000000000O10000000000000000000000000000000000O1000000000000000000000000000000O100000000000000000000000000O1000000000000000000000000O1000000000000000000O100000000000000000000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000O10000000000000000000000O100000000000000000000000000O1000000000000000000000000000000O1000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000O1000000000000000000000000000000000000O1000000000000000000000000O100000000000000000000000000O100000000000000000000000000000000000000O10000000000000000000000000000O10000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000O100000000000000000000000000O1000000000000O100000000000000000000000000000000O1000000000000O1000000000000O1000000O1O1LTPh3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "TQa73a00l0P=jNYC6Ga40WL0e0Nl4mk0]IVTO239O0Od04oNLd0Yn0A\\RO;fm0B]ROCjC4QN12ML6d0e;W9UD_Gmc0n6S\\O`GRg0Z6XYOiI2Lmg0j1eWOd13eLSj0^1TVOc1_j0[NdUOb1^j0\\NdUOc1]j0\\NdUOb1^j0]NdUOa1]j0^NdUOa1]j0^NdUOa1]j0^NeUOa1[j0^NfUOa1[j0_NeUOa1[j0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0^NfUOb1Zj0]NgUOc1Yj0]NgUOb1Zj0]NhUOb1Xj0^NhUOb1Xj0^NhUOb1Xj0^NhUOb1Xj0]NiUOc1Wj0]NiUOb1Xj0^NhUOb1Xj0^NgUOc1Yj0\\NhUOd1Xj0\\NhUOd1Xj0\\NhUOd1Xj0[NiUOe1Wj0[NiUOe1Wj0[NiUOe1Wj0[NhUOf1Xj0YNiUOg1Wj0YNiUOg1Wj0XNjUOh1Vj0VNlUOj1Tj0UNmUOk1Sj0TNnUOl1Rj0TNoUOk1Qj0TNoUOm1Qj0SNoUOl1Rj0TNnUOl1Rj0SNoUOm1Qj0SNoUOm1Qj0SNoUOm1Qj0RNPVOn1Pj0QNQVOo1oi0PNQVOQ2oi0oMQVOQ2oi0nMRVOR2ni0mMSVOS2mi0mMSVOS2mi0lMTVOT2li0lMTVOT2li0kMUVOU2ki0jMVVOV2ji0iMWVOW2ii0hMXVOX2hi0gMYVOX2hi0gMYVOY2gi0fMZVOZ2fi0eM[VO[2ei0dM\\VO\\2di0cM]VO\\2di0cM\\VO^2di0`M^VO`2bi0_M_VOb2`i0]MaVOc2_i0[McVOe2]i0YMdVOg2[k000000000000000000000O1000000000O10O1000000000000000O10000000000000000000O1000O1000000000000000000000000000000O010000O1001O0O100001O000O100001O0O100000O100O100000O2O00000000O100000000000000000000O1000O10000000000001O00O10000000000000000O01000000000000001O00000000O01000000000000000O1000000001O0O10000000O1000000000000000000O100000O2O000000000000O10000000000000000O100000O101O000000O1000000000000000O10000000000000000O100000000000O1000O10000000000O1000000000000000000000000000000O1000000O1001O000O100000O1000000000000O10O11O00000000O10000000000000000000O10O1000000001O0000O10000O1000000000O1000O10000000000000000000000O100000000000O101O000000001O000000O10000000000000000000O1000000000O100000000O100000000O1000O1000O10000000O2O000000O10000000000000O10000000O10O10001O00000000O10000000000001N010000000000000000000000000O1000000000000000O1000O100000001OO1000000O1001O0000000000O100000O10001O0000O100000000001O000O1000O100000000000000000000O0100000001O00O10000000O100000000000000000O10O101O0000O10000000O1000000000000000000000000O100000000000000000000000000000000000O1000O10000000000000000000000000O100000000000000000000O10000000O100001O000000000000O100000O100000000000000000000O100000O1000001OO1000000000000000000000000O01000001O000000000000O1000000000O10000000000000000000000000O100000O101O0000O1000000000000000000000O100U^OmLaE4MOHNd03ZON238M3NG0jP3jFlL_Lg0dNEH1b0JB015MHm0KS>T3YHQMXINHNY10gN3a`0P3i;0000001N100000O100O1000000001N10O101O00iNmLaUOS3_j0nL_UOS3bj0kL_UOU3fk0000000000000O10000000000000000000WYOnLVJN02O1OOYj0_O^UO1e0OO1]OOO11ON31Z1ik0eNWTON0=L3Ne1ok0PNQTO01X3lk0c0_Oa0iNW1hMYJaXO4H7OP6me0cI^ZO3M4JQ8Ve0jGoZO6L3Mb8Te0[GnZOl9`d0QFa[Oc:hc0V1YOg0ZOf0[OkBT^O`=Ya0^Bh^OV>\\?hARAN10Nd>[>_AZA1M31K238OH0Lb>W>[AWBS1A^O0h>c=i@bBOOga0l, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_6.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2897.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "ZVg37Q>8I5M3M2NDaBM]=OnBMP=1TCOj<1XCOg<0[COd<2]CMc<1_C0`<0aCO^<1cCO]<1cCO\\<3bCNY<:cCGU<\\1N5M5K1O11N2N2N10O000000000000000001O000O100000000O100O2L5eN\\CNH411ZdZ4" + }, + { + "size": [ + 460, + 620 + ], + "counts": "hh\\45U>4M4M2TBBf=c0O001N1O1O1000000001G\\BGe=8]BFe=`00001O3M1O0002N10O0000000001O1N2O000N22N1N2N2O0O10000000000000000001O000_OaB4a=Ib0OTQa3" + }, + { + "size": [ + 460, + 620 + ], + "counts": "i`[7:i=:L300O01O1O1O101N1O1O1O1L6JaXT1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_7.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1116.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dbnh07ho02N2O2cPOKjn0f0000O10001O000000O11O0000001O0O2O00001O0O1CPQOHRo06>N1O3NX]S>" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "egX`04jo02O2K4M4M2N2O1B>O1O10000001TOVQO`0jn0_O\\QO and are in the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_8.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/16.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dk0Y3gl000O1O1O100O1O100O1O100O1O100O2N100O100O100O1O100O100O1O1O1H8N2O1@`0VOQRO_OSn0`0g0O2N1O100O1O100O1O100O1O1O2N1O2L6F\\TZV1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "PP]63mo01O1O1O1O1O1O001O1O1O1O1O1O1O001O1O1O1O1O001O1O1O1O1O001O1O1O1O1O1O001O1O1O001O1O1O1O1O1O1O1O001O1O1O1O1O1O1O1O001O11O000O0100000O1000001O00000O100000O1000O1000000000000O1000000O1000000000000O10O02N1O1O1O1N2O0O3N1N2O1N1O2O2M2O1N101N2O1N2N2N2O1N3M2N2Nnmdm0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "nla:d0]o0Annc0k0dP\\OZ2gM1O1O00001O0000000001O0000000000000000000001O000000000000000000000000000000001O0000000000010O0000O10000M3M3N2O1O1O1O1O1O1O101N100O100O100O10000O10000O100O1N2M3N2N2N2O1O1O1O1N2mNS1O1O10000O1000000000001O00000000O1000O100O100O10000O100N2G9N3N1O3M3M[Tlh0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "0Yd0Y2d@X2\\?hMd@X2_?eMa@[2b?aM_@_2d?]M^@b2i?VMX@j2l?QMU@o2P`0iLT@V3R`0`LR@`3]`0gKm_OY4me0000001O000000000000000000000000000000001O00000000000bMdTO8\\k0QO`UOj0`j0ROfUOl0Zj0SOhUOl0Xj0ROkUOm0Uj0QOnUOn0Rj0QOPVOn0Qj0POQVOo0oi0QOQVOo0oi0POSVOo0mi0POTVOP1li0POTVOP1mi0nNUVOQ1li0mNTVOT1mi0iNUVOW1ki0gNWVOY1ki0bNXVO^1ki0\\NXVOd1Sl0O0000000000000000001O0000000000000000001O00000000000000000000O1\\I\\NbXO1M0i0MYO4_4b1hb0i0o\\OWOPc0k0n\\OVOQc0n0iXO\\Ml3g1[c0]2Y[OeMgd0b3mXOcMSg0e500O1000000O100O1nJ[GR^ON01;OU3g8^>\\Go]O10O2N021M01l2f8Q?]Go]O1001N101N101Na2i8[?\\Go]O2OO2N101N101Na2i8[?\\Go]O2OO2N101N101Na2i8[?\\Go]O2OO2N101O0O2Na2i8[?]Gn]O1O03M101O0O2N`2j8\\?\\Gm]O;3D0O101N10`2h8U?WGZ^Oh0M]O001N101Na2h8U?WGY^Oi0N]ON03N0O10a2f8U?YGY^Oh0NE1F001Na2g8V?XGY^Oh0NE2EO11Na2g8W?VGY^Oi0NE3DO11N`2m8Z?WGQ^O:4E0M^3T9X>nFY^O0fim0CfSO4iW\\2JafdM0iodQ1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_9.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2307.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "m]U61m>3^OORB2n=OQB1o=0oA1P>1nA1Q>0nA0R>1lA0T>0kA17I`=7XB05LT=KRCf0GBV=NnB`0JCY=0jB=KE[=0gBi0Z==101N101O01O5K4L2L1L4L4O11O2N2N2N3N1O101OO1N3M2N1N2O00O002I602O6K5J01O001O0OCdBYOZ=g0iBWOW=i0kBTOU=3hB;g=D[B0K0i=O^B0K0g=LbB3I0V>0jA0V>OlA0T>OmA0T>OnA0R>OQBOo=0RBOo=0ZeW2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Qo_84d>9I6H7L5H7J7L4L4M3M2K6F]N[C_1_10000001O001O1O1O1O1O1O1O1O001O0O2O1O3LaSR4" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_10.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1468.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "_`Uo0a0]o02N2N3N1O1O1O1M3G0\\QOVO27mm0Q1nQOSOPn0_101N100000000000000001O01O000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000O100000000000000000000000000000000000000000000000000000001O00000000000000000000000O10000001O00000000000000001O000000000000001O0O3[NSROH`_f4" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "RoU3k0So03N2N1O1O100O1O100O1O10000O100O100O10001N100O100O1O100O100O100O10000O100O100O10000O100O100O10000O100O100O100O1O10000O1000000O100000000O100000000O100O100O1O1O1O1O100O100O100O100O100O1O100O100O100O1O100O100O1O1O010O1O1YOjMbSOY2Vl0l0M3N2N2M3N2O1O1O1N2O1N2O1O1O1O00100O100O100O100O10000000000O101OO2O000000001O001O001O001O001O1O2N2N2N2N1O3M4L2N1O001O00001O000000VOiLkTOW3oj0QMoTOo2Pk0SMoTOm2Pk0UMoTOk2Pk0VMPUOj2mj0ZMRUOf2kj0]MUUOc2jj0^MVUOb2ij0_MWUOa2hj0`MXUO`2fj0bMZUO^2ej0dMZUO\\2ej0eMZUO\\2ej0fMZUOZ2ej0hMYUOY2fj0a1O1O100O100000000000WL]UOU2cj0kM]UOU2cj0d1000000001O00000]K]UO4Oh2ej0PM_UO6Nj2cj0oL`UO5Ol2bj0nL_UO5On2cj0lL^UO50o2cj0kL]UO60o2ej0iL[UO70Q3hj0eLUUO=3n2jj0cLQUOa05l2Sk0VMlTOj2Tk0VMkTOk2Vk0o000001O0bLfTOZO4V2Vk0`NhTOUO, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_11.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1555.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]hl=7ao09N2N2N2O1M3M2L5N2N2O1O2N1O1O2N3^QOmNQn0d1M2N1O1O00000000000000000001O01O000000000000000000000000000000000000O1O2UOPY]h0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_12.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1503.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "gdf05go08J4L4L4M2N2N2N2N2O1N101N101N10000O2O000O1000000000000000O1000000O2O000O100O1O10O1O10O01O1N2O1O100O101O0000001OO100000001O0O2O1N10000001O00000O01000001O000O10001N101N101O0O2N2N2N1O2N3M3L4K8Gd[YT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "SYc01ho0=G7J4M3M2N2O0O2O0O2O1N101N2O001O000O2O00001O00000000000001O001O00000001O1O001O02N3M00O2N1O100000000001O00001O0001O000O100N2M3L4N2O1O1N2O1O1O1O100O100O01000O10O1O10O0100O1O2O0N2M3J6N2000001N10001O0001O01O00000010O00101N1O1O00000O100000000000000000O200O001O001O1O2N1O2N2N100O0O3N1O1O001N2O1N101O1N2N101N1N3N1O2M3N3LRg^R1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Zjd1Z1\\n0iNiQO^1Qn0cNoQO^1Pn0bNPRO_1om0aNQRO`1nm0`NRRO`1nm0`NRRO`1nm0`NQROb1nm0_NQROa1om0_NPROb1Pn0710O00001O00001O001O00001O00001O00001O00001O00001N10001O0O2O000O2O00001O0001O8Hl0SOS1mNmSgT1" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_13.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/136.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bPk1=ao05K5L3L4L5K4L3M4K4O1O1O001O00000000O10000O100O100O1O100O1O10000O10000O100O1O1N2N2N2N2N2O1N2N2N2O1N2O1N2O1N2N2N2O1N2N2N2O1OQPWT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "c\\VV1R3kl0c2[SO]J03:Nh07Xf0]8H3L2O2N1O1O100O100000000O1N2E" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_14.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/130.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "nml8l0Po0;F9H=B;E5L4L2O2N1O1O1N2[NcMfUO^2Xj0hMaUO[2ai0WMVVOc03Y2ei0\\MPVOb02W2li0kNlUOX1Tj0Z2O10000000000001O1O1Oe0[O3M001O00001O1N101O1O6J`0@4L5K4L8H4L7I6J2N2N2N3M2N4L4L4L3M5K5K4L4L4L3M2N2N2N2N2O1N2N2N2N1N3N2N1O2N2N1N3NVPbl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Q[k8c0Pm0^2N101O0000000000000000000000000000000001O0000000000000000000001O0010O0001O00000000O10001N100O1O100000000000000001O0001O000001O000000000000000000000001O001O>AX2cMhdcl0" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_15.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1900.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "SSR24[`04L4L3M4M2N3M3N1O100O2N101N100O2O001N101O001O0O2O00001O00001N10001O001N101O00001N101O001O001N101O001N1000001O001O001O0O101O001O1N1000001N101O0O2O001N2O000O101O000O2O0O1O2O0O2O0O101O00001O00000O2O001O1O1O1O00001O000O2O001O000O2O001O1O0O2O00001O0O2O001O001O0O2O00001O00001N10001O001N2O001N10001N10001O001N101O1O000O2O00001O000O2O001O1N10001O000O2O00001N101O001O0O2O001O0000001N10001O001N101O1O0O2O00001N101O001O1O0010O01O012M2N1O01O000O2O0O1O2O0N201N100O2O0O1O1O2N1O1O2N100O1O2N100O2O0O1O1O2N1O1O100O2O0O1O1O010OO2N1001O1O100O1O101N100O1O2N1O100O101N1O1O100O2N1O101N1O101N1O1O101N1O1O101N1O100O2O0O1O101N1O1O2N1O101N100O2N1O100O2N1O100O2N1O100O2O0O1O1O2N100O2O0O101N1O1O1O2N100O1O101N1O100O1O1O1O0001O001O000O2O01O100O1O2N1N3M2N3K5L4K5K9EVih3" + }, + { + "size": [ + 530, + 730 + ], + "counts": "nZW55U`09H8H8H8K4I7H8H8K5K5K5N2O2N100O1000001O00000000010O00000000000000001O0000000000001O0001O00000001O00000000001O000001O000001O000000000000001O000000000001O01O00000000001O0000000000010O0000000000001O0000000000001O01O000000000000000O1M3K5J6K5J6J7J5J6K5J6J6L4K5H9KjX_4" + }, + { + "size": [ + 530, + 730 + ], + "counts": "P1l3f<010O000001O0fM]ClN5S1^, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_16.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1108.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "joc12mo02N2O0O1O1O100O2O0O1000000O10000O10000O10000O10000O1000000O10000O10000O100O1000000O1000000O10000O100O10000O10000O1000000O1000000O10000O100O1000000O100000000O10000O100O10000O10000O1000000O10000O100O100O10000O100000000O10000O100O10000O10000O1000000O10O11O0O100O10000O10000000000O100000000O10000O100O100O1000000O10000O100O100O10000O01001O0O100000000O10000O1O10000O1000O01000000O2O0O10000O10000O1000000O100000000O10O02N1O10000O100000000O100O00100O1000000O10001O0O10000O1000O0101O0O10000O1000000O10001N100O10000O1000000O1000000O10000O100O100O100O100000000O100O100O100O1000000O1000000O10000O100O10000O10000O100000000O100O100O100O1000000O10000O100O100O10000O10000O10000O1000000O100O100O100O100000000O1000000O100O10000O10000O10000O1000000O1O100O10000O1000000000000O100001O1OS2mM1O1O001O00001O00001O1O1O1O0000001O000000001O001O00001O0000001O0000001O0000001O001O1O1O001O00001O0000001O001O00001O00001O001O001O001O001O001O00001O0000001O000000001O00001O001O0000001O001O001O1O00001O00001N101O000O2N1O2N2K_PT1JjokN2O0001N20O010O1O000001O0O1OXPZe0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mhVg02; and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_17.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1080.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "noV41no0100O2O0O100O10000O100O10000O10000O10000O10000O100O10000O10000O1000000O10000O1000000O100O10000O1000000O1000000O1000000O1000000000000O100001O000000001O00001O0000001O00001O0000001O001O001O00001O00001O00001O001O00001N1000001N1000001N101N2NXPRP1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "ajd7Y3`k0[1F7L3O1O10O0100001N10001O0O10001N10000O2O000O10001N10000O101O0O101O0O10001N10001O0O10001N100O2O000O2O000O10001N1000000O2O000O101N10000O2O00001N10000O2O000O101O0O101O0O100O2O000O101O000O2O000O101O0O10001N10000O2O000O101N10000O2O000O2O000O101O0O10001N100O10001O0O10001O0O100O2O000O101O0O10001N10001N10001N100O10001N10000O2O1N2Ng0YOSUVk0" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_18.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/640.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "Xl0b0^o00000O10O10O1000O10O10O10000000O10O1000O0100000O0100000O10O10O1000O100000O10O100O10O1000O10O10000O1000O10O1000O100000O01000O10O10O10000O01000O100O2NXTdU1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Wm0[2em0000000O01000O10O1000O10O10O10000O0100000O0100000O0100000O01000O10O10O1000O01000O010000O10O10O100O01000O10O10O100O10O10O100O100O10O02aNZROa0gm0[O^ROc0an0N2N2N2_Oa0N2O1N2N2O2MPR`U1" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_19.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fTZj0X1[m0`1fRO^Mjl0S3I6J4N2N100O1O1O1O1O10O01OM3L5L3L4O1100O10O100000O0001N110O01]YOiLUJ4he0T3n_OeMh?\\2V@iMg?W2X@lMe?U2Y@nMf?Q2Z@RNd?n1Z@UNe?k1Z@WNe?i1Z@YNe?g1Y@\\Nf?d1Y@]Ng?c1V@aN^KUN\\c0Z3TAdN]KUN^c0W3TAfN\\KTN`c0V3SAgN[KUNbc0U3Q\\OfLN4X4n1TLUNec0S3l[O_MW1VO_1V2fMTNhc0[4h\\O^Ke1V2iMSNjc0_4a\\OZKh1X2kMPNkc0a4_\\OYKh1X2mMoMlc0a4]\\OZKg1Y2nMmMnc0a4[\\OZKf1[2PNkMoc0`4Z\\O\\Kc0JCd2@fMPd0a4Y\\O\\Kb0M_Od2DcMRd0a4W\\O_K;a3[O_LSd0a4V\\OT1jc0mNS\\OU1mc0mNn[OV1Rd0\\5000000O1001O00000000000O100000001O0]Im[OY1Sd0fNP\\OX1Pd0fNS\\OY1mc0SMj[OSN;i4kc0SMR\\OkM6Q5hc0SM^\\O^MMI2^4cc0[Nl]OSMiN]4[c0_Nn]ORMjN]4Yc0`Nm]ORMlN9QO\\3Vd0ZOm]OoLoN3YO^3lc0@n]OlLPO1^O^3ec0D\\@:d?F_@7a?Ib@4_?Kd@2\\?Nf@0Z?1g@MZ?2h@LX?4i@UOlJVN\\d0d2i@ROoJYNYd0d2j@POoJZNXd0g2i@nNPKZNXd0g2i@mNQK[NVd0h2j@kNQK\\NWd0h2j@iNRK^NSd0i2l@gNSK_NQd0k2l@dNVK_Noc0k2m@dNVK`Nmc0k2o@cNVK`Nmc0_1_[O@c5>SKbNlc0c0YCh0nHbNnc0OiC[1Qf0M3L4J6M4K4K6JVdP:" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "_VhQ1k0So0g0[Ob0]O=D8H3M1N2O00000O10O1000O101O0000000O10O10000O01000O1000O1000O1000O01000000000O10O100000O010000000O10000000O0100000000O10O100000O1000000O010000000O1000000O10000000O0100000000O100000000O1000000O1000000000O10O100000000O1000000O1000000000O010000000000O01000O1000000O100000O0100000000O0100000O1000000O10O1000O100000O10O100000O10O1000000O010000000O01000000000O010000000O0100000O10O100000O01000000000lJ" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_20.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\^]Q17go0:H1N2O00000000000000000001O00000000O100000001O000001N10000O101N2M3N2M4Lba]5" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mVjl04lo00amT2h0RSjMZOEO1OT3d0^N^O^OMae0]3[YOgL3OOm1d0[OBYOYf0d5iYO^LVf0g6N2O1O001O0000000010O001O2N2O2M4L7Hk1VNT1jNQ1dNSUOeK^k0a1P3ROk0B=CUfR8" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]\\nm06go04N10001O00100O010O10O0100O010O10O0100O010O10O10O10O0100O0100O1O010O1N6GUcl8" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_21.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1621.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "iom24ko02O0O1N2O1O1O1O100O1O1O100O1O1O1O1O1O2OO0100O1O1O1O1N2O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1O2N100O1O1O1O1O1O1O1O010O1O1O1O2N1O001O1O100O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O010O1O1O1O100O1O0000001O000000000000000000O2N100O1001O00O10000001O00O20O000000O2M2O1O100001O1O1O1O1O100O2M2O1O1O1O1O100O1O100O1O1O1O1O1O1O1O100O1O1O1O1O1N200O1O1O100O1O1O1O1O1O1O100O1O1O100O1O1O1O1N200O1O1O1O2O0O1O100O1O1O1O1O1O1O1O1O1O1O1O1O1O2N1O100O1O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1N200O1O1O1O1O100O1O1N2O1O1O1O100O1O100O1O1O1O0101M3I6B?[Od0B`0VOj0\\Of0[O`Wjl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "[bfb085MXo0d0I4L3N2M10010O000000000O02O000000000000000O2N2Gn]`d0" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_22.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2584.jpg", + "mask_rles": [ + { + "size": [ + 460, + 620 + ], + "counts": "VP5;P>7I4M9G5K2N2N001O5K1O001O010O000010O01O10O01O010O001O010O0101Nc0]Od0[OVVR8" + }, + { + "size": [ + 460, + 620 + ], + "counts": "]:`1X=G33L4J7I3O3N2M2N5L13H4J6J6J and are in the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_23.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2130.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "]PQ2:F9G4L2O0O1O01O01O010O01O00010O0001O01O001UOYB=f=B`B9`=FfB6Z=IlB1T=OQCLo<5m001O00010O002O1MZTj6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "ScY2b0[>:I1O1O001O000000000000001O00000000000000001O01O1O01O010O00010O01O01O01O000010O0001O010O02N010O00010O01O01O01O010O000010O01O000010O01O00010O001O000010O01O01O01O1O002O4KQaj5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nRg84g>6I6M3K5M4K4K5L4K5K6I6I7I7J6I7G9G9J6G9oNgLSFa3k9l0O0O100000000000000000000000O1O1O1H8G9I7D, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_24.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/297.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "aRgg0;_o0>C7H7H9I7I6K5K5L3L5L4M2L5L3N3K4L4N3N100O2O000O101O0000000O1000000001O00010O000O10O100001O00000000000000000000001O00O10O100000000O10O100000O10000O10000O100O100O10000O100000001O0O10000000000000010O01O001O2N3M5K1O10O01O000000001O01N100000000O2O0000000O1000000O10000O10001O000001O000000000000000000O2O001N2O1N2O2M3M3N2M3M3M3L4M2M4L4M3L5K4L4K7J5K5J6J5K6J9CQng:" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_25.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/552.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\kSS12ko05M2N1010O00010O00010O01O010O1O01O01O0010O0001O001O010O001O00001O00001O0000000000O2N3ITP=6goB5M3N100O010O1O1O100O1O10O0100O1001O0O2O0000001O000O2O00001O0O101O00001O0O101O000O101O000O10001O00001N1000001N100000001N10001NbTS1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "djYj03mo00O2ANnPO2Qo02kPOOTo03kPOMUo04iPOMWo04hPOLWo0?000001N3N2N1O2N3M1N2O1O000O101O000000000O10000000000O10000000000O10000000O100O1N2O1O1N2M3N2N2N2N2M3L5HZVj;" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]Zm, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_26.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/46.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "djY9k0So05L3M2O0O100O10000O10001N1000000O10000000001O000O101O00001O000001O01O00001O101N5K:F6K0O1O10O010O2O10O4M0O02@YROaNkm0k0gROSO\\m0`0PSO]OXm0J`Vll0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "lS`o06go05L3I7H8N2N2N2O1O1O001O1N2O10O01O1O1O1O1O001O100O1O001O1O1O1O001O1O1O0010O010N`0^NSRO1VjW7" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "dPib03ko03N1O2N1O2M2O2N101N101N101O001N2O001N101O0O2O001N101O00001O010O010000O01000O100O010O100O010O100O001O1N1B?L4N2O1N3MWo\\c0" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_27.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1258.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bfgR13?4NO^j00dUOL3001N1O4Lc1ma0SOQE_OmHI9OO102L7L_1na0SOSE\\OoHI9OO2O9E11_1Pb0ROSE[OQIH81N2Nm0Gk0Qb0SOX^OnN^6<^II63N4Kk2ka0kMUESOnHL4373L5Kk2ka0kMUEROZIN03N1N9F12`1ma0PO_FQORH1N2b08TOP1ka0CoHUO_E1O2J0ha0h0QIROUF0ZO4_a0j0mJVOR5j0nJVOR5j0nJVOR5j0oJUOQ5k0oJUOQ5k0oJUOR5j0nJVOR5j0mJXOR5h0nJXOR5h0mJZOR5f0jHROaE2^1;T`0a0jHWOaEOZ1a0Y`09PCTOl09_JMN6a0DAi0ca04oBWOi0k2VRLR_OQOP2Q2^NQN\\OT6Ta0jJk^O\\OKLX1a2jN[4Wa0Y7O1O1O1O010O00001O001O001O000010O01O0000000000O1N2[Of0kNT1fNZ1cMf[OZG4K^e0V5ZZOnJ9M3M^h0b2\\4dN\\1F:N2O1O100O2O00000001O01N10000O101N1K5O2O1N3MTee2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "jj0h2n0fNni0Z1RVOfNmi0\\1QVOeNni0_1oUOaNoi0d1mUO]NRj0f1kUO[NTj0g1jUOZNVj0g1hUOZNWj0i1fUOXNYj0m1bUOTN^j0g3O1O1O1O1O100O100O1O100O100O1O100O100O1O1O100O100O010O100O101N100O100O100O100O100O100O100O101O000O1000000O1000000O1000000000000000000000000001N101O1O001O001O1O1O001O1O1O1O001O1O001O1O001O1O1O1O001O1O1O0O2O1O1O010O1O1O1O1O100O1O1O1N2O1O1O1O1O2N1O1O2N1O1O1O1O1O2O0O1O1O2N1O2N1O1O2N1O1O2N1O2N1O2N2N1O1O2N1O2N2N1O2N2N3M2N1O1O2N1O1O00O10oLYSOi2gl0WM\\SOf2dl0ZM^SOd2bl0\\M`SOb2al0]MbSO`2^l0`MeSO]2[l0dMhSOX2Xl0iMiSOU2Wl0lMiSOS2Wl0nMiSOQ2Wl0oMkSOo1Vl0QNjSOn1Vl0RNjSOn1Wl0RNhSOn1Yl0SNcSOo1^l0QN`SOP2`l0SNYSOR2gl0b02O1O1UNPSOd0Qm0UOVSOj0ll0QOXSOn0kl0lNYSOS1ll0eNXSOZ1hm0N3N1O1O1N1F_QOUObn0h0bQOUOan0f0`0I8IZQ\\Q1" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_28.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1843.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "UnP88U`06M3M3M2O100O1M3N2N2M3N2M3O2N10000O1O1N2O21NfASOQ=l0bBA^=i11O1O4L1O00010N4M1O1O0O2O0O1O1N2L4M201O1O1O1N101^OXAYOi>8kAFV>3RBKo=0WBMn=MVB0]\\m2" + }, + { + "size": [ + 530, + 730 + ], + "counts": "Zgo:4^`05K2N0100O00O2J5J6I8I60001O00000010O00010O01O001O1O001O3M2M3N2N1O001O00001O00O10000O1000000O20O00000YG" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_29.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1012.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "0i7Wh00000O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O100O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O1O1O1O1O1O1O1O1N2ZOcTOfL`k0W3f0M3O1N2O1O1O1O100O1O1O1N2O1N2O1O100O1O1O1N2O1O1O1O1O100O1O1O1O1N2O1O1O1O1O100O1N2N2O1O1O1O100O1O1O1O1N2O1O1O1O1O1O100N2O1N2O1O1O1O100O1O1N2O1O1O1N2O1O1O100O1O1N2O1O1O100O1O1O1O1O1N2O1O1O1O1O1O100O1O1O1N2O1O1O1N2O1O1NRPWQ1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "`Ph61cPi:2lnWE2iPOOVo02jPOOUo00lPO1To0MlPO4To0LlPO5So0JnPO7Qo0InPO9Ro0FlPO=Zo01O2N2O1O00001O0O2M3N2BWOcQOl0Yn0VOfQOk0Yn0UOgQOl0Xn0SOhQOo0Xn0POhQOQ1bn02O1N3M2N2K5N102gQOaNmm0l1M5USOmM^O1M8IH^j0X2QVOi0^OYM2F0K50T44kK9LN_1FR?\\9]_ORHn0f2TORLU?Ra0\\O`2`MQ1oNj0VO3@[ZO^Fie0Y1WZOeNOO2;O00O4B94EMb21^M021LT1Pf0SOSZOG030Kj7d0^g0M1N2O0O2M2N20TQOIom07m0O10000O1001O00000000O100M30001O03M1O00000O10L4000000000000000000000000002M3N000000O1M3000001O0000000000000000000002N1O0000O1N20000000ZXO1[?1W@k0Y?UOd@Q1Y?oNe@U1W4TO[DCW:4VMV1T4G]NSO_MW1Q4J^NoN`MX1Q4J_NoN_MX1P4K`NnN]MY1R4JaNnNVLKhJb1_:GbNlNaK6aJ`2Z;_NdNkN_Kf3^5aMQOkN^KR5mJPKNO5J4O1LZ12hNO`3_1j5jNZKa;mJdD1GOL7:J14NLX1f9hNZK[`0d4f_O\\K]`0TKe_OJ30O2M3Fc6:XNVd0SKa[O\\5:AZg0QKeXO6E^1>[3]j0O00010O2O1O001OO100O1O1O1000000O01O1N2O010O1O100O0100N110000O1000000O100O\\UOVKTj0j4lUOWKSj0h4nUOXKRj0g4d00000O10000O1000000O1000000O1O1000000O100O1O100O100O10000O1000fUOcKQi0\\4PWOdKPi0\\4oVOfKPi0Z4PWOfKPi0Z4PWOfKPi0Z4PWOfKPi0Y4QWOgKoh0Y4QWOgKoh0Y4QWOgKoh0X4SWOgKmh0Y4]1O100O10000O1000iUOjKdh0V4[WOlKdh0T4\\WOlKdh0S4]WOnKbh0R4^WOnKbh0R4^WOnKbh0Q4_WOoKah0Q4`WOnK`h0Q4bWOmK_h0R4h10000O10000O10000O1000PVOPLPh0o3QXOQLog0o3Q2O100O1000000O100O1O100O100O1000000000000O10000O10000O10000O1000000O100O100O10000O1000000O10000O10000O1O100O1000000O100O100O100O100O100O10000O1000000O1000000O100O1O10000O10000O100000000O100O100O100000oEVMlFi2T9fM^FZ2b9gM]FY2b9iM]FW2d9hM\\FW2e9iM[FW2f9gM[FY2j9bMVF]2m9aMSF_2n9^MTFb2ec0O100O100001O004Le1PSOkKkj0P;WUOjD:Ld035Nh?e=Y_O]B92HKW`0]`0__OR@3c01ROW=ib0eB`]O7Em;gd0lCg[O2GS:Yh0nEcWOQ7kk0QI\\TOX3[o0kL3M1O001O000000000000001O0000000000000000O1000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000O10000001O00000000000000000000000000O100000000001O0000000000000000O100000000000000000000001O0000000000O10000000000000000000000001O0000000000]J1lUO83JU4Mge0[1UZOeNje0^1TZObNle0`1RZO`Nne0a1QZO_Noe0b1PZO^NPf0b1PZO^NPf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NQf0c1oYO]NRf0b1nYO^NRf0b1nYO^NRf0b1nYO^NSf0a1mYO_NSf0a1mYO_NSf0a1mYO_NSf0a1mYO_NSf0b1lYO^NTf0b1lYO^NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NUf0a1kYO_NVf0`1jYO`NVf0a1iYO_NWf0a1iYO_NWf0b1hYO^NYf0b1fYO^NZf0b1fYO^NZf0b1fYO^NZf0b1fYO^NZf0b1fYO^N[f0a1eYO_N[f0b1dYO^N\\f0b1dYO^N\\f0b1dYO^N\\f0c1cYO]N]f0c1cYO]N]f0c1cYO]N^f0b1bYO^N^f0b1bYO^N^f0b1aYO_N_f0b1`YO^N`f0b1`YO^N`f0c1_YO]Nbf0b1^YO^Nbf0c1]YO]Ncf0c1]YO]Ndf0c1[YO]Nef0c1[YO]Nef0c1[YO]Nef0c1[YO]Nff0b1ZYO^Nff0b1ZYO^Nff0a1[YO_Nef0k0cUOFg3@ff0h0fUOFe3Bff00bUO>:Ee3Mhf0N_UO0;O`33ik00000000001O000000000000001O0000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "]hg==_o06H8K5K3N3M3M2O2N3L4L4M1O2N1O1O2M3N7I4L7I7I7J1N1O2O1N101N1O1O101N100O101O000O101N10001N1000001O00001O00001O001O001O001O001O00001O010O001O000010O01O001O0010O010O010O1O01O01O01O00010O0010O0001O01O01O0010O01O010O001O010O0010O10O01O10O010O000010O0001O010O0010O01O1O01O01O010O0010O010O010O010O010O01O01O01O0010O01O00010O02Mj1VLbRO^1lSXe0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "VT_3;5Oen0Q1cQOROF3011a0Xk0e3K4L4M3L3N2M3N1O1O2O0O1O2N1O1O1O2N1O1O2N1O2M2O2N1O001O1O001O1O1O1O2N1O1O1O1O1O1O1O2\\WO^Igg0c6XXO^Igg0c6YXO^Ieg0c6[XO]Idg0d6\\XO\\Icg0e6]XOZIcg0g6^XOXI`g0j6j01O1O1O1O1O1O101N1N2O1O1O2N1O1O1O1O2N1O1N2O2N1O1OoNkWOYJUh0g5lWOXJSh0i5mWOWJRh0i5PXOVJog0k5QXOUJng0l5SXOSJkg0o5UXOQJjg0o5WXOQJhg0P6YXOPJeg0Q6[XOoIdg0R6\\XOnIdg0R6\\XOnIcg0T6\\XOlIcg0U6]XOkIag0W6_XOiI`g0Y6^XOhIag0Y6_XOgI`g0[6^XOgI`g0]7O1O100O1O1O1N2O1N2O2O0O1O1O1O1O1O1O1O10001N100O100N2000000000001O000000000000000000001O000000000000001O000000001O00000000001O0000000000000000001O000000001O00000000000000001O000000001O00001O00001O001O00001O00O10002N0000000001O0001VOcYOnG^f0Q8fYOlG[f0R8nYOeGTf0Z8PZOaGdi06mUO6N\\3gn0^L_QO8Xg9DaXF860M2NO4M6Flk0]6kSOoI4D\\h0^8iN5N1XYOcGke0]8TZOfGje0Z8UZOgGke0Y8UZOhGje0X8VZOhGje0X8UZOjGje0V8UZOkGke0U8UZOkGke0U8TZOlGle0T8TZOlGle0U8SZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0T8TZOlGle0U8SZOkGme0U8SZOkGme0U8RZOlGne0T8RZOlGne0T8RZOmGme0T8RZOlGne0T8RZOlGne0T8RZOlGne0T8RZOlGne0T8RZOlGne0U8QZOkGoe0U8QZOkGoe0U8QZOlGne0U8PZOlGPf0T8PZOlGPf0T8oYOmGQf0S8oYOmGQf0S8oYOmGQf0S8nYOnGRf0S8mYOnGRf0j3PZO@KgLUf0h3RZOhe0CWZO=ge0EXZOc6\\c0WIW\\O5, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_30.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1390.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fPho09do08XOC_QOc0_n0?000000000000000000000000000000000000000001O000000000000000000O1000000001O000000000000000000000000000000001O000000000000000000000000001O000000000000O1O1O1N2M3M3K5L4K5L4M3JVP_5" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "YSQR13io05J6K4M3N2L4K5M3M3M3M3K5L4K5M3M2M4J6L3N3M2M4L3L4N1N3N1O12N2N2O1N2O1N2N2N2N2N2M3N2N2N2N2N2M3M3K5M3M3N2L4M3N2K5M3M3N2N2N2M3N2M3N2N2O1NRPl3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "PPW43mo05K3M3M3M3M2N3M3M2N3M2N4L6J4L3M4L4L3M3M4L4L4L4L3M5K3M4L3M4L4L3M3M3M4L1O1O00000000001O000000000000000000_NPTORORl0j0RTOTOQl0h0TTOTOok0h0UTOUOok0f0TTOXOPl0c0STO[OQl0`0TTO[OQl0`0RTO^ORl0=PTOBTl09oSOETl07QTOESl07PTOEVl05mSOIWl02nSOGYl04m1K[ndQ1" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_31.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/602.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]c]Q17bo08WMDfSO4EONm1]l0^1eUOWM^g0k2[XO^Mbg0b2[XObMdg0^2YXOeMgg0[2TXOkMkg0V2PWOiL7T1ih0S2nVOnL1T1Qi0n1nVORMGV1[i0h1mVOQOSi0o0mVOQOSi0o0lVOROTi0n0lVOROTi0o0jVOROVi0n0iVOSOWi0m0iVOSOWi0o0fVOROZi0j3000000000O1000000000O1000O1M3oNR1gNX1eM[TO`N6Hml0V1iROmNcn0:^QOHk^\\5" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_32.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/130.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "i`TQ1;Zo0>H`0@>C`0_O;E:H?A6I]MoSOa1jk0aNYTO`1ck0\\NdTOe1Wk0[NmTOf1oj0XNVUOh1hj0XNZUOi1Pj0gNUVOZ1hi0cN]VO^1ai0aNaVO`1]i0aNcVOa1[i0_NeVOb1Zi0_NfVOa1Xi0`NhVOa1Wi0`NhVO`1Xi0aNgVO_1Xi0dNfVOZOUOn0Uj0KbVOUO]Oo0Qj0N`VOoNDR1li01^VOkNIR1ii08ZVOdN1Q1fi0?UVO]N9R1ci0e0PVOWN`0R1ai0m0jUOoMg0S1_i0P1hUOlMk0R1^i0V1fUOcMn0V1^i0\\1hVOdNYi0OcUOLT13[i0NiUOIl09\\i0LmUOFh0=]i0KPVOCe0a0]i0HTVOB`0e0^i0GYVO[O, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_33.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1944.jpg", + "mask_rles": [ + { + "size": [ + 530, + 730 + ], + "counts": "j9`0[3\\2`6dMaI[2_6fMaIY2_6gMbIX2^6hMcIW2]6iMdIU2]6jMdHFZN`2R9jMbHLYNY2U9kMaH1VNT2Y9kMaH3UNQ2[9jMaH:oMl1`9jMbHRO`Bh0i>L3M2N1O0000000000000000000000000000O1M3M3M3O100N2O1O1N2O1O1000000O100000000001O000000000000O1000000001O0000000000000000000000O1001O0000O1000000001O000000000000000000O100000000001OO1000000000000001O00000000O10000000000000000000000000000000000000000000000000000000000000000000000000O11O00000000O11O0000O100001O000000000000000O01001O000000000000000000O11O000000000000000000000000000000000000000000O11O000000000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000000000000000O11O00000000000000000000O11O000000000000000000O11O0000000000000000O100001O0000000000000000O1001O0000000000000000O100001O0000000000000000O1001O000000000000000000O11O00000000000000000000000000000000000000000001O01O0O10O1000001O00000001O000000_Oo@AQ?9ZABg>9^AFb>7bAH_>2gAM[>LkA3[?O1O10O01NUaZ2" + }, + { + "size": [ + 530, + 730 + ], + "counts": "PmY31Y`01j_O3P`0;M3@a0L4J7K6J6K2O1O2M3M3N2N1O1O1N7J4L1O1O103L111M0101N12O2M001OO0AaBlM`=d2OjNdBUO\\=Q2O0101N1O100O1O1O100O1O1O001O1O1O2O0O1O001O1O1O1O1O1O2O0O1O1O1O1O1O1O001O010OaNnM^DR2b;nM^DR2b;nM_DQ2a;oM_DQ2Q=00000001O000000001O000000000000000000001O0000000000cNRNVDn1j;RNVDn1j;RNVDn1j;SNUDm1k;UNSDl1l;UNTDj1l;WNSDi1m;WNSDi1m;XNRDh1n;YNQDg1Pc0eAB[>=_AIa>T11O0000001O0001O000001N1001O000001O000000000000000000000000001O000000000000000O2O000O101M2N2N3K6C and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_34.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2347.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "k`[33k>2F:001O1O1O1O1O2N2N0O2O00Wmi5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "jVj15f>6L4J5K6I6L5G9^Ob0ROn0M2O2O0O1000000O100000000O100000000000000000000000000000000000000000000000000000000000000cEZM`8f2]GeM[8[2bGkM[8U2bGQN[8o1aG[NY8e1fGcNS8\\1mGlNl7T1THSOe7m0ZH\\O^7d0aHBZ7>kGoLd0j2[77QHPMa0n2[72SHQMa0R3_OTM\\7Q3RI;m6m3O1O001O000010O00000000000000000\\KiH_1W7aNiHSM7m3P7POjHQM and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_35.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/118.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "PPf64lo04L1O2N1O1O1O1O1O1O2N3M5K1O1O1O1O001O0000000000000000001O00000000001O00001O00001O00001O00000000001O00001O0000001O000000001O0000001O000000001O00001O0000001O00000000001O0000001O00001O001O1O1O2N1O1O000000000000000000O100000000001O000000000000001O0000000000001O001O00001O00001O0000001O000000001O00000000000000000000O10000000000O10000000002M3N1O2N1O2N1O1O2N2N1O1O1O1O1O1O1O2N1O1O1O1O001O01O01O0000000000O2N1O100O1O1O1N3O0O10000O2O000O100000000000000001O1O1O1N1O2N1N2N3M2O2M3M3N3K6KPoji0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_36.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2158.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[jX5R2n3900O1N20WA0a>1521O1O00000000000000O11O00SO0jB0U=7eBI[=b0XB@h=l000000001O9G=CB`0@8H1O0000000000000000000O100000000000000000000O100000000000O2O1O1O7I`0A5I5La0jMXC5_Of0V>M4L5K4LWgR7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hbj44i>:VCI\\:;^EK^::\\EIb::[EHc:, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_37.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/105.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dlQS14_o0S1hNS1ROg0_O=E and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_38.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/869.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`V`9=4H2Min0f2XN7I2N1O100O10000O100O10O10O100000O0100O100000O0100000000O10O10000000O10O1000O10000O0100000O1000O10O1000000O01000000000O10O1000O100000O10O100000O1000O100000O10O1000000O010000000O10000O01000000000O100000O01000000O1000O01000000O10O100000O100000O01000000O10O10O1000O10O100000O1000O1000O01000000O01000O10000O100000O10O1000000O1000O100000O1000O010000O1000O1000O10000O1000O1000O1000000O01000O1000O010000O10O01000000O0100000000O10000000000O10001O00000O1000000O2O0000000O101O000000001N100000001N10001O00000O2O0000001N1000001O000O101O001O0O101O00001O001O0O2O001O001N101O001O001N101O001O0O2O001O001O0O2O001O001O001N101O1O001O1O1O2Mg0YOo0nNfhed0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_39.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2369.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "o7o6Q81O00000000O2O3N2L5L4L2O3L7I4L2M3N2O0N10kN[JPId5o6cJkH]5S7hJjHX5U7kJiHU5W7oJeHQ5[7SKaHm4_7VK^Hj4a7[KZHf4f7Z100O100001O2N2N3M4L1O001O0000O1O1D7G6L3N2N3N2N00000002N2N3L2M3M2N3O0O3M3LTT`5" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "D", + "type": "mirror", + "image": "images/vqa_40.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/48.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "`Zi09ao08I5J7M2O1000001OO1000000001O000000000001O0000000000000000000010O0000001O000000000000000000000001O000000000001O00000000000001O0000000000000000001O00000000000000000001O000000000001O000O3LneXT1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_41.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1081.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "go[b06`n0\\1SOk0XOh0H8K6L3VN[MWVOg2bi0jMQVOY2ji0QNoUOQ2mi0S2N2M3L4N2O1OdJbVO\\Oe0f3hh0lLPXOP3og0gL\\XOX3cg0gL`XOX3_g0hLcXOW3]g0iLcXOW3\\g0jLeXOU3[g0jLgXOU3Yg0kLhXOT3Wg0mLiXOS3Wg0nLiXOQ3Wg0PMiXOo2Wg0SMhXOl2Xg0XMfXOf2Zg0]McXOc2]g0`MaXO^2`g0fM]XOY2cg0kMYXOU2gg0WNnWOh1Qh0]NlWOb1Th0cNfWO^1Zh0kN[WOW1eh0Q31KdVOfI[i0f5fVOfJbi0Z5^VOeJ5CUi0f0QWOa4EkJYi0b0[WOc4eh0\\KeWO\\4Yh0eKPXOR4og0oK\\XO3oNV2ch0jMmYOS2ke0VNXZOe1fe0_N`ZOZ1^e0hNmZOl0Se0VOS\\OmN]KNfh0U1d52O00O2TO\\QOOPo0JmWjc0" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_42.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/778.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "ZPa=l0jn0=M1O2N1O1O1O1O001O1O001O1O001O001O00001O001O00001O0000001O00001O000000001O00000000001O000000000000000000000000000000O10000000000O100000000O1000000O100O100O100O100O1O1O1O1N2N2A?D\\Peg0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "XQV98Ro0Q1D3M2M3O1N2O1N2O0O2O1O0O2O001O001N10001O001O00001N10001O0000001O0O100000001O0O1000000000000O2O000000000O10000000000O100000O1000000000O10O100000N20O10000O100O100O100O1O1O3M2L7hNmPQl0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Vl`21jo09J4L4L3N2N2N2aSOWOci0l0YVOZOci0g0YVO_Odi0c0YVO@ei0a0ZVOAkh0@`UOP1d1Bfh0J]UOg0l1@ch00\\UOa0P2Abh01[UO?R2B`h04ZUO;U2B`h0_1ZWOgNdh0Z1[WOgNdh0Z1ZWOhNeh0Y1ZWOhNeh0Y1ZWOhNeh0Y1ZWOhNeh0Y1YWOiNfh0X1XWOjNgh0V1XWOlNgh0U1WWOlNih0U1VWOlNih0T1WWOmNhh0T1WWOmNhh0S1VWOQOhh0P1SWOUOlh0k0QWOYOoh0g0oVO[OPi0e0oVO^OPi0b0nVO@Qi0`0nVOCPi0=nVOFRi0:lVOHTi09gVOKXi06fVOLZi04dVON\\i06_VOL`i06]VOLai06]VOKci07ZVOKei08WVOIhi0Y3N100O1O1O1O10001N1000000O1000000O010O100O01O001M3K4O2O10O10O10O10000000001O0000000001O01O01O010O010OO3N2M3N1N1N3N1O2N1O2N101N1O1O1O101O0O1000000000O1000000kLVWOTOjh0i0]WOSOch0m0_WORO`h0m0dWOPO\\h0iNTWOoN1R1a0T1Zh0jNiWO00T1Vh0lNlWOOOT1Uh0mNmWONOT1Uh0lNmWOO0T1Sh0mNmWOO0T1Sh0mNnWON0T1Rh0mNoWOO0S1Qh0nNoWOO1R1Ph0nNQXON0T1Ph0mNPXOO1S1og0mNQXO01Q1og0nNQXO11P1og0nNPXO22o0ng0nNQXO32n0mg0nNSXO30o0mg0mNTXO40n0lg0mNUXO51l0jg0oNVXO33l0gg0POWXO44j0fg0POWXO56i0cg0QOYXO57g0ag0QOZXO7, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_43.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2237.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "neU17h>3N1O00001O00000000000001O0001O000000000010O000000000000001NQig7" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_44.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1341.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^`Rc0 and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_45.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1004.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "bm0^2bm00001O1O1O1O1O1O1O1O1O1O1O1O2N1O1O1N2O2N1O1O1O1O1O1O2N101N1O1O3M7I6I4LkPkV1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_46.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1555.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "VlYb02ko06J4L3M4L4L3N3M2L4N3L3N2N2M2O1O1N1N200O0001O1O10N100100O010O0001O10O1O01N1100O000010O1O0^FkN]DT1b;8TCGna=TOQ\\OL[6o0f=nNY\\O1o5Q1j=lN_\\ONc5V1P>lN`\\ON\\5T1W>nN_\\OMX5U1Z>nN`\\OMR5U1`>nN`\\OMk4W1f>lN`\\ONf4V1m>kN_\\OOa4U1R?mN]\\ON^4U1W?mN]\\ONY4T1\\?oN[\\ONV4R1a?QOY\\ONS4P1e?UOU\\OOQ4l0k?YOP\\OOP4h0h?T2Q@mMP`0X2j_OhMW`0[2d_OeM_`0[2^_OeMe`0[2Y_OeMi`0[2U_OdMn`0\\2P_OcMSa0]2k^ObMXa0_2d^OaM_a0a2[^OaMga0`2T^OaMoa0`2m]OaMUb0_2i]OaMYb0_2_]OfMdb0Z2V]OiMmb0W2P]OhMTc0X2h\\OiM[c0W2b\\OjM`c0V2^\\OiMec0W2Y\\OiMic0W2U\\OgMoc0Y2n[OhMTd0X2j[OhMXd0X2g[OgM[d0Y2c[OgM_d0Y2`[OfMbd0Z2gYOXL9\\1Sf0\\2cYOZL8Y1Wf0]2`YO]L5W1\\f0[2`YO`L2R1af0^2]YObLOo0ff0_2[YOcLNl0if0a2YYOeLLi0lf0b2XYOgLIf0Qg0d2VYOfLId0Rg0f2UYOgLHa0Tg0j2bXOoKNg0;=Xg0m2YXOWL1b0<7Y7POP8R4R@[L9b050`7UOn7a4\\@^L2Lc7YOm7]4^@_L0K[5AoKLW>Y4^@aLNI\\5K`KOf>l3`@lMU5`0X:e1b@nMn3h1Z;:h@TN[1f4W=UM]Ab7^>^HbAd7\\>\\HcAf7\\>[HbAg7]>n400O100000M3TOkAc_O_>V`0i0N2O1TLRAYDR1Km=Z:WEaEk:i8lFTGV9l7o7jLUXORNPh0l1TXOoMog0P2TXOlMng0T2SXOiMog0V2SXO^LMLog0f3WXOZLL1kg0e3hXO]KRO6>j0eg0c3kYO_LSf0a3nYOaLoe0_3QZOcLme0\\3TZOfLje0Z3VZOhLhe0W3ZZOiLee0h2YXOVLS2S1ce0f2_XOQLP2\\1^e0b2P[OaMmd0_2T[OcMid0]2X[OdMfd0[2[[OhMbd0X2_[OjM^d0U2d[OmMYd0S2h[OPNTd0P2n[OQNoc0o1S\\ORNjc0n1W\\OTNfc0l1\\\\OTNbc0l1b\\OSN[c0n1i\\OPNTc0P2R]OmMkb0S2X]OmMeb0S2_]OmM]b0S2k]OgMSb0Y2P^OgMma0Y2V^OgMga0Y2\\^OgMaa0Y2a^OhM\\a0X2g^OhMWa0X2m^OfMPa0Z2S_OfMj`0Z2W_OgMg`0Y2\\_OgMa`0Y2b_OhMZ`0X2h_OiMU`0W2n_OiMo?W2U@hMh?W2]@hM`?X2c@gM[?Y2i@fMT?Z2n@gMo>Y2SAgMk>Y2WAhMf>X2]AiM_>W2eAhMX>X2kAgMS>Z2oAfMn=Z2UBfMh=Z2\\BfM`=Z2dBeMY=[2jBdMT=\\2nBeMo<[2TCdMj<\\2YCdMd<\\2_CeM]<[2gCdMV<\\2mCcMQ<]2QDdMl;\\2WDeMe;[2_DcM_;]2dDcMY;]2iDdMT;\\2nDeMo:[2UEdMh:\\2[EdMb:\\2`EeM^:Z2eEeMZ:Y2jEgM[::Q^OVOh7a0U=lNPCS1Pf00O0010O10010N10011NO1O1O02O01N2OO2N04N2NON10O3L3M2O22ON11OO10O1N2000O00O2N1O1N2O0002N1N1O1jNjQO`0Vn0_OPRO, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_47.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1090.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "^m0a0_o000000O10000000000000000000O10O1000000O10001O00000001O001O001O1O000001O000000000O01000000000ZROBjk0>STOFlk0:RTOImk06STOLlk04RTONnk02QTOOok01PTO1ok0OPTO2Pl0NoSO3Ql0MnSO4Rl0LmSO5Sl0KlSO6Tl0IkSO9Ul0GiSO;Xl0DeSO?[l0AcSOa0]l0_OaSOc0_l0]O^SOf0bl0[OZSOh0fl0XOVSOl0jl0TOSSOo0ml0ROnROR1Rm0QObROX1^m0e00000000000O10000001O0000001O3M;E5K4L5K3M3M5K3M3L2O2N2N1O1N2N1O2N101N1O2N1O2O1N2M5KdP`T1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "hVdQ12ko04M3M3M3I7L4L4M3M2L5K5M3L4N1O2N1N3N2N7I8H:F9G9XYO]Mc?f6XZOhK[e0b7L2O0`]OZEe?g:i2001O1O1O001O010O1O1O1O1O010O00O1O1YIV[O_NMgM>FF9N0j0JZd0o3^[OPM@oNYi0Q4Y21O01O0001O001O01O01O010O001O010O001O001O01O01O001O01O01O00010O001O0010O01O00010O001O00001O010O001O010O00001O010O001O00010O00001O010O001O00001O010O00001O10O01O00001O001O010O1O00010O00001O001O0010O0001O00100N101O00010O00010O00010O001O0010O01O00100O00001O0010O000001O0010O00010O01O001O00010O001O10O010O000010O010XO" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "fRj, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_48.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1090.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "fnSf0T1Wn0h0J4eKWN[ZOl1ae0gNgYOWOmLS2[i0nNiYOQ1Vf0SOiYOk0Wf0WOjYOf0Vf0\\OjYOb0Vf0CiYO9Wf0LfYO2Zf02eYOK[f08fYODZf0?eYO_O[f0e0eYOWO[f0n0dYOnN\\f0V1dYOfN\\f0^1eYO]N\\f0e1eYOWN[f0n1cYOoM]f0V2aYOgM_f0]2`YO`Maf0c2^YOYMcf0j2]YORMdf0Q3T35L4K4L4L4M4K4L3M3M5L4K4L3N2M4M3L4M2N3N13MN3M2M4L3M3N1N0010O0000001O00001O001O00001O1O001O1O001O1O001O1O1O1O1O1O2N1O001O001O1O1O1O1O1O001O1O1O2N5jKPUOR3Tl0K3M2N1O1O1O1O1O001O1O1O1O001O1O001O001O001O1O001O001O00001O001O001O1O001O1O00001O000000000000000000000000000O10001N1M3I7WOi0D>_Ob0_OPSj<" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "^j>6go05L3N2O1O00001O0O1O100O1O1N2O1N2O1O1O1O100O1O0001N1O2O001O1O10O01O100O1O00100O1O1O10000O100000O100000000000000000001O010O001O010O00100O0010001N100O1O1O100O0001O001N2O0O2O1N2O1N2O0O2O1N2N2N2NjUaT1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "mckT12Zo07oPOOkn04TQO6an0K^QO=[n0CeQOl0km0UOUROQ1em0oN[ROR1dm0oN[ROR1dm0nN\\ROR1cm0oN]ROQ1cm0oN]ROR1bm0nN^ROR1am0oN_ROQ1am0PO^ROQ1am0oN_ROQ1`m0PO`ROQ1_m0oNaROR1]m0oNcROS1[m0mNeROT1Zm0lNeROW1Ym0iNgROX1Xm0hNhROZ1Um0hNjROY1Um0gNkROY1Um0gNkROY1Um0gNkROY1Um0gNkROX1Vm0gNkROY1Um0gNkROX1Vm0hNjROW1Vm0jNjROU1Wm0kNjROR1Xm0nNhROR1Xm0mNiROR1Ym0mNgROR1\\m0kNeROU1dm0aN^RO]1Sn0O3Nd0[O6I?AoZj1" + } + ], + "question": "Among , , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_49.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1072.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "nbk;5?M25om0^1K2M2O0000001O0000001O000010O00000000000O1000000000000000000000000001O0000001O001O000001O000000000001O0000000000000000O1O2K4Aa0oNo]Sj0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "kf0V7jh0000000001O0000000000000000100O000000O101N10000001O00001O01O0001ObMZWO[KYf0iMhZOo1lN8\\f0iMdZOV2mN1_f0jM_ZO[2oNLbf0lMZZO\\2ROHdf0mMVZO`2SODgf0mMSZOb2UOAhf0oMmYOf2XO\\Okf0PNfYOk2\\OVOnf0PNaYOR3\\OoNSg0QN[YOW3^OiNVg0VNRYOX3FbNXg0o2dXORM\\g0Q3`XOPM`g0S3]XOmLcg0T3[XOmLeg0g50\\M[XOmLeg0S3[XOmLeg0R3\\XOnLdg0R3\\XOnLdg0Q3^XOnLbg0Q3_XOoLbg0n2`XORM`g0l2cXOSM]g0Z2dXOVK7_2Vg0e0mZOYOSe0e0T[OVOld0h0W[OWOid0h0Y[OWOhd0g0Y[OYOgd0f0Z[OZOfd0f0[[OYOed0f0\\[OZOdd0f0][OYOcd0f0^[OZObd0f0^[OZObd0e0_[O\\O`d0d0`[O\\O`d0d0`[O\\O`d0d0`[O\\Oad0c0_[O]Oad0b0`[O^O`d0b0`[O^O`d0b0`[O^O`d0b0_[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Obd0b0^[O^Ocd0a0\\[O@dd0`0\\[O@dd0?][OAcd0?\\[OBdd0?[[OAed0?[[OAed0`0Z[O@fd0`0Y[OAgd0`0X[OAgd0?W[OChd0?U[OCkd0>Q[OEod0UOPoAV1\\O;N2O1O001O2N010O01O01O0000001O00000O100000001N11N101M4[NmQOP1ZZo9" + } + ], + "question": "Which one among , , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "C", + "type": "mirror", + "image": "images/vqa_50.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/67.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "lhaf06ho04M3M3M3M3M3N2M3N3L2O2N2N3M2N2N102M2O1O1000O1N2N2N2N2M3N2M4M2N3L3N2M3N3M2M4L3MQVV`0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "YmTR11lo05M3N1O1N2O1N2O9G5K7H7J1O1O1O001O001O001O001M2I8L3O1mNQNUTOP2ik0RNUTOo1kk0SNRTOo1lk0SNQTOo1ok0SNmSOo1Sl0TNjSOm1Ul0XNeSOi1[l0ZNbSOf1^l0[NaSOf1^l0]N^SOd1cl0l00001O01O010O00100XM`SOn1al0nMcSOQ2^l0jMfSOW2Rm0O00100O2N2O0O1O100O4M7H1O01O00010O0001O01O01O00001O000O2M3K4G:E;F:H8LbRW3" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "TPTV1`12oNim0a3UN_1aNk1UN=Ch0PXOhGhf0j9nNn0ROa1_NX2d\\OgA9GILd?Qb0^@o]O[=cd0bB^[On:dj0dF^UObN:\\2Zn0bNe0[O;E2N1O00001O00000000001O000000O10000000000000000O100`M2gRO<0A[12gk0g1eSOYNYl0X2nRORNQm0_2O1O1O1000000O10000001O00000000" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "0j;Vd0000000000000001O000000000000000000000000000000001O000000000000000000001O0000000000000000000000001O000000000000000000000000000000001O00000000000000000000001O0000000000000000000000000000001O00000000000000000000000000001O0000000000000000000000000000001O000000000000000000000000000000001O00000000000000jIX\\O4hc0I^\\O4bc0K`\\O4`c0Kb\\O4^c0Lc\\O3]c0Le\\O3[c0Mf\\O3Yc0Mh\\O2Xc0Mj\\O2Vc0Nj\\O2Vc0Nk\\O1Uc0Ol\\O0Tc0On\\O0Rc00n\\O0Rc00o\\OOQc01o\\OOQc01o\\OOQc01P]ONPc02P]ONPc02Q]OMob03Q]ONnb02S]OMmb03S]OMmb03S]OMmb03T]OLlb04T]OLlb04T]OLlb04T]OLlb05T]OJlb06T]OJlb06U]OIkb07U]OIkb07V]OIib07W]OIib08V]OHjb08W]OGib09W]OGib0:V]OFjb0:W]OEib0V]OBlb0R]OBnb0>R]OBob0>P]OBPc0>P]OBPc0?o\\OAQc0?o\\OARc0>n\\OBRc0?m\\OBRc0>n\\OBSc0=m\\OCSc0=m\\OCSc0>l\\OBUc0=k\\OCUc0>j\\OBVc0?i\\OAWc0?i\\OAXc0?g\\OAYc0`0f\\O@Zc0`0f\\O@Zc0a0e\\O_O\\c0a0c\\O_O]c0a0d\\O_O[c0b0d\\O^O\\c0b0e\\O]O[c0d0e\\O[O[c0e0e\\O[O\\c0d0e\\O[O[c0f0e\\OYO[c0h0d\\OXO\\c0h0e\\OWO\\c0i0c\\OWO]c0i0d\\OVO\\c0k0c\\OUO]c0l0c\\OSO]c0m0d\\ORO]c0n0b\\ORO^c0o0b\\OPO^c0Q1b\\OoN^c0Q1b\\OnN^c0R1c\\OmN]c0T1d\\OjN]c0U1d\\OjN\\c0W1c\\OiN]c0W1d\\OhN\\c0Y1d\\OfN]c0Y1d\\OfN\\c0[1c\\OeN]c0\\1c\\OcN^c0]1b\\ObN^c0_1b\\O`N^c0`1c\\O_N]c0b1d\\O\\N]c0c1d\\O\\N\\c0e1d\\OZN\\c0f1e\\OYN[c0h1f\\OVN[c0j1f\\OTNZc0l1g\\OSNYc0n1h\\OPNYc0P2g\\OoMYc0Q2h\\OnMXc0R2i\\OmMXc0Q2j\\OnMVc0P2m\\OnMUc0gNk[OTMS4T4cg0M2I7D;GdSOjL`l0R3:G9E]Ob0cN`QO3O2^R:JdmE1Qo00VQO8in0IVQO7ln0GTQO5\\n0MVRO3cm0`0QROAfm0HoQOk09\\Ogm0n0WROoNjm0c1POYNYSO5Gh1Pm0SNXSOZ2Um01O01O1[O^ROlNdm0L_ROd00^Oam0O_RO;L[O5:am0n0\\ROgN3;bm0m0\\ROfN3, , and , which mask is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_51.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1383.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "k4d3\\l00O1N2N101O1N1N3N101N101M2O2N101GnKbTOT4Zk0;N1N3K401O010O10O10000O1O101N1O1O2N1O2N2N101N100O2O00001N10001O0O101O001N101O001O0O2O001O1O1O001O1O0O2O1O1O0O2O1O001N2O1O1N101O0O2O0O2O1O1N2O2M2O1N2O1O0O2O1N2O1N2O2M2O2M3N2N2M3N2M3N3L4M2M4L4M3L3M5L;D5K3M3M4L3M^iTT1" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_52.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1258.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "]d]k0=^o08I8fQOVOSm0P1fRO]Ool0f0fROlNHh0d0WOok0d2mSO^Mok0m2hSOTMUl0^3M4M2N4M7HAc0^O3N>A8Hn0UAnBbKLmNA40:1J[c0V:d\\OcE4`1Uc0W9h\\OkFWc0P;O2i\\O]Ca1Le11mL1a=oS8Pa0mHU^Oc1LlMk0f7Ta0jHY^OVOHMO`2j0e5Ua0gH[^OWO0\\2?h5Va0eHX^O\\OLc10bNd0m7Wa0cHX^O[OMHMb15^NK31I;f8aa0\\HW^OROM020M`18]NK3O2MX9^a0dGg^OROM011O]18^NKR:Sa0UGm^OWOMGO^19^NIR:Ra0VGP_OVOL_17]8k`0oFR_OX12k7j`0nFR_OTOMc15]8l`0lFR_OSONe1O`8Pa0iFS_OSOMZl8cAYGX>\\=L2bCU^Ob:ma0\\EU^ObNNg;oa0fES^OcNNe;Rb0iEm]O_:Vb0c13M3L6K7HnGRC]MhjNgAlIS1L[ON03D90R7Z>kNeAjIT1OYON04B:0o6^>WOaA]I87003I3W8S>bNeAaIN?6_7[>aN_AeJ3f6e2lGn8j6XDS3_2QFgM6a;e6YDU22fG\\2NZ9V6XDl1>hGn1>]9n5WDi1a0hGk1c0^9XOUDY64o1c0iGd1j0n9^5fCm1`3eHl8g=XGWBl8c=TGZBT9a=eFiB^9];cCcDo20`9P;oClDb24a9`:]DZER26c9c8RCiG^1\\Oo15d9V8aClGW1Bk18b9HhBU7V1UHkNj0m1Ji19b9IiBR7^1mHa0Ne17h9MbBQ7b1bH`0ERNc0a3:j9b7ZDbGb00gMb0`3T:j4gHfJPM^ObLQ1i=a4mHlJkLk0Y:n3VIVKdLh0X:d3_IgK[L=X:T2[BjMc7ElK:Q:T2hKbMXJ8n9R2kKjM[J1h9m1ULSNYHYODf0n;]1[BPNR:e0oIMd9W1YBUN_:f0`IJUL@d=_1]B`Nb:g0UILV:k0oLWOlHLY:gHOY:O`M0[H0[f0AbUO=W42Uf04lYOLRf05oYOJoe07XYO_ObLM8=ji09\\YOHiLNhi0:cYOHeLMii08dYOMbLJki07eYOO`LJii05kYO1if00YYO3df0LTYOB\\Lf0Uj0OZYOB`L`0Sj0NZYO<`LUOTj0>nXOYO\\MQ1FVOQj0>oXO[OZMP1HUOPj09UYOBSMP1JQOPj0:UYOGQMl0Vj0\\OjXOGPMn0Wj0YOkXOHnLo0Wj0WO]YOj0cf0QO_YOS1bj0G_QO\\O_n0d0dQOZO[n0g0fQOXOXn0i0;5N20O010M2O1O11\\QOPOUn0R1;1oQOoNTm0f0URO@f0LTm0b0\\RO]Oa01Rm0a0]SO_Obl0`0`SO@_l0`0dSO]O\\l0b0a1O000K50000010O1O1O100CESQO>mn0:1O1K6KeRO@^k0>dTOB\\k0=fTOA[k0>S20100O1OIHkPO4Uo0NlPONUo03;1O0O100lMJgROM_19jk0LfROL<3KM07Um0LdROM<4IM16Xm0JZSO0_OO05\\n0JVRO2`n0O<4M1gNJVRO2F7Rn0NQRO2G1Xn03hQO1MM]m0HZSO^n0AgQO:[n0^OPROok0@STO1aN1^m0MRTO1`N2^m0NPTO1bN1^m02kSO;Vl0DjSOPo0_OTQOa0kn0^OUQOe0in0[OVQOh0hn0XOWQOi0in0WOVQOk0in0TOXQOl0hn0TOXQOl0on0O00000001N1JSOYQOn0en0TOZQOl0fn0TOZQOm0en0SO[QOm0fn0QO[QOP1jn01O1O1O1O2M2N3K7WROcNgl0`1SSOeNll0[1QSOhNol0X1oROjNPm0W1mROkNSm0W1jROkNSm0S2O2M5L2M3N1O2eSOPMA0ik0T3_TOZM^k0j2ZTO[Mek0`30001N10000000001O00000O1O1N2O1001O001O00O1O1N2O1O1N21O000000O100O1JZKQUOg4oj0ZKPUOf4Pk0XKRUOh4Tk0O1O000000O100IZKSUOg4mj0ZKRUOf4nj0[KQUOe4Pk0YKQUOg4oj0XKRUOh4Sk00O1M3O100O100000000000000000000000000000000O1O1O1O1N21O1O001O00O1O1N2N2O10000001O00O1O1N2N2O11O2N1O001O00O1000000000000000000O1O1N2O10000000000O10000000000000000O100O100O10000000O100O1000O100O101O000000001M2O1O1O1000001O000000O01000O010000000001N100O10000000000000O01O1O1O100001O000000O100O10000100O001OO001N2O1O10000001O0000O1N2L40000001O2N1OO1O1O1O1000000000000O1O1O1000000010O0000000O1000O10000001O00001O1O1O4L4L3M2N1O1O2N2N2N2N2N9G4L:\\KSUOa3jk0LgMUL`XOh3_g0ZLbXOd3^g0\\LdXOb3\\g0`LSWONa0a3[h0cLeVOKH9W1W3]h0eL`VOh0k0b2gh0cL_VOS1c0Y2]i0iMbVOV2^i0lMaVOS2`i0mM_VOS2ai0nM^VOR2bi0nM_VOQ2ai0mMaVOS2_i0lMcVOR2_i0lMcVOS2]i0nMcVOQ2lh0kL_VOU1f0o1ih0PM_VOT1f0k1\\h0nLlVO7OS1h0g1\\h0[OcWOe0]h0\\OcWOc0Uh0lLSWOb2h0c0Sh0mLTWOa2i0a0Rh0PMTWOa2h0?Sh00hWO0Xh03eWOM[h05dWOJ\\h08bWOH^h09bWOF^h0<`WOCah0>_WOAah0?_WOAah0`0_WO_Oah0b0_WO]Oah0d0_WO[Oah0f0^WO[Oah0e0`WOZO`h0g0`WOXO`h0i0`WOVO`h0hMUWOZ2;N`h0gMVWO\\2:L_h0m0bWORO^h0o0aWOQO_h0^MZWOS36^O_h0_M]WOS34]O_h0`M]WOT34[O^h0bM^WOS36YO\\h0eM\\WOT38VO\\h0hMYWOT3;SO\\h0_1dWO`N\\h0a1dWO^N\\h0b1eWO\\N]h0c1dWO\\N]h0c1fWOZN]h0c1eWO[N]h0b1fWO\\N[h0b1hWO[NYh0fNPWOl1n0ZOSh0\\NRWOE9l1V1O`g0WNoWOe1h0ZNUN^1Vi0`NTXO5hNQ1T2lNnM1OON`0Pk0AoTO=1FY2IlM100Ma0Vk0NiTOF`2LcM1M?Yk0LhTOG\\20eM1K=^k0H[WONZMc0]k0\\OZWOR1`g0hNiVO3i1U1]g0jNiVOOl13RMm0Wj0TOjVOJo13QMo0gi0UO]VO7NE`30_Ln0di0XO_VO?\\3\\O`Ln0gi0TO^VOc0Y3c0[f0cN`VOj0T3d0hf0]OgXOUO_M`1ji0]O^XOX1bg0hN\\XOZ1dg0eN_XOY1ag0gNaXOV1ag0WNPVONa2j1_g0UNTVOO^2l1^g0RNZYOl1`j0M`0@`0@103K6KVPf1JaoYN`0G8ROo0H7J6J6M3M3K5J6L4M3M3N2M3L4M3N2N2O1O1O100O1O1O1O1O1O1O1O1O1O1N2O1O1kN^KkVOc4ah0hKcVOGk0a4ah0iKdVOFi0c4ch0gKeVODh0f4ch0fKeWO[4[h0eKdWO\\4]h0ZK`VO2R1e4eh0[KZWOf4fh0ZKYWOg4ah0UK_VO5n0h4bh0_K\\WOb4dh0^K\\WOb4eh0]KZWOd4fh0\\KXWOf4ih0YKUWOi4kh0VKTWOl4lh0TKRWOn4nh0RKPWOP5Pi0oJPWOR5Pi0mJPWOT5oh0lJQWOU5oh0jJRWOV5mh0kJRWOV5nh0iJRWOX5nh0gJQWO[5jh0bJ\\WO`5ch0bJ[WO_5fh0_J[WOa5bi00O100000000O1N2N2N2N2N2O1O1O1O1N2O1O1O1O1O100O100N2O1N2N2N2O1O1O100O100O100O100000000001O001O1O001O0000000000000000O100001O000000001O0000001O001O001O001O001O2N3M1O1O0000O1K500O1000000000000001O00001O000000001O00000000000000001O00000000010O000O2O001O00000000000O110O000000001O000000000000001O00001O0000000000001O00001O0000000000001O00000000001O0000001O001O0000001O0000000000001O0000001O001O2N001O00001O1O001O0000O100O100000000O10000001O0000001O00001O000000O11O0000000000001O0000001O00000000001O001O000000001O0000001OgJbJP@^5o?dJP@\\5o?eJQ@[5n?fJR@Z5Y?cJS\\O4d4X5X?fJR\\O3f4W5X?gJQ\\O2g4W5W?iJR\\ONh4Y5T?kJU\\OKg4Z5R?nJW\\OGh4Z5P?PKY\\ODh4\\5o>PKY\\ODh4\\5o>PKY\\ODh4\\5o>PKX\\OEi4[5o>oJY\\OGh4Y5o>PKX\\OHi4X5o>PKX\\OHi4X5o>PKX\\OHi4X5n>QKZ\\OEi4Z5m>QK[\\ODi4Z5k>SKiAm4W>SKiAm4W>nJnAR5R>mJoAS5Q>lJPBT5P>kJQBU5o=iJTBV5l=iJUBW5m=hJRBX5n=hJRBX5o=gJQBY5o=gJQBY5n=hJRBX5n=iJQBW5o=iJQBW5o=iJQBW5n=jJRBV5n=iJTBV5l=jJTBV5k=hJXBX5h=hJYBW5f=jJ[BU5d=lJX\\OMg5W5o=oJY\\OJh5W5n=PKY\\OJj5U5l=RKZ\\OIj5U5l=RKZ\\OIj5U5k=SK[\\OHk5T5k=RK[\\OKi5S5l=PK]\\OMg5S5l=lJa\\O1c5S5l=lJa\\O1d5R5j=nJb\\O0d5R5g=RKd\\OLe5R5f=TKd\\OJf5R5f=UKb\\OJi5P5e=VKb\\OJi5P5e=UKb\\OLi5o4e=TKc\\OMh5o4e=SKc\\OOh5n4h=oJ`\\O4h5m4Y>SKgAm4Y>SKhAl4W>UKiAk4W>UKjAj4V>VKjAj4V>VKjAj4V>VKjAj4W>UKiAk4_=oJS]O1`5P5[=TK^Cl4a, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_53.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/45.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "dbc6460O2N2N`30[Lhi0V6jCPJhB0O11O1NW15aN33NO110N06KL40000OHM3N:2V6S9bIoFQf0Y8W1M4L300000001O0000000000000000000000000000000000000000O1001O0000000000O1000000001O000000O1000000000000000000000000000000000000O1O1gFiGbJ^8]5gGVB5n6`8e4XGPEMQOd3JnL0[=^;c60000001O001O00001O000000000000001O000000001O000000001O0000001O0000000000001O0000001O0000001O001O00000000000000001O000000000000001O00000000O100O1O1J6L4H`0iET[O^1Mk16QL67Fh0_o0RO3N1O2O001N1OWe_l0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "V]oT15fo05M3N2N2N2O1O10`0@1O0000000000O2O001O3ROSQOc0\\o0I4L6J\\bY2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "T_Qi0a03GO7^n0o1gQOeMPm0P4PSOYL4M3Ldi0[8RWObGVe0a:RO3N1N2O0O2O0O101O000010O000000000000001O01O000O101O00001O00001O000000000000000001O00000000O11O000001O000000000000000000000000000000001O0000000000000O1000001O000000000000000000000000001O0000000000001O00000000000000000O2O0000001O001O001O00001O0000001O01O0001O00000000000000000O1000000000000000000000000000000000000000000000000000001O000O100000000000000000001O000000000000001O00000000000O100000000000000000000000001O000O100000001O0000001O0O10000000002N3M3M2N1O1O00001O001O00001O00000000001O00001O2N1O1O00000000O1O100000O10000000001O001O001O00001O001O0010O01OO100O100O10000000000O10O01O100001O00000000000O1000001O01O00000000001O010O010O000000000001O01O001O00000001O01O0001O001O00000000001O000000000000000001O000001O00000000000000000001O000000O1000001O00000000001O000000000000000000000000000N2N2O1O10000000001O0001O00000000000000O10000O10001O00000001O0000000000000000000001O0O1000000001O0000000000000000000000000000O10000000000001O000000000001O000000000YA" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_54.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/3005.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "PPU>2no000001O00001O001O0000001O00001O00001O001O0000001O00001O0000001O0000001O001O0000001O0000001O00001O0000001O00001O0000001O0000001O0000001O0000001O000000001O000000001O000000001O00001O00001O000000001O000000001O000000001O000000001O0000001O0000001O00001O000000000000O10000O1O1O100O1O1O1001O00001O001O001O0000000000001O001O1O1O0000O1001O001O001O1O00O1000000001O0000000000000000001O00001O000000000000001O001O0000001O0000001O0000000000000000O1NRPQc0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Tea1?_o05M1N101O00001O00000000000000000000000000000000O1000000000000000000O100000000000O100O2N1O2M4I6J7J6M4L4M3L2O000O101O0000O2O001N2N4K5K7Gb0UOX[YT1" + } + ], + "question": "Are and located within the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_55.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1626.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "k`gk0:`o07K4L5K4K5M3M3K5K5L4K5L4L4J6M3N2L4L4L4M3K5K5M3J7L3M3M3L4J6L4L4L4L4L4M3M3L4K5L4L4L4K5M3K5L4M3M3K5L4M3L4K5K5L4M3M3L4L4L4L4L5K4L4L4L4M3L4M3L4M3M3L4N2N2O1O1O1O100O1000010O0000000000000000000001O000000000000000001O00000000000000000000000000000000000001O000000000000000000000000000000000000000001O00000000000001O0000000000000000000000000000000000000000000000001O0000000000O2O01O0000000000000000000000000O1001O001O00000000000000000000000000000000000000000001O0000000000000000000000001O0000000001O0000000000000000000000000000O10000001O000000O2O0000000001O00000000O100001O0000000000000001O0001O000000000000O10000001O0000000001O00000000O1001O000000000000000000000000001O00O10000000000000000001O000001O0000000000000000000000000001O0000000000000000000000000000000000001O000000O2O000001O0000000000oG" + } + ], + "question": "Is visible in the mirror?", + "choices": [ + "A. Yes", + "B. No" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_56.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/2291.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "`_n52k>6L2O2nAHX=<]BKc=j0^OmNUCS1hP1h9]OmEA=7_OGW:`0PF_O;8a:7WE@97a:8cFF^99cFF^9:cFB`9=b20000001O000000010O0013Dd^V4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "edk84k>2N2gALP=MRC9LLo, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_57.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/1584.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "\\Z[93mo02M101N10001O00000O101O00000000001O0000000000000000000000000000000000000000000000000001O000000000O101O000O101N101N1Mfeil0" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "RbVU11f0a0om0KaQO<^n0d0O1N2O2N8H1O000000001O00000001O01O001O0000O1N2N2O1N2N2N2N2O1N2N2M3N2O1N2N2MoQOVORm0i0mROYOSm0e0lRO^OTm0`0kROCUm0:mROGSm07mROKSm02oROOQm0OPSO2Pm0LoRO6Rm0ImRO9Sm0EmRO=Sm0BmRO?Sm0_OmROd0Tn03N2N2N2O1M3N2N3N1N2N2N2N2O1N2N2N2N2O001O1001O001O3M4K4[OoQOQO_n05Y^8" + } + ], + "question": "Can you tell me if and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_58.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/647.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "d_d5`04h0Wi0h5A6M2OO010O002N1N3M3M3M2O101N1O101O00001O00000000000001O0000000001O00000000000000O101O001O010O4L3M2N4L10O0O1O100O101N1M4]Mi2kMhf\\P1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "bidl01e0;D1mm0_1N2O0O1O100O1O1O001O100O1O1O100O1O1O1O100O1O1O100O100O10O01000000000000O0100000O10000000000O100O0100000000000O1000O1000O100000000000000O00100O100O100O10O10000000O100O100000O10O100000000000000O010O100O100O100O100000O10O1O100O1O100O001O100O1DSM`SOQ3]l0;N2O1O1O010O101N10O010000O1lNXLoUOh3Tk001000O100O10000O1000O010000O1000000[KTLT]Ol3lb0TLU]Ol3`g00O10O0100O01O1O100O00001O001O001O1O001O1O1O1O1O1O001O1O1O1O001O001O00001O1O100O1O1O001O1N2O001O1O001O001O1O001O001O100O1O00001N101O1O001O1O00001O001O1O00100O001O1O1O1O1O1O001O1O1N2O00001O001O1O10O01O1O001O1N2O001O001O1O1O001O1O001O001O1O1O001O001O001O1O001O001O001O001O0010O01O00001O3M2N001O1O1O1N10001O00001O1O1O1O10O01O000O101O001O0000001O00000^OgPO=`o0N1O001O00000O010O1O1O2Nho:L]PE0O2O01N1000eP2" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "bii11`03hn0n0E?Bi0WOP1`ROoLQl0f3J7I>C?@1O010O00010O01O00001O00010O0010O000001O00010O001O010O00010O01O0100O010O01O01O00001O00100O1O010O000010O00001O1O010O0001O001O010O01O01O01O0001O001O1O10O01O001O00001O001O010O00001O01O01O001000O10O00010O0000010O01O0010O00010O000010O010O0010O001O010O0100O1O10O01O0001O01O010O0010O1O001O0010O01O1O1O1O1O0000000000O10001O01O001O00001O00001O0000001O00001O1O001O1O001O0000001O000O10001O001O001O001O001O001O001O100N101O00001O00001O00000000001O00001O00001O002N5J2O1O00000001O0McLdSO[3]l0eLcSO[3]l0eLcSO[3]l0eLdSOZ3\\l0dLfSO\\3`l0O00001O001O1O1O1N11O000000000000000000000001O00000000O1001O0000001O00001O001O1O1O1O1O00001O000000000000000000000000001O1O1O001O0000000ZMkROa2Vm0\\MmROc2Xm0000001OO10000O1000000001O00001O0000001O00001O00001O00001O01O01O001O00001O0000001O0000001O1O1O0000001O001O00001O1O1O001O001O001O0000O10000001O00001O001O001O0000001O00001O001O1O00001O00001O00001O00001O001O1O00001O00000000001O1O1O1O00000000001O001O00001O000000001O00001O00001O0000001O1O1O001O00001O00000O2O00001O1O0000001O00001O001O00001O0O2O00001O00001O000000001O001O001O0000001O00001O00010O1O001O00001O0000001O00001O00001O00001O00001O0000001O001O1N101O00000000001O001N100O2O00VPld0" + } + ], + "question": "Which of the following mask :, , or is in the mirror?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. None of the above are in the mirror" + ], + "answer": "A", + "type": "mirror", + "image": "images/vqa_59.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/RGBD-Mirror/test/image/256.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1280 + ], + "counts": "o_e4;bo05L4L4L3N1O2N2N101N100O1O2O0O100O100000O0100000O100O10000O100O100O1O1O2N100O2N2N1O2N2M3M4L5J\\`kQ1" + }, + { + "size": [ + 1024, + 1280 + ], + "counts": "Zoi86io01O1O100O010O100O100O100N20O01O1000O01O10O0100O100O10O0100O1O100O100O10000O010O100O1O100O10000O100O1O100O010O10000O100O100O100O100O10000O2O0O100O100O100O10000O10000O100000000O10000000001N10000OROmQO1Sn0LTROOkm0O[ROOdm0OaROO^m01eROL\\m03gROI[m07W1O2O0NVRm7NmmRH1O2N1N200O1N2O1O1O1O1O1O100O1O1O1O1N2N2N2K5M3L4N1O2N2O2N100O1001O00000O101O00001O0O10000O2O000O100O2O0O100O1000000O2O000000O010000O1000000O10000O0100000O10O100000000O010O100O100O00100O1O1O0O2L4M3O1N2N110O100000000001O0000001O00001O000O2O001O001O0010O01O001O00001O001O00001O0010O01O001O010O00100O001O001O010O001O001O010O001O1O1O0010O01O00100O001O010O1O00100O001O1O010O00100O001O1O01000O100O1O100O001O1O001O2N1O1O1O1O1O1OWP[=" + } + ], + "question": "Could you confirm whether and are inside the mirror?", + "choices": [ + "A. and are both in the mirror", + "B. Only is in the mirror", + "C. Only is in the mirror", + "D. Neither nor is in the mirror" + ], + "answer": "B", + "type": "mirror", + "image": "images/vqa_60.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. Red and white.", + "B. Blue and white.", + "C. Black and orange.", + "D. White and black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_61.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. It is a mix of red, black, and white.", + "B. It is a mix of orange, black, and white.", + "C. It is a mix of blue, black, and white.", + "D. It is a mix of green, black, and white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_62.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2 primarily composed of?", + "choices": [ + "A. Entirely made of high-strength plastic.", + "B. Primarily composed of aluminum and rubber.", + "C. A mix of carbon fiber and fiberglass.", + "D. A combination of metal and plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_63.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912042.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RdY35hg05`YOMKa0Wc0Fh\\ONO>Vc0He\\OO2?", + "choices": [ + "A. It features a solid orange color with black lettering.", + "B. It has a wavy blue and black design.", + "C. It is decorated with a pattern of geometric shapes.", + "D. It has a colorful floral and vine-like pattern." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_64.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00912850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "iWm>2\\g01RYO5hf0MnXO?MM`f0EbYOR1OkNle0P2[ZOWNLMhd0l2O1G711O100OI_[OlL2O^d0W31kL`[OV3`d0jL`[OW3_d050000O2O00O01O001O01O01O1O1000O01O01000O1O1O100O1001O1O0O2O001O001N10000O1000O1000O1L31O01O010001O1O001O0000000L4O001O1O1O1001O0O1O10000O21OO1O000O100000N2O100000000O2O1O00001O00000001O001O100O01OO1001O5L0O00100O01O0O2OO2N101O1O2N1O00012N0000000O02N001O5K1O3M2N1N3M2M5F8N1XMW[OW2ld0gMW[OV2kd0gMW[OV2md0hMV[OS2^e0M7aNPZOa0Tf0]OoYO>Uf0@mYOYf0^OiYO?Xf0BkYO9Wf0GiYO8Xf0GiYO8Xf0GjYOM]O0Qg02[aU5" + } + ], + "question": "What is the primary material of the filling in ?", + "choices": [ + "A. Crab meat.", + "B. Spicy tuna.", + "C. Avocado and carrot.", + "D. Tofu." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_65.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00915597.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "[bP55jo02N2O1N1O1O2N101N1O100O1O100O100O1O1YRODjk0Vk0AjTOb0Sk0_OlTOd0Qk0\\OoTOg0oj0YOPUOj0mj0VOSUOn0jj0ROUUOQ1hj0oNXUOU1dj0kN\\UOY1aj0fN_UO]1^j0cNbUOa1Zj0_NfUOd1Xj0[NhUOi1Tj0XNkUOl1Qj0TNoUOo1ni0QNRVOS2ji0mMVVOV2gi0lMWVOX2ei0iMYVO[2di0fMZVO_2bi0bM[VOd2ai0]M]VOg2`i0ZM]VOl2_i0VM^VOP3]i0QMaVOU3Zi0lLcVOX3[i0iLcVOZ3[i0gLcVO\\3[i0eLbVO^3]i0dL`VO^3`i0bL]VOa3bi0`L\\VOb3ci0_L[VOb3ei0\\101O000010ZJ]VOn4ci0RK_VOl4bi0SK`VOl4`i0SKbVOk4_i0TKdVOi4]i0VKeVOh4\\i0WKfVOh4Zi0WKiVOf4Xi0YKhVOg4Yi0XKhVOg4Yi0XKgVOh4[i0VKeVOk4\\i0SKdVOm4^i0QKbVOo4Wj0nK[UOj2ej0WMeUO^2\\j0bMhUOZ2Wj0VMVUONc0j2Wj0XMWUONc0h2Vj0ZMXUONc0f2Uj0\\MYUOOb0c2Uj0^MZUOOc0a2Sj0_M[UO0c0_2Rj0aM\\UO0c0]2Qj0cM]UO1b0Z2Qj0eM^UO1c0W2oi0hM_UO1d0U2li0jMaUO2d0Q2ki0mMcUO1d0o1ji0oMcUO2e0l1hi0RNdUO3e0h1gi0UNeUO3f0e1ei0XNfUOG@3W1i1ei0]NeUOG_O4V1f1gi0_NeUOG^O4T1f1ji0_NeUOG^O3R1f1mi0_NdUOH]O4P1d1Pj0`NdUOH\\O4n0d1Sj0`NdUOH\\O3l0d1Vj0aNbUOH\\O4j0b1Zj0aNaUOI[O4h06XOf0Wk0VO_UOKZO3f06\\Oc0Vk0YO`UOJXO4e06B=Sk0^O_UOKWO5c04K7mj0E_UOKWO4`071Okj0J^UOLWO4=87Hhj01]UOKWO5;9=_Ofj07\\UOLWO48Xl0A_TOMUO51?[l0^O_TOOVO3Oa0[l0^O_TOOWO3Mb0\\l0[OaTO0WO2Je0]l0ZOaTO0YOT1Vl0kNbTO2WOS1Vl0lNcTO1XOR1Ul0lNdTO2WOR1Tl0mNeTO1XOQ1Sl0mNfTO3VOP1Sl0nNfTO3XOm0Sl0oNfTO4WOm0Rl0POgTO4WOj0Sl0QOgTO5VOj0Rl0ROhTO4UOk0Sl0POiTO6SOj0Sl0QOiTO6SOi0Ul0POgTO9SOh0Ul0POfTO>POc0Zl0oNdTOm1\\k0SNcTOo1]k0QNaTOP2_k0PN`TOR2`k0nM^TOT2ak0lM^TOU2ck0kM[TOW2dk0jMZTOW2gk0hMXTOY2hk0hMWTOX2jk0gMUTOo0AOYl0SOTTOh0ASO3P1Yl0TOSTOj0BPO4O0g0Wl0ASTOh0EmNb0d0gk0GQTOi0d0TO`k03nSOh0^m0XObROg0_m0XOcROg0\\m0ZOeROe0[m0ZOgROd0Ym0]OiROa0Wm0_OjRO`0Um0@nRO>Rm0BoRO]ROBdm0a0YRO_Ohm0b0URO^Omm0c0QRO]Oom0e0nQO\\ORn0g0kQOXOVn0j0gQOWOYn0W110RROnNol0V2K3M2N1O1O2N1K6N1N2N2O0O1O100O00iLbSOm2_l0RMbSOn2]l0SMcSOl2]l0TMcSOl2^l0SMcSOl2]l0UMcSOj2]l0VMdSOj2\\l0UMdSOk2\\l0VMdSOi2\\l0WMeSOh2\\l0WMeSOi2[l0WMeSOh2\\l0XMdSOg2]l0ZMcSOd2^l0\\McSOb2\\l0aMdSO^2Zl0dMhSOY2Xl0iMhSOU2Yl0kMiSOR2Xl0nMiSOOF`1bl0aNiSOKIc1]l0cNlSOFIf1\\l0dN[TOZ1fk0fN[TOX1fk0hNZTOW1fk0jN[TO4PO:el0B\\TO3oN:fl0C[TO2PO:fl0D[TO1PO9fl0F[TOOPO:gl0FYTO0PO:gl0FZTOOPO9gl0G[TONoN:gl0G[TOOnN9hl0H[TOMoN:gl0G\\TONmN:hl0G\\TONmN:hl0H\\TOMmN9il0H\\TOMlN:il0H]TOLkN?", + "choices": [ + "A. White.", + "B. Yellow.", + "C. Green.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_66.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00915597.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "[bP55jo02N2O1N1O1O2N101N1O100O1O100O100O1O1YRODjk0Vk0AjTOb0Sk0_OlTOd0Qk0\\OoTOg0oj0YOPUOj0mj0VOSUOn0jj0ROUUOQ1hj0oNXUOU1dj0kN\\UOY1aj0fN_UO]1^j0cNbUOa1Zj0_NfUOd1Xj0[NhUOi1Tj0XNkUOl1Qj0TNoUOo1ni0QNRVOS2ji0mMVVOV2gi0lMWVOX2ei0iMYVO[2di0fMZVO_2bi0bM[VOd2ai0]M]VOg2`i0ZM]VOl2_i0VM^VOP3]i0QMaVOU3Zi0lLcVOX3[i0iLcVOZ3[i0gLcVO\\3[i0eLbVO^3]i0dL`VO^3`i0bL]VOa3bi0`L\\VOb3ci0_L[VOb3ei0\\101O000010ZJ]VOn4ci0RK_VOl4bi0SK`VOl4`i0SKbVOk4_i0TKdVOi4]i0VKeVOh4\\i0WKfVOh4Zi0WKiVOf4Xi0YKhVOg4Yi0XKhVOg4Yi0XKgVOh4[i0VKeVOk4\\i0SKdVOm4^i0QKbVOo4Wj0nK[UOj2ej0WMeUO^2\\j0bMhUOZ2Wj0VMVUONc0j2Wj0XMWUONc0h2Vj0ZMXUONc0f2Uj0\\MYUOOb0c2Uj0^MZUOOc0a2Sj0_M[UO0c0_2Rj0aM\\UO0c0]2Qj0cM]UO1b0Z2Qj0eM^UO1c0W2oi0hM_UO1d0U2li0jMaUO2d0Q2ki0mMcUO1d0o1ji0oMcUO2e0l1hi0RNdUO3e0h1gi0UNeUO3f0e1ei0XNfUOG@3W1i1ei0]NeUOG_O4V1f1gi0_NeUOG^O4T1f1ji0_NeUOG^O3R1f1mi0_NdUOH]O4P1d1Pj0`NdUOH\\O4n0d1Sj0`NdUOH\\O3l0d1Vj0aNbUOH\\O4j0b1Zj0aNaUOI[O4h06XOf0Wk0VO_UOKZO3f06\\Oc0Vk0YO`UOJXO4e06B=Sk0^O_UOKWO5c04K7mj0E_UOKWO4`071Okj0J^UOLWO4=87Hhj01]UOKWO5;9=_Ofj07\\UOLWO48Xl0A_TOMUO51?[l0^O_TOOVO3Oa0[l0^O_TOOWO3Mb0\\l0[OaTO0WO2Je0]l0ZOaTO0YOT1Vl0kNbTO2WOS1Vl0lNcTO1XOR1Ul0lNdTO2WOR1Tl0mNeTO1XOQ1Sl0mNfTO3VOP1Sl0nNfTO3XOm0Sl0oNfTO4WOm0Rl0POgTO4WOj0Sl0QOgTO5VOj0Rl0ROhTO4UOk0Sl0POiTO6SOj0Sl0QOiTO6SOi0Ul0POgTO9SOh0Ul0POfTO>POc0Zl0oNdTOm1\\k0SNcTOo1]k0QNaTOP2_k0PN`TOR2`k0nM^TOT2ak0lM^TOU2ck0kM[TOW2dk0jMZTOW2gk0hMXTOY2hk0hMWTOX2jk0gMUTOo0AOYl0SOTTOh0ASO3P1Yl0TOSTOj0BPO4O0g0Wl0ASTOh0EmNb0d0gk0GQTOi0d0TO`k03nSOh0^m0XObROg0_m0XOcROg0\\m0ZOeROe0[m0ZOgROd0Ym0]OiROa0Wm0_OjRO`0Um0@nRO>Rm0BoRO]ROBdm0a0YRO_Ohm0b0URO^Omm0c0QRO]Oom0e0nQO\\ORn0g0kQOXOVn0j0gQOWOYn0W110RROnNol0V2K3M2N1O1O2N1K6N1N2N2O0O1O100O00iLbSOm2_l0RMbSOn2]l0SMcSOl2]l0TMcSOl2^l0SMcSOl2]l0UMcSOj2]l0VMdSOj2\\l0UMdSOk2\\l0VMdSOi2\\l0WMeSOh2\\l0WMeSOi2[l0WMeSOh2\\l0XMdSOg2]l0ZMcSOd2^l0\\McSOb2\\l0aMdSO^2Zl0dMhSOY2Xl0iMhSOU2Yl0kMiSOR2Xl0nMiSOOF`1bl0aNiSOKIc1]l0cNlSOFIf1\\l0dN[TOZ1fk0fN[TOX1fk0hNZTOW1fk0jN[TO4PO:el0B\\TO3oN:fl0C[TO2PO:fl0D[TO1PO9fl0F[TOOPO:gl0FYTO0PO:gl0FZTOOPO9gl0G[TONoN:gl0G[TOOnN9hl0H[TOMoN:gl0G\\TONmN:hl0G\\TONmN:hl0H\\TOMmN9il0H\\TOMlN:il0H]TOLkN in the image?", + "choices": [ + "A. The masked object has a smooth, glossy surface.", + "B. The masked object has serrated or jagged edges.", + "C. The masked object has small dark spots on its surface.", + "D. The masked object is covered in tiny black specks." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_67.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "oX[=4hg06L4N1N100O2O001O001N101O010O010O01000O0100O010O1N1N3N1N3M3N1N3Nonk9" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Black.", + "B. Red.", + "C. White.", + "D. Brown." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_68.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "oX[=4hg06L4N1N100O2O001O001N101O010O010O01000O0100O010O1N1N3N1N3M3N1N3Nonk9" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. A rectangular object with sharp corners.", + "B. A cylindrical object.", + "C. A rectangular object with rounded corners.", + "D. A flat, circular object." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_69.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "XXW78ag07I7J7O0K5N201O000O2O0000000000000O101O0000000000000000000000000000000000000000000010OO10000000000000000000000000000000000000000000000000000O10000000001O00000000000000000000000001O000000000000000000001O000000000000000000001O000000002N1O1O1O1O1O3M1O001O001OVPh=" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metallic.", + "B. Plastic.", + "C. Wooden.", + "D. Ceramic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_70.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00926777.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "XXW78ag07I7J7O0K5N201O000O2O0000000000000O101O0000000000000000000000000000000000000000000010OO10000000000000000000000000000000000000000000000000000O10000000001O00000000000000000000000001O000000000000000000001O000000000000000000001O000000002N1O1O1O1O1O3M1O001O001OVPh=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red.", + "B. Black.", + "C. Brown.", + "D. White." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_71.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N?", + "choices": [ + "A. Circular.", + "B. Square.", + "C. Rounded rectangular.", + "D. Oval." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_72.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N shown in the image?", + "choices": [ + "A. Wood.", + "B. Glass.", + "C. Metal.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_73.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00946186.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gk_25ig02J7N?", + "choices": [ + "A. White.", + "B. Pink.", + "C. Brown.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_74.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the texture or pattern of ?", + "choices": [ + "A. A grille with concentric ridges and radial spokes.", + "B. A pattern of small, rectangular tiles.", + "C. A grid pattern of small, square panes.", + "D. A perforated lattice pattern." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_75.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The masked object is circular.", + "B. The masked object is square.", + "C. The masked object is rectangular.", + "D. The masked object is arched." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_76.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the shape of the vent on ?", + "choices": [ + "A. Rectangular.", + "B. Circular.", + "C. Arched.", + "D. Square." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_77.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00951281.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[_Y36eg06J6I8I6J5M3HiNeYOY1Zf0iNbYOZ1\\f061O00O100001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1M3J6I7J6I7J6JRbjb0" + } + ], + "question": "What is the color of the fan inside ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Orange.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_78.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "Which of the following best describes the texture/pattern of in the image?", + "choices": [ + "A. It is a price tag with printed text.", + "B. It is a price tag with handwritten text.", + "C. It is a blank piece of paper without any text.", + "D. It is a sticker with a barcode on it." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_79.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "Based on the image, what is the shape of ?", + "choices": [ + "A. It is a rectangular object.", + "B. It is a square object.", + "C. It is a circular object.", + "D. It is a triangular object." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_80.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00975971.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "j\\P6T1lf0;E1O1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004L`0@`0@Rbh`0" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Red.", + "B. White.", + "C. Black.", + "D. Brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_81.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00981094.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R_]>4lg0000000000000O10O1000000000000000000000O10000000O1000000000000O1O01000000000000000000000O10O100000000001O000000000000O10O1000000000000000000O10O1000000000000000O1000000000O10000000O1000000000000000O10O1000000000O2O0000000000O10000000000000O100000000000O10000000000000000L5O22L6JSaY6" + } + ], + "question": "What is the primary color of 's shaft?", + "choices": [ + "A. Black.", + "B. Red.", + "C. Silver.", + "D. Blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_82.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_00981094.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R_]>4lg0000000000000O10O1000000000000000000000O10000000O1000000000000O1O01000000000000000000000O10O100000000001O000000000000O10O1000000000000000000O10O1000000000000000O1000000000O10000000O1000000000000000O10O1000000000O2O0000000000O10000000000000O100000000000O10000000000000000L5O22L6JSaY6" + } + ], + "question": "Which of the following descriptions accurately represents the texture or pattern of ?", + "choices": [ + "A. The handle is smooth and made of two different colors of plastic.", + "B. The handle features a series of parallel grooves running along its length.", + "C. The metallic grip area has a knurled, cross-hatched pattern for a better hold.", + "D. The entire surface of the object is smooth and polished metal." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_83.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "What is the color of the main body of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Brown.", + "D. Blue and white." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_84.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "Which of the following statements about the color of is correct?", + "choices": [ + "A. The hull of the object is painted dark blue.", + "B. The entire object is covered by a large blue tarp.", + "C. The nameplate on the side features white lettering.", + "D. There is a red life preserver attached to its side." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_85.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "nie;5go06J5K5K5K5K5L4K5M3M3O0O2O1N2O1O1O1O10O0100O1N2O1O1O1N2O001N2O1O1O1N2O1O1O0O2O1O1O1N2O1O1O1N101O1N101O000O1000000O1000000O10O10O1000000O1000000O1000000O0100000O10000O1000000O1000O10O1000000O10000O0100O1O001N2DK5K6J6K5J6JeWi1" + } + ], + "question": "What is the color of the component mounted at the rear of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Black.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_86.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What is the color of the head of ?", + "choices": [ + "A. Gray.", + "B. Blue.", + "C. White.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_87.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue and white.", + "B. Pure white.", + "C. A mix of white, brown, and gray.", + "D. Black and brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_88.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "Vfg<1oo0101N1O10O100000000000_POOQo02nPONSo03kPOMVo0?05KO1O12N1kPO[Oln0e0QQO]Oon0j01M3N4K20001N100O1OO2O001N10001N02O0O2N10100O0100O010O10O00100N1O2D`PO5bo032N2MiYd=" + } + ], + "question": "What color is the beak of ?", + "choices": [ + "A. The beak is yellow.", + "B. The beak is black.", + "C. The beak is white.", + "D. The beak is grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_89.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Pure white.", + "B. A combination of grey and brown.", + "C. Black and white.", + "D. Blue and grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_90.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. It is primarily blue and white.", + "B. It has a combination of white, grey, and brown feathers.", + "C. It is completely black.", + "D. It is mostly white with some black markings." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_91.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "Which of the following statements correctly describes a shape characteristic of in the image?", + "choices": [ + "A. The wings of the object are fully folded against its body.", + "B. The tail of the object is spread out in a fan shape.", + "C. The beak of the object is noticeably curved upwards.", + "D. The entire body of the object forms a straight, horizontal line." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_92.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01002306.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 902 + ], + "counts": "[f^>3lo01O1O001O0100001O1O001O1^Oa001M4UQOSO_n0Q1]QOoNbn0S1\\QOnNdn0W100O0100000001O003MO\\O\\QONcn01`QON_n01dQON[n02hQOJYn05jQOHWn07lQOEVn0;g0O1O100000001O1O1O1O1O1O1O2N1OhZU<" + } + ], + "question": "What is shown in the image?", + "choices": [ + "A. A bird swimming in the water.", + "B. A bird perched on a boat cover.", + "C. A flying bird.", + "D. A bird sitting on the roof of a boat." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_93.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Gold.", + "B. Gray.", + "C. White.", + "D. Red." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_94.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Glossy.", + "B. Matte.", + "C. Rough.", + "D. Ribbed." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_95.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nWob0Q1of01O2N1O1O001O000000000000000001O00001N2O1O001N2O1O0O2O1O1N101O1N2O1O1N2O1O1N2O1O1N3N1M\\go3" + } + ], + "question": "To which device does belong?", + "choices": [ + "A. It is a red alarm button on the console.", + "B. It is a spherical paperweight used to hold down papers.", + "C. It is the trackball of an ergonomic mouse.", + "D. It is a hold-indicator light for the telephone." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_96.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "URc=?`g0d0]Od0\\Oc0]Od0\\Od0[Od0]O:F1O00000O1000000000O100000O100000000000000O01000000000000000O100000O1000000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O1000000000000000O010000000000000000O1000O100000000000O1000000000O100000O1000000000000000O010000000000000000O1000O100000000000O100000000000O2O:Fe0[Od0\\Od0\\Oe0[Od0\\Oe0ZO\\Ri5" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. It is a square.", + "B. It is rectangular.", + "C. It is a trapezoid.", + "D. It is a parallelogram." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_97.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01010195.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "URc=?`g0d0]Od0\\Oc0]Od0\\Od0[Od0]O:F1O00000O1000000000O100000O100000000000000O01000000000000000O100000O1000000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O10000000000000O10O10000000000000000O10O10000000000000O10000000O10000000O1000000000000000O010000000000000000O1000O100000000000O1000000000O100000O1000000000000000O010000000000000000O1000O100000000000O100000000000O2O:Fe0[Od0\\Od0\\Oe0[Od0\\Oe0ZO\\Ri5" + } + ], + "question": "What is the primary material of ?", + "choices": [ + "A. Plastic.", + "B. Wood.", + "C. Metal.", + "D. Glass." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_98.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Blue.", + "C. White.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_99.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. Cuboid.", + "B. Cylindrical.", + "C. Conical.", + "D. Rectangular." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_100.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01070155.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "id_89fo0100000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000Y[m<" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Paper.", + "B. Cloth.", + "C. Wood.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_101.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. The object is rectangular.", + "B. The object is circular.", + "C. The object has a checkerboard pattern of squares.", + "D. The object is oval." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_102.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. Red.", + "B. Yellow.", + "C. Blue.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_103.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01071650.jpg", + "mask_rles": [ + { + "size": [ + 821, + 1024 + ], + "counts": "XR1:Vi0_1eN]4cKR3nL22N2N1N100O1O001N1O2N3_JTCSNi>1c6iNko<;dnBY1XXOkNfe0W3hNY1fNY1gNX1hNY1ROm001O00000000O100O11O2N2N001O1O0O10001N010O1O1O0O1N3N02N2M300O1O1O1000000000000005K2N1O2N1O1O001O00aNTAUHl>i4mAoK[OWOg>X4`C`KkM7e>k3\\ETLc:^3kEbLU:P3YFQMf9a2iF^MW9S2XGmMe8h1iGXNU8\\1ZHcNe7P1jHoNh6QOmAc1j7[Oj5BZBf0[8GP3QOYFQ1[NHk86`2UOhE]1kNUOQ99[2VOZEk1ZOcNU9;W2XOkDX2IUNU9;V2S2gD`MU9?", + "choices": [ + "A. The object is predominantly yellow with a green checkered pattern.", + "B. The object is primarily white.", + "C. The object has a black body and an illuminated red light.", + "D. The object is dark gray." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_104.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01080826.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "k`[19eo04M2N101N1O100O10000O1O100O10O10O01M3N101O1O10O01000O010000O10O100000000000010O0001O0O102M3M4J^_Te0" + } + ], + "question": "What is in the image?", + "choices": [ + "A. It is a kitchen sponge.", + "B. It is a bar of soap.", + "C. It is a bottle of dish soap.", + "D. It is the handle of a kitchen utensil." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_105.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01080826.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "k`[19eo04M2N101N1O100O10000O1O100O10O10O01M3N101O1O10O01000O010000O10O100000000000010O0001O0O102M3M4J^_Te0" + } + ], + "question": "Which of the following options accurately describes located near the sink?", + "choices": [ + "A. A yellow and brown bar of soap.", + "B. A yellow cleaning sponge.", + "C. A part of a silver faucet.", + "D. A slice of a banana." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_106.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "What is the shape of the object indicated by ?", + "choices": [ + "A. The object is elongated and thin.", + "B. The object is round and smooth.", + "C. The object is bell-shaped with multiple lobes.", + "D. The object is bulbous and tapers at one end." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_107.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Red.", + "C. Green.", + "D. Brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_108.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01091580.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Vco23lo02O00100N101O1O001O001O1O001O1O001O1O001O001N2O001O1O001O1O001O010O1O001O1O001O1O001O001O1O001O1O001O000001O00001O0000010O01O0010O01O0010O0001O010O0010O01O0010O0000001N1O100O2N100O2N100O1O2O0O1O101N1O100O2O]QO@fm0?ZROCem0<\\ROEdm0:[ROGfm07ZROKem04\\ROMcm03\\ROOcm00^RO1am0N_RO3am0M^RO5bm0I_RO8`m0G`RO;_m0EaRO;_m0DaRO=_m0C`RO=bm0A_RO>an0O10001O000O2O00000010O00O2N10000010O0001O0000010O01O01O101N1O100O2O0O2N2Nj[T`0" + } + ], + "question": "Based on the image, what is the shape of ?", + "choices": [ + "A. The object is spherical.", + "B. The object is curved.", + "C. The object is bell-shaped.", + "D. The object is cylindrical." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_109.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01095871.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "ob<`0_o02N2O000000O1000001O0001O000001O000001O000001O0001O000001O000001O000001O000001O0001O00000001O0001O000001O0001O00000001O0001O00000001O01O00000001O00012M2\\XO[Oi?f0d_ONZ`02S_Oa0m`0@_^OS1aa0kNn]Oh1Sb0VN\\]O\\2db0cMj\\Oo2Wc0QM\\\\OBWLa3]g0lLQ\\Oi3oc0WLe[OU4[d0kKY[Oa4gd0_KmZOm4Te0RKcXOCd1g5ie0fJbXOENN`0N0U6Pg0ZJbXOL;HF^6]g0nIbXO07\\6Wg0dIbXO14]6Zg0XJdXOj5]g0XJ_XOi5ag0[1000000000_OjXOWHXg0o6aXORI^h0e6c0C=lN_VOPKZj0e4f0YOXUORLmj0`3P1D?", + "choices": [ + "A. The masked object is a large, rectangular block.", + "B. The masked object is part of a long, horizontal structure.", + "C. The masked object is the main vertical support of the structure.", + "D. The masked object consists primarily of crisscrossing diagonal lines." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_110.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01095871.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "ob<`0_o02N2O000000O1000001O0001O000001O000001O000001O0001O000001O000001O000001O000001O0001O00000001O0001O000001O0001O00000001O0001O00000001O01O00000001O00012M2\\XO[Oi?f0d_ONZ`02S_Oa0m`0@_^OS1aa0kNn]Oh1Sb0VN\\]O\\2db0cMj\\Oo2Wc0QM\\\\OBWLa3]g0lLQ\\Oi3oc0WLe[OU4[d0kKY[Oa4gd0_KmZOm4Te0RKcXOCd1g5ie0fJbXOENN`0N0U6Pg0ZJbXOL;HF^6]g0nIbXO07\\6Wg0dIbXO14]6Zg0XJdXOj5]g0XJ_XOi5ag0[1000000000_OjXOWHXg0o6aXORI^h0e6c0C=lN_VOPKZj0e4f0YOXUORLmj0`3P1D is correct?", + "choices": [ + "A. The main arm is a solid beam, not a lattice structure.", + "B. A hook is visible hanging from the object's arm.", + "C. It is the tallest structure in the image.", + "D. There is no operator's cab attached to the tower." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_111.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0?", + "choices": [ + "A. Triangular.", + "B. Cylindrical.", + "C. Irregular.", + "D. Rectangular." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_112.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0?", + "choices": [ + "A. The object is made of plastic.", + "B. The object is made of wood.", + "C. The object is made of metal.", + "D. The object is made of stone." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_113.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103219.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dSi>2lg03N1M3L3N3M3M1O1N101O2N3MIoXOEmf0IXiA0000000lXO6Rf0JnYO6Rf0>ZYOBff0`0XYO@hf0a0WYO_Ojf0a0UYO_Omf0i0 in the image?", + "choices": [ + "A. A structural support for the playground.", + "B. A curved wooden bench.", + "C. A flat-topped wooden stool.", + "D. A wooden toolbox." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_114.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gb_f03lg09[OESYO>Rg05I^OnXOe0Qg06O1O1O0001O1O001N2N2L4O001N003M2O000O2N1000000000001N100O1O1O110O001N\\]b0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. It is cylindrical.", + "B. It is rectangular.", + "C. It is conical.", + "D. It is spherical." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_115.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "gb_f03lg09[OESYO>Rg05I^OnXOe0Qg06O1O1O0001O1O001N2N2L4O001N003M2O000O2N1000000000001N100O1O1O110O001N\\]b0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Gray.", + "C. Brown.", + "D. Black." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_116.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is the overall shape of ?", + "choices": [ + "A. Crescent-shaped.", + "B. Rectangular.", + "C. Circular.", + "D. Triangular." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_117.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Red.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_118.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01103275.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cTR4a0]g03M4N4K3N0OATYOMkf03VYOMhf04XYOLhf03ZYOMef03\\YOMcf04\\YOLcf06]YOJbf06^YOJbf06^YOJaf08^YOHbf07^YOIcf07]YOIcf07]YOIbf09]YOGcf08]YOIcf04YYO_O5=bf03aYOM_f02bYON^f0OeYO1[f0NfYO2Yf0NhYOGcf08c0N3N2O2N1N10c[Tc0" + } + ], + "question": "What is in the image?", + "choices": [ + "A. A drum rim made of plastic.", + "B. A buckle made of silver.", + "C. A drum rim made of metal.", + "D. A drumhead made of hide." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_119.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01108895.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WUk23lg04K5K6K4gXO\\Oif0U1K4L5U]OnN^>X1PAVOm>Q1k@SOS?S1i@mNU?Y1f@hNW?_1U^OcMZ2P1^?e1a@[N\\?i1b@XN]?j1b@VN]?l1a@UN^?m1a@SN^?o1`@RN_?P2`@PN_?Q2a@oM^?S2`@nM_?T2`@lM_?V2_@kM`?W2_@iM`?Y2_@gM`?[2^@gM`?[2_@eM`?]2_@cM`?_2_@aM`?a2^@`Ma?b2]@_Mb?c2]@]Mb?d2]@]Mb?e2\\@\\Mc?f2\\@ZMc?h2\\@XMc?j2[@WMd?k2Z@VMe?l2Y@UMf?m2Y@SMg?m2X@UMf?m2Y@SMg?n2X@RMg?o2Y@QMf?Q3X@PMh?P3X@PMg?R3X@nLh?S3W@mLh?T3W@mLi?T3V@lLi?U3W@kLh?W3V@jLj?W3U@gLl?Z3T@fLl?[3S@eLl?]3S@cLm?]3R@dLm?^3R@bLm?_3R@bLn?_3Q@aLn?a3Q@_Lo?a3Q@_Ln?c3R@\\Ln?e3R@ZLm?g3T@XLk?j3U@ULj?l3W@SLi?n3V@RLi?o3X@PLi?P4V@PLj?P4V@PLk?o3T@RLl?n3S@SLm?m3S@SLm?m3R@TLn?l3R@TLn?l3R@TLn?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?n3P@RLP`0n3P@RLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3R@PLn?Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?Q4e_O^J9a1R`0Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?^4d_ObK[`0R60000000O01O1O1ON3M4UNo_OfKP`0Z4o_OhKP`0X4P@jKn?V4R@kKm?U4S@lKl?T4U@kKl?T4T@lKl?T4T@mKk?S4U@mKk?S4U@mKk?S4U@mKl?R4T@nKl?R4U@mKk?S4U@mKk?R4V@nKj?R4V@nKk?P4V@PLj?P4V@hKR`0X4n_OiKQ`0V4P@kKo?U4Q@kKP`0S4Q@nKn?R4R@oKm?Q4S@PLl?o3U@RLj?n3V@SLi?m3W@SLj?k3W@ULi?k3W@SLk?l3W@QLk?o3U@nKn?R4R@oKn?o3S@QLm?o3S@RLl?m3U@SLk?m3U@TLj?l3V@ULj?i3W@WLi?i3W@WLj?h3V@XLj?g3W@WLl?h3T@VLo?h3R@YLn?f3R@[Lm?e3S@\\Ll?c3U@^Ll?fNW_Og4n0cLk?fNW_Oe4o0eLj?fNW_Od4P1fLj?fNV_Oc4P1gLk?eNU_Oc4Q1hLk?eNT_Ob4Q1iLk?eNT_Oa4R1jLk?eNS_O_4S1lLl?T3T@lLm?R3T@nLl?R3T@mLn?Q3T@nLl?Q3U@oLl?P3T@mLo?R3R@aL\\`0^3d_OcL\\`0[3e_OeL\\`0Y3e_OhLZ`0X3f_OhL[`0V3f_OkLZ`0S3g_OnLY`0P3h_OPMY`0m2j_OSMW`0i2k_OXMV`0d2l_O\\MU`0a2m_O`MT`0\\2n_OdMT`0X2n_OiMR`0T2P@lMR`0Q2P@nMR`0n1P@RNR`0j1P@UNR`0i1o_OWNS`0f1n_OYNU`0b1n_O^NS`0^1P@aNS`0[1P@dNR`0W1Q@hNR`0S1Q@jNS`0S1`3K5K4M4K5K3N2M4N1O2N2N0M5L3M4MQcV`0" + } + ], + "question": "What are the primary colors of ?", + "choices": [ + "A. Green and yellow.", + "B. Silver and blue.", + "C. Light blue and white.", + "D. Solid silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_120.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01108895.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WUk23lg04K5K6K4gXO\\Oif0U1K4L5U]OnN^>X1PAVOm>Q1k@SOS?S1i@mNU?Y1f@hNW?_1U^OcMZ2P1^?e1a@[N\\?i1b@XN]?j1b@VN]?l1a@UN^?m1a@SN^?o1`@RN_?P2`@PN_?Q2a@oM^?S2`@nM_?T2`@lM_?V2_@kM`?W2_@iM`?Y2_@gM`?[2^@gM`?[2_@eM`?]2_@cM`?_2_@aM`?a2^@`Ma?b2]@_Mb?c2]@]Mb?d2]@]Mb?e2\\@\\Mc?f2\\@ZMc?h2\\@XMc?j2[@WMd?k2Z@VMe?l2Y@UMf?m2Y@SMg?m2X@UMf?m2Y@SMg?n2X@RMg?o2Y@QMf?Q3X@PMh?P3X@PMg?R3X@nLh?S3W@mLh?T3W@mLi?T3V@lLi?U3W@kLh?W3V@jLj?W3U@gLl?Z3T@fLl?[3S@eLl?]3S@cLm?]3R@dLm?^3R@bLm?_3R@bLn?_3Q@aLn?a3Q@_Lo?a3Q@_Ln?c3R@\\Ln?e3R@ZLm?g3T@XLk?j3U@ULj?l3W@SLi?n3V@RLi?o3X@PLi?P4V@PLj?P4V@PLk?o3T@RLl?n3S@SLm?m3S@SLm?m3R@TLn?l3R@TLn?l3R@TLn?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?m3Q@SLo?n3P@RLP`0n3P@RLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3Q@QLo?o3R@PLn?Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?Q4e_O^J9a1R`0Q4Q@oKo?Q4Q@oKo?Q4Q@oKo?^4d_ObK[`0R60000000O01O1O1ON3M4UNo_OfKP`0Z4o_OhKP`0X4P@jKn?V4R@kKm?U4S@lKl?T4U@kKl?T4T@lKl?T4T@mKk?S4U@mKk?S4U@mKk?S4U@mKl?R4T@nKl?R4U@mKk?S4U@mKk?R4V@nKj?R4V@nKk?P4V@PLj?P4V@hKR`0X4n_OiKQ`0V4P@kKo?U4Q@kKP`0S4Q@nKn?R4R@oKm?Q4S@PLl?o3U@RLj?n3V@SLi?m3W@SLj?k3W@ULi?k3W@SLk?l3W@QLk?o3U@nKn?R4R@oKn?o3S@QLm?o3S@RLl?m3U@SLk?m3U@TLj?l3V@ULj?i3W@WLi?i3W@WLj?h3V@XLj?g3W@WLl?h3T@VLo?h3R@YLn?f3R@[Lm?e3S@\\Ll?c3U@^Ll?fNW_Og4n0cLk?fNW_Oe4o0eLj?fNW_Od4P1fLj?fNV_Oc4P1gLk?eNU_Oc4Q1hLk?eNT_Ob4Q1iLk?eNT_Oa4R1jLk?eNS_O_4S1lLl?T3T@lLm?R3T@nLl?R3T@mLn?Q3T@nLl?Q3U@oLl?P3T@mLo?R3R@aL\\`0^3d_OcL\\`0[3e_OeL\\`0Y3e_OhLZ`0X3f_OhL[`0V3f_OkLZ`0S3g_OnLY`0P3h_OPMY`0m2j_OSMW`0i2k_OXMV`0d2l_O\\MU`0a2m_O`MT`0\\2n_OdMT`0X2n_OiMR`0T2P@lMR`0Q2P@nMR`0n1P@RNR`0j1P@UNR`0i1o_OWNS`0f1n_OYNU`0b1n_O^NS`0^1P@aNS`0[1P@dNR`0W1Q@hNR`0S1Q@jNS`0S1`3K5K4M4K5K3N2M4N1O2N2N0M5L3M4MQcV`0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The object is made of ceramic tiles and grout.", + "B. The object is made of metal and plastic.", + "C. The object is made of painted wood.", + "D. The object is made of woven fabric and straw." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_121.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red and white.", + "B. White and black.", + "C. Silver and black.", + "D. Orange and silver." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_122.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "Which of the following accurately describes a feature of ?", + "choices": [ + "A. The object has a curved handle on top.", + "B. The object has a large, gray, overarching handle.", + "C. The object is primarily cylindrical in shape.", + "D. The object has a square base." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_123.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "Which statement correctly describes a shape-related feature of ?", + "choices": [ + "A. The display screen on its front is rectangular.", + "B. The top of the object is completely flat.", + "C. It has a large, curved handle on top for carrying.", + "D. The main body of the object is a perfect cube." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_124.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What is the primary color of the body of ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Silver.", + "D. Orange." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_125.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01121205.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^QU1m0nf09J2M3N2N2M2O2N:G=C>C=B=B7H6L4L4M2N2M3N1N3N1N3N2M2O2L4N1N3N1N2O0O2O0O2O001N101N101N101N2O0O2O001O0O2O001O001N101O001O0O2O1O0000000O101O00000O100000000O100000000O100000000000000000000O1000000000000000000000000000000000000000O10000000O10000000000000000000O100000000000000000000000000O10000000O100000O1000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O10001O000O100000000O100000001N101O0O1mJg]O_4Zb0`Kh]O^4Xb0bKi]O\\4Yb0dKg]O[4Yb0eKh]OZ4Xb0fKi]OX4Yb0gKh]OX4Xb0hKj]OV4Wb0jKi]OT4Xb0lKi]OS4Xb0lKi]OS4Wb0nKi]OP4Xb0PLi]Oo3Xb0QLh]Om3Yb0SLh]Ol3Yb0TLg]Ok3Yb0ULg]Oj3[b0VLc]Ok3_b0SL`]On3ab0RL\\]Oo3fb0e02L3L5L4bLg\\On1`c0lMh\\Ok1]c0mMP]Oi1Xc0mM_]O^1gb0YNijZa0" + } + ], + "question": "What material is primarily made of?", + "choices": [ + "A. Metal.", + "B. Wood.", + "C. Plastic.", + "D. Concrete." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_126.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100 made of?", + "choices": [ + "A. Woven.", + "B. Plastic.", + "C. Cardboard.", + "D. Leather." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_127.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100?", + "choices": [ + "A. Woven.", + "B. Plaid.", + "C. Polka-dotted.", + "D. Striped." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_128.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "VPc51ng03N1N2N2O2M2N2N2N3O000O10000O1N2O1O1N2O1N2O2N1N200O2O000O101O0O100?", + "choices": [ + "A. White.", + "B. Red.", + "C. Black.", + "D. Brown." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_129.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "PP`:1og02N000000001O00000000001O0000000000001O00000P\\Ok1U>UNkAm1S>SNmAn1R>SNmAm1fMoMk>4_Cm1dMVOh=mNdDh1iM`0^TNYOR2h0_NaNh0^2ZOZMe0m2[OlLf0Z3[O_Ld0h3[ORLf0T4ZOfKe0a4YO[Kh0j4UOTKk0Q5ROmJm0Y5oNfJQ1_5lN_JT1f5iNXJV1n5fNQJZ1T6cNiI]1]6`N`Ia1e6]NXIb1n6^NlHc1Y7]NbHb1d7^NVHc1o7]NkGc1[8]N^Ge1g8[NSGe1S9[NgFe1_9[N[Ff1j9ZNPFf1V:ZNeEe1a:\\NYEd1l:\\NnDd1X;\\NcDc1c;]NXDc1m;]NmCc1Y<]NbCb1d<^NWCb1n<^NlBc1Y=^NaBb1d=^NWBb1n=^NlAc1Y>]NbAc1c>]NXAc1m>]Nn@c1W?]Nd@b1b?^NY@b1l?_Nk_Od1Z`0\\Nd_Oa1a`0_N__O\\1f`0cNZ_OY1k`0gNV_OR1Pa0nNP_Ol0Va0TOj^Oe0]a0\\Oc^O=ca0C]^O7ia0IW^O1oa00Q^OMQb03o]OKSb05n]OHTb09i200001O00001O00001O001O0000001O00001O00001O00001O0000010N10001O001O00001O00001O00001O001O00001O00001O001O0[YOUOUf0k0eYO]OYf0V1O0000O1000000000000000000O100000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000O100000000O10000000000O2OO100000O10000000000O100000000O10000000000O10000O10000O100O10000O10000O10000O10000000000O10000000000O1N2F:J6JQa70m^H:[OEXYOd0hf0;O00O11O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O00" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of plastic.", + "B. It is made of metal.", + "C. It is made of wood.", + "D. It is made of fabric." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_130.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "PP`:1og02N000000001O00000000001O0000000000001O00000P\\Ok1U>UNkAm1S>SNmAn1R>SNmAm1fMoMk>4_Cm1dMVOh=mNdDh1iM`0^TNYOR2h0_NaNh0^2ZOZMe0m2[OlLf0Z3[O_Ld0h3[ORLf0T4ZOfKe0a4YO[Kh0j4UOTKk0Q5ROmJm0Y5oNfJQ1_5lN_JT1f5iNXJV1n5fNQJZ1T6cNiI]1]6`N`Ia1e6]NXIb1n6^NlHc1Y7]NbHb1d7^NVHc1o7]NkGc1[8]N^Ge1g8[NSGe1S9[NgFe1_9[N[Ff1j9ZNPFf1V:ZNeEe1a:\\NYEd1l:\\NnDd1X;\\NcDc1c;]NXDc1m;]NmCc1Y<]NbCb1d<^NWCb1n<^NlBc1Y=^NaBb1d=^NWBb1n=^NlAc1Y>]NbAc1c>]NXAc1m>]Nn@c1W?]Nd@b1b?^NY@b1l?_Nk_Od1Z`0\\Nd_Oa1a`0_N__O\\1f`0cNZ_OY1k`0gNV_OR1Pa0nNP_Ol0Va0TOj^Oe0]a0\\Oc^O=ca0C]^O7ia0IW^O1oa00Q^OMQb03o]OKSb05n]OHTb09i200001O00001O00001O001O0000001O00001O00001O00001O0000010N10001O001O00001O00001O00001O001O00001O00001O001O0[YOUOUf0k0eYO]OYf0V1O0000O1000000000000000000O100000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000000000O10000000000O100000000O10000000000O2OO100000O10000000000O100000000O10000000000O10000O10000O100O10000O10000O10000O10000000000O10000000000O1N2F:J6JQa70m^H:[OEXYOd0hf0;O00O11O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O00001O00001O00001O00001O0000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O000000001O0000001O0000001O00" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Red.", + "C. Yellow.", + "D. Grey." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_131.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hebe0 is correct?", + "choices": [ + "A. The object is a combination of blue and white.", + "B. The object is a combination of black and green.", + "C. The object is entirely brown.", + "D. The object is a combination of red and yellow." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_132.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01142493.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hebe0?", + "choices": [ + "A. Rubber.", + "B. Leather.", + "C. Canvas.", + "D. Plastic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_133.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01155009.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "WRn23Sg0ObYO7Wf0l0K5N2O1M3O1O000100O000O10000O10000000000000000000000000000000000000000000000000000000001O0O2O001O2J7nNiYO0Pmjc0" + } + ], + "question": "What is the background color of ?", + "choices": [ + "A. Red.", + "B. Yellow.", + "C. Black.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_134.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. White.", + "B. Silver.", + "C. Gold.", + "D. Brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_135.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. It is a hook-shaped object.", + "B. It is a circular object.", + "C. It is a rectangular object.", + "D. It is an oval object." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_136.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "dZ\\76ho05L2N2N1O1O2O0O1O1O1L4M3L4H8N2N2O1O1O10kNgQOc0Yn0[OkQOc0Tn0]OnQOb0Rn0]OSRO?lm0AVRO=km0CVRO?", + "choices": [ + "A. The masked object is silver.", + "B. The masked object is beige.", + "C. The masked object is white.", + "D. The masked object is brown." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_137.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metal.", + "B. Plastic.", + "C. Glass.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_138.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Green.", + "C. White.", + "D. Silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_139.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156032.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 980 + ], + "counts": "ga0<8Lkn05UQOKjn06VQOJin08VQOHin09WQOGin09WQOGhn0:XQOFgn0;YQOEfn0ZQOBen0?[QOAdn0`0\\QO@cn0a0]QO_Ocn0a0^QO^Oan0d0^QO\\Oan0e0_QO[O`n0f0`QOYOan0g0_QOYO`n0h0`QOWO`n0j0`QOUO`n0m0_QOSOan0m0_QOROan0o071O1O101N1O1O100O1O0001O00000010O000000010O000001O0001O01O000001O01O000000010O1O1O1O010O1O1O001O100O1O2N1O100O1O1O2N1O100O2N2N2N2N3M2M3M3Mb_fk0" + } + ], + "question": "What is the shape of the top of ?", + "choices": [ + "A. It has a pump-action dispenser.", + "B. It has a rounded, dome-like shape.", + "C. It is flat and tapered.", + "D. It is a simple, cylindrical screw-top." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_140.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. Dome-shaped.", + "B. Cylindrical.", + "C. Irregular.", + "D. Rectangular." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_141.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "Which of the following best describes the shape of in the image?", + "choices": [ + "A. A long, thin cylinder.", + "B. A dome shape.", + "C. A short, wide cylinder.", + "D. A long, narrow rectangle." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_142.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01156833.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zc]a09go08H9G8H9G8H:F9G:E:GW1iN0000O1O1O1O=D9Fe0[O6J7I6Kn0QOQZj5" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Wood.", + "B. Plastic.", + "C. Metal.", + "D. Cardboard." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_143.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "j^T=385S10_l0NYRO69HNf0]m0N`ROe0\\m0R1O2M3M2O2M2O2N1O1O101M101N100O2O0O100O2O0O100O2O0O010O010O01O001O00000000O100O101O000000000000O10O10O10000001O1O2N2N1O2N1O001O1O1O1O1O1O1O001O1O1O1O1O1O001O001O0000000O2O0O10000O101N100O10001N100O100O2O000O2O0O2O1N2O1O1N2O1N2O0O2O1O1N2O1N2O1N2N1O2O1N2N2N2N2O1N2N2N2N3N1N2N2N2N2O1N3M2N1O2O0O2N1O10O01O1O00100O001O10O01O1O010O1O001O10O01O1O010O1O001O010O00O1O2F9N200O2O000O1O2M2O2N10001O001O001O000O2O001O00001O001O0N3L300100O2O0O100O1O101N100O1O101N1000000O2O000O101O000O101O000O101O0O10O1000O0100O10O0100O010O100O010O1000O01000O01000000O10000O1000000O10000O1000000O10000O10000O100BWVOeJii0[5WVOdJji0[5XVOcJii0]5ZVO]Jii0b5;O1O100O1O2O0O1O2O0O2N100O2N1O2O0O1O2O0O2N100O2N101N1O101N1O2O0O1O2O0O2N100O2N2O1N1O2O1N2N2O1N1O2O1NVC" + } + ], + "question": "Which of the following statements accurately describes the shape of the ears of ?", + "choices": [ + "A. The ears are rounded at the tips.", + "B. The ears are pointed and triangular.", + "C. The ears are floppy and folded downwards.", + "D. The ears are not visible in the image." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_144.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "V\\_:2U10`m04\\QONj04fm06VRONhm05SRO0jm03RRO1km01QRO3mm00oQO2Qn00jQO3Vn0l0000O1N101O1N2O0O2O1N2O0O2O1N2TOZQO;gn0E]QO5fn0Ii0d0\\O3M3M2N1O0O2O00001O2N1O1N3M3N1N2N2O0N2O001O1O1O1O1O100O02O001O2N5K3M2ZNiQO^1]n00000000004L1O001N010O0000100O1O1O00001OO1000O2O001O001N1O3D;M4M4KRP55koJ2N2O1HCjPO>Uo0c0B6K2N2N2M2O1O2N3L3N2N3M2M4M2N2N2N2O0O1O1O100O1O100O1O00100O1O100O1O[OlRO`NSm0a1mRO_NSm0a1nRO]NRm0d1oROZNRm0f1RSOUNnl0l1c01N10000_ROQNQm0o1nROSNPm0o1nROSNQm0m1oROTNPm0l1oROUNPm0l1PSOTNPm0^200O10000O10000O10000O1000O010000O100O10000O10000O100O01000O10000O100O1000000001O0O100000001O00000000001N1000001O001O0O2O001O1O001O1O0O2O001O1O001O1N2O1O1O1O1O1O1O1N2O1O1O1O1O1O1N2O1O1O1O2N2N2N2M3N2N1O1O0O2O0O2O0O2O0O2O0O2O0O2O001N100O2O0O2N1O2N1O1N2O1O1O1N200O1O1O1O100O1O1000O100001N2O001O00O1O1O10O01O1O1O00100O001O1O1O0O2O001N101N1O2N1O2O000100O1O010O1O1O1H8K5M3J7L`dm2" + } + ], + "question": "What is the shape of the ear of ?", + "choices": [ + "A. Rounded.", + "B. Floppy.", + "C. Triangular.", + "D. Pointed." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_145.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01189415.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "V\\_:2U10`m04\\QONj04fm06VRONhm05SRO0jm03RRO1km01QRO3mm00oQO2Qn00jQO3Vn0l0000O1N101O1N2O0O2O1N2O0O2O1N2TOZQO;gn0E]QO5fn0Ii0d0\\O3M3M2N1O0O2O00001O2N1O1N3M3N1N2N2O0N2O001O1O1O1O1O100O02O001O2N5K3M2ZNiQO^1]n00000000004L1O001N010O0000100O1O1O00001OO1000O2O001O001N1O3D;M4M4KRP55koJ2N2O1HCjPO>Uo0c0B6K2N2N2M2O1O2N3L3N2N3M2M4M2N2N2N2O0O1O1O100O1O100O1O00100O1O100O1O[OlRO`NSm0a1mRO_NSm0a1nRO]NRm0d1oROZNRm0f1RSOUNnl0l1c01N10000_ROQNQm0o1nROSNPm0o1nROSNQm0m1oROTNPm0l1oROUNPm0l1PSOTNPm0^200O10000O10000O10000O1000O010000O100O10000O10000O100O01000O10000O100O1000000001O0O100000001O00000000001N1000001O001O0O2O001O1O001O1O0O2O001O1O001O1N2O1O1O1O1O1O1O1N2O1O1O1O1O1O1N2O1O1O1O2N2N2N2M3N2N1O1O0O2O0O2O0O2O0O2O0O2O0O2O001N100O2O0O2N1O2N1O1N2O1O1O1N200O1O1O1O100O1O1000O100001N2O001O00O1O1O10O01O1O1O00100O001O1O1O0O2O001N101N1O2N1O2O000100O1O010O1O1O1H8K5M3J7L`dm2" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. Its back is arched.", + "B. Its back is completely straight.", + "C. Its tail is curled up.", + "D. Its head is tilted downwards." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_146.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. It has a smooth texture.", + "B. It has a woven texture.", + "C. It has a crinkled texture.", + "D. It has a rough texture." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_147.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Orange.", + "B. Green.", + "C. Black.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_148.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "Which of the following statements accurately describes ?", + "choices": [ + "A. It is a canvas handbag.", + "B. It is a leather handbag.", + "C. It is a nylon satchel.", + "D. It is part of a leather jacket." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_149.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01198997.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 795 + ], + "counts": "RPTc0`2_m02O00000O2OO100000000000000000000000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O1000000O100000000O1000000O1000000O10000O1000000O10000O1000000O101OO010001N?B8H3M4K5L5K0O2NinS1" + } + ], + "question": "What is a characteristic of ?", + "choices": [ + "A. It has a visible seam.", + "B. It has a metal zipper.", + "C. It is made of woven fabric.", + "D. It has a leather strap." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_150.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "Which statement accurately describes a color-related feature of ?", + "choices": [ + "A. The object is orange.", + "B. The object has white text on its rear.", + "C. The object is entirely black.", + "D. The object is red." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_151.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "What is the material of the taillight on ?", + "choices": [ + "A. Glass.", + "B. Metal.", + "C. Plastic.", + "D. Rubber." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_152.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01246937.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "RnV2R1bf0a0I7J7mYOPNie0V2N2MO[ZOjMde0Y2O1O100000000O1000O100L4O1000000O10000002N2N1N2O000000000000JaZOkM_e0U26O100O11OO10O10000000001O00000000000000000000001O01O000000000O12O1N1O0001O0000001O001O1N1K6M3O001O1O11100N3L3M1OO1O10gNhYOj0Xf0VOkYOg0Vf0XOnYOb0Uf0\\OPZO8Yf0FR1Mki[c0" + } + ], + "question": "What text, which indicates its purpose, is displayed on ?", + "choices": [ + "A. TAXI.", + "B. Norwich.", + "C. 21 22.", + "D. city & rail station." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_153.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O?", + "choices": [ + "A. Gray.", + "B. Black.", + "C. White.", + "D. Silver." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_154.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O?", + "choices": [ + "A. Hard, smooth plastic.", + "B. Polished metal.", + "C. A soft, cushioned material.", + "D. Flexible rubber." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_155.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cig7n0kf08G9H8L4N2N2N1O1OSOYZOCfe07aZOI^e00kZOOUe0JR[O6md0FY[O9fd0H[[O7ed0H\\[O8dd0G^[O8bd0G_[O9ad0Fa[O9`d0Eb[O:_d0Db[O in the image?", + "choices": [ + "A. A curved handle of a pair of scissors.", + "B. A curved telephone receiver.", + "C. A curved headset.", + "D. A curved arm of a desk lamp." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_156.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The object has a cylindrical shape.", + "B. The object has a square shape.", + "C. The object has a rectangular shape.", + "D. The object has an irregular shape." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_157.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. White.", + "C. Gray.", + "D. Black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_158.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "[jT54kg06I4M000000O2O00lYOIad05a[O1Xd0Ok[O5nc0KT\\O9gc0F[\\O?^c0@e\\Oe0Tc0[Oo\\Oi0jb0WOX]Om0bb0SO`]OR1Yb0nNi]OW1Qb0iNP^O[1ja0eNY^O[1da0fN]^OZ1aa0fNa^OY1^a0gNd^OX1[a0iNf^OV1Za0iNi^OV1Ua0kNl^OT1Sa0lNo^OS1Pa0nNQ_OQ1n`0oNT_OQ1j`0POX_On0h`0QOZ_On0e`0RO]_Om0b`0TO__Ok0``0UOb_Ok0\\`0VOe_Oi0Z`0WOh_Oh0X`0XOj_Of0U`0ZOm_Of0Q`0[OP@d0o?\\OR@d0m?\\OU@c0j?^OV@c0h?]OZ@b0f?^O[@a0d?@]@?b?A`@?^?Bc@=\\?De@;Z?Eg@;Y?Eh@;W?Ei@;W?Dj@T?Bl@>T?Bm@>R?Ao@?Q?Ao@?Q?Ao@?Q?@QA`0o>_OQAa0o>_OQAa0o>^ORAb0n>^OSAb0l>^OTAb0l>^OTAb0l>]OUAc0l>\\OTAe0k>[OUAe0k>ZOVAf0j>ZOWAe0i>[OWAe0i>ZOXAd0j>\\OVAc0l>\\OUAb0l>^OUALmKa0ob0BYADYA8j>HWAMS?3]401O0000000000001O0000000O10001O00gn``0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Gray.", + "C. Black.", + "D. Beige." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_159.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "What is the binding style of ?", + "choices": [ + "A. It is bound with a spiral wire.", + "B. It is held together by staples in the center.", + "C. Its pages are glued together at the spine.", + "D. It uses a three-ring binder mechanism." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_160.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "Which of the following descriptions accurately portrays the pattern on ?", + "choices": [ + "A. The object has a purple \"Y!\" logo on its cover.", + "B. The object has the text \"Microsoft\" printed on its cover.", + "C. The object has the text \"YAHOO\" printed on its cover.", + "D. The object is plain black with no text or logos." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_161.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01252367.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cl_d01mg04KLYXO7dg04O2N1O1O2N1O1O2O0O1O2O0O1O2N1O1O2O0O1O2N1O1O2O0O1O2NA[YODdf0l01O00000001O00000000000010O00000000000001O000000000000001O0000000000001O00000001O000001O0000000000000000000000000000001O000001O001O1O1O1O1O1O2N1O1O1O1O1O101N1O1O1O1O1O1O2N00100O1O1O1O1O1O1O001O1O1O100O1O1N2I[XO0iY<" + } + ], + "question": "What is the pattern on the surface of ?", + "choices": [ + "A. It has a purple logo on the cover.", + "B. It has white text on the cover.", + "C. It is plain black with no markings.", + "D. It has black text on a white cover." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_162.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "flf[28b_1:G8H9G8K6J4M2M3N3L3N2M4M2M3N3M2N2N3N1N2O2M2O1N3M2O1N3N0O2O0O2N101N101N101N1O2O0O2M2O2N1O2N101N101N101O00001O001N1O100000000000O100000000O1000000O2O0000000O10001O0O1000001O0O2O001O1N2O001O1O0O2O1O1O0O2O1N1O2N2N2N2M4M2M3M3M3M3N2M3N2N2N2N3L3N2M3M3M3M3M3L4K6SOYaN9m^1CXaN8[_1L5K\\SSj0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. The object has a rough and bumpy surface with a cross-hatch pattern.", + "B. The object has a smooth surface.", + "C. The object has a leathery and wrinkled texture.", + "D. The object is covered in a layer of fine fuzz." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_163.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "flf[28b_1:G8H9G8K6J4M2M3N3L3N2M4M2M3N3M2N2N3N1N2O2M2O1N3M2O1N3N0O2O0O2N101N101N101N1O2O0O2M2O2N1O2N101N101N101O00001O001N1O100000000000O100000000O1000000O2O0000000O10001O0O1000001O0O2O001O1N2O001O1O0O2O1O1O0O2O1N1O2N2N2N2M4M2M3M3M3M3N2M3N2N2N2N3L3N2M3M3M3M3M3L4K6SOYaN9m^1CXaN8[_1L5K\\SSj0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. It has a rough and scaly texture.", + "B. It has a dimpled texture.", + "C. It has a matte and dull texture.", + "D. It has a glossy texture." + ], + "answer": "D", + "type": "texture/pattern", + "image": "images/vqa_164.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01261883.jpg", + "mask_rles": [ + { + "size": [ + 1536, + 2304 + ], + "counts": "Q`kc27h_13N7I4L2N1O2N3M6J6J7I7I2N2N1O2N2N001O00001O0000001O00001O0000000000000000001O2N2N2N001O1O001O1O00000000O1000000O1000000O1000000O1000000O100O100O100O100O100O100O1O100O100O100O100O100O1O1O1O1O1O1O1O1O100O1N2H8G9K5L4K5K6NP`ob0" + } + ], + "question": "Which of the following best describes ?", + "choices": [ + "A. A pomegranate.", + "B. A large cherry.", + "C. A small red apple.", + "D. A red plum." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_165.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01276645.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "maPe0=ao06J6L4K5M2N3L4M2N2N1N3N2N101N100O100O1O100O10O01OO2O0O2O9F7J1N101N1O2O0O2O0O2N100O1O001O1O1N2O0O2O2L4LR\\[1" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Tan.", + "C. White.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_166.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01312527.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "`ja>3lg02O1N1000O010000O1N101N2O1O100O1O1O1O10000O10O100O10O10O14L2M101O000O101N101O001O4L1O100O0001O000010O00O2H7N4M101N10000O1O100011O0O10O000O10003M2N1N2O1N1O2O0NcUb7" + } + ], + "question": "What is the primary color of the body of ?", + "choices": [ + "A. Black and white.", + "B. Grey and brown.", + "C. Solid grey.", + "D. White and grey." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_167.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01312527.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "`ja>3lg02O1N1000O010000O1N101N2O1O100O1O1O1O10000O10O100O10O10O14L2M101O000O101N101O001O4L1O100O0001O000010O00O2H7N4M101N10000O1O100011O0O10O000O10003M2N1N2O1N1O2O0NcUb7" + } + ], + "question": "Based on its shape, what is in the image?", + "choices": [ + "A. The folded wings of a bird.", + "B. A frog sitting on the grassy bank.", + "C. A piece of a decaying tree stump.", + "D. A fish jumping out of the water." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_168.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01350089.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "fnQf0?]g06J7I6L5L3LKaYOoN[f0R1gYOnNVf0T1jYOmNSf0U1mYOlNQf0U1oYOmNme0U1QZOnNne0R1QZOPOme0Q1SZOPOke0Q1UZOPOie0Q1WZOPOge0P1ZZOgNN0fe0Z1\\ZOfNO0ce0[1^ZOeNO0be0\\1_ZOdNO1`e0\\1bZOaNO3^e0]1mZOdNQe0]1nZOeNQe0[1nZOfNQe0[1nZOgNQe0Y1nZOhNQe0Y1nZOiNQe0W1nZOjNQe0W1nZOkNQe0T1oZOmNQe0S1nZOnNQe0S1nZOnNRe0R1lZOQORe0P1mZOQOSe0o0mZOQOSe0o0mZOQOSe0o0mZOQORe0P1oZO\\N=Jdd0j1oZOZNfe0f1:00O1L4MlZO^NPd0X1oZOiNd0?]d0g0g[O]OYd0c0d[O@\\d0`0d[O@\\d0`0d[O@\\d0?[[OlNFe0od0?X[OoNIb0od0?T[OSOM>od0?Q[OVO0;od0?Q[OVO0;od0?Q[OVO0;od0?Q[OVO1:nd0`0Q[OVO29md0a0Q[OVO38ld0b0Q[OVO47kd0c0Q[OUO67id0d0Q[OUO85gd0f0Q[OUO94fd0g0Q[OUO:3ed0h0Q[OUO;2dd0i0Q[OUO<1cd0j0Q[OUO0F4:kd0k0Q[OUO0G47ld0m0P[OUO0H45md0m0oZOVO0J41nd0n0nZOWO0K4Ond0o0nZOWO0L4LPe0P1lZOXO0M4JQe0P1kZOYO0N3ISe0o0jZOZO0O2HTe0o0jZOZO001FWe0P1fZO[O200EYe0a1gZOkNMF\\e0_1gZORO[e0m0eZOSO[e0m0eZOSO[e0n0cZORO^e0g110\\J" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. The masked object is black.", + "B. The masked object is white.", + "C. The masked object is brown.", + "D. The masked object is silver." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_169.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Black.", + "B. Brown.", + "C. Silver.", + "D. Dark green." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_170.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Metallic.", + "B. Pinstriped.", + "C. Grooved.", + "D. Smooth." + ], + "answer": "D", + "type": "texture/pattern", + "image": "images/vqa_171.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01356234.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Wcde0P1nf08I4L2N2H8M3H8M2O2N1O2O0O2O2N1N2N2O1N2N2O1O00001O001O1O1O1O1O001O00000000O100O10000O100O100O100O1O10001O0O1O2O1N2O1O1N101N2N1O2L4L4M3L3M4M3N2N3M3L3I7L4M5E:GaXOK^l`0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. Oval-shaped.", + "B. Circular.", + "C. Square.", + "D. Rectangular." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_172.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364554.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 820 + ], + "counts": "PPa7;eo0;E2N1O1O1O2N1O1O1O1O1O1O2N1O1`QONPm0k1F7I7I3M3M3M3M3M2N1O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O001O001O00001O000000001O0000001O000000001O0000001O00000oTOUKmj0Q5N2N1OO100O1N2O1N20000000TUO[K]j0T501ON2N2000000O12N2N3M2N2N001OO100N2N2N2N2N2N2N2N2N21O00001O001OO100O100O1O100O1O100O1O100O100O1O100O1O100O1O100O100O10SUO`KZj0`4eUObKZj0]4gUOcKYj0]4gUOcKYj0\\4gUOeKYj0[4gUOeKYj0Z4gUOgKYj0Y4gUOfKZj0Z4eUOgK[j0X4eUOiK[j0W4eUOiK[j0V4eUOjK\\j0V4dUOjK\\j0U4dUOlK\\j0T4dUOlK\\j0T4cUOlK^j0S4bUOnK^j0R4bUOnK^j0Q4bUOPL^j0P4bUOoK_j0P4aUOQL_j0o3aUOQL_j0o3`UORL`j0m3aUORL`j0n3_UOSLaj0l3`UOTL`j0l3_UOULaj0j3`UOULaj0k3_UOULaj0k3^UOVLbj0i3_UOWL5Jbi0o3YVOVLO3gi0f3ZVOXLG:oi0^3ZVOXLE?", + "choices": [ + "A. The lettuce inside is shredded.", + "B. The tomato is diced into small cubes.", + "C. It contains whole, intact lettuce leaves.", + "D. The tortilla is rolled into a closed cylinder." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_173.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364554.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 820 + ], + "counts": "PPa7;eo0;E2N1O1O1O2N1O1O1O1O1O1O2N1O1`QONPm0k1F7I7I3M3M3M3M3M2N1O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O1O001O001O1O001O001O001O001O001O00001O000000001O0000001O000000001O0000001O00000oTOUKmj0Q5N2N1OO100O1N2O1N20000000TUO[K]j0T501ON2N2000000O12N2N3M2N2N001OO100N2N2N2N2N2N2N2N2N21O00001O001OO100O100O1O100O1O100O1O100O100O1O100O1O100O1O100O100O10SUO`KZj0`4eUObKZj0]4gUOcKYj0]4gUOcKYj0\\4gUOeKYj0[4gUOeKYj0Z4gUOgKYj0Y4gUOfKZj0Z4eUOgK[j0X4eUOiK[j0W4eUOiK[j0V4eUOjK\\j0V4dUOjK\\j0U4dUOlK\\j0T4dUOlK\\j0T4cUOlK^j0S4bUOnK^j0R4bUOnK^j0Q4bUOPL^j0P4bUOoK_j0P4aUOQL_j0o3aUOQL_j0o3`UORL`j0m3aUORL`j0n3_UOSLaj0l3`UOTL`j0l3_UOULaj0j3`UOULaj0k3_UOULaj0k3^UOVLbj0i3_UOWL5Jbi0o3YVOVLO3gi0f3ZVOXLG:oi0^3ZVOXLE?", + "choices": [ + "A. It is a full circular slice.", + "B. It is a sliced piece.", + "C. It is whole and round.", + "D. It has a jagged, leafy shape." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_174.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "^dca01ng02N000QYO5ke0KTZO7je0IWZO8ie0HUZO9ke0GVZO8ke0HTZO7ne0HRZO8ne0IPZO8Qf0HnYO8Rf0HmYO9Tf0GkYO9Vf0FkYO8Vf0IiYO6Yf0IgYO6Zf0KfYO1]f0OcYO2]f0MdYO2\\f0OcYO1^f0NbYO3]f0NcYO1]f0OcYO2]f0MdYO2\\f0OcYO1^f0NbYO3]f0NcYO1]f0OcYO2oe0LhYO1:2me0OhYO190oe01gYOO91oe01hYON90Pf0M]YO3:17OUf00eYO051Wf0NdYO150Xf00bYO14O[f00bYO021\\f0ObYO020]f00aYO10Oaf00_YO0O0cf01]YOOO1ef00UYO0M08Ohf01PYO30O4Mnf09nXOK2MQg0a0nXO^OQg0d0nXO\\OQg0k0N02O1N5L=YO_XO5fg0O101N2N^bQ5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. It has a high, gooseneck-style spout.", + "B. It has a straight, right-angled spout.", + "C. It is composed of two separate cross-shaped handles and a central spout.", + "D. It has a curved or arc-shaped spout." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_175.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. A straight, rectangular bar.", + "B. A circular knob.", + "C. A curved handle.", + "D. A T-shaped pull handle." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_176.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "Which of the following descriptions about the texture of is correct?", + "choices": [ + "A. The masked object has a smooth surface.", + "B. The masked object has a grooved texture from the wooden planks.", + "C. The masked object features a distinct wood grain pattern.", + "D. The masked object has a slatted texture." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_177.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "What is a defining characteristic of in the image?", + "choices": [ + "A. It is a vertically oriented mount.", + "B. It is a horizontally positioned dispenser.", + "C. It has a cylindrical shape.", + "D. It is an L-shaped bracket." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_178.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "f`g?^1Tf0>N3oYORNme0Q200O2O0O100O10000O2O0O10000O10001O000O100O1O1O2N1000000000010O0001O1hMY\\O5ic0GY\\O9he0O100O1O1O000001O000000000001O0000000000000000001O000O100000000000001O000000000000000O101O00000000O10000001O001O002Neeo5" + } + ], + "question": "Which of the following statements accurately describes in the image?", + "choices": [ + "A. The control panel is located on the left side of its door.", + "B. It has ventilation slots located on its top surface.", + "C. The object is the same color as the wooden cabinets above it.", + "D. It has a large, vertical handle for opening the door." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_179.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "bdf>5ig03N101N2O1O001O1O001O1O1O001O1O0000O1N2O100O1000000O1000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1nN1SZOOje0;oYOEPf0V1N2N2N200O10000000000000000000001O001O000000000000000001O000000000000000000000000000O1O0F;O101N2No\\P6" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Silver.", + "C. Brown.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_180.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01364931.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "bdf>5ig03N101N2O1O001O1O001O1O1O001O1O0000O1N2O100O1000000O1000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1nN1SZOOje0;oYOEPf0V1N2N2N200O10000000000000000000001O001O000000000000000001O000000000000000000000000000O1O0F;O101N2No\\P6" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. It is a high-arc gooseneck faucet with a pull-down sprayer.", + "B. It has two separate handles for hot and cold water.", + "C. It has a single handle and a curved spout.", + "D. It is a wall-mounted faucet positioned above the sink." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_181.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01396529.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Pkd`09fo02O1M4M2O1O1N2N2N2O1N2N2N2N101N100O2O0O100O2O1N1O010N2N2NObQOcN^n0]13011OO1O2N1O0O2O001L310O1O1N1O2O0O2M20001O0O1O1O1O1O0011O0O100ZO[OPROf0Pn0[OnQOf0Qn0[OmQOf0Tn0[OjQOf0Vn0ZOiQOf0Xn0VO]QO2:h0\\n0XOcQOi0^n0<001N6K1N6J2N2N2O2N1N2O1O5J2MXUg4" + } + ], + "question": "What is the primary material of ?", + "choices": [ + "A. Plastic.", + "B. Leather.", + "C. Rubber.", + "D. Canvas." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_182.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01403825.jpg", + "mask_rles": [ + { + "size": [ + 1152, + 2048 + ], + "counts": "fX[\\1b0]S12M3L3O2N101N101N101N101N101N101N101N101N101N1O2O0O2O0O2O1N101N100O01O001O001O00001O010O010O010O0010O010O0mN^NjoNb1TP1aNjoN_1UP1dNjoN[1UP1hNioNY1TP1kNjoNU1UP1mNkoNS1SP1POkoNP1SP1TOloNl0SP1VOkoNj0UP1XOjoNh0VP1YOhoNg0XP1[OgoNd0YP1^OeoNc0[P1^O`oNe0`P1\\O_oNe0`P1]O]oNd0dP1]OYoNe0fP1Z10010O001O010O00010O010O0100O1O010O100O10O0100O1O010O100O010O100000O1000000O0100000O10000000O0100000000O100000001O0O2O00001O001N110O00001O001O001O01O01O001O001O001O0000O1O1O1O10O0100O100O10000O010O100O100O10O010O10O10O1000O010000O02O0000000O1000000O101O00000O100000000O10001O0O100000000O1000001O0O1O100O1O100O1O101mNSoNPOnP1o0boNaN_P1]1S1O1N2N2O1N2O2M2O1N200000000O10001O000O101O00001O0O101O00001N10001O000O2O00001O0O10001N10001N10001N10001N10001N10001N10001N10001O0O2NXfVa0" + } + ], + "question": "What is the material of the cover of ?", + "choices": [ + "A. Wood.", + "B. Rubber.", + "C. Plastic.", + "D. Cardboard." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_183.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01403825.jpg", + "mask_rles": [ + { + "size": [ + 1152, + 2048 + ], + "counts": "fX[\\1b0]S12M3L3O2N101N101N101N101N101N101N101N101N101N1O2O0O2O0O2O1N101N100O01O001O001O00001O010O010O010O0010O010O0mN^NjoNb1TP1aNjoN_1UP1dNjoN[1UP1hNioNY1TP1kNjoNU1UP1mNkoNS1SP1POkoNP1SP1TOloNl0SP1VOkoNj0UP1XOjoNh0VP1YOhoNg0XP1[OgoNd0YP1^OeoNc0[P1^O`oNe0`P1\\O_oNe0`P1]O]oNd0dP1]OYoNe0fP1Z10010O001O010O00010O010O0100O1O010O100O10O0100O1O010O100O010O100000O1000000O0100000O10000000O0100000000O100000001O0O2O00001O001N110O00001O001O001O01O01O001O001O001O0000O1O1O1O10O0100O100O10000O010O100O100O10O010O10O10O1000O010000O02O0000000O1000000O101O00000O100000000O10001O0O100000000O1000001O0O1O100O1O100O1O101mNSoNPOnP1o0boNaN_P1]1S1O1N2N2O1N2O2M2O1N200000000O10001O000O101O00001O0O101O00001N10001O000O2O00001O0O10001N10001N10001N10001N10001N10001N10001N10001O0O2NXfVa0" + } + ], + "question": "Which of the following best describes a shape characteristic of ?", + "choices": [ + "A. The object is rectangular in shape.", + "B. The handle of the object is curved.", + "C. The object has a pointed tip.", + "D. The head of the object is rounded." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_184.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. It is rectangular.", + "B. It is conical.", + "C. It is cylindrical.", + "D. It is spherical." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_185.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Red.", + "C. Green.", + "D. Yellow." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_186.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01413369.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Y`g43lg06J6J6J6K4K6K5K5K5eYO]Nne0h1nYOXNRf0k100O10000000001O00001O0000O100O_ZOVNnd0i1R[O\\Njd0d1U[O`Nid0^1W[OeNjd0X1U[OiNnd0S1R[OnNPe0P1P[OoNTe0n0lZOROWe0l0gZOTOOAPe0Y1Q[OVOMHPe0o0R[OZOMOmd0d0V[O\\OM4ld0>W[O^OM4od0Ve03^[OMfd00Z[O0hd0OV[O2md0KR[O5Re0IR[O2Pe0LT[O0od0MV[OMnd01V[OJld04c1N2MoePb0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. It is blue.", + "B. It has white text.", + "C. It has red measurement markings.", + "D. It is solid white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_187.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "kkY63b12ee03UZO6de0LXZOde0YO]ZO:NM4ge0^O\\ZO?M3ge0BWZO=11ge0j0YZOUOhe0k0XZOUOhe0j0YZOUOge0l0YZOTOge0k0ZZOTOge0k0ZZOUO6ROod0i1lZOTO3WOnd0f1oZOSO2ZOmd0b1S[ORO0^Old0`1U[OQONAld0]1W[OROMBkd0[1Y[OSOKDjd0Y1][OQOJHgd0W1_[OQOIJgd0U1a[OPOHLfd0T1c[OnNHOdd0S1e[OnNE2cd0Q1j[OkNC5bd0P1n[OhN@a0e0QObb0e1\\]OfN]Of0c0QOcb0c1_]OdNZOj0b0QOcb0b1c]O`NYOn0?ROdb0`1f]O]NWOR1?ROcb0_1j]OXNVOX1;TO^a0Do^Oh1?@TOVOYa0GR_Oc1b0_OSOXOUa0JT_O`1d0]OUOYOPa0MV_O]1f0\\OTO[Ol`00Y_OY1h0ZOUO\\Oi`03Y_OW1j0XOVO^Oe`05Z_OT1m0WOVO_Oa`09Z_OR1Q1SOVOA^`0nc0CS]OOoN>mc0DU]ONnN?", + "choices": [ + "A. Solid color.", + "B. Checkered pattern.", + "C. Striped pattern.", + "D. Polka dot pattern." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_188.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "kkY63b12ee03UZO6de0LXZOde0YO]ZO:NM4ge0^O\\ZO?M3ge0BWZO=11ge0j0YZOUOhe0k0XZOUOhe0j0YZOUOge0l0YZOTOge0k0ZZOTOge0k0ZZOUO6ROod0i1lZOTO3WOnd0f1oZOSO2ZOmd0b1S[ORO0^Old0`1U[OQONAld0]1W[OROMBkd0[1Y[OSOKDjd0Y1][OQOJHgd0W1_[OQOIJgd0U1a[OPOHLfd0T1c[OnNHOdd0S1e[OnNE2cd0Q1j[OkNC5bd0P1n[OhN@a0e0QObb0e1\\]OfN]Of0c0QOcb0c1_]OdNZOj0b0QOcb0b1c]O`NYOn0?ROdb0`1f]O]NWOR1?ROcb0_1j]OXNVOX1;TO^a0Do^Oh1?@TOVOYa0GR_Oc1b0_OSOXOUa0JT_O`1d0]OUOYOPa0MV_O]1f0\\OTO[Ol`00Y_OY1h0ZOUO\\Oi`03Y_OW1j0XOVO^Oe`05Z_OT1m0WOVO_Oa`09Z_OR1Q1SOVOA^`0nc0CS]OOoN>mc0DU]ONnN in the image?", + "choices": [ + "A. White.", + "B. Green.", + "C. Red.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_189.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "What is the shape of the platform on ?", + "choices": [ + "A. Trapezoidal.", + "B. Rectangular.", + "C. Square.", + "D. Circular." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_190.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The masked object is circular.", + "B. The masked object is triangular.", + "C. The masked object is rectangular.", + "D. The masked object is oval." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_191.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "gX]=:fh04L4L4L3M4L3M4L3M4L3M3M3M2N3M2N3M2N3M3M3K5N2O1O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O1O1O1O100O100O100O1O100O1000000000001O000001O00000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O000O100O100O100O1O1O1N3J5M3N2M3M3N2M3M4M2M4L4M3L5K7J6I7I7I5LmdU8" + } + ], + "question": "Which of the following statements accurately describes a color feature of ?", + "choices": [ + "A. The top surface of the object is gray.", + "B. The object has a black display screen.", + "C. The main body of the object is light green.", + "D. The entire object is a uniform color." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_192.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01420513.jpg", + "mask_rles": [ + { + "size": [ + 803, + 1024 + ], + "counts": "`hla02Pi02N2O1N1OKWWO3hh0NXWO2bh02`WON_h02bWON]h02cWON]h03cWOL]h04cWOL]h05bWOK^h05bWOK^h06aWOJ_h0O`WO21O_h0OaWO2OO`h0OaWO2OO`h0ObWO7^h0IbWO7^h0IcWO6^h0JaWO6_h0JbWO5_h0JaWO6_h0K`WO5ah0L]WO4ch061O1O1O00001O2N2O0O2OO01O01O01O010O0010O0001Ndio5" + } + ], + "question": "What is the material of the handles of ?", + "choices": [ + "A. Metal.", + "B. Wood.", + "C. Rubber.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_193.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Blue.", + "C. Yellow.", + "D. White." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_194.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of metal.", + "B. It is made of plastic.", + "C. It is made of wood.", + "D. It is made of stone." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_195.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01453850.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "R[X<;dg02N2N1O1O1O3N2N2N101N2N1O2N2N3M4L3L5L3N2M2O0O00O101O0O10000O100O101O0O10000O100O101O0O1000O01E_YOVObf0i0;N2O1N2O001O100O1000O010000O100000O010000O1000O10O1000000O10O10O1000O10O10O10O10O1000O01000O010000O01000O01000O100O1O1O1O2N1O1O1O1O1O2N1O2N3M3M3M2N3M3M3M3N6I6K20N110O01O010O520OJ6J7H7I8G8H:G;D;EWSZ8" + } + ], + "question": "Which of the following describes the shape of ?", + "choices": [ + "A. A flat-topped object with four legs.", + "B. A cylindrical object with a rounded top.", + "C. A series of connected, curled tentacles.", + "D. A rectangular frame with a grid pattern." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_196.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01455911.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "P_k23Tf0Nb[O5[d0Nb[O4]d0Lb[O6]d0JeZO315Ze0H]ZOa02I_e0GYZOk02@4E^d06\\[OJKV11UO4Nad00\\[Of1NaNbd0k2K4M3L3M2O1N2O1O1O1O1O1O1O1O1O1O1000000O1000000O10000000O01000000O0100000O101O\\Lc\\Oe2]c0YMd\\Oh2[c0YMe\\Og2[c0XMf\\Oi2Yc0VMh\\Oj2Xc0VMh\\Oj2Xc0UMh\\Ol2Xc0SMi\\Om2Xc0RMh\\On2[c0nLf\\OR3^c0iLd\\OV3\\c0jLe\\OU3[c0jLf\\OV3Zc0fLk\\OY3Tc0hLm\\OW3Sc0hLo\\OW3Pc0jLP]O>[OR2ec0_MR]O2E_2Xc0`Mh]O`2Xb0_Mi]Oa2Vb0`Mk]O_2Ub0`Ml]O`2Sb0aMm]O_2Sb0`Mn]O`2Rb0`Mn]O`2Sb0^Mn]Ob2hc000000O100O100O100O100O1000000O1000000O100000000O1000000O1000000O100001O1O1O1O1O1O1O1O1bMgZOS2Ze0kMgZOU2Ze0gMjZOX2ae0N1O1O2N1O1O2N1O2N1O1O2N1O:F>B3M0000O10O10000O100O00100O1O100O1O1O101N2N2O1N2N2Ml`\\a0" + } + ], + "question": "Which of the following descriptions about a part of is correct?", + "choices": [ + "A. It has a rectangular license plate.", + "B. The rearview mirrors are circular.", + "C. The seat is triangular.", + "D. The headlight is square." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_197.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01511060.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Rb0e0Zg01O100000001O00000000000000000000001O00000000000000000000001O000000000000000000001O0001O00001O00001O000010O0001O00001O00000O2O0O101N100O2O0O101N101N101N2O1NcUTf0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Light beige.", + "B. Light gray.", + "C. Dark gray.", + "D. Off-white." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_198.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01525619.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "ZQ\\l0g0P5[OVe0h0_ZOC^e0?YZOJfe07PZO3ne0OiYO:Wf0F_YOe0_f0]OWYOn0gf0ROoXOY1Pg0gNgXOc1dg0RNQXOY2ng0gMiWOc2Xh0[M^WOQ3ah0nLVWO\\3lh0aLjVOi3Ui0XLbVOQ4Wi0WL^VOS4Zi0VL\\VOR4ci0V1O2O1N20O01000gIoVOb5hi001O2M10O1001O000O2O000O1O0010O01O1O10O01O1O00100O001O10O01O1O010O1O1O010O1O00100O001O10O01O1O1N101O001O0O2N101O1O0010O10M2N3M2N3M2M4M2N3M2N3N002K5K5L4K4L5K4L5K3M4M3L4L4L4L4L4L4M3L5KoK" + } + ], + "question": "Which of the following descriptions about the shape of is correct?", + "choices": [ + "A. The masked object has a curved handle.", + "B. The main body of is a perfect cylinder.", + "C. The masked object is heart-shaped.", + "D. The top rim of has a scalloped edge." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_199.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01534987.jpg", + "mask_rles": [ + { + "size": [ + 786, + 1178 + ], + "counts": "Z`d27Yh04L4L4L4L4L5K4M2N2N2N2N2N2N1O2N2N2N2N1O2O1N2N101N101N100O2N100O100O2O0fNkMV\\OV2ic0mMT\\OT2lc0oMQ\\OR2nc0QNn[OP2Rd0SNk[Om1Ud0UNh[Ol1Xd0WNd[Oj1\\d0YNa[Oh1^d0ZN_[Og1ad0\\N\\[Od1dd0_NX[Ob1hd0U10O3M3M3M2O2M3M3M2N3L4M2N000000000O10000000000O10000000000O2O0000000O10000O101N100O100O10000O2O0O100O100O100O2O000O2O0O1O2N1O2N2N1O2N1N3L3N4J5J6J7I6J7I7J`adf0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Transparent.", + "B. White.", + "C. Brown.", + "D. Black." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_200.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01575962.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Xh0>bo010000000O0100O10000O1O101N100O2O0O1010O1O1N2N2LoQP2E^noM1O100N101O1N2O1O1O10O10O100000000O0100000O100O1O001N2O1O0001O1N1L500O1000O010000O10O01O1MdThc0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The masked object is made of metal.", + "B. The masked object is made of ceramic.", + "C. The masked object is made of wood.", + "D. The masked object is made of plastic." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_201.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01575962.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Xh0>bo010000000O0100O10000O1O101N100O2O0O1010O1O1N2N2LoQP2E^noM1O100N101O1N2O1O1O10O10O100000000O0100000O100O1O001N2O1O0001O1N1L500O1000O010000O10O01O1MdThc0" + } + ], + "question": "Which of the following best describes the shape of ?", + "choices": [ + "A. The masked object is long and cylindrical.", + "B. The masked object is a set of two long, flat metal strips.", + "C. The masked object is a thin, flat slice.", + "D. The masked object is a folded piece of cloth." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_202.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01616394.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "^bh31io09G7J7PRO]OZl0i0cSO^OSl0i0jSO]Olk0j0PTO]Ofk0i0XTO\\O_k0k0^TO[OXk0l0eTOZOQk0l0lTO[Oij0m0TUOYObj0n0[UOXO[j0n0cUOXOSj0o0iUOXOmi0o0PVOVOgi0P1WVOTOai0S1\\VOoN^i0m3J6I7J6J6J5N3M3N2O1N2_N[HeZOf7Pe0eHoZO\\7md0hHR[OY7ld0lHP[OU7od0lHoZOV7od0lHP[Om2^OOae0UMP[O^21:nd0YMP[O\\277id0]MP[O[2<5cd0aMP[OZ2a0YNcN4ke0ZOoZOY2i0QNcN:ee0\\OnZOX2o0jMeN`0^e0^OmZOW2U1cMgNf0We0_OmZOW2X1^MkNj0Ze0WObZO`2[1ZMPOm0Re0ZObZO^2^1VMUOP1jd0]ObZO\\2b1QMYOT1bd0_ObZO\\2^3SNoa0BbZOZ2c3RNja0EbZOX2h3QNea0HbZOV2m3oMaa0LaZOT2Q4oM]a0MbZOS2T4oMYa0ObZOQ2W4PNWa0O`ZOQ2\\4oMSa01`ZOo1`4oMo`03`ZOm1d4nMl`05`ZOl1g4mMi`08_ZOj1k4lMf`0;^ZOi1m4kMe`0=\\ZOh1R5iMb`0?\\ZOg1U5hM_`0b0[ZOe1Y5fM]`0f0YZOc1]5dM[`0j0WZOa1`5cMZ`0n0TZOi06ZNa5KV`0U1PZOc0gl0Y12O1O1O1O1O100O1OU]OVMb8i2^GZM`8e2`G\\M`8d2`G]M_8b2aG`M\\8a2eGaMW8`2jGaMS8`2oGaMn7`2RHaMk7`2WH`Mf7a2[HaMa7`2aH`M]7`2eHaMW7e0a^O:Y:ROS7d0k^O5S:WOo6d0T_O0n9^Ok6b0\\_OKk9Cf6b0g_ODe9Ka6`0P@@`91_6=X@]OZ97\\6;`@XOU9>Z69f@SOS9d0V68mKIQ46PLKo35RLLm32TLOl3OVL1k3LWL4i3JXL6j3GXL9h3FXL:j3CXL`0e3^O]Le0^3\\ObLi0X3WOiLm0R3TOnLQ1k2POVMR1g2nNZMS1d2nN\\MS1b2mN_MT1_2lNbMU1\\2lNdMU1Y2lNhMU1V2kNkMV1S2jNnMW1P2jNPNW1n1iNSNX1j1iNVNY1h1hNXNY1f1gN[NZ1c1fN^N[1`1fN`N[1^1eNcN\\1\\1cNeN^1Y1bNiM^OkCQ2[>_NgMGlCk1\\>[NfM1lCf1]>UNeMQNdMe0mC[1_>lMbMP1mCU1b>fM_M\\1mCo0Qk0YOlTOi0nj0^OPUOc0jj0DTUO>hj0GUUO:ij0`2N2O00000001N2N2M3K5K5K5K5dM`TO2ek0EcTO7bk0@gTO;^k0BfTO0hk0M\\TOL2WO\\j0k0gUOGO@Zj0g0kUODMFXj0d0oUOAKLVj0a0TVO]OG4Uj0>WVOYOF9Tj0<[VOTODa0Qj09iWOHWh06jWOJWh04iWONXh0OiWO2Wh0LjWO4Wh0JjWO7Vh0HiWO:Wh0DjWO?", + "choices": [ + "A. Golden brown.", + "B. A mix of green and white.", + "C. A mix of purple and orange.", + "D. Creamy white." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_203.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01616394.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "^bh31io09G7J7PRO]OZl0i0cSO^OSl0i0jSO]Olk0j0PTO]Ofk0i0XTO\\O_k0k0^TO[OXk0l0eTOZOQk0l0lTO[Oij0m0TUOYObj0n0[UOXO[j0n0cUOXOSj0o0iUOXOmi0o0PVOVOgi0P1WVOTOai0S1\\VOoN^i0m3J6I7J6J6J5N3M3N2O1N2_N[HeZOf7Pe0eHoZO\\7md0hHR[OY7ld0lHP[OU7od0lHoZOV7od0lHP[Om2^OOae0UMP[O^21:nd0YMP[O\\277id0]MP[O[2<5cd0aMP[OZ2a0YNcN4ke0ZOoZOY2i0QNcN:ee0\\OnZOX2o0jMeN`0^e0^OmZOW2U1cMgNf0We0_OmZOW2X1^MkNj0Ze0WObZO`2[1ZMPOm0Re0ZObZO^2^1VMUOP1jd0]ObZO\\2b1QMYOT1bd0_ObZO\\2^3SNoa0BbZOZ2c3RNja0EbZOX2h3QNea0HbZOV2m3oMaa0LaZOT2Q4oM]a0MbZOS2T4oMYa0ObZOQ2W4PNWa0O`ZOQ2\\4oMSa01`ZOo1`4oMo`03`ZOm1d4nMl`05`ZOl1g4mMi`08_ZOj1k4lMf`0;^ZOi1m4kMe`0=\\ZOh1R5iMb`0?\\ZOg1U5hM_`0b0[ZOe1Y5fM]`0f0YZOc1]5dM[`0j0WZOa1`5cMZ`0n0TZOi06ZNa5KV`0U1PZOc0gl0Y12O1O1O1O1O100O1OU]OVMb8i2^GZM`8e2`G\\M`8d2`G]M_8b2aG`M\\8a2eGaMW8`2jGaMS8`2oGaMn7`2RHaMk7`2WH`Mf7a2[HaMa7`2aH`M]7`2eHaMW7e0a^O:Y:ROS7d0k^O5S:WOo6d0T_O0n9^Ok6b0\\_OKk9Cf6b0g_ODe9Ka6`0P@@`91_6=X@]OZ97\\6;`@XOU9>Z69f@SOS9d0V68mKIQ46PLKo35RLLm32TLOl3OVL1k3LWL4i3JXL6j3GXL9h3FXL:j3CXL`0e3^O]Le0^3\\ObLi0X3WOiLm0R3TOnLQ1k2POVMR1g2nNZMS1d2nN\\MS1b2mN_MT1_2lNbMU1\\2lNdMU1Y2lNhMU1V2kNkMV1S2jNnMW1P2jNPNW1n1iNSNX1j1iNVNY1h1hNXNY1f1gN[NZ1c1fN^N[1`1fN`N[1^1eNcN\\1\\1cNeN^1Y1bNiM^OkCQ2[>_NgMGlCk1\\>[NfM1lCf1]>UNeMQNdMe0mC[1_>lMbMP1mCU1b>fM_M\\1mCo0Qk0YOlTOi0nj0^OPUOc0jj0DTUO>hj0GUUO:ij0`2N2O00000001N2N2M3K5K5K5K5dM`TO2ek0EcTO7bk0@gTO;^k0BfTO0hk0M\\TOL2WO\\j0k0gUOGO@Zj0g0kUODMFXj0d0oUOAKLVj0a0TVO]OG4Uj0>WVOYOF9Tj0<[VOTODa0Qj09iWOHWh06jWOJWh04iWONXh0OiWO2Wh0LjWO4Wh0JjWO7Vh0HiWO:Wh0DjWO in the image?", + "choices": [ + "A. A smooth, curved arc.", + "B. A ruffled or scalloped edge.", + "C. A collection of separate, sharp fragments.", + "D. A complete and perfect circle." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_204.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Blue.", + "B. Brown.", + "C. White.", + "D. Black." + ], + "answer": "D", + "type": "color", + "image": "images/vqa_205.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "What material is at the person's waist made of?", + "choices": [ + "A. Elastic.", + "B. Leather.", + "C. Cotton.", + "D. Nylon." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_206.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "icS?5jg02N101O00000O1O10000O1000000O101O0O5J3N2O1M2O100O1O1N2O1cKSOgAm0W>VOgAk0X>WOeAk0[>VOcAk0]>VOaAk0^>XO_Ai0a>ZO\\Af0e>\\OWAe0i>]OUAc0k>^OTAb0m>^OQAc0P?^Oj@\\OfLV1`b0^Oh@h0X?YOf@g0[?ZOd@f0\\?[Oc@e0]?\\O`@e0`?^O[@d0f?]OW@e0j?[OT@e0m?\\OQ@e0o?[Om_Oi0S`0ZOi_Of0X`0\\Oe_Od0\\`0]Ob_Oc0_`0^O__Oa0c`0BT_Oc0m`0_O_^OR1ca0POY^OP1ha0TOS^Ol0na0VOP^Oh0Rb01U]ONlb04R]OIQc0b22N2M3PNk\\OPOYc0o0g\\OPO[c0P1d\\OoN`c0n0`\\OQOdc0l0\\\\OSOgc0k0Z\\OSOjc0j0V\\OUOmc0i0S\\OUOPd0j0P\\OUORd0j0n[OUOUd0i0k[OWOXd0f0h[OYOZd0f0f[OYO\\d0f0d[OXO`d0f0a[OXObd0f0^[OYOfd0d0Z[OZOld0b0T[O]ORe0>nZOAYe09gZOE_e07aZOGde06\\ZOIje01WZOMle02TZOMne01TZOMZf0EhYO9\\f0BhYO:QUh6" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. It has a ribbed texture.", + "B. It has a smooth surface.", + "C. It has a denim-like texture.", + "D. It has a braided texture." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_207.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01621320.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "nm=5jg02N2O00000001N10000001O00000001O00000000O1L4`XOISg0e0L3O2O0O2N1O1O1O2N1O1O101N1O1O1O1OD`YO[O^f0d0dYO\\O[f0e0fYOZOZf0f0fYOZOYf0h0gYOXOXf0h0hYOXOWf0j0iYOUOVf0l0jYOTOVf0l0kYOSOTf0o0lYOQORf0P1nYOPORf0Q1nYOnNQf0S1oYOmNQf0T1oYOlNoe0U1QZOkNne0V1SZOiNme0X1RZOhNme0Y1TZOfNle0[1SZOeNle0\\1UZObNke0`1:0O2O0O101N101O001N10000O10O1N2O001O1O1O1O1O1O2N1O1O1O102M10001O0O2O001N101O001N101O0O2O001O0O2O1O1N2O001N2O1O2M101O1N4M3M4K`jed0" + } + ], + "question": "Which statement accurately describes a feature of ?", + "choices": [ + "A. The masked object is long and curved.", + "B. The masked object is perfectly spherical.", + "C. The masked object has a short stem.", + "D. The masked object has a long, prominent stem." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_208.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01629547.jpg", + "mask_rles": [ + { + "size": [ + 1600, + 1200 + ], + "counts": "mP1`8P1jHAUOdU1S8ijNjIUU1X6jjNiITU1X6ljNiISU1X6ljNjIRU1V6njNkIQU1T6PkNmIoT1S6QkNnInT1R6RkNoImT1P6TkNPJmT1o5SkNPJoT1o5QkNQJoT1o5QkNQJoT1o5RkNPJnT1P6SkNoImT1Q6UkNlIlT1T6_kNaIaT1_6^kNjGoNQ1cU1U7^kNjGB8VU1n7XkNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjGZV1V8fiNjG[V1U8eiNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1V8diNjG\\V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8eiNkG[V1U8diNlG\\V1T8diNlG\\V1T8diNlG\\V1T8ciNmG]V1S8ciNmG]V1S8ciNmG]V1S8biNnG^V1R8biNnG^V1R8biNoG]V1Q8biNQH]V1o7ciNQH]V1o7ciNRH\\V1n7ciNSH]V1m7ciNTH\\V1l7ahNdGi0b0fV1h7_hNmGg0;jV1h7]hNSHe06mV1]8nhNdGRW1`8jhNaGUW1c8ghN]GYW1g8bhNZG^W1d900000000000a0_O6J6J6J6J6J6J6J6J6J6J6J3M3M4L4L9G5L2M1O1O1O2N2N001O1O001O001O1O001O0000oE^lNd5bS1ZJalNe5_S1ZJdlNd5\\S1ZJglNd5ZS1ZJjlNd5VS1ZJmlNe5SS1YJQmNe5oR1YJTmNe5mR1YJVmNf5jR1XJZmNf5fR1XJ^mNf5bR1XJemNb5\\R1]JjmN^5WR1_JomN]5QR1aJVnNY5kQ1eJ[nNW5fQ1fJanNU5`Q1hJgnNR5[Q1kJknNo4\\Q1jJknNn4YQ1oJmnNj4WQ1VKmnNc4VQ1\\KonN\\4UQ1cKonNV4UQ1jKonNn3UQ1QLonNi3TQ1WLPoNa3TQ1^LQoNZ3SQ1fLQoNR3SQ1mLToNj2nP1WMYoN^2kP1aM\\oNU2hP1\\L]jN7R5R3eP1eL[jN8^5[2bP1[MRjN:^6:_P1ZOUiN;V_1Dk`NS_1@o`N?R_1_OPaN`0R_1]OPaNa0R_1^Oo`N`0T_1]On`Na0T_1]On`Na0U_1\\Oo`N`0S_1^OQaN>R_1_ORaN=Q_1_OTaN=[`1K4M2N3KWblb1" + } + ], + "question": "What is a primary material of ?", + "choices": [ + "A. Bamboo.", + "B. Metal.", + "C. Wood.", + "D. Rubber." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_209.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01629547.jpg", + "mask_rles": [ + { + "size": [ + 1600, + 1200 + ], + "counts": "_Sji02la14L3N2N2N2N2N2N2N2N2N2N2N2N2N2N2N2DnNg_NT1V`1=N1O1O1O1O1O1O100O1O1O1O1O010O1O1O1O1O100O1O001O3M2O2M2N3M2N3M2WdNZMVX1h2fgN\\MWX1f2ggN[MXX1h2egNZMYX1h2fgNXMEk1eS1P1dlNUMBd2US19XmNRM_OT3oR1L`mNPM\\O\\3PS1GamNmL[Oc3RS1AamNmLXOj3SS1\\OcmNiLVOS4TS1UOdmNhLVOX4TS1ROdmNfLVO]4SS1nNemNeLWOb4QS1kNfmNcLVOg4RS1gNfmNbLWOl4PS1dNfmNaLXOP5oR1`NhmN_LWOW5nR1\\NhmN^LXOZ5nR1YNimN\\LXO`5lR1VNimNZLYOf5kR1QNkmNYLXOj5kR1oMjmNWLZOo5iR1kMkmNWLYOT6iR1fMlmNVLZOX6hR1cMmmNULXO^6hR1]MomNULXOc6fR1YMPnNTLYOg6eR1UMQnNTLYOj6eR1SMQnNSLXOm6fR1PMQnNSLXOQ7eR1mLQnNRLZOS7dR1kLQnNRL[OV7bR1iLRnNQL[OX7bR1gLRnNQL\\OZ7aR1hLomNnK@]7_R1hLmmNkKD_7^R1gLmmNjKDb7]R1eLmmNiKFd7\\R1dLlmNhKHf7[R1bLlmNhKIi7YR1`LmmNgKIk7YR1^LmmNgKIn7YR1\\LlmNfKKQ8WR1YLmmNgKKR8WR1WLnmNfKKV8UR1TLomNfKLX8TR1QLPnNgKMY8RR1PLPnNhKM[8QR1mKRnNgKN]8oQ1lKRnNgKO`8mQ1iKTnNfK0b8kQ1hKTnNgK0c8kQ1fKUnNfK1f8hQ1dKVnNfK2h8gQ1[IbmNId0SN4j8eQ1ZIdmNHc0TN3m8dQ1WIhmNG?VN5m8cQ1VIkmNE=WN5Q9aQ1SIomND9YN7Q9`Q1RIQnNC8ZN6S9`Q1PITnNB5ZN8U9^Q1oHWnNA1\\N9U9_Q1nHXnNAO[N;V9^Q1nHZnN@L\\N;X9^Q1lH]nN_OH^N=W9^Q1lH^nN^OG^N=Y9^Q1kH`nN]OC`N>Y9_Q1jHbnN\\O@`N`0[9]Q1iHdnN\\O^O`N`0\\9^Q1hHfnN[OZObNb0\\9]Q1gHinNZOWObNc0^9]Q1fHknNYOTOcNc0_9^Q1eHlnNDBl7aQ1`HonNA\\OT8eQ1[HQoN_OVOZ8iQ1WHRoN]OnNe8oQ1nGUoNm9kP1SFWoNk9iP1UFXoNk9gP1UF[oNi9eP1WF]oNg9cP1XF_oNh9`P1XFboNf9^P1YFeoNe9\\P1ZFaoNj9^P1YDVnNX1]1`:]P1WD_nNGER1`1P;]P1VDgnNh0l0S;]P1SDinNi0j0T;^P1PDknNk0g0U;bQ1jD^nNW;cQ1gD]nNY;dQ1fD\\nNZ;eQ1eD[nN\\;eQ1cD[nN];gQ1aDYnN`;gQ1_DYnNa;hQ1^DYnNa;hQ1^DXnNc;iQ1[DWnNe;jQ1ZDWnNe;jQ1ZDVnNf;kQ1YDUnNg;mQ1VDTnNj;mQ1UDTnNj;mQ1TDTnNl;mQ1RDTnNn;nQ1oCTnNPZ1L]Nl9fQ1kE]nNb0`1G^Nj9gQ1mEXnNd0d1D]Nk9gQ1oEVnNb0g1C\\Nk9hQ1RFRnN`0k1C\\Ni9hQ1UFomN`0n1@\\Nk9gQ1WFlmN?Q2_O^Nh9fQ1cGmoNdN^Ng9fQ1eGmoNbN_Ng9eQ1gGUQOW8ln0iGTQOU8mn0kGUQOR8ln0nGUQOQ8kn0oGWQOn7jn0RHWQOl7jn0THXQOj7hn0VHYQOh7hn0WH[QOf7fn0YH\\QOe7en0ZH^QOb7dn0\\H_QOb7bn0UHiQOh7Xn0XHiQOf7Xn0YHkQOd7Vn0\\HkQOa7Wn0^HkQO`7Vn0`HlQO]7Un0bHmQO\\7Tn0cHoQOZ7Rn0fHoQOW7Sn0hHPROU7Rn0jHoQOT7Rn0kHPROS7Qn0mHPROQ7Rn0mHQROo6Rn0PIoQOn6Sn0PIoQOm6Un0PImQOm6fn0`H^QO\\7eT1M4L4L4L3M4L4L4L3M4L4L4L4L3M4L4L4@?^Oc0UNRcNQN_]1l1WcN_Mk\\1_2UcNaMm\\1]2ScNcMo\\1[2QcNeMR]1X2nbNhMT]1U2lbNkMW]1S2ibNmMZ]1P2fbNPN\\]1n1dbNRN^]1W33M2O2bL^aNY3d^11O001O000bHdLXPO\\3fo0gLYPOY3eo0jLZPOV3do0lL\\POU3bo0lL^POU3_o0nL`POR3^o0PMbPOQ3[o0QMePOo2Zo0RMfPOn2Xo0UMgPOk2Xo0VMhPOj2Xo0VMhPOY1cIeNgU12ePOY1dIeNgU12ePOZ1cIcNiU13dPOZ1cIaNkU16]PO^1gIXNPV1:XPO_1hIWNQV19WPO`1hIWNQV18XPOb1fIVNRV18XPOb1fIVNRV17YPOc1eIVNRV16ZPOe1cIUNTV14ZPOg1bIUNTV12\\POj1_ITNVV1O]POm1]ITNVV1N_POm1[IUNWV1K`POQ2XISNYV1KaPOQ2VITNQ^1k1naNVNS^1j1laNVNU^1i1kaNWNV^1i1iaNWNW^1l1faNTN[^1m1caNSN]^1P2`aNPN_^1S2_aNmMa^1V2\\aNjMc^1Y2[aNgMe^1[2YaNeMg^1j2000002N4L5K5K3M2N1O2N1O2iJTLVlNm3gS1VLXlNl3fS1ULXlNm3fS1ULYlNm3eS1TLZlNm3eS1TLZlNn3cS1TL\\lNm3cS1TL\\lNn3bS1SL]lNn3bS1SL]lNo3bS1QL]lNP4cS1PL\\lNR4dS1mK[lNT4iS1hKVlNZ4iS1fKVlN[4iS1eKWlN\\4hS1eKWlN\\4hS1dKXlN]4gS1cKYlN^4fS1bKZlN_4dS1bK\\lN_4cS1aK]lN`4bS1`K]lNb4bS1_K]lNb4bS1^K^lNb4bS1UKRhN0\\4l4aS1TKShN0\\4m4aS1RKShN1\\4n4`S1QKThN1\\4o4_S1ZK`lNg4`S1XK`lNi4_S1WKalNj4^S1VKblNj4_S1UKalNl4^S1TKblNm4]S1SKclNn4]S1RKblNo4]S1QKclNP5\\S1PKdlNQ5\\S1nJdlNS5[S1mJelNS5\\S1lJdlNU5[S1kJelNV5[S1jJclNX5\\S1hJdlNY5\\S1fJdlN[5[S1eJelN[5\\S1dJdlN]5[S1cJelN]5\\S1bJdlN_5[S1bJdlN^5]S1aJclN_5]S1aJclN_5^S1`JblNa5]S1_JclNa5^S1^JehNN\\3d5PT1\\JchN3\\3b5QT1YJdhN6Z3a5`T1aJ_kN_5aT1cJ]kN^5bT1cJ]kN]5cT1eJ[kN[5eT1gJYkNY5gT1iJWkNX5hT1iJVkNX5kT1iJSkNW5mT1kJQkNV5nT1lJPkNT5PU1mJojNS5RU1nJljNR5TU1PKjjNQ5UU1QKijNo4XU1RKfjNn4ZU1TKdjNm4[U1UKcjNk4^U1WK_jNi4aU1YK]jNh4bU1ZK\\jNf4eU1[KYjNe4gU1^KVjNb4jU1`KTjNa4lU1`KRjN`4nU1cKoiN]4QV1eKmiN\\4SV1eKkiN[4UV1gKhiNZ4XV1iKeiNW4\\V1jKbiNV4_V1kK_iNU4aV1mK]iNS4dV1oKYiNQ4hV1PLViNP4jV1RLTiNn3mV1VLnhNi3TW1[LghNe3ZW1^LbhNb3^W1_LahNa3`W1_L_hNa3bW1`L\\hN`3dW1aL[hN^3gW1bLXhN^3iW1bLVhN^3kW1bLThN^3RX1]LmgNc3TX1]LkgNc3VX1]LhgNc3YX1^LfgNb3[X1^LdgNb3\\X1_LcgNa3]X1`LbgN`3^X1aLagN_3^X1cLagN\\3`X1eL_gN[3aX1gL]gNY3dX1gL[gNY3fX1gLYgNY3hX1gLWgNY3iX1hLVgNW3lX1iLSgNW3nX1iLQgNW3PY1iLofNV3SY1jLlfNU3UY1k21N2O1N2N101N2N2O1N3N1N3M2O2M2N3M2N2N3M2N3M2N3M2M4M2N3M3TKodNd2T[1R24L3M3iLUeNTOoZ1h0SeNVOP[1h0PeNWOS[1g0mdNWOX[1e0jdNYOY[1e0gdNZO\\[1d0ddN[Ob[1`0_dN^Oh[1;YdNDo[15QdNIX\\10icNL]\\11ccNLc\\10^cNMh\\10YcNKn\\12RcNKT]11mbNKY]13hbNI^]14bbNId]14\\bNHk]14WbNHl]18TbNDP^1;RbNAR^1>oaN]OU^1b0maNZOV^1d0oaNVOT^1:d`NLo`13?000001O00001N10c_NGU_15k`N0S_1Nm`N4^`12N2O00000O20O000000010O000000mNDb`N=Z_1He`N7Z_1Ke`N5Z_1Me`N3[_1Nd`N3Z_1Ng`N1Y_10f`N0Z_10f`N0Z_10f`N0Z_10f`N1Y_10g`NOY_11g`NOZ_10f`N0Z_11e`NO[_11f`NOZ_10f`N0Z_11e`NO[_11e`NO[_11e`N0[_1Of`N0Z_11e`NO[_11e`NO[_11e`NO\\_11c`N0\\_10e`NO[_11e`NO\\_11c`NO]_11c`NO]_11d`NO\\_10d`N0\\_11c`NO]_11c`NO^_10b`N1]_1Od`N0]_1Nd`N2\\_1Nd`N2]_1Ld`N4]_1Jd`N7\\_1Fg`N9``10000010O00000010O000000010O0000010O00000O2O1N2N2NRi\\4" + } + ], + "question": "What is the color of the seat on ?", + "choices": [ + "A. Red.", + "B. Orange.", + "C. Black.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_210.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01634579.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 853 + ], + "counts": "Vlg>3io07K3M2O2L5M2N3M2N2N2N2O1N2O000XQOlNdn0[1M1O1O1O001O0000001O00000000000000010O0010O0010O01O1O3M1O3M2N1O001O1O1mNWQOl0Qo0N2M2O0O2N2O0O3N3K7IWSo9" + } + ], + "question": "What is the material of the item from which most likely originated?", + "choices": [ + "A. Plastic.", + "B. Waffle.", + "C. Stainless steel.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_211.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01635395.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "Rcfi03ho07I7I6J6K4K6J5_Nb1L4M2M4L3M4M3L3M4L3N]OQTOZMkk0`3M2N3N2N1O2O0N2O2N1O001000O100N2fNQUObMPk0Q2nUOZMVj0X2Q2B>A?B>ATSk00knSOEgl0]1hROfNel0d2^Ob0\\ObLSTOP4gk08J6J6J5K5L4K6J4L5K5D;01O1O1O2O000O100001O01O01O1O2N0001O00000000001O0O100000001O00000000000O2O00000000001O00000O10001O00000000001O0O100000001O000000001N2O1O1O001O1O1O1O1O001O1O1O1O1O1O001O001O00001O0000001O00001O0000001O00001N1O1O2N1000001O002N10O010O101NZB" + } + ], + "question": "What is the shape of ?", + "choices": [ + "A. The object is circular.", + "B. The object is rectangular.", + "C. The object is oval-shaped.", + "D. The object is square-shaped." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_212.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01635395.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 1024 + ], + "counts": "Rcfi03ho07I7I6J6K4K6J5_Nb1L4M2M4L3M4M3L3M4L3N]OQTOZMkk0`3M2N3N2N1O2O0N2O2N1O001000O100N2fNQUObMPk0Q2nUOZMVj0X2Q2B>A?B>ATSk00knSOEgl0]1hROfNel0d2^Ob0\\ObLSTOP4gk08J6J6J5K5L4K6J4L5K5D;01O1O1O2O000O100001O01O01O1O2N0001O00000000001O0O100000001O00000000000O2O00000000001O00000O10001O00000000001O0O100000001O000000001N2O1O1O001O1O1O1O1O001O1O1O1O1O1O001O001O00001O0000001O00001O0000001O00001N1O1O2N1000001O002N10O010O101NZB" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. The masked object is made of glass.", + "B. The masked object is made of plastic.", + "C. The masked object is made of metal.", + "D. The masked object is made of rubber." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_213.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01689730.jpg", + "mask_rles": [ + { + "size": [ + 855, + 1024 + ], + "counts": "`dg>:\\j0a0@f1ZN?A01O00001O000000000000000000000000000000000000000000000000000000000000000O10000000000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000001O000O10000000000000000000000000000O100000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000O1O1O1O1O1O1O1O1O100O00100O1O1N2O1O1O1O100O1O1O100O1O1000000000000O1000000000000O1O11O2N1O1O1O1O1O1O2M2O1O2N1O2N1O2N2N1O2N2N2N2N2N2N1O2N2N2N2N2N1O2N2N2N2N2N1O4L4L4L5K4L4L3M1O1O1O4L1O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O1O2N1O1O1O2MReS2" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is a painted wooden board.", + "B. It is made of red plastic.", + "C. It is made of fabric.", + "D. It is a metal sheet painted red." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_214.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Plastic.", + "B. Metal.", + "C. Wood.", + "D. Ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_215.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metallic.", + "B. Wooden.", + "C. Plastic.", + "D. Ceramic." + ], + "answer": "A", + "type": "material", + "image": "images/vqa_216.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01696718.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sbfd04lo02N3M2N010O101O0O0100O001000O10O1O001O1N101O1N202M0000000000000000000O11O010O010O01O01O000001O01O000001O01O0001O01O0001O01O0001O01O00010O00000010O000010O00010O00010O0010O01O01O01O01O000001O001O010O0001O001Nj\\1" + } + ], + "question": "Based on its shape, what is ?", + "choices": [ + "A. A spoon with an oval head.", + "B. A pie server with a triangular blade.", + "C. A knife with a long, rectangular blade.", + "D. A fork with four tines." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_217.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01729425.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "dUZa05jg06J2N2N2N2O1N1O2O000O101N1O1O100O2O000O1O1O1O1O100O1O1N2O1O100O100000001O00000000O2N2OO11O2N2M4L3M3N3M2N2M2O1N2N2O1O001N10000000O100O10000O100O100104JYjl4" + } + ], + "question": "Which of the following statements correctly describes the shape of ?", + "choices": [ + "A. It has a pointed toe.", + "B. It has a square toe.", + "C. It is an open-toed object.", + "D. It features a round toe box." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_218.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01729425.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "fcc55jg03M2POKRZO8ke0JRZO:ke0HRZOke0EQZO?me0BoYOc0oe0]OPZOf0ne0ZORZOi0le0VOSZOl0le0TOTZOl0me0SOSZOm0me0SOSZOm0ne0SOQZOm0oe0SORZOl0ne0TORZOl0ne0TORZOl0ne0TOPZOn0oe0SOPZOn0Pf0a0O100O10000001O1O1O1O1UNmYOg1Vf001O00001N10001O001N101ROcYO7^f0GcYO9^f0EdYO9^f0EcYO8af0G_YO6df0I]YO3hf0H\\YO5Yg0O2N3N2MW\\Ra0" + } + ], + "question": "Based on the image, which of the following statements about the color of is correct?", + "choices": [ + "A. The clothes it is wearing are green.", + "B. Its main body is yellow.", + "C. The ears are pink.", + "D. It is holding a red basket." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_219.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01770249.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "h:k6`0]Il1T1X:a5jC\\In1S1V:e5jCXIP2S1U:h5hCUIT2S1S:k5fCSIW2R1R:n5eCoHZ2S1P:P6dCnH\\2R1n9^6RFbIm9^6TFbIk9_6UFaIj9`6VF`Ii9`6XF`If9b6ZF^Ie9b6\\F]Id9e6[F[Id9g6[FYIc9k6[FUId9m6[FSId9P7ZFPIe9R7ZFnHe9T7ZFlHd9W7[FiHd9Y7[FgHd9[7[FeHd9^7ZFbHe9`7ZF`Hd9c7[F]Hd9e7[F[Hd9g7[FYHd9i7[FWHc9m7[FSHd9o7[FQHd9Q8[FoGe9R8ZFnGe9T8ZFlGe9V8YFkGf9W8YFiGf9Z8WFfGi9\\8VFdGi9^8UFcGj9_8UFaGj9a8UF_Gj9c8TF^Gk9\\9\\EdFd:a9VE`Fi:g9QEYFn:];O1O1O1O1O1O1O100O1O1O1O1O1O1O001O1O100O1O1O1O1O1O1O1O1O1O2O0O1O1O1O2N1O1O1O1O2O0O2N3M2N3M2O1N3M2N3N1N3M2O2M2N2O2M2N3N1N3N1N3N1N101N2O1N2N2O1N2O1N2O1N101N2O1N2N2O1N2O1N2O0O2O1N2N2O1N2O1N2O1N100O100O1O100O100O100O100O100O1O100O100O100O100O1O100O10000O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O100O10000O100O100O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O10000O10000O1000000O1000000O100000000O100000000O100000000O100000000O100000000O1000000O100000000O100000000O100000000O100000000O100000000O100000000O1000000O100000000O10000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O10000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O1000000000000O100000000000000O100000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000001O00000000000000000000000000000000001O000000000000000000001O00000000000000000000001O000000000000000000001O0000000000000000001O0000000000001O0000000000001O000000000000001O0000000000001O0000000000001O0000000000001O000000000000001O0000000000001O0000000000001O00000000001O0000001O0000001O0000001O0000001O00001O0000001O0000001O0000001O0000001O00001O00O100O100O100O100O100O100O100O10000O100O100O100O100O100O100O100O10000000\\]O[Kma0e4R^O\\Kna0d4R^O\\Kna0d4R^O\\Kna0d4Q^O^Kna0b4R^O^Kna0b4R^O^Kna0b4Q^O`Kna0`4R^O`Kna0`4R^O`Kna0`4Q^ObKna0^4R^ObKna0^4R^ObKna0^4Q^OdKna0\\4R^OdKna0\\4R^OdKna0\\4Q^OfKna0Z4R^OfKna0Z4R^OfKna0Z4R^OgKma0Y4R^OhKna0X4R^OhKna0X4R^OhKna0X4R^OiKma0W4S^OiKma0W4S^OiKma0W4S^OjKla0V4T^OjKla0V4T^OjKla0V4T^OkKka0U4U^OkKka0U4U^OkKka0U4U^OlKja0T4W^OkKia0U4W^OkKia0U4W^OlKha0T4X^OlKha0T4X^OlKha0T4X^OmKga0S4Y^OmKga0S4Y^OmKga0S4Z^OlKfa0T4Z^OmKea0S4[^OmKea0S4[^OmKea0T4Z^OmKea0S4[^OmKea0S4[^OmKea0S4\\^OmKca0T4\\^OlKda0T4]^OkKca0U4]^OlKba0T4_^OkKaa0U4_^OkKaa0V4_^OjK`a0V4`^OjK`a0V4a^OjK^a0V4b^OjK^a0V4b^OkK]a0V4c^OiK]a0W4c^OiK]a0W4d^OiK[a0W4e^OiK[a0X4e^OhKZa0X4f^OhKZa0X4f^OiKYa0W4h^OhKXa0X4h^OhKXa0Y4h^OgKWa0Y4i^OgKWa0Y4i^OhKVa0X4k^OgKUa0Y4k^OhKTa0Y4l^OfKTa0Z4l^OfKTa0Z4l^OgKSa0Y4n^OfKRa0[4m^OfKRa0Z4n^OfKRa0Z4o^OeKQa0[4o^OfKPa0Z4Q_OeKo`0\\4P_OeKo`0[4Q_OeKo`0[4R_OeKm`0[4S_OeKm`0[4T_OdKl`0]4S_OdKl`0\\4T_OdKl`0\\4U_OdKj`0\\4V_OdKj`0]4U_OdKj`0\\4W_OcKi`0]4W_OcKi`0^4V_OcKi`0]4X_ObKh`0^4X_OcKg`0^4X_ObKh`0^4Y_OaKg`0_4Y_ObKf`0_4Y_OaKg`0_4Z_OaKe`0_4[_OaKe`0`4Z_OaKe`0_4\\_O`Kd`0`4\\_OaKc`0`4\\_O`Kd`0`4]_O`Kb`0`4^_O`Kb`0a4]_O`Kb`0`4^_O`Kb`0`4__O`K``0a4__O_Ka`0a4__O_Ka`0a4`_O_K_`0b4`_O^K``0b4`_O_K_`0a4b_O^K^`0c4a_O^K^`0b4b_O^K^`0b4c_O^K\\`0c4c_O]K]`0c4c_O^K\\`0b4e_O]K[`0d4d_O]K[`0c4e_O]K[`0c4f_O]KY`0d4f_O\\KZ`0d4f_O\\KZ`0d4g_O\\KX`0e4g_O[KY`0e4g_O\\KX`0d4i_O[KW`0f4h_O[KW`0e4j_OZKV`0f4j_O[KU`0f4j_OZKV`0f4k_OZKT`0f4l_OZKT`0g4k_OZKT`0f4m_OYKS`0h4l_OXKT`0h4l_OYKS`0g4n_OXKQ`0j4n_OWKP`0j4P@VKo?k4R@UKl?m4S@SKl?n4T@SKj?n4W@QKh?Q5W@PKg?Q5Y@oJf?R5[@nJc?T5\\@lJb?V5^@kJ`?V5a@iJ^?Y5a@hJ\\?Z5e@eJZ?\\5f@eJW?^5i@aJV?`5j@aJT?`5m@_JQ?d5n@]JP?d5QA\\Jm>e5SA[Jk>h5UAXJi>i5XAVJg>k5ZAUJc>n5^APJa>R6_AkIb>V6_AgIa>\\6_AcI`>_6eAZI[>g6]13N1N3N2N1N3N1O2M3N1O2L3M4L4K4M4L3O2N2O0O2N1O2_BXGh;h8VD[Gh;g8UD[Gj;h8SDZGk;h8RD[Gm;g8PDZGo;h8nC[GP?", + "choices": [ + "A. The object is entirely black, matching the stove it is on.", + "B. The object is uniformly silver in color.", + "C. The object contains a bright yellow utensil.", + "D. The object is filled with red-colored food." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_220.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01788343.jpg", + "mask_rles": [ + { + "size": [ + 1009, + 1024 + ], + "counts": "_oo78\\n0n0N2M2O100O2N100O100O2O0000001N11OO10001O000000001O00000000001O000000001O00000000001O00O100000000000000000001N10001O00001O00001O0O101O00001O00001O0O1O2I7Hh0XOUVSe0" + } + ], + "question": "What material is a component of ?", + "choices": [ + "A. Plastic.", + "B. Cardboard.", + "C. Wood.", + "D. Metal." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_221.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01811034.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hPYf06gg04K6J5K5K5K4L5K5K4M4K4L5K4L4L4L4L3M4L4L3M4L3M4K4M4L3M4L3M4K4M4L3M4TOmKg]OV4Yb0mKa]OW4^b0lK\\]OW4db0c000O010O0100O001O1N1N3K5L4L4L4L4L4L4L4L4L4L4L5K4L4L5K4L4L5K4L5K4L4L5K4L5K]D" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Metal.", + "B. Glass.", + "C. Cardboard.", + "D. Plastic." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_222.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01811034.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "hPYf06gg04K6J5K5K5K4L5K5K4M4K4L5K4L4L4L4L3M4L4L3M4L3M4K4M4L3M4L3M4K4M4L3M4TOmKg]OV4Yb0mKa]OW4^b0lK\\]OW4db0c000O010O0100O001O1N1N3K5L4L4L4L4L4L4L4L4L4L4L5K4L4L5K4L4L5K4L5K4L4L5K4L5K]D" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. A combination of orange and white.", + "B. A combination of blue and white.", + "C. A combination of green, yellow, and white.", + "D. Primarily red and black." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_223.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is the texture/pattern of ?", + "choices": [ + "A. Ribbed.", + "B. Pleated.", + "C. Smooth.", + "D. Velvet." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_224.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is in the image?", + "choices": [ + "A. A knotted tie.", + "B. A bow tie with a butterfly shape.", + "C. A decorative epaulet on the shoulder.", + "D. The leaf of the poppy pin." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_225.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01835389.jpg", + "mask_rles": [ + { + "size": [ + 2472, + 2832 + ], + "counts": "c\\U_29l\\26K3N3L3M3M3M3M2O2M3N1N2O1N3M2L4M3L4M3M3N1O2J6L4M2N2O2M2B4WfMVMgY2j2;0100000000010O01M2O[Od0L6N200AdLRgM]3jX2mLPgMS3oX2c0O2M201O001O1O10O0100O01000O0100000000003N1N1O3N1N101N1000O10001N102`KbgMP4OlKgX2Z41O100O2N10001O0010O01N1N3N1N101O001O1O1O2N2ON1O1N2N1O2N2N2N1O2O1N2N2N2N2N2O1N1O2N101N1O1O2O0O1O101N100O101N100O101N100N10O100N2N2M3K6L3O1O2N2N1N3L3L4K5M3N2O2M2N3L3L5I7H7N3M3M3M3M3M3M4L3M3M4K4M4L3M4K5L4L4K5K6I9GX`Yh3" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. White.", + "B. Red.", + "C. Black.", + "D. Light blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_226.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Red.", + "B. Green.", + "C. White.", + "D. Blue." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_227.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the texture of in the image?", + "choices": [ + "A. It has a smooth, glossy surface.", + "B. It has a papery skin.", + "C. It has a grainy, wooden texture.", + "D. It is embroidered with colorful threads." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_228.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01867731.jpg", + "mask_rles": [ + { + "size": [ + 1023, + 806 + ], + "counts": "^_d:8do0?B4eQO[OUm0j0]ROCam0?UROKim0V1000O001O0O2O1O001N2O1O0O2O1O1O1N101N12M3L4N2M3N2M3M4M3L4M3L4M3L3N2N1N2O1O2N1O1O2N1O1O1O2N1O1O000000O2O0O2O000O101O0O2O001O10000000O01O1O1O1O1O100O1O1O002N1O1O1O1O2N1O1O1O1O1O2N1O1O1O2N2N2N1O2O1N2N2O0O2N010O010O2N2O1N2N2O2M2O1cNfQOR1\\n0lNfQOR1\\n0mNdQOS1en0N2N4M2N3M4L3M3M2N1O00000000O1N21O1O2N2N2N2N1N1OTlo9" + } + ], + "question": "What is the color of the root of ?", + "choices": [ + "A. Red.", + "B. White.", + "C. Brown.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_229.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D?", + "choices": [ + "A. The object has a smooth, polished surface.", + "B. It has a prominent, visible wood grain pattern.", + "C. The surface is painted a solid, matte color.", + "D. It is covered with a striped pattern." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_230.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D is correct?", + "choices": [ + "A. It has six strings.", + "B. It is a four-stringed instrument.", + "C. It is painted bright red.", + "D. It is standing on a pink stand on the floor." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_231.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D?", + "choices": [ + "A. f-shaped.", + "B. Oval.", + "C. Round.", + "D. Diamond-shaped." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_232.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01915694.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "mP]83ig08H8H9G8G8I6J7I7J6UZOlM`e0T1bZO0Pc0mNQ_OR1VN0eb0Aa^O?POO^b0E[^O`0YOI\\b0GW^Oc0_OE^b0Dl]On0H[O`b0Db]OV11SO_b0F_]OX14oN`b0G[]O[17jNab0JX]O[1;dNbb00R]O]1?]Ndb04m\\O^1b0ZNdb07j\\O_1bd0`N^[O_1cc0[Nf\\O4Ga1cc0\\Nf\\O2Gb1cc0\\Ng\\O1Fb1cc0_Nh\\OMEd1cc0`N`]O_1ab0aN_]O_1ab0bN_]O]1ab0cN`]O[1ab0eN`]OY1ab0fNl]Om0Tb0TOY^O_YOCaf0>\\YODdf0<[YOEVd00i]Ok0jNiN`b0k0W^OZc0Bf\\O`0OYN]b0W1d]Oc0G]Ndb0o0e]Of0_ObNkb0h0f]Oh0XOfNQc0b0g]Oa1Yb0_Ng]Ob1Xb0^Nh]Ob1Xb0^Nh]Ob1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Ng]Oc1Yb0]Nf]Od1Zb0\\Nf]Od1Zb0\\Nf]Oe1Zb0ZNf]Of1Zb0ZNf]Og1Yb0YNg]Og1Zb0XNf]Oh1Zb0XNf]Oi1Zb0VNf]Oj1[b0UNe]Ok1[b0UNe]Ok1\\b0TNd]Ol1]b0SNc]Om1]b0SNc]Om1^b0RNb]On1_b0QNa]Oo1_b0QNa]Oo1`b0PNa]Oo1`b0PN`]OP2bb0nM^]OR2cb0mM]]OS2eb0kM[]OU2gb0iMY]OW2hb0hMX]OX2jb0fMX]OW2]d0L3M4M2M3M2N2N2N3M2N2N3M3M3M4L3M2M4M3K5H9D in the image?", + "choices": [ + "A. Blue.", + "B. Black.", + "C. Red.", + "D. Light brown." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_233.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D in the image?", + "choices": [ + "A. Trapezoidal.", + "B. Triangular.", + "C. Rectangular.", + "D. Curved." + ], + "answer": "C", + "type": "shape", + "image": "images/vqa_234.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D?", + "choices": [ + "A. The masked object is a component of the white van.", + "B. The masked object is a component of the black car.", + "C. The masked object is a component of the blue building.", + "D. The masked object is a component of the gray road surface." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_235.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01916008.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "cdg63kg03M3L3M3M2O1N2N3M2N200O1O100O100010OO100001O1O1O001O000000000010O01O1O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1O1N2000000O1O1O1O2Noa=6g]B5K5K5K5L4K5K5N2O1O1O1O1O1O1O0O2O1O1O1O1O1O1O1N2O1O1O1O1O001O1O11O00000000000000000O1000000000000000001O00000O101N1E;D?", + "choices": [ + "A. White.", + "B. Black.", + "C. Gray.", + "D. Blue." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_236.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01936287.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "l\\me01ng02N3N000ROKQZO5ne04jYOKWf06hYOJYf05hYOJb01bd0MnZO06`0id0@Q[O0Lk0Re0UOR[O0DR1]d0SOo[OKOn0DSObd06j[OIDo0Ye0YOS[OI[Ol0le0\\OYZOn0Pf079DIPOVZOc0Xf0;7CTYOGlf03\\YOKef0McYO2_f0KdYO3P\\a1" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Black.", + "B. Yellow.", + "C. White.", + "D. Green." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_237.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01939853.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sin91mo04M2O1N1000O10000000O1000O100000O1000O0100O010O010O01000N1O2N110O2O1N2O1N2O1N2O1O1N2O1N10001N10O1000O10000O1000000O100000O01000000O1000000O10O100GTQO]Oln0c0WQOZOjn0e0901O001O0000001O00001O0000001O00001O0000001O00001O0000001O00001O00001N100O2O2L4L]W]:" + } + ], + "question": "Which of the following statements accurately describes the shape of ?", + "choices": [ + "A. The masked object is a large animal lying on the ground.", + "B. The masked object is one of the small pigeons walking on the ground.", + "C. The masked object is a small animal standing on its feet.", + "D. The masked object is a decorative golden statue at the base of the temple." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_238.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01939853.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Sin91mo04M2O1N1000O10000000O1000O100000O1000O0100O010O010O01000N1O2N110O2O1N2O1N2O1N2O1O1N2O1N10001N10O1000O10000O1000000O100000O01000000O1000000O10O100GTQO]Oln0c0WQOZOjn0e0901O001O0000001O00001O0000001O00001O0000001O00001O0000001O00001O00001N100O2O2L4L]W]:" + } + ], + "question": "Which of the following descriptions about the texture of is correct?", + "choices": [ + "A. It has a shaggy coat.", + "B. It has a smooth coat.", + "C. It is covered in feathers.", + "D. It has a spotted pattern." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_239.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01944558.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "\\]n5i0Ug05J4H8M3N1O1O100O10000000000001N1O2N2H8M4L;EiZ`a0" + } + ], + "question": "What is a characteristic texture of ?", + "choices": [ + "A. Creamy.", + "B. Hard and woody.", + "C. Woven fabric.", + "D. Smooth and metallic." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_240.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01948375.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Plm=166[g0;L3N3kYOG]d0;S[O3md0`100O1000O100000O1000O1000O1000O100O002N1O2O1N0000001O1L4]Oc0N2M2N3M3N2M3N2O001O1O1O1O1N2O101\\OjXO;_g0OO1O001O00O102N2HZTe8" + } + ], + "question": "What is the color of in the image?", + "choices": [ + "A. Grayish-white.", + "B. Teal.", + "C. Brown.", + "D. Beige." + ], + "answer": "A", + "type": "color", + "image": "images/vqa_241.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01948375.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Plm=166[g0;L3N3kYOG]d0;S[O3md0`100O1000O100000O1000O1000O1000O100O002N1O2O1N0000001O1L4]Oc0N2M2N3M3N2M3N2O001O1O1O1O1N2O101\\OjXO;_g0OO1O001O00O102N2HZTe8" + } + ], + "question": "Which of the following best describes the texture of ?", + "choices": [ + "A. Smooth.", + "B. Ribbed.", + "C. Plush.", + "D. Waxy." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_242.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F?", + "choices": [ + "A. Plastic.", + "B. Ceramic.", + "C. Fiberglass.", + "D. Porcelain." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_243.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F?", + "choices": [ + "A. Patterned.", + "B. Matte.", + "C. Smooth.", + "D. Rough." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_244.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01959650.jpg", + "mask_rles": [ + { + "size": [ + 1200, + 1600 + ], + "counts": "]k0S:]k00O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O100000000O100000000O10000000000O100000000O100000000O100000000O100000000O1000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000O10000001O2N2N1O1O1O1O001O001O00001O1O2N2N1O1O1O1O1O1O001O001O001O00001O00001O001O1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1O2N1O1O1PJgSOg2[l0TMkSOi2Vl0SMoSOk2Rl0QMSTOm2nk0PMUTOo2mk0lLWTOS3jk0jLYTOU3hk0hL[TOW3fk0eL]TO[3ek0aL`TO\\3ak0`LdTO^3]k0_LcTOc3^k0YLdTOh3^k0TLdTOl3]k0PLgTOo3Zk0nKhTOR4Yk0kKiTOU4Yk0fKjTOZ4Wk0cKkTO]4Vk0_KmTOa4Tk0\\KnTOd4Tk0WKnTOj4Tk0RKnTOn4Vk0kJmTOU5fm0100O100O1O1O1001O001O001O1O1O1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O2N1O2M3N1N3M2N3M2M4L3L5F:F9F;F9G:F:F in the image?", + "choices": [ + "A. The masked object is beige.", + "B. The masked object is white.", + "C. The masked object is light brown.", + "D. The masked object is blue." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_245.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01968981.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "al\\<:ag0g0[O6K4L5K4L3N4K3N2M4M2M4M3L4M3M3M2N2N3M2N2N2N2O1N2O1N2N2O1N2O0O2N2O1N101N2O1O1O0O2O1N101O1N101N2O001N2O1O001O1O1O1O1O1N2O1O1O1O1O0O101n\\O`Kbb0`4]]OdK`b0\\4]]OgKcb0Y4\\]OhKdb0X4\\]OhKdb0k4O001O01[O]]OkKcb0U4_]OhKbb0X4_]OeKcb0[4`]O`Kbb0_4`000O2O0O1N2M3N2O1O2N1O1O2N1O2N2N1O1O2N100O2N1O1O2N1O2N2N1O2N1O2N2N2N2N2N1O2N2M3M3N2M3N3L3N3L3M3M3M4M6I5K5J5J7J8Ec[\\8" + } + ], + "question": "What is the primary material of the envelope of ?", + "choices": [ + "A. Canvas.", + "B. Rubber.", + "C. Polyester.", + "D. Plastic sheeting." + ], + "answer": "C", + "type": "material", + "image": "images/vqa_246.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01968981.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "al\\<:ag0g0[O6K4L5K4L3N4K3N2M4M2M4M3L4M3M3M2N2N3M2N2N2N2O1N2O1N2N2O1N2O0O2N2O1N101N2O1O1O0O2O1N101O1N101N2O001N2O1O001O1O1O1O1O1N2O1O1O1O1O0O101n\\O`Kbb0`4]]OdK`b0\\4]]OgKcb0Y4\\]OhKdb0X4\\]OhKdb0k4O001O01[O]]OkKcb0U4_]OhKbb0X4_]OeKcb0[4`]O`Kbb0_4`000O2O0O1N2M3N2O1O2N1O1O2N1O2N2N1O1O2N100O2N1O1O2N1O2N2N1O2N1O2N2N2N2N2N1O2N2M3M3N2M3N3L3N3L3M3M3M4M6I5K5J5J7J8Ec[\\8" + } + ], + "question": "Which statement accurately describes a feature of in the image?", + "choices": [ + "A. It has a simple horizontal striped pattern.", + "B. It has a small basket hanging underneath.", + "C. It is primarily colored green and white.", + "D. There is no visible basket attached to it." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_247.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "obW95jo03M3M3N2N1N2O1N100O1000001O01O0000O100000O2N1N2L4M3000010O0001O001O010O001O0010O010O010O01O1O010O001O010O000001O0000001O0000001O01O00000000000000000000000mNF`RO:_m0H`RO8_m0J`RO6`m0K_RO5`m0L`RO4`m0M_RO3am0M_RO3am0N^RO2cm0N\\RO2dm0N\\RO3dm0M[RO3fm0LZRO4fm0MYRO3hm0LXRO4hm0MWRO3jm0MURO3km0MURO3lm0MSRO3nm0LRRO4nm0MQRO3Pn0LPRO4Pn0MoQO3Rn0MmQO3Sn0MmQO3Tn0MkQO3Vn0LjQO4Vn0MiQO3Xn0LhQO4Xn0MgQO3Zn0MeQO3[n0MeQO3\\n0McQO4]n0KcQO5]n0LbQO4_n0KaQO5_n0L`QO4an0L^QO4bn0L^QO4cn0L\\QO4en0K[QO5en0LZQO4gn0KYQO5gn0LXQO4in0LVQO4jn0LVQO4kn0LTQO4mn0KSQO5mn0LRQO4on0KQQO5on0LPQO4Qo0=001O10O01O001O001O1O001O001O001O1N1O2N1N3L4M3Mm\\P:" + } + ], + "question": "What is the shape of in the image?", + "choices": [ + "A. It has spoon-like, cupped ends.", + "B. It has scalloped gripping ends.", + "C. It is a single, straight utensil with a pointed tip.", + "D. It has flat, spatula-like ends." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_248.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "obW95jo03M3M3N2N1N2O1N100O1000001O01O0000O100000O2N1N2L4M3000010O0001O001O010O001O0010O010O010O01O1O010O001O010O000001O0000001O0000001O01O00000000000000000000000mNF`RO:_m0H`RO8_m0J`RO6`m0K_RO5`m0L`RO4`m0M_RO3am0M_RO3am0N^RO2cm0N\\RO2dm0N\\RO3dm0M[RO3fm0LZRO4fm0MYRO3hm0LXRO4hm0MWRO3jm0MURO3km0MURO3lm0MSRO3nm0LRRO4nm0MQRO3Pn0LPRO4Pn0MoQO3Rn0MmQO3Sn0MmQO3Tn0MkQO3Vn0LjQO4Vn0MiQO3Xn0LhQO4Xn0MgQO3Zn0MeQO3[n0MeQO3\\n0McQO4]n0KcQO5]n0LbQO4_n0KaQO5_n0L`QO4an0L^QO4bn0L^QO4cn0L\\QO4en0K[QO5en0LZQO4gn0KYQO5gn0LXQO4in0LVQO4jn0LVQO4kn0LTQO4mn0KSQO5mn0LRQO4on0KQQO5on0LPQO4Qo0=001O10O01O001O001O1O001O001O001O1N1O2N1N3L4M3Mm\\P:" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. It is made of plastic.", + "B. It is made of metal.", + "C. It is made of wood.", + "D. It is made of ceramic." + ], + "answer": "B", + "type": "material", + "image": "images/vqa_249.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "Which of the following descriptions best fits the shape of ?", + "choices": [ + "A. The masked object is perfectly straight from end to end.", + "B. The masked object has a distinctly curved handle.", + "C. The tines of are blunt and rounded.", + "D. The handle of is cylindrical and thick." + ], + "answer": "B", + "type": "shape", + "image": "images/vqa_250.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. Smooth.", + "B. Grooved.", + "C. Brushed.", + "D. Hammered." + ], + "answer": "A", + "type": "texture/pattern", + "image": "images/vqa_251.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "lfoa03mo0006KO0001O010O1O010O0010O01O010O1O010O0010O01O10O01O010O0001O00O10000O10000001O000000001O0O1000001O000N20001O0000O01000000O0100000O10O1000O1000O10O100000O10000000000000000000000O100000000000000000O1000000000000000000000O1000000000000001O000000000001O000000000001O0001O00000000000001O01O00000001O00000000010O000000000010O000010O0000010O00010O00010O000010O00010O05K00^X8" + } + ], + "question": "Which of the following correctly describes a feature of ?", + "choices": [ + "A. It has two tines.", + "B. It has three tines.", + "C. It has five tines.", + "D. It has four tines." + ], + "answer": "D", + "type": "shape", + "image": "images/vqa_252.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01975150.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "e\\X;2jo06J5L4N1N2O1N2O0gNZ1N2O1O100O11O001O2M4M4K5K3K5K4L5K5M3N1OO1O1O1O100O1O101N2`QOnNPn0R1mQOROQn0Q1kQOROSn0^101N1M2N3N10100O0100O10000O100000001O001O1O2N2N001O001O001O001O001O001O001O001N100O2M3B=O1O00O1O100O2O1N101O1O1O00GXQO[Oin0c0[QOYOfn0e0;00O10O0100O2O0O2O2M102M2O2H\\PO0QeV9" + } + ], + "question": "Which of the following statements accurately describes a feature of in the image?", + "choices": [ + "A. The masked object has a large, round nose.", + "B. The masked object has long, floppy ears.", + "C. The masked object has small, beady eyes.", + "D. The masked object has a slender body." + ], + "answer": "A", + "type": "shape", + "image": "images/vqa_253.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01981955.jpg", + "mask_rles": [ + { + "size": [ + 768, + 1024 + ], + "counts": "Zm\\8a0^g03M1O2N1O2N2N2N2N1O2N1O1O2N2N2N4M0O2N2N1O2O0O2O0LYNnYOi1Qf0WNoYOi1Tf01KVNRZOj1me0VNSZOl1oe0100O3N1O3M00001O00001MjMZZOV2fe0jMZZOV2he000000000000000O101N2O0O2K400N200O2N1M4M2N3L301N1O1O1O2N1O2M3N2N1O4L3M1O9F3M3Jjbe=" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Light blue.", + "B. White or beige.", + "C. Red and yellow.", + "D. Blue with white polka dots." + ], + "answer": "B", + "type": "color", + "image": "images/vqa_254.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the texture of ?", + "choices": [ + "A. The object has a wood grain texture.", + "B. The object has a smooth, metallic texture.", + "C. The object has a woven texture.", + "D. The object has a porous, baked texture." + ], + "answer": "C", + "type": "texture/pattern", + "image": "images/vqa_255.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the material of ?", + "choices": [ + "A. Wood.", + "B. Woven fabric.", + "C. Plastic.", + "D. Wicker." + ], + "answer": "D", + "type": "material", + "image": "images/vqa_256.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the texture/pattern of in the image?", + "choices": [ + "A. Smooth with wood grain.", + "B. Woven.", + "C. Smooth and metallic.", + "D. Porous and baked." + ], + "answer": "B", + "type": "texture/pattern", + "image": "images/vqa_257.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/DLC-Bench/images/objects365_v2_01983311.jpg", + "mask_rles": [ + { + "size": [ + 1024, + 768 + ], + "counts": "Zk0f4Yk0100O10000O10000O10000O100O10000O10000O10000O100O10000O10000O100O1O11O1RK`UOZ4bj0bKiUOU4Tk0M2N3M2N3M2N3M2N2N2N2N3M2N2N2N2N3M2N1O1O1O1O1O2N1O1O1O1O1O1O1O2N1O1O00001O001O1iMXROT2jm001O1O000000001O001O001O1O1O1O001O1O001O1O1O001O1O1O1O1O1O001O00001O0000001O00001O00001O00001O001O001O00001O001O001O001O001O00001O001O001O001O00001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O001O00001O00001O00001O0000001O00001O0000001O00001O0000001O0000001O00001O0000001O00001O001O00001O00001O00001O001O00001O00001O00000000000000QPV`0" + } + ], + "question": "What is the color of ?", + "choices": [ + "A. Silver.", + "B. Yellow.", + "C. Brown.", + "D. White." + ], + "answer": "C", + "type": "color", + "image": "images/vqa_258.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/1886.jpg", + "mask_rles": [ + { + "size": [ + 384, + 683 + ], + "counts": "f^g3g0V;5K4K4L4N11B=01001O2N2N:E3N1O1O000001AhESOZ:m0<3N00003M_O\\ECi:2Xbn3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one counting from the left", + "B. The fifth one counting from the right", + "C. The sixth one counting from the left", + "D. The fifth one counting from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_259.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/1888.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "PbT4:c;5M3M2O0O10O0O2M2103M3L3N4L2N2N1O0000I7N200O100O1001O0oDYOj:Q1M00L4M3M4O1O2O4L9G5K^i\\1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the right", + "B. The third one counting from the left", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_260.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2032.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "bQf2a0^;6J2N5L2N1N3N2N1O001O0O100000000O1000000O100000000O10001O000000O1000000000O1000000000001OO101O000000000000000O10000000000O1O1O1M3M300M3M3O1M3M3N2N2N2O100O1001O1O002N2N2N1O2N3M1O1N3N1O2N002N1O2N1O1N10001O000000000O100000000000000000000000000000000000000O100000000O1000O100000O100000000001O000000001O001O00001N101O0O2N1G:G8I8Jij5" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one counting from the top in the left column", + "B. The third one counting from the top in the right column", + "C. The fourth one counting from the top in the right column", + "D. The third one counting from the top in the left column" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_261.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2051.jpg", + "mask_rles": [ + { + "size": [ + 384, + 461 + ], + "counts": "X]V13j;4K5L3N2K6M2O1O2O001O001O00001O001O00001O0000010O00000000O2O000O2O0O2O0O101O1N110O001O001002M3NO010O001O010O00010O01O001O010O001O00100O001O010O001O10O010O01O100O101N0001N2N1O2O0O2N2O1N101N1N3N2O1O001O001O1O1O2L6JUZn2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one from the top in the left column", + "B. The fourth one from the top in the right column", + "C. The third one from the top in the right column", + "D. The fourth one from the top in the left column" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_262.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2179.jpg", + "mask_rles": [ + { + "size": [ + 384, + 526 + ], + "counts": "a_Y4`0^;5K7K4K3N3L2O2M3N2M2O2M2O2M2O1N2N2N2N2N2N2O0O2N1O2N1O2O0O2M2O1N3L3N2O2M2N2M3O100O1000000000000000000001O0000000O101O000O10001O0O101O0O100O2O0O2N1O2N1O2N1O2N2O0N3N2N2N2M2O3L3M3N2M4K5K5K5K5J:@Pig0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left in the first row from the top", + "B. The second one counting from the left in the last row from the top", + "C. The third one counting from the left in the last row from the top", + "D. The fourth one counting from the left in the last row from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_263.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2613.jpg", + "mask_rles": [ + { + "size": [ + 384, + 684 + ], + "counts": "SW:5i;3N1O2N0O2O1O0O1O1O2N2N3M5K2O1ON2100O1O1O101N2N2O1N1O1O100OJ]EROb:m0bEnN`:P1801O10O01O10O010000O10000O10000001O100O2N2N2O0O1O2N014K10O001O1O0OO_Ob0N1L5N3O1N3M4JTbj6" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the left", + "B. The second one from the left", + "C. The first one from the right", + "D. The second one from the right" + ], + "answer": "A", + "type": "ordering", + "image": "images/vqa_264.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2614.jpg", + "mask_rles": [ + { + "size": [ + 384, + 604 + ], + "counts": "QeZ61k;6M2N22N2OO0000001N11O2OO01O00O01000O1N2N2N2oDGX:=aEGj9I]FV1b9mNZF1Fi0P:XOWFOIj0Q:XOSFNMj0R:BnE`0S:]OnEd0R:[OPFc0Q:]OPFb0o9_OQFa0l9CRF=c91YF1f9FZFc0g9[O]Fc0e9ZO]Fe0f9VO\\Fj0f9PO_Fo0Y:0ORO^Ea0`:^OcEa0\\:_OeEa0Y:@iE`0U:_OmEa0Q:@PF`0n9@TF`0V9ZOhF93>T92mFOT9NnF3R9JPG6R9GoF9T9BmF`0S9^OnFb0R9]OoFc0S9ZOnFg0T:1O101O1N`0A6I2\\J" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the left", + "B. The first one counting from the right", + "C. The second one counting from the right", + "D. The second one counting from the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_265.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/286.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "Q`n1=_;8I6J5M2L6K5L3M2N4L2N3L4N2L4M2N3M2N2N3M101N2N2N1O2O1N1O2O1N100O10001N1000001N100000001O000O100010O00001O00000000000O101O0000001N2O0O2O0O2O001N2N2O0O2N2N3M1O2N3K5M2L4M4K5M2M3E;K`0UOoDLUSh3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one from the left", + "B. The 2nd one from the left", + "C. The 3rd one from the left", + "D. The 2nd one from the right" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_266.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/290.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "[oZ49e;4K5L2M4J5I8M3N1N3H7O2M201N2N2O1O1O1O1N2O1O1O1O1O1N3OO01O000O1O100001O1O001OO1000O10001N10000O1O2O0O2N1O2O0O2N2O1N1O3M2N2N4L3M2N1O2M5L3L4L4J:FPQj0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the left", + "B. The second one counting from the right", + "C. The first one counting from the right", + "D. The second one counting from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_267.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/291.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "mR[35i;5E8G9M3N2M3N2N2M3N2N2N2O1N2N3M2N2O1O1O001O1O1O1JRNYFo1g9RNWFn1i95000O10001O000O2O00001N100O01D;M3O2O00eMkFm1U9QNPGl1P9RNTGk1b9O1N2O0O2O0O10O001N100000000000O1O1O2N1UOfE0[:OgEO[:OgE0Z:MiE2W:LlE5S:JnE6R:IoE7Q:GQF9o9FRF:n9ETF9m9FTF:l9EUF in the full image?", + "choices": [ + "A. The second one counting from the left in the middle row", + "B. The third one counting from the left in the top row", + "C. The fourth one counting from the left in the middle row", + "D. The third one counting from the left in the middle row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_268.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2910.jpg", + "mask_rles": [ + { + "size": [ + 384, + 1210 + ], + "counts": "`X[:1Q86aHo3Y6Q1L3O0O2O0O1001O1O00000000000000000000000000000000000000001O00O100000000001O000000000001O00O10000000001O0000O1000000000000001O00O1000000000000000000000000000000000000001O0000O10000000000001O00000001N10000001N100000O101O0000000001O00O1000000000000O10000001O00O10000O2O00O2O00000O1000000000001N2N5KV1PK[K3XSR2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the left in the first row.", + "B. The first one from the right in the first row.", + "C. The second one from the right in the first row.", + "D. The second one from the right in the second row." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_269.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2922.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "eRY1122g;OXD3g;MYD000g;2ZD50>1]OO3V:d1M000O100000001OO1001O00000000O11O00000000O1001OO11O00O1000000001O01N11O000O1001O00O11O00O1000000000000001O000000O1001O00000000O1000O11O00000000O100001O0000O2O000000O11O00001O004LWe\\2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 2nd one from the left in the 2nd row", + "B. The 3rd one from the left in the 2nd row", + "C. The 2nd one from the left in the 3rd row", + "D. The 3rd one from the left in the 3rd row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_270.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2938.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "h_6:]:Z1N2O00000000000000000000000000000000000000000000000000001O000001OO1001O00000000000000000000000000000000000000000000000000000000000O2XOWRi3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The last one from the left", + "B. The first one from the right", + "C. The first one from the left", + "D. The second one from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_271.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/2941.jpg", + "mask_rles": [ + { + "size": [ + 384, + 484 + ], + "counts": "[Sg23j;Z2iM1O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000000000000000000000001O0000000000000000000000000000O11O0000O11O0000000000O1001O00O11O0000000000000000000000000000000000000000001O3LdT_1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left in the second row counting from the top.", + "B. The second one counting from the right in the first row counting from the top.", + "C. The first one counting from the right in the second row counting from the top.", + "D. The second one counting from the right in the second row counting from the top." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_272.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "gae37i;2M2O1O1O1ON2O2M3NYfV2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left", + "B. The second one counting from the right", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_273.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/317.jpg", + "mask_rles": [ + { + "size": [ + 384, + 579 + ], + "counts": "jje12l;3M3N2O2M2N2N2O1O1O1O000100O1O001O1O2N3MT]k4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left.", + "B. The fourth one counting from the left.", + "C. The fifth one counting from the left.", + "D. The fourth one counting from the right." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_274.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3269.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "ZYP35h;4HIdD9Z;IeD7[;IeD8Z;IdD8\\;601O00000000000002N1O00001O1O2N1N101N2O1Mhb^1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top in the second column counting from the right.", + "B. The second one counting from the top in the first column counting from the right.", + "C. The second one counting from the top in the second column counting from the right.", + "D. The second one counting from the top in the second column counting from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_275.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3281.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "lU_35i;3M2O1N3M2N2O10000O101O000O100O1I]OPEd0P;600O2O000O100O101O0O10000O2N1N200O100O10000O101O0000000000O1000O100N1O2000O10000000001N10000O1O100O100O1O1O2O1N2N2N1N3N1O2O0O2N1O2O1N5K2N2Nkj8" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the top in the first column from the right.", + "B. The second one from the top in the second column from the right.", + "C. The second one from the top in the first column from the right.", + "D. The second one from the bottom in the first column from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_276.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3284.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "eWR29f;2N1N2O100O1O1O010O100O010O100O010O10O100O10O010O01000O0100O10O01O1000O010000O010O01O1O001O1O001WO^ORFc0i90iE1U:n0M3M201O1N2O1O1O1O1O001O1O001O100O2N101O1N1O2O1O0O2O0O2O000O101O0O2O0000001O0O2O000000000000O1O1M3O1O1O1O1O1O1O1O100O10001N10000O10001N10001N10001O0O2O00001N101O10OO2O0O2O1N2O1N2N2N2M3M3N3M2M4L5J6J9@ZVU2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left in the third row counting from the left.", + "B. The third one counting from the right in the second row counting from the left.", + "C. The second one counting from the right in the third row counting from the left.", + "D. The third one counting from the right in the third row counting from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_277.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3500.jpg", + "mask_rles": [ + { + "size": [ + 384, + 613 + ], + "counts": "Q^e2:c;6K5K4L4M3L4M3L4M2N2N3M2N3M2N3M2N3M3N2M2N101N102M2O1O1N101O1O001O001O010O000010O00001O001O1O001O001N2O1N101O1O1N2O1N2N3N1N2N3M2N2N3M2N3M2N3L4M3M3L5L3L6K6F_Uc3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top", + "B. The first one counting from the bottom", + "C. The second one counting from the bottom", + "D. The first one counting from the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_278.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3644.jpg", + "mask_rles": [ + { + "size": [ + 384, + 577 + ], + "counts": "]UW25g;8H6K4L4M3M2N3M2N2O2M2O1O1N2O001O1O1O1O1O1O1O10O01O001O1O01000O010O000010O0100O0100O0010O01000O01O10O0100O00100O010O1O10O01000O0100000O010000000O10O1000O1000000000000O100000O10O100000000O100000000000000000000000O1000000000000O2O00000O101O001O0O10001O0O10001N1O2O1N1O2N2N2N1O2N2N3L3N3L4M6GPlj2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one counting from the top", + "B. The first one counting from the bottom", + "C. The second one counting from the bottom", + "D. The last one counting from the bottom" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_279.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/3696.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "UeR1?];:F8J4J7J4M3L4M4M2M3N2O0O3M2N2N2M3O0O2O2M2O0O2N2O1N2N2O1N2N101N2N2O0O1O2O0O2O0O2O0O2O001N2O0O101O001O001N10000000001N1000001O000000001N10000000000000001O00000000000000000001O000000000000000000001O00000000001O000O101O001O00001N101O0O2O00000O2O000O2O000O2O000O2O000O2O0O2O1N2O0O2O1N101N101N101N1O2O0O2O1N102M1O2N2N2N2N1O2N2N1O2N2N2N2M4M2M4L3N3L3M6H in the full image?", + "choices": [ + "A. The second one counting from the right in the top row", + "B. The third one counting from the right in the middle row", + "C. The second one counting from the right in the middle row", + "D. The second one counting from the left in the middle row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_280.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4.jpg", + "mask_rles": [ + { + "size": [ + 384, + 575 + ], + "counts": "afW36h;4M2O1O1N2O1O1OO2N1O1O2M3M5K_YZ3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the left", + "B. The second one counting from the right", + "C. The fourth one counting from the right", + "D. The third one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_281.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4063.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_ZW2=_;7K4L3N3L3N2N3M1O2N2N2O0O101N101N10001O000O10000000000000000001O0O10001N101N101N2N101N2N2N2N2M3N3L4L5J8Fdid1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the top", + "B. The first one from the bottom", + "C. The first one from the top", + "D. The last one from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_282.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4066.jpg", + "mask_rles": [ + { + "size": [ + 384, + 531 + ], + "counts": "^R`1d0X;8J5K5J5M3L4L4M2N2N2N3L3O1N2N2M3O1N1O2N2O0O2O1N101N100O2O000O101O0O101O0O10001O0000000O10000000000000000001O00000000001N10000O101N10001N101O0O2O001N2N101N2O1N2N2N2N2N2N2N3M2N3M2N3L4L4M3L4L4J8Egeb3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the top", + "B. The third one from the top", + "C. The second one from the bottom", + "D. The second one from the top" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_283.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4405.jpg", + "mask_rles": [ + { + "size": [ + 384, + 580 + ], + "counts": "lPR45i;3N3M101N2O1O1O1O00O1O1O2N1N4L4LVSa2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one in the top row", + "B. The last one in the bottom row", + "C. The second one in the bottom row", + "D. The first one in the bottom row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_284.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4406.jpg", + "mask_rles": [ + { + "size": [ + 384, + 604 + ], + "counts": "^c\\38e;7K3M3L4N1N2N3M2N2O1N2O1O1O1N2O1O2N5K1O00000000KfEcN[:[16O2N1N2O1O2N1O2N1O2N2N2M3M3M3N3LfTV3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 4th one counting from the right", + "B. The 5th one counting from the left", + "C. The 6th one counting from the right", + "D. The 5th one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_285.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4412.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "Q[a19e;5L2N3M2O1O2M2O1N2O1O2N1O2N3M3M00I7N2N2O2N1O2N1N2O2N2N1O3L3LRQk4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one counting from the left", + "B. The fourth one counting from the left", + "C. The third one counting from the left", + "D. The third one counting from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_286.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4493.jpg", + "mask_rles": [ + { + "size": [ + 384, + 577 + ], + "counts": "kiS35a;`1fNb1jEjLh3k1WOX7K4L2N2O1N2O00000001O1O0O2N3M2N4\\I5k0V7WN[_\\3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 8th one counting from the left", + "B. The 9th one counting from the right", + "C. The 9th one counting from the left", + "D. The 10th one counting from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_287.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4495.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "V^c03l;101N101O001O000O10000O1O1O1O1O1O00100O0O100O20OO1001O01O00O01010OO10O1010O0O0010010NO2010kDAh:?XECf:=ZEDd:<^EDa:<_EE`:<`EE_::aEG_:7bEJ_:4aEM_:3`EN`:2^E0c:N\\E4d:I]E8d:H[E9e:F[E;e:EZE in the full image?", + "choices": [ + "A. The 2nd one from the left", + "B. The 1st one from the right", + "C. The 1st one from the left", + "D. The last one from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_288.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4497.jpg", + "mask_rles": [ + { + "size": [ + 384, + 570 + ], + "counts": "dVk433N9g0^8_OUHJTOl1Q4UNmK;0E2Og00V1>bNj2S3b1_LaN^3R5O1N2O1O100O10000O01O1M3DBWE]Oo:3UY`1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the right", + "B. The fourth one counting from the left", + "C. The fifth one counting from the right", + "D. The fourth one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_289.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4895.jpg", + "mask_rles": [ + { + "size": [ + 384, + 532 + ], + "counts": "m]Y43h;9J6K2N4L3N2M3N2M3N2M2O2M3N1O2N1O2N1O1O2N3M1O1O2N1O1O1O100O1O10O01O10O01O0010O01O001O01O02O3L5K2O0O2OO010O0O2O1N1O2N1O2N2N2M2O2N2O010O1O010]OVFnNk9P1YFmNh9Q1ZFnNf9R1ZFnNg9Q1ZFnNg9P1ZFoNh9P1YFnNj9P1e0M3M2O0BPEKR;3oDMQ;1RENQ;KSE5\\;0O01000001O0000001O0O101O1O2Mj`h0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third one counting from the right", + "B. The fourth one counting from the left", + "C. The fifth one counting from the right", + "D. The fourth one counting from the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_290.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4904.jpg", + "mask_rles": [ + { + "size": [ + 384, + 522 + ], + "counts": "ccR2;b;5M3L4L3O1M3N2N2N1O2N1O2N1O1O2N2N1O1O101N1O101N100O101O0O2O000O10000O101O0000000000000000000000000000O2O00001N101N2O1N2O1N2N3N1N3M2N3M2N3M3M4K5KbhU3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 2nd one in the 2nd row", + "B. The 3rd one in the 2nd row", + "C. The 1st one in the 3rd row", + "D. The 2nd one in the 3rd row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_291.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4922.jpg", + "mask_rles": [ + { + "size": [ + 384, + 513 + ], + "counts": "ZSV17d; in the full image?", + "choices": [ + "A. The first one from the left", + "B. The second one from the right", + "C. The third one from the left", + "D. The second one from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_292.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4934.jpg", + "mask_rles": [ + { + "size": [ + 384, + 483 + ], + "counts": "hiW3131g01XO2i:NlE:T:EjE7]OK]fm0m9VOfEM=n0k9VOhEL=n0j9WOXFk0g9TOYFn0CnNm95`FT1^9lNaFV1]9kNbFV1^9jNaFW1^9jNaFW1_9hNaFY1^9hNbFX1^9gNbFZ1^9gN`FZ1`9gN^FZ1b9fN]F[1b9b000000000000000001O0000001O0000001O0O2O0GSF_Nn9`1RF_NP:_1RF`No9^1RFbNo9\\1RFcNQ:[1oEeNR:Y1oEfNT:X1kEhNW:U1=M2G9K5M5K5JWfY1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second item from the right in the bottom row", + "B. The third item from the left in the bottom row", + "C. The third item from the right in the bottom row", + "D. The third item from the right in the top row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_293.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4939.jpg", + "mask_rles": [ + { + "size": [ + 384, + 480 + ], + "counts": "kfj4a0[;7K3L6K4M2N3M2N2N3N1N3M2O2M4M2M2O1N2O00001N10000000000000O0100000000000O00100O1O010O1O10O01N2O1O2O0O2M2O2N2M3O1M2O2N2M3N3K4M4K4M4J6K_5" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The bottom one in the rightmost column", + "B. The second one from the bottom in the rightmost column", + "C. The second one from the top in the rightmost column", + "D. The second one from the bottom in the middle column" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_294.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/4948.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "chh2j0Q;9D in the full image?", + "choices": [ + "A. The third from the left in the second row", + "B. The second from the left in the third row", + "C. The third from the left in the third row", + "D. The fourth from the left in the third row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_295.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5145.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "gUd3 in the full image?", + "choices": [ + "A. The first from the top down", + "B. The second from the bottom up", + "C. The first from the bottom up", + "D. The second from the top down" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_296.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5362.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "bnS34j;2O2O0O10000O1001O01O0001O1N2N`Q_1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third item from the left", + "B. The second item from the right", + "C. The third item from the right", + "D. The fourth item from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_297.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5375.jpg", + "mask_rles": [ + { + "size": [ + 384, + 580 + ], + "counts": "\\co11o;2N0nSg1OokXN9H7J4L4M4M3L3M2O2M2N2N2N2N2O1N2N1O2O1N2N101N2N101N101N2O0O1O2O001N1O1O1O100O100O10000000O0100000O10O1000000000O0100000000O100000O010000O100000000000000000000000000000O10001O0000000000001O00000001O000000000001O00000000010O00001O0000001O00001O0O101N10000O1O2O001N100O2N101N1O101N2N2O2M3M2O1N2M101O1O2M2O2M4L4J9GTUW1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 3rd one in the 3rd row", + "B. The 4th one in the 3rd row", + "C. The 3rd one in the 4th row", + "D. The 4th one in the 4th row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_298.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5461.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "\\hS15h;6L4L2O2M2N2O1N2N2O1N1O2O0O2O0O1O10000O100000000000O10000O101O0O2O0O2N1O2O2M2M3N2N3L4Lhgk2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one in the second row", + "B. The second one in the first row", + "C. The second one in the second row", + "D. The first one in the first row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_299.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5571.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "cZd4<`;:H6J4L4L4M3L3N2N3M2N2N2O1N2O1N1O2O0O2O0O2O000O2O0O101O0O1000001O000000000000000000O10001O0O101O0O101O0O101N2O0O2O1N101N2N2N2N2N2N2N3M2M4M4K4L4L4K6J]a?" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second row from the top, in the first column from the left.", + "B. The first row from the top, in the first column from the right.", + "C. The second row from the top, in the first column from the right.", + "D. The second row from the top, in the second column from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_300.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5572.jpg", + "mask_rles": [ + { + "size": [ + 384, + 614 + ], + "counts": "oYT4`0Y;:]Ob0I6K6L4K5M2M4L3N2N1O2N2N1N3N2N2N101N2N2N101N101N101N101O0O10001N100O101O000O1000001O000000000000000000O10001O000O10001O000O101O0O2N101N1O2O0O2O1N101N2N2N1O2O1N2N2O1N3M3L4M3L4K5L5J6J6^Od0AoZn1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third row from the top, the second one from the right", + "B. The second row from the top, the first one from the right", + "C. The third row from the top, the first one from the right", + "D. The first row from the top, the third one from the right" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_301.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5575.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "gP`36R;j0J4M3M5M1N3N2M2O1O2M2O1O1O1O2N100O3M100O2O1N101N100O2O0O101O1O0O101O000000001O000000000000000001O0O1000001O000O2O00001N101O0O2O0O2O1N2N2O1N1O2N2N2N101M4M3M2N3L5K3M5I7JU4" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first column from the right, the second item from the bottom upwards.", + "B. The second column from the right, the first item from the bottom upwards.", + "C. The first column from the left, the first item from the bottom upwards.", + "D. The first column from the right, the first item from the bottom upwards." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_302.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5813.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "g[X26g;5N01O1O1O1O0000I7K5M3L4L4M3N2N2N2L4N2N2O1N2O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O100O2N1O1O1O100O001O2N100O1O100O010O2O0O10000O1000O11O0O100000O100000001O000O101O00010O001O001O1O1O001O1O1O1O1O1O1O001O2N1O1O2N1O2N2N2N2N3M3M4L3M4L3M5J7Fk`\\2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The one in the upper part", + "B. The one on the left side", + "C. The one in the bottom part", + "D. The one in the center" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_303.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5819.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "V]V1154_;:L4M2N2N2O0O1O1N2M3M3M3N2N2M3N1O2M201N2N1O101N1O1O101N10000O2O0O100000001O000000000000000000000001O0000001O000000000000ORNQFm1o9SNQFm1o92O10O10O11SNQFg1U:O1N100O2N100O2O0O100O1O2N2O1N100O2O1O2M101N101O0O2N100O2O0O2O2M2N2M4K6JhV[2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 1st row", + "B. The 2nd one in a row of two", + "C. The 1st one in the 2nd row", + "D. The 2nd one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_304.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5918.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "Qj><`;7K5K4L3M4M2N2N1O2N101N100O1O100O10000O100O101O0000000O1000000000000000000000000000000001O0001O01O1O001O1O001O001O001O001O1O1O1O001N2M4L3M3N3I7K8FQ^^3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 2nd row", + "B. The 2nd one in the 1st row", + "C. The 2nd one in the 2nd row", + "D. The 1st one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_305.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/5930.jpg", + "mask_rles": [ + { + "size": [ + 384, + 514 + ], + "counts": "hm]4:^;>H5J5K5L5L4K5L2M4M2N2M3N3M2N2O1N3N1N2N2O1N2O001N2O1O0O2O1O001O001O001O1O001O001O00001O000001O0000001O001N101N101N1O2N2N1O2N2N1O2N2N2M4M3L3M3M4L4L5J6J6H9H=ZO^Ve0" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the left to the right", + "B. The second one from the right to the left", + "C. The second one from the left to the right", + "D. The first one from the right to the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_306.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6.jpg", + "mask_rles": [ + { + "size": [ + 384, + 575 + ], + "counts": "R[d38f;4M3N1N2N2O1O2N1N2O2N2N00M3O1O2N1N2O2N1O2M3N3LPYj2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one from the left to the right", + "B. The fifth one from the left to the right", + "C. The sixth one from the left to the right", + "D. The fifth one from the right to the left" + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_307.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6646.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_b`06d;:K2O0000000O1000001O0001O00000000000000000000000000000000000000000000000000000000000000001OO100001O000001O000000000000000000000000000000000000000000000000000000000000000000001O0001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O10000000001O00O10000001O0000O100001O0000O100001O0000O10000001O0000O1001O0001O00000O10002Mdig1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth one from the top down", + "B. The sixth one from the top down", + "C. The fifth one from the top down", + "D. The fifth one from the bottom up" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_308.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6650.jpg", + "mask_rles": [ + { + "size": [ + 384, + 480 + ], + "counts": "TRo0b0S;g0^O=F8H6K4L4M2N3M2N1O2N2N2N101O001O001O00000000000000000001O00000000000000000000O1000000000000000000000000000000O10000000000000000000000001O0000000O10001N100O1O2O0O1O2N2N2N1O2N2N2N2M5L3K6K5J9Dk0POkU`3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The fourth row from the top, the first one from the left", + "B. The third row from the top, the second one from the left", + "C. The fourth row from the top, the second one from the left", + "D. The second row from the top, the fourth one from the left" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_309.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6651.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "jUj11132MY;W1eDlN_:f1C4M3M3N1N=@i0iFfL00\\8Q4_O100O100O2O000000001N101O001O1O1O3M4L3M1O2N01N100O101gMkHBY7\\OSJE_NROo7S1l2H9^Oa__2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 1st one in the 1st row", + "B. The 2nd one in the 2nd row", + "C. The 2nd one in the 1st row", + "D. The 3rd one in the 1st row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_310.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6660.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "gmm012013[;b0J3O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1N2O101N1O100000000000000000000000000000000000000000001O000000O1001O000000O100001O00O1000000000000000000001OO100000000000000000000000000000000000000000000001OO1001OO1001OO1000000001OO1000000000000000000001OO1000000001OO10000001O0000000001O00O100001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000O11O00O2O000000000000002M_R<" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second from the top down", + "B. The third from the bottom up", + "C. The second from the bottom up", + "D. The bottom one" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_311.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6743.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "`Yn24g;;H4L4N3L3M2N2O1N2N101N1O10001O0O1000000000000000000001O001O0O2O001N2N101N2N3M2M4L4L5HeVQ1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first column from the right, the first row from the top", + "B. The second column from the right, the second row from the top", + "C. The first column from the right, the second row from the top", + "D. The first column from the left, the third row from the top" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_312.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/680.jpg", + "mask_rles": [ + { + "size": [ + 384, + 931 + ], + "counts": "ega71a;g0bDDO0178AN2]8c3@4K4N00000000000000000000000000000000000000000001OO10000000000000000000000001O00O10O1000000001O0000000001O00O10000000000000000000000001O00O1000000000001OO100000000O100001O0000001O0000000000000000O1000000000000QMXHELW1m7TOUHf1l7[NRHf1o7U11N2oNlGbM00]8W2gG^MN31ON0e8O^G0N12NNb0OBOO]bP2" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first one from the bottom and the fourth one from the left", + "B. The second one from the bottom and the fifth one from the left", + "C. The first one from the top and the fifth one from the left", + "D. The first one from the bottom and the fifth one from the left" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_313.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6804.jpg", + "mask_rles": [ + { + "size": [ + 384, + 550 + ], + "counts": "Vf\\29e;4L4L4L5K3N1N2O1O1O1O1O1O1O2N100O1O2O0O101N2O0O2O001N2O001O1O1N2O1O1O1O00O2M2N2M4M2N2M4M2N3L3N3M3M2O2M3N2M3O1N2O1O2M201N2O2MomY3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second line from the bottom, the second one from the left.", + "B. The third line from the bottom, the third one from the left.", + "C. The second line from the bottom, the third one from the left.", + "D. The third line from the bottom, the second one from the left." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_314.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6813.jpg", + "mask_rles": [ + { + "size": [ + 384, + 413 + ], + "counts": "^[_2<^;>E5L3M4L4M2N3M2N2N2N1O2N2N2N101N2N1O2O0O2O001O0O2O001O0000001O000O11N10000000000O1000001O0O2O0O2N101N2N1O2O0O101N101N101N2N2O1N3M4L4K5K5K8H5J```1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 3rd one from the right in the 4th row.", + "B. The 2nd one from the left in the 4th row.", + "C. The 2nd one from the right in the 3rd row.", + "D. The 2nd one from the right in the 4th row." + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_315.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/682.jpg", + "mask_rles": [ + { + "size": [ + 384, + 485 + ], + "counts": "dbT1=];8I7J6L4L5K5K5K4L3O1N2M7bFkMb8o2I4L4N3N3N001O3M5LO0001O00001OO100000000001OO101O001O3M9G4gN`GjNa8P1jGjNY8o0PHlNR8o0UHlNo7o0d1K7J7H6J5J6J`]m3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one in the second row", + "B. The third one in the second row", + "C. The second one in the third row", + "D. The third one in the third row" + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_316.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6827.jpg", + "mask_rles": [ + { + "size": [ + 384, + 512 + ], + "counts": "iX\\1?`;4L2N5K:F9H4L4K5L4L5K6J6J5J6K4L5K8H4L4L1O1O1O001O001O0O2O001O0000001O0000000000000000001O00000000000000000O11N100000O1000O100000000O1000001N10000O10000O2O0O2N1O2N6aL^G81J;U2l9iMfE`1k:@d0UOk\\c3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The first row, the first one from left to right.", + "B. The first row, the second one from left to right.", + "C. The second row, the second one from left to right.", + "D. The first row, the third one from left to right." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_317.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/6960.jpg", + "mask_rles": [ + { + "size": [ + 384, + 384 + ], + "counts": "_lj2:b;9I3M4L4L3N2N2N2M3N2O1N2N101N100O2O0O2O0O101O000O1000000O2O00O10000000000000000O2O00000O2O000O2O0O100O2O1N1O2NBeNmE0008Y1l9gNYFX1Y:N2M3N2N3LkNUOZGh0S:K5J_n>CQRA0X=0c91TYO0UT10lkN0nDOTk8" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The third row from the top and the second column from the right.", + "B. The second row from the top and the second column from the right.", + "C. The second row from the top and the third column from the right.", + "D. The second row from the bottom and the second column from the left." + ], + "answer": "B", + "type": "ordering", + "image": "images/vqa_318.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7153.jpg", + "mask_rles": [ + { + "size": [ + 384, + 557 + ], + "counts": "bSl14h08o9KkE in the full image?", + "choices": [ + "A. The 1st one from the left to the right", + "B. The 2nd one from the left to the right in the 2nd row", + "C. The 3rd one from the left to the right", + "D. The 2nd one from the left to the right" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_319.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7370.jpg", + "mask_rles": [ + { + "size": [ + 384, + 576 + ], + "counts": "od^2121c;a0C8J6K3M4L4M2M3N2N2N2N1O2O1N1O2O0O101N100000001O00000000O1000001N10001N2O0O2N2O0O2N2N3M2N2N2M4L4L6J5I8IY_e3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The 4th one from the left.", + "B. The 6th one from the left.", + "C. The 5th one from the left.", + "D. The 5th one from the right." + ], + "answer": "C", + "type": "ordering", + "image": "images/vqa_320.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7384.jpg", + "mask_rles": [ + { + "size": [ + 384, + 408 + ], + "counts": "nmU2Q2n94M1O0000000000001O000000000000001O0001O000000000001O000000001O0001O0000000001O0000000000001O000001O000001O0000000000001O000000000001O00000O1]Od0nNofg1" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. The second one from the left in the third row", + "B. The third one from the left in the second row", + "C. The fourth one from the left in the third row", + "D. The third one from the left in the third row" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_321.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wanghaochen/caption/datasets/FSCD-147/FSC147/images_384_VarV2/7488.jpg", + "mask_rles": [ + { + "size": [ + 384, + 683 + ], + "counts": "mee3l0R;4M3L3N2O2M2N101N2O0O2O0O2O0O2O1N2O0O2O001N10001N10001O00001O00000000001O000000000000000001O00000000001O00000O10001O00000O101O001N100O2O001N101O1N1O2O1N1O2N2N2N2M4L3M4L5@YVZ3" + } + ], + "question": "Which row and which position is in the full image?", + "choices": [ + "A. From the bottom up, the first row; from the left to the right, the second element", + "B. From the top down, the first row; from the left to the right, the first element", + "C. From the bottom up, the second row; from the left to the right, the first element", + "D. From the bottom up, the first row; from the left to the right, the first element" + ], + "answer": "D", + "type": "ordering", + "image": "images/vqa_322.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000242934.jpg", + "mask_rles": [ + { + "size": [ + 227, + 500 + ], + "counts": "VfP1b0`69H5kIQOl5[1G4M5K1O2N1O2N2N1O1O2N2N1O1O3L[N]Km0b4SO_K_OI[1g4VOcKk0\\4VOcKj0]4VOdKi0\\4WOdKi0\\4VOgKh0Y4WOiKh0W4XOjKg0V4YOjKf0W4YOkKf0T4[OnKb0S4^OmKb0S4^OmKb0S4^OoK`0Q4@QL=P4DPL;P4FPL:o3FSL7n3JQL6o3JQL6o3KPL5o3MPL3P4NoK2Q4NoK2Q4OnK1R40mKOT43jKMW42iKNW43hKMX45_K1b41[K0e41ZKNg44WKLh4X10000000O1000O10O1000001N10000O0100000O100O1O2N10O1001UNPKa1^5J3N1O0O10000O101O0O10001O00001N100O101O0O10001N10001O001N1000O1103L1O000010N2O000000O1O1N2DhIEd\\b1" + }, + { + "size": [ + 227, + 500 + ], + "counts": "Rn`1221g6;K2N2O1O1N2O1O1O0O2O1O1O3L2O001O2M3N2N3M1N2O1O1O1N2O1O1O1O1N2O1O1O1O1O1O00001O01O0kNoJ8R5GoJ8Q5GQK8P5GPK9P5FRK9o4DSKb4A\\Ka0d4@[K`0e4@ZKa0f4@YK`0h4_OfJL:f0P5_OeJK?b0m4DYK:g4D]K:d4D^K;b4E^K;b4D`K;a4D_K\\5E[Jb0f5=001O00001O0O2N1O1O2M2O2N1O1N3N1N2O2L3M5Hlo1N]PNNXe20iZM10O_Rk0" + }, + { + "size": [ + 227, + 500 + ], + "counts": "Qfi03n66L4K2O00001N9H4L2N0ODoIHQ6e0000DoIFQ6:PJEo5?", + "choices": [ + "A. ", + "B. ", + "C. None of the above", + "D. Both and " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_323.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000376093.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "noc51n>1O1O100O100O101N00101N1O100O1O100O1O100O1O100O1O100O1O1N2O1O100000000000000000O1001O0000O02O000000O1001O00O10000001OO1000O1001O7I5M4K6K3M3N2M4M3L3N1O1O1N101O2N1N3N3L4M2N2M5L3L3N2N2M3N1O1N10000O2N1000001N1000001N100O1000000O10000O1000000O1000O0100O10O1000O10O10O10O01000O10000O100O100O00100O10000O00100O01O01O001O10O01O1O1O00001O1O1O1O0O3N1O1O1O1O3L3N1O3L3M4J8I4M5K6H6J6Jf>AlA4OO3LZg`6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "QfX36i>3L4N1N3M2N101O2N0O2O001N101N2O2N1O1O1N3N1O1O1O1N2O00G9O100O100N2O10000N2N1100RFXOd5h0[JZOd5e0YJ_Og5`0\\IEYMMZ9=YIMZMG]9;UI6XMCa98UI8XMBb94bG^O2k1[8fNdG@HR2d8^NcGB_OY2m8UNeGBVOa2U9lMeGHQO^2Y9jMeGm2\\ObLX8`0]Hn2ZOlLP86eHo2YOoLP81hHP3WOPMQ80gHQ3XOoLS8MfHT3WOnLT8MeHV3WOkLW8NbHW3WOkLW8McHY3UOjLY8KbH\\3UOiLZ8IaH_3TOiL\\8G_Hb3TOgL^8E_Hd3SOgL^8E^Hf3SOeL`8C^Hi3QOdLm9]3RFdLm9^3RFbLm9a3nEbLR:V400O100O100O100O1N20O10O10000kFWKR8j4oGUKQ8j4QHVKn7j4RHVKn7j4SHTKn7l4S10000ZLRFZ2n9fMXFT2T9hLnFX13l1n8dNTGZ1m8eNUGY1k8gNWGW1i8iNXGW1Q8TMjGd15UN@P3^8CRH`M]Oo2Z8FXH\\M^Oo2W8FWH`MCk2Q8H\\H]MCl2n7IZHRMI:0j2k7LZHTMI^3k7@WHnL1g3i7[O[Hc0f7^OYHb0g7_OXHb0g7_OVHd0j7]OTHd0k7]OUHb0l7Y300O10M3L4I7N2M3]ObG_Jc8`5?TOPGgKQ9Y4SGbKn8T1nF`15ZMn8T1oFb14nLI2V9]1oFb1h9\\N\\Fb1e9]N\\Fb1d9]N^Fb1]1oLY6^1]Ha1Z1QMZ6\\1^Hb1X1RM[6Y1bHa1T1VM[6W1bHc1S1VM\\6V1bHc1R1WM\\6T1dHQ1UO\\Nl1_O\\6R1dHf1P1XM]6P1eHf1[9XNgFg1Z9VNjFh1W9TNmFk1U9PNoFn1U9hMSGW2m:L2O3M2K5J7E=D0O100O1O100O100O100O100O100O100O100O100O1O100O100O100O100O100O100O100004L5K4LWmj6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "X]c44l>3L5L5K4L4L1N1001ClALS>4oAKQ>5SBGm=4nAI71k=5PBI52k=5PBI61j=6PBI]>77JYA1g>Noe_4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "WbU64l>4K2OO1000000O1000O10000000000000001O000001Nkdk2" + } + ], + "question": "Which statement correctly describes the actions and position of ?", + "choices": [ + "A. is holding and standing on .", + "B. is standing on and holding .", + "C. is looking at while standing on .", + "D. is holding and looking at ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_324.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000376093.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^aQ1>[>9ZOZOgBP1T=b0K5K3N2N5K4K4K5K7K4L4K7J3L4M2Nf:]O^Ec0b:YOaEg0]5M4K6K3M3N2M4M3L3N1O1O1N101O2N1N3N3L4M2N2M5L3L3N2N2M3N1O1N10000O2N1000001N1000001N100O1000000O10000O1000000O1000O0100O10O1000O10O10O10O01000O10000O100O100O00100O10000O00100O01O01O001O10O01O1O1O00001O1O1O1O0O3N1O1O1O1O3L3N1O3L3M4J8I4M5K6H6J6Jf>AlA4OO3LZg`6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "nn[31n>2OO001001O00N\\d`20a[_M;F6M2N1O2O1O0O2O0O101O0oCWOj9j0PF@l9`0SFCj9=WFEf9a2Q1[MaN4=b2R1YMcN49e2S1XMdN36h2V1UMeN23k2X1SMeN21m2Z1QMeN3Mo2^1nLfN2IS3`1lLgN1HT3a1kLhN1EU3c1jLhN2\\N]OHh3T3iLiN2WNBHe3X3hLiN1SNFZOR4j3WLjN1oMd4W3[KjN2jMg4]3WKiN2gMi4`3UKiN3bMl4e3RKhN3`Mm4i3PKgN4[Mo4n3nJfN5XMo4S4lJfNV6Z1kIfNT6[1lIeNS6[1nIdNR6]1nIcNQ6^1oIbNP6^1QJbNn5_1RJ`Nn5`1RJ_No5b1PJYNU6h1PJjMX6W2kI_M[6a2kISM[6n2iIgL]6Z3cI_Lc6b3]IZLf6g3ZITLj6m3WInKl6R4XIeKm6\\4k1001O1O1O001O1O001O1O001O001O00YOnEaLQ:Z3YFbLf9[3_FeL_9Z3eFaL]9]3lFXLX9g3Q1O1N2O1O100O1O100O1O100O100O100O100O100OnNgL_FY3Z9WM\\Fi2b9`MWFa2e9_1_O^K^Fh4^9?L4O0N3\\McJcK_5[4kJ_I^Of1k5j4UKiJn4V5YKcJi4\\5`KXJd4h5bKlId4S6U20O01N2M3M3O1N2N2O1N2N2O1O2L3O1O2M3N1O2gLWG8j8F[G4i8KZG1g8N]GNd81^GKf83]GJe83^GIf85^GEe8:`G@c8>bGXOUNTO]:a1dGmNj8n0g2N2L6K4J9F8ISgh0" + }, + { + "size": [ + 480, + 640 + ], + "counts": "QfX36i>3L4N1N3M2N101O2N0O2O001N101N2O2N1O1O1N3N1O1O1O1N2O00G9O100O100N2O10000N2N1100RFXOd5h0[JZOd5e0YJ_Og5`0\\IEYMMZ9=YIMZMG]9;UI6XMCa98UI8XMBb94bG^O2k1[8fNdG@HR2d8^NcGB_OY2m8UNeGBVOa2U9lMeGHQO^2Y9jMeGm2\\ObLX8`0]Hn2ZOlLP86eHo2YOoLP81hHP3WOPMQ80gHQ3XOoLS8MfHT3WOnLT8MeHV3WOkLW8NbHW3WOkLW8McHY3UOjLY8KbH\\3UOiLZ8IaH_3TOiL\\8G_Hb3TOgL^8E_Hd3SOgL^8E^Hf3SOeL`8C^Hi3QOdLm9]3RFdLm9^3RFbLm9a3nEbLR:V400O100O100O100O1N20O10O10000kFWKR8j4oGUKQ8j4QHVKn7j4RHVKn7j4SHTKn7l4S10000ZLRFZ2n9fMXFT2T9hLnFX13l1n8dNTGZ1m8eNUGY1k8gNWGW1i8iNXGW1Q8TMjGd15UN@P3^8CRH`M]Oo2Z8FXH\\M^Oo2W8FWH`MCk2Q8H\\H]MCl2n7IZHRMI:0j2k7LZHTMI^3k7@WHnL1g3i7[O[Hc0f7^OYHb0g7_OXHb0g7_OVHd0j7]OTHd0k7]OUHb0l7Y300O10M3L4I7N2M3]ObG_Jc8`5?TOPGgKQ9Y4SGbKn8T1nF`15ZMn8T1oFb14nLI2V9]1oFb1h9\\N\\Fb1e9]N\\Fb1d9]N^Fb1]1oLY6^1]Ha1Z1QMZ6\\1^Hb1X1RM[6Y1bHa1T1VM[6W1bHc1S1VM\\6V1bHc1R1WM\\6T1dHQ1UO\\Nl1_O\\6R1dHf1P1XM]6P1eHf1[9XNgFg1Z9VNjFh1W9TNmFk1U9PNoFn1U9hMSGW2m:L2O3M2K5J7E=D4K2OO1000000O1000O10000000000000001O000001Nkdk2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "lS`2=^>7J6Jb0^O7K3K4M3M3L4N2M3N2M3N2L4K5O1K5L4M3N2M3N2N2N2N2N2O1N2O1O1O1O1O100O1O1O1O100O100O100O100O10000000000000000000000001O00O10000000000O1000000000000000000001O001O0000001O0000001O001O001O001O1O00001O1O1O001O001O1O1O1O2N1O2N2N1O1O2N3M2N2N3M3M5K3M4L4L3M4L5Km0SO;E6J2N2N2L4N2N3M2N2NTfj4" + } + ], + "question": "Which person is holding , and which person is looking at ?", + "choices": [ + "A. is holding , and is looking at .", + "B. is holding , and is looking at .", + "C. is holding , and is looking at .", + "D. is holding , and is looking at ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_325.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000222317.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "kc^1g0T>>E7J2N2O0O1fCQOnN_O10[;`1fEFlNkNn:_1WF2`9N`F9Y9GgFl0f8SO[GX1Z8hNgG]1S8cNmGb1n7^NRHf1j7ZNVHj1e7WN[Hk1c7UN^Hl1`7TN`Hm1_7RNbHo1]7QNcHQ2[7oMeHR2Z7nMfHS2Y7mMgHT2X7lMhHT2X7kMiHV2V7jMjHW2U7iMkHX2T7hMlHY2S7gMmHZ2R7fMnH[2P7fMPI[2o6dMRI\\2n6dMRI]2m6cMSI^2l6bMTI_2k6aMUI`2j6`MVIa2i6_MVIc2i6]MWId2g6]MYId2f6\\MZIe2e6[M[If2d6YM]Ih2b6XM^Ih2b6XM^Ij2`6VM`Ik2_6TMbIm2]6SMcIm2]6SMcIn2\\6RMdIYOkNK2P2]7mNfISOTOG0Y2T7mNhIkN@GJ`2m6nNiIiN:Z2l5mNjIgN=\\2h5mNlIdN?_2d5mNoKR1P4nNQLR1n3nNSLR1l3nNTLS1k3mNVLS1i3mNZLQ1d3PO]LP1b3PO_LP1`3PO`LQ1_3oNdLo0[3QOfLo0Y3QOhLo0W3QOjLo0U3QOlLn0T3ROmLn0Q3SOPMn0n2ROSMm0m2SOTMm0k2SOUMn0j2ROWMn0h2ROYMo0e2QO\\Mo0c2QO]Mo0c2QO]MP1b2PO^MQ1a2oN_MR1_2nNcMR1\\2nNdMS1[2mNeMS1[2mNeMT1Z2kNgMV1X2iNiMX1V2hNjMY1U2fNlM[1R2eNoM\\1P2cNQN^1n1aNSN_1m1aNSN`1l1_NUNa1k1]NWNc1h1^NXNb1h1^NXNb1h1]NYNc1g1\\NYNe1g1ZNZNf1f1YN[Ng1e1XN\\Nh1d1XN\\Nh1d1WN]Ni1b1XN^Nh1b1XN^Nh1b1XN^Nh1b1XN]Ni1c1WN]Ni1c1WN]Ni1c1WN]Ni1b1YN]Ng1c1YN\\Nh1d1XN\\Nh1d1XN\\Nh1d1YN[Ng1e1YNZNh1f1YNYNg1g1YNYNg1g1ZNXNf1h1ZNWNg1i1YNWNg1h1ZNXNf1h1[NVNf1j1YNWNg1i1WNYNi1g1VNZNj1f1VNZNj1f1UN[Nk1e1TN\\Nl1d1SN^Nl1a1TNaNk1_1TNcNk1]1UNdNj1\\1UNeNk1[1TNgNk1Y1TNhNl1X1TNiNk1W1TNjNl1V1SNmNk1S1UNoNi1Q1VNQOi1o0WNQOi1o0WNSOg1l0YNXOd1h0[N]Oa1c0_N@^1`0aNC]1=cND\\1P2@SN>n1^OVNb0j1\\OXNd0h1[OYNe0g1YO\\Nf0d1YO]Ng0c1WO_Ni0a1TObNl0^1SOcNl0^1ROeNm0[1ROgNm0Y1ROiNm0W1QOkNo0U1POmNo0S1oNPOP1P1oNQOQ1o0nNTOP1l0nNWOP1j0oNXOP1h0oNZOP1f0POZOP1f0nN^OP1b0oN@P1`0POAo0?POBP1>POCo0=PODo0=QOCo0=QOCo0=QOCn0>ROBn0>ROBn0>QOCo0=QOCo0=QOCo0=QOCo0=ROBn0>ROBn0>ROBm0?SOAm0?SOAm0?SOAm0?SOAm0?SOBk0?UOAk0?UOBj0>WOAi0?WOAh0`0XO@h0`0XOAg0?YOAg0?YOBf0>ZOAg0?ZO@f0`0ZO@f0`0ZO_Og0a0YO_Og0a0ZO]Of0d0ZO[Og0e0YO[Of0f0ZOZOf0f0[OYOe0g0[OXOf0h0ZOXOf0h0[OVOf0j0ZOVOf0j0ZOVOUN^LV1\\4e0VOSNaLW1Y4g0TOoMhLY1T4h0TOPNgLX1U4h0TOSNdLU1X4h0TORNgLS1V4l0ROoMlLS1R4n0ROnMnLS1P4o0SOiMRMV1l3Q1ROhMSMW1k3R1QOfMVMW1i3S1QOcMYMZ1f3S1QOaM\\MZ1d3U1oN_MaM[1`3V1nN`MbMZ1`3W1lN_MfMY1^3X1kN_MhMY1]3X1lNhLfLc0R1\\1\\3Y1POYMfM]1Z3[1POVMgM^1Z3\\1POTMiM^1W3^1QORMjM^1V3`1POQMlM^1T3a1QOoLmM_1R3c1QOkLPNa1o2d1ROjLRN_1l2g1SOhLSN_1k2i1o0UNQOl1Q1QNoNo1S1nMnNR2U1kMkNV2X1fMhNZ2[1cMeN]2_1^MbNc2h1iLfIK0Ob4]3m6001O000000001O00001O0000001O000000001O0000001O0000001O0000001O0000001O00000O2N1L4K5F:J7H7M3L4L5M2N2O2M2N2O1O3M2M2O2N1O100O1O101N1O1O2O0O10000O1000001N10001N1000000O2O00001O000O101O00001N100N2J7L3N2O2N1O1N200O2N101N1O100O101O000O101O0O101O000O101O0O10001N1000000[JgHc3Z7ZLkHc3U7\\LVIY3k6gL_Io2a6PMaIo2`6PMbIn2^6QMaJQ2_5oM`KeNiL0O07h1b7CfMcNa2\\1c5O0000001O000O2O00000000001O00001N1000001N1000001O00001O00000000001N1000001O0000VK" + }, + { + "size": [ + 480, + 640 + ], + "counts": "oRk03k>4aAJP>7mAKS>6lAKS>6lAJU>6iAJX>8dAI:Lh=`0WBBi==WBEg=;YBEh=:XBGg=9YBHf=8ZBIf=6ZBKe=5[BLd=4\\BMd=2\\BOc=1]B0c=O]B3b=L^B5a=K^B7a=I_B8a=G^B;a=E^B=b=B]B`0b=AYBd0g=;O101N1O100O100O1O1O1O100O1O1O1O1O2N100O1O1O1O2O0O1O1O1O11O1O2N10O00O2O01N100001O3M00OXNkBe1X=05KM_NhB`1X=aNgB_1Y=bNgB]1_=O000000O11O000000001O1O1O1O002N1O1O1O002N1O2N001O1O1O1O1O1O2N1O1O2N1O00001O1O001O3M1O001O1O1O1O001O1N2K[Ye6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "X3n6R8000000001O1N2O0000001O001O1O002M101O00001O00001O001O000O10000000O1O100O010O100000000000O01N2L4K5I7M3O1O100O0100000O10000O1000000O10O10O10000O10000O100O00100O100O100O100O1UOcGRK^8m4dGRK[8n4kGmJU8Q5oGlJR8S5PHkJQ8U5PHjJP8U5RHhJP8X5l0O1O1O0oNZFjLf9l2SGdLn8X3\\GaLe8]3`1O1O1O1000000O01000000000000000O1000O100000000000000000O100000O100000000000O10O1000000000000O10O1000000000000000O1000O1000000000000000O10O100000000000000O10O100000000000O100000O10000000O10000000O100000O10\\MiLYIGQN`3f8RMUIo2k6aMeH_2[7lMYHU2g7nMZHn1f7XN_H]1a7cNaH[1_7aNVG_N]1n2^7gNbHX1^7mN^HR1b7TOeG[NF`2e8_OXGUN3[2e86\\GHd8:[GEe8<[GCe8?[G_Oe8b0\\G\\Od8e0\\GYOe8h0ZGXOf8i0ZGVOf8l0YGSOg8m0cGfLFj1g8a1RHXNn7j1QHUNo7l1QHSNo7o1QHoMo7g2\\GWMe8k2ZGTMf8n2YGQMg8P3YGoLg8R3YGmLg8T3ZGjLf8W3ZGhLf8Z3YGeLg8\\3ZGbLf8_3ZG_Lg8c3XG\\Lh8e3XGZLh8g3XGXLh8i3ZGoKk8R4\\G`Kj8b4i01O3M5K6J00O1O1O104K2O1N2O3M;DZ1B:D=E?A;E;E:F9GB=Cg0YOe0cLmCc2Q=WOh0WOc0^O_dP5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`UZ33k>2O2N2N1O2N1O2O0O2N1O101N00100O1O010O01000O010O101N10000O2O00001O0O101O001O0O2O001O001N101O001O001O1O2N0000001O000000001O0000001O0010O01mBfN^a0Aa0_Oo0QOa0_Oc0]Of0ZO?", + "choices": [ + "A. Lying on , which is on .", + "B. Lying on , which is attached to .", + "C. Attached to .", + "D. Lying on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_326.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000148719.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0]3c;001O00000000001O001O1O1O00001O2N1O1O000000000000O1O100O100000000001O001O1O00001O001O0000O1O1O1O1K5N204L3N1M4M1O7I4L2N1O1N9H4MgNkMiEP2X:YNbEd1^:aN^E^1b:cNeEU1[:oNdEn0\\:TOeEi0[:[OaEe0_:^O_Ea0a:BbE8^:KcE1]:3`EL`:9\\EFd:<\\EBd:a0\\E\\Od:f0\\EXOd:i0\\EVOd:m0YESOg:n0ZEPOf:S1[EiNe:Y1^EbNb:a1]E]Nc:h1YEWNg:j1YEUNg:n1[ElMf:V2Q12N2N2N1O2N2N4L2N3M4L2N2N1O2N1O00000000000000000000000000000000000000000000000000000004WMYDQ2gA:G8Ha0_O6J8H3LQX2o0ofM;D:I8H`0@7I6J;E1O1O001O000000001O0000O1001O0000001O0000000000000000000000001O0000001O001O2N1O001O00000000001O000000000000000000O1001O0000001O1O5K4L3M4L3M3M1O1O1O0aEbK\\:]4dEdK\\:\\4eEcK[:]44L4]Oc0A?001Oj0VON2O1000000O1O1O1N2O1N2N2N2M3M3001ON200O11O00000000O100O1O1O100000000000000M300N20000N20000O1O100O100O10000000000O1001O01O1O00010TNPDf0R<^NlCLj0d1`4dNObC3l;GYCj0?Dm;AdCW1;\\Ol;l0SDWOj;k0UDVOg;o0UDSOi;R1oCTOo;P2N2N1O1O2M200O2O001O0O101O001JbLhD_3W;aLiD_3[;10001O0000000L_LiDa3W;_LiDa3W;_LhDb3X;^LhDc3Z;00001O00001N1O101O001O0000001O00002N2M3N101N2N1N200O1O2M101N200O0O101O00001O000000001O0O11O0O1GdEnK\\:[40O10000000000O0O2O10000001N11O01N1O010O1O101O001O00O10001OO100000001O0O100O01000O2O01O00O101O00001O000000001O0O10000O101N100000001N1O100O2L3M3N3M2O101M2N2O100N2O2M2N2O1N3N1M3O1N2N3N1O1K6M2B[I^Hi6a7WI]Hk6b7:0fJXH\\3g7dLZH\\3e7fLZHZ3f7dLZH_3e7n1O100000000O2N11O00O2O001O00000000000O2O000000001O00O1001O0O101O001O00001N1001OO2O0000001O00O1001O00000O11O0000O105K1O1O5K3M3M100O2M3N10O01N1O2O1O10O0001O00000000000000001O000O1000000000O100000001O000O01000O2O0O1O100O10000O1O1O2O0O1O10000O1O1N2O2M2O1N2L4N2O1N2N1K6N2N2M3K5C=E;O1O20O00000000000000000000000001N100O1O100O1O2O000O100O1N3N1000000O100O2O0O1O2N1000000O10001N1O1O1O101O000O2N102N0O2N3M6K1O3L7J5J5L1N7I;E2O1O3M1N6J5K2O1N10O01O100O100O100O10000O10000000O01001O1O2N3M1O1N101O00000O101N1000000000O1000000O1O10000O0O2N1N3O1O1F:F:L41O0000000000000M3OM400O100M[CkMe:fANU>1mAGZ>0cA35Kmi7" + } + ], + "question": "Which of the following statements accurately describes the arrangement involving , , and ?", + "choices": [ + "A. is beside and in front of .", + "B. is beside and in front of .", + "C. is beside and in front of .", + "D. is parked on and beside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_327.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000148719.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "U5[3e;00O1000000000O100000O100O10O102]DoLS;d3K4L3L4M1O2N1O1O2J`KhE`4X:`KhEa4[:1DeKQF\\4m94dNObC3l;GYCj0?Dm;AdCW1;\\Ol;l0SDWOj;k0UDVOg;o0UDSOi;R1oCTOo;P2N2N1O1O2M200O2O001O0O101O001JbLhD_3W;aLiD_3[;10001O0000000L_LiDa3W;_LiDa3W;_LhDb3X;^LhDc3Z;00001O00001N1O101O001O0000001O00002N2M3N101N2N1N200O1O2M101N200O0O101O00001O000000001O0O11O0O1GdEnK\\:[40O10000000000O0O2O10000001N11O01N1O010O1O101O001O00O10001OO100000001O0O100O01000O2O01O00O101O00001O000000001O0O10000O101N100000001N1O100O2L3M3N3M2O101M2N2O100N2O2M2N2O1N3N1M3O1N2N3N1O1K6M2B[I^Hi6a7WI]Hk6b7:0fJXH\\3g7dLZH\\3e7fLZHZ3f7dLZH_3e7n1O100000000O2N11O00O2O001O00000000000O2O000000001O00O1001O0O101O001O00001N1001OO2O0000001O00O1001O00000O11O0000O105K1O1O5K3M3M100O2M3N10O01N1O2O1O10O0001O00000000000000001O000O1000000000O100000001O000O01000O2O0O1O100O10000O1O1O2O0O1O10000O1O1N2O2M2O1N2L4N2O1N2N1K6N2N2M3K5C=E;O1O20O00000000000000000000000001N100O1O100O1O2O000O100O1N3N1000000O100O2O0O1O2N1000000O10001N1O1O1O101O000O2N102N0O2N3M6K1O3L7J5J5L1N7I;E2O1O3M1N6J5K2O1N10O01O100O100O100O10000O10000000O01001O1O2N3M1O1N101O00000O101N1000000000O1000000O1O10000O0O2N1N3O1O1F:F:L41O0000000000000M3OM400O100M[CkMe:fANU>1mAGZ>0cA35Kmi7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "d45k>0N200O1O2N2O1N1O1O1O010O2O3L4M000O100O010OO2N1NI9O10O1010O0100O1O2M2O2N2FfAK^>OfANg>OjSi8" + }, + { + "size": [ + 480, + 640 + ], + "counts": "0]3c;001O00000000001O001O1O1O00001O2N1O1O000000000000O1O100O100000000001O001O1O00001O001O0000O1O1O1O1K5N204L3N1M4M1O7I4L2N1O1N9H4MgNkMiEP2X:YNbEd1^:aN^E^1b:cNeEU1[:oNdEn0\\:TOeEi0[:[OaEe0_:^O_Ea0a:BbE8^:KcE1]:3`EL`:9\\EFd:<\\EBd:a0\\E\\Od:f0\\EXOd:i0\\EVOd:m0YESOg:n0ZEPOf:S1[EiNe:Y1^EbNb:a1]E]Nc:h1YEWNg:j1YEUNg:n1[ElMf:V2Q12N2N2N1O2N2N4L2N3M4L2N2N1O2N1O00000000000000000000000000000000000000000000000000000004WMYDQ2gA:G8Ha0_O6J8H3LQX2o0ofM;D:I8H`0@7I6J;E1O1O001O000000001O0000O1001O0000001O0000000000000000000000001O0000001O001O2N1O001O00000000001O000000000000000000O1001O0000001O1O5K4L3M4L3M3M1O1O1O0aEbK\\:]4dEdK\\:\\4eEcK[:]44L4]Oc0A?001Oj0VON2O1000000O1O1O1N2O1N2N2N2M3M3001ON200O11O00000000O100O1O1O100000000000000M300N20000N20000O1O100O100O10000000000O1001O01O1O00010TNPDf0R<^NlCLj0d1`?", + "choices": [ + "A. and ", + "B. , , and ", + "C. and ", + "D. , , and " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_328.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000098716.jpg", + "mask_rles": [ + { + "size": [ + 359, + 640 + ], + "counts": "nh59l::G1O1O1O1O1H8M3O100O100001O000YOVF1k9Mj0K`nc6" + }, + { + "size": [ + 359, + 640 + ], + "counts": "]kd0:j:6K2O1O1J]OgEe0V:700O1N200O100O@mENS:0QFMP:2SFLn93RFNm90VFOl9NUF1m9MTF3b:JnTT6" + }, + { + "size": [ + 359, + 640 + ], + "counts": "Qn]6l0Z:Y1hFZN^6_3I1O00000001O001O0SMhHTO3]2W7mMgIk1[6TNfIi1\\6VNeIj1[6VNeIj1[6WNdIj1[6UNfIk1Z6UNfIj1[6VNdIk1\\6VNaIk1`6iNaHQO0\\2`7`0g0N2O1O1O1M3N2O2O0lM_Gb1a8\\N_Hf0a7ZO`Hd0a7[O`He0`7\\O_Hd0a7\\O^He0b7[O^He0b7[O^He0b7\\O]Hd0c7\\O]Hd0c7\\O]Hd0c7]OdGE;n0Q8`1000000000000000000YK" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is sitting on with .", + "D. is over ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_329.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000203317.jpg", + "mask_rles": [ + { + "size": [ + 423, + 500 + ], + "counts": "YUi2b0_<:I7I6K4L4L4L4L3N3L3N2N2M3N2N2N2N2N2N2O0O2N2N101N1O2O0O2O0O101N100O101O0O1000001N1000000000000000O100000000O100000O0N3N2N1N3O0N3O0M4M2M4N1RNXL\\Jl0\\Nj1Y7]MVJb0kNo1P7dMnI=TOo1m6SN_INGm1j6[NWIJ0j1i6`NoHL8b1h6dNnHM:]1g6iNkH^O1A=V2f6nM[Ie0JVO5U2f6oM^I`0\\OTO=82T2h6nM`I=OAIR2j6gMfIb0LEEo1l7WNgHJ]Om1j7ZNlHIZOk1g7^NRIGWOh1e7cNWIEUOe1c7eN\\IGQO_1e7hN^IInN\\1d7iNbIKjNY1f7gNfI0eNU1g7jNfI1eNo0h7oNeI2dNk0j7ROdI2dNe0m7XO`I3gN;n7A]I4V8KkG5T8JnG7P8IQH7n7HTH8l7GUH9j7GXH8\\:1O0000O11O00000XCH`A?o8SO^G?B`0P9ROZGa0D?R9QOUGc0H`0Q9mNUGe0Ha0R9jNSGh0Ib0R9gNPGc0GWO6b1Q9eNnF40IKL5e1P9bNmF3:0G]1P9QO^GA_Ob1R9iNdGC[Of1o8dNiGDXOk1n8_NmGDVOo1k8ZNRHT2m7iMWHW2h7iMXHX2g7gMZHZ2e7fM[H[2d7eM[H]2d7cM[H_2d7aM[H`2e7`MZHb2d7_M[Hc2d7]M\\Hc2d7\\M\\Hf2c7ZM]Hg2b7YM^Hg2b7XM^Hj2a7UM`Hk2`7TMaHm2_7PMcHP3^7nLcHS3h800001N10001O0001O0001O00000000000000000000000000O1000000hLVFR3k9nLUFR3k9nLUFR3k9mLVFR3k9oLTFQ3l9oLTFQ3l94100O100O1O100O100O1O1O1O100\\MkEU2W:iMlET2V:iMmEU2U:gMoEW2a:N4M1O2M3N3L3M4L5`N_DQ1f;lN]DP1Qk;BUD>k;BUD>Z<0EBmC>So;ChC09=^<000001OO1001O00O10000O1N2N2O1O1O1N2O1N2O10O0101O00000000O1001O00000000O011O00000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000001O001O1O1O2N1O1O1O1O1O1O1O2N001O001O1O1O00010N101O1O00001O01OO1000001O00000000001O000000O10000000000O1000000O100O10000O100O1000000O1O1O1O10000N2O100O100O1N2O1O1O1O1O100O100000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001OO100000000001O00000000O100001O001O001O1O1O1O001O1O001O001O1O1O0000001O001O00001O0000001O0000000000000000000000O10000000000O10000O10000O10000O1O100O1O10000O1O1O100O1O1O1000000000000000000001O00O11O00000000000000000000000000000000O11O000000000000O11O0000000000_C" + }, + { + "size": [ + 423, + 500 + ], + "counts": "0m;Z1000000001O00O100000000000000000000000000000000001O000000000000001O00001O00000000000000000000000000001O00001O0000001O0000000000O100O100O1N2N2KgNZD[1d;5O1O1M3O1O11O0000001O00O1001O00O11O00000000001O000000000000000000001O00O11O000000001OO1ZK]NhMc1n6000RK\\NYNd1f1]NZNc1f1\\N[Nd1c1^N]Nb1c1^NVJN_3d1\\2]N\\Nc1e6O00PK_NZNa1S5_N`Ga1S:0dM_N^GOm0b1d7eNkFK:OV1a1h7]NnF240V1a1R:01O_M_NfG1i0a1a7]NgG1i0b1]7^NkG0i0b1Z7aNlGMj0b1Z7bNkGNh0a1]7]NoG1f0a1Q:OPK^N[Nb1d1_NdK1>`1X6^NiIb1W7^NiGb1W7_NQH0f0b1Y7_NPHOTO0`1b1]7]NgIb1o80PK^N[Nb1d1_NfKOB]Ob2o6lMfIA\\Ob2n6kMfHUOP1?\\O`2m6lMkIEWO^2l6kMRJGRO]2k6lMVJEoN_2j6kMZJFlN^2j6hM_JKiNY2h6cMiJ4`NW2g6eMjJ3aNV2e6gMjJ6_NR2h6gMjJ6aNi0_OhN0Q1X7UOlJ7_Nd0FNP7UOmJ9\\N`0^OSO9P1P7TOnJ:_N8G:l6SOPK:Db0]5POTK=_Ob0]5ROTK>\\O`0`5SOTK=[O`0a5ROVK=YOa0V8^OiGb0b5QOYK>TOa0c5ROYK=SOa0[8_OdGa0^8@nEC\\1m0f8HVFYOf0o0U9GUF[Oe0n0X9FTG9m8GRG9n8GRG9n8HoE[Oh0n0\\9EjE_Oi0l0_9ChEBh0k0a9BgEEf0i0d9EbEBk0h0d9L]F4d9L[F4f9LYF4h91RFOP:3lEMU:3jEMW:4gELY:5fEK[:5dEK]:4cEL]:4cEL^:3bEM^:4`EM`:4_ELa:4`EK`:6_EJa:6_EJb:6\\EKd:5\\EKf8XOhHn0bNJf8\\OdHj0fNJe8]OeHi0fNJe8]OeHi0fNJe8A`Hf0kNIf8@_Hg0kNIf8A\\Hg0oNHe8CWHh0TOEf8FmGj0]O@g8GgGl0B]Og8GcGP1FYOh8G^GS1KUOg8JYGT10ROh8o1WGRNi8n1WGRNi8n1WGRNk7WOeHf2@SNj7YOfH\\1ZO@7Ji7\\OdH`2ESNg7^OcH_2FSNg7^OcH^2GTNf7^OcH]2HUNe7^ObH_2HSNf7_O`H_2ISNg7@\\Ha2JPNj7_O[Hd2ImMl7_OZHh2HhMn7ASHn2MbMP8U400O10VLQH\\2o7XM_Hf2a7WMPHUO?d3a7WMbHi2^7SMPHYOc0d3]7oLkHP3U7oLlHQ3T7nLmHR3S7lLoHT3Q7lLoHT3Q7lLnHU3R7jLoHV3Q7jLnHW3R7iLnHW3Q7jLnHW3R7iLmHX3S7hLmHX3S7hLmHX3S7hLlHY3S7hLmHX3T7gLlHY3U7fLkHZ3U7dLmH\\3S7bLnH_3W7VLoHj3W800000001OO1001O00000000000000000000000000000000000000000000000000000000000000000000000000001O001O1eLaFl2_9QMeFn2]9nLeFR3\\9jLfFW3e903M3MM30lLSFn2U:hNhE]OY:`0jE_OW:=lECX:oNgEo032g:^ObEa0b;0O11O00000000000000000000O11O000000O10000000000000000000000" + } + ], + "question": "Which statement accurately describes the position of ?", + "choices": [ + "A. is parked on and attached to .", + "B. is attached to and parked on .", + "C. is parked on and beside .", + "D. is parked on and attached to ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_330.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000542856.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "mWY66R=5L4N1N20O00001O0O2O1O5HdVl1" + }, + { + "size": [ + 427, + 640 + ], + "counts": "W6b5h7001O10O100000000O001O100O100O1O10000M3N2N10100O100000O1000000O100001O00O10000000O1000000O100001O00000000000000O02O000001O00O1001O0O0101OO100000000O1000000000000000000000000000000000000000000000O100000000000000000000000001O000000000000001O000000000000000000000000000000000000000000VNoGZNQ8`30000000000000000000000000000O100001O000000000000000000000000000000O1000000001OO10000000000000000000000000000000000000000O10000000000000000O100000000000000000000O10000000000000000000000000000000000O100N2N2McJ[HZ5c7hJ]HX5c780000OaJ^HW5b7hJ`HW5`7jJ^HW5b79000_J`HW5`7iJ`HW5`7jJ_HV5a7jJ^HW5b79O10000000000000000002N1_J\\HY5d77O10[J]Hb5c7]J_Hb5a7^J_H2N\\5b7dJ^Ha5b74004L1O01O00O100000O11O0`JYH\\5h7cJXH]5h7cJXH]5g740`JYH\\5h7bJYH^5g7bJYH^5f7dJYH\\5g7dJYH\\5g7dJYH\\5g7dJYH\\5g7dJYH\\5g740000000`JYH\\5g7400000000000000000`JYH\\5g7400000000000000000000000000000000000000O10000001OO100001O00O100H]JhHc5X7]JiHb5\\3^J60^Lb5U3`JgLNf30^Lb5R3dJiLJo3c5ZO_Jm0`5_3000G^JhHc5X3^JcL1g3O]Lb5Y7^JgHb5Y7]JhHc5a7N10O110O1O1N1000O1000000001O1O1O1GXJmHi5Z71O2N1O1O001O1N1N3N23M00O1O00001O00000O20O1O0000000000000000000000000000000O10000000000O1000O01O10000O1O100O1O1O1O100O1N2O1N2O1N2O1O10000O100000000000000000000O10O2O00010N101O0001OO10000001N0101O00O100000000000000000000000000000000000O11O0000O1O100000000001O0000O100000000000000000000O100000000001O00O10O1000000001O000000001O000000O100000000O101O00O03N1O0000001O8dJoG7:o3P8jKWHS4j7mK\\Hm3d7RLaHj3c8M:E7J4jLlEm2Z:O5K9F7J2N1oMmDg1^;N8aNaDFIi0g;]ObDJH0Od0k;]O_D4F>m;]O_Di0T and the other objects?", + "choices": [ + "A. is driving on and contains .", + "B. contains and is being driven by .", + "C. is parked on away from .", + "D. is an advertisement seen on and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_331.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000333745.jpg", + "mask_rles": [ + { + "size": [ + 640, + 427 + ], + "counts": "RTR1=ac03K5K5N1M4M3L3M3dKVO`Em0Z:BZE`0b:NPE6n:6gDKX;?^DBa;e0YD[Of;h0UD[Oj;h0oC]OP interacting with and ?", + "choices": [ + "A. is holding and standing next to .", + "B. is holding and carrying .", + "C. is on top of , and is walking towards them.", + "D. is carrying and wearing ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_332.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000189698.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "2Z4R90O2O000000O1001OO1O1O1001OO100O100000000O11O00O1O1O100O10000O11O0000O1001O0000O1000000000000001O001O0VGXLEF1NQ8U4VHeLg7[3YHfLf7d4LNVHdJh7]5WHdJh7a5O2N1O001N102N00100N010001M2001N10O2N1O1001O00001O0O2O10O01O2N1O1O2N5K2N1nHoIg6^6MO1001O2\\IZI^6j60N200002N00001O001O1O1O3M1O1O0000O11O1OM300000000O1N2O10000000000O1O100000O10010O1O1fIPIV6R71001O1O1N2O1O1O000000O11O1O001O1O1O1O001O001O1O1OdKYJT1f5iLmLmN_Mf3c5]MUMb2j2^MWMd2f2\\MZMe2e2[M[Mf2XMYLT5Q1dMf2WM[LT5o0eMh2TMZLX5m0dMk2UMULW5P1dMk2VMTL0OQ5S1iMU3U2kLkMW3UMhKi4Q1RNW3S2iLmMk2TMXLS5m0iMk2UMSLV5R1eMQ3Y2oLgMQ3Y2oLgMn2\\2RMdMk2_2UMaMj2`2VM`Mf2d2ZM\\Mc2g2]MYMa2i2_MWM`2j2`MVM_2k2aMUM_2k2aMUM_2k2aMUM_2k2aMUM_2k2aMUM`2j2`MVM`2j2`MVM`2j2`MVMa2i2_MWMe2e2[M[Mg2c2YM]Mh2b2XM^Mi2a2WM_Mj2`2VM`Mj2`2VM`Mj2`2VM`Mk2_2UMaMk2_2UMaMl2^2TMbMm2]2SMcMm2]2SMcMn2\\2RMdMn2\\2RMdMo2[2QMeMP3Z2PMfMQ3Y2oLgMR3X2oLgMT3V2kLkMW3S2iLmMX3R2hLnMY3Q2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMX3R2hLnMY3Q2gLoMY3Q2gLoMY3Q2gLoMY3Q2gLoMZ3P2fLPN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN[3o1eLQN\\3n1dLRN\\3n1dLRN\\3n1dLRN\\3n1dLRN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3n1bLRN^3n1bLRN^3m1cLSN]3m1cLSN]3m1cLSN]3m1cLSN]3m1dLRN\\3n1dLRN\\3n1dLRN\\3n1dLRN]3n1bLRN^3m1cLSN^3l1bLTN`3j1`LVNa3i1_LWNa3i1_LWNb3h1^LXNb3h1^LXNa3i1_LWNa3i1_LWNa3i1_LWNa3i1_LWNa3i1_LWNb3WMYK_4T1[Nb3UM\\K^4S1\\Nb3h1^LXNb3h1^LXNc3g1]LYNe3e1[L[Ne3e1[L[Nf3d1ZL\\Nf3d1ZL\\Ng3c1YL]Nh3b1XL^Nh3b1XL^Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Ni3a1WL_Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nj3`1VL`Nk3_1ULbNj3^1VLaNk3_1ULbMkNROP5\\3ULbMkNROQ5[3TLbNl3^1TLbNm3]1SLcNm3]1SLcNm3]1SLcNm3]1SLcNl3^1TLbNl3^1TLbNl3_1SLaNm3^1TLbNl3^1TLbNl3^1TLcNk3]1ULbNl3^1TLbNk3_1ULaNk3_1ULbNj3^1VLbNi3_1WLaNh3`1XL_Nh3b1XL^Ng3c1YL^Ne3c1\\L\\Nc3e1\\L[Ne3e1[L[Ne3e1[L[Nd3f1]LZNa3g1_LYNa3g1_LYNa3g1_LXNb3h1^LXNb3h1^LYNa3g1_LYNa3g1_LXNb3h1^LXNb3h1^LXNb3h1^LXNc3g1]LYNc3g1\\LZNd3f1\\LZNe3e1\\LZNd3f1\\LZNe3e1[L[Ne3e1[L[Ne3e1[L[Nf3d1ZL\\NQ1WM<]4cN]NoNHn1j1SO^NlNLQ2e1SO_NkNMR2d1ROZ1o0eNRO[1m0eNSO[1n0dNRO\\1n0dNRO\\1o0cNQO]1o0cNPO^1P1bNPO`1n0`NQOn7M3O10000O1001O00O10000000000001O002N3mFlNWM6D:2En0d0ROXO?3P4Q1mLkNK;mNU13Z1d3[NbMkNIaNU12]1f3VNoNXOYMV10^1g3TNPOYOWMV11^1h3SNPOY4o0gKQOZ4n0fKROZ4n0fKROZ4n0fKRO[4m0eKRO\\4n0dKRO\\4n0dKRO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO]4m0cKSO]4m0dKRO\\4n0dKRO\\4n0dKPNjNYOb5g2dKPNjNYOb5g2dKPNjNYOc5f2cKQNjNYOc5f2cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKQMiNB0^1d5o1cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO^4l0bKTO^4l0bKTO^4l0bKTO^4l0bKTO_4k0aKUO_4k0aKUO_4k0aKUO`4j0`KVO`4j0`KVO`4j0`KVO`4j0_KWOa4i0`KVO_4k0aKUO^4l0bKTO^4l0bKTO]4m0cKSO]4m0cKSO]4m0cKSO]4m0cKSO\\4n0dKRO\\4n0cKQMjN?0Bc5n2cKSO\\4n0eKQOZ4P1fKPOY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1fKPOY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNY4Q1gKoNZ4P1fKPO\\4n0dKRO]4m0cKSO^4l0bKTOa4i0`KVOk4?UKAn4TKBk4?UKAj4`0VK@i4a0WK_Ob2dMoMn2_O^O]2jMSNh2@^O\\2lMSNf2A^O[2mMTNe2A^OX2SNSN`2E]OS2YNWNZ2E^OR2[NXNW2F^OQ2^NVNU2I]OQ2_NUNT2J]Oo1bNVNQ2K]Om1gNSNn10[Ol1jNQNl13ZOk1lNPNk15YOj1mNQNj15YOi1oNQNh16YOh1ROoMf19XOf1TOPNe1:WOe1UOQNd1:WOd1VORNc1:WOb1XOSNb1;VO_1\\OTN_1=UO^1]OUN^1=UO^1]OTN_1>TO]1^OUN^1>TO[1@WN\\1>TOZ1AWN\\1?SOY1BWN\\1`0ROY1BWN\\1`0ROX1CWN\\1a0QOV1EXN[1b0POU1FXN[1c0oNU1GWNZ1d0oNU1GVN[1e0nNU1GVN[1e0nNT1HVN[1f0mNS1IUN\\1h0kNR1JUN\\1i0jNQ1KVN[1i0oMkNjNV2l1VN[1i0jNQ1MTNY1k0jNP1OSNX1m0iNP10RNW1n0iNo01SNV1n0iNo01SNV1n0iNo01RNW1o0hNn02RNW1P1gNm03RNW1Q1fNm03QNX1R1eNm03QNX1R1eNl05PNW1T1dNk06oMX1V1bNk0_4UOaKk0_4UOaKk0_4UOaKj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Kj0`4VO`Ki0a4WO_Ki0a4WO_Ki0a4WO_Kh0b4XO^Kh0b4XO^Kh0b4XO^Kh0b4XO^Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kf0d4YO]Kg0c4ZO\\Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YO]Kg0c4YOZKj0f4VOXKm0g4TOTKP1l4h30001O000000001O1O0000000000000000001O00000000001O00001O0000001O000000001O00000000000000000000001O00000000" + }, + { + "size": [ + 428, + 640 + ], + "counts": "Uj[12m in relation to ?", + "choices": [ + "A. It is driving on .", + "B. It is located under .", + "C. It is parked beside .", + "D. It is positioned behind ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_333.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000362434.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "PjU3`1Q:60000000000000000000000000000000001O000000000000000000O100O1O10000O100001O00001O1O00001O1O001O001O00001O001O1O1O001O2N1O1O2N2N1O3M3M2N3M2aG]Mf7f2hGoMR8m2N1O3M2N2N1O1O3M1O1O1O0000001O0000001O00001O001O0000000000001O00000000000000000000000000001O000000O10000000000001O00000000000000000000000000000000000000001O000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000000000" + }, + { + "size": [ + 375, + 500 + ], + "counts": "U6a5V6000000000000000O1000000000000000PMoI:Q6WMPJV21b0o5WMRJU22b0l5XMVJR20e0W6ZOkId0V6ZOlIe0V6^M]If1>k0i6^MTIi05g1[7`MYH;>T2d7hM_HV2d7gM]HX2b8O2N1O2N4L2N1O1O2N001O3M001O3M1O3M4L2N5K2N001O00001O00001O00001O00001O1O00000000001O00001O001O003M001O2N1O00001O2N001O001O1O001O1O1O1O1O1O001O000000O1000000000000000000O100000000001OO100000000000000000000000000O10000000000000000000000000000O100000000000000000000000000000000000000O1000000000000000000000000000000O1000O11O00000000000O10000000000000000O10000O1000000000000000000000000000000000000000000O100001O00O1000000000O1O11O1O0O11O0000000000000000000000000000000000001O00000000000000001O00001O0000001O001O00000000000000000000000000000000001O000000000000000000001O0000000000DGZE9e:H\\E7c:J]E6c:J]E6c:J]E6c:K\\E5c:L]E4c:L]E4c:L^E3a:N_E2a:N_E2a:N_E2`:0ZEHO8g:0ZEH07f:1ZEH07f:1ZEH07f:1ZEH16e:2ZEH16e:2ZEH16e:2ZEH16e:2aEN^:3bEM^:3bEM^:3cEL\\:6cEJ\\:7dEI\\:6eEJ[:7dEI\\:7dEI\\:7dEI[:8eEHZ:9fEGi92RF96De9n0[FROb9Q1^FoN`9S1`FmN^9U1bFkN[9X1eFhNY9Z1gFfNW9\\1iFdNU9^1kFbNR9a1nF_NQ9b1oF^NP9c1PG]No8d1QG\\NW82jGe1OYNT85kGd11WNR87kGd13UNn7;nGa14TNj7?QH^15SNh7a0RH\\17SNh7`0PH_17QNi7?PHa1OlM24P8>oGb17PNj7>nGc18oMk7, , and ?", + "choices": [ + "A. is on and attached to .", + "B. is on and beside .", + "C. is inside and on .", + "D. is beside and on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_334.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000052565.jpg", + "mask_rles": [ + { + "size": [ + 458, + 640 + ], + "counts": "_W\\23U>4N1O0O2O1N2O0O2O1O1N100010O000000100N100O100000O1O101O1N4L7H7J5J;F8]CVNWUO_B0_f\\2" + }, + { + "size": [ + 458, + 640 + ], + "counts": "e;d2g;O00001O00001O000000001O00000000000000000000001O00001O0000000000001O001O00O10000001O000000000000000000000000000000000000001O00000000001O000000000000001OO100000000000000001O000000O100001O00000000000nMQDi1P positioned relative to and ?", + "choices": [ + "A. is standing on , with behind it.", + "B. is standing between and .", + "C. is standing on and is positioned in front of .", + "D. is partially submerged in , next to ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_335.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000304396.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "kTW4;bc07I6K5L3M2N3M2O2M2N2O1N2O1O1O2N001N2O1O010O1O001O0010O01O010O00010O01O001O001O10O01O1O1N2O1O1O1N2O1O2M2N2N3M3M2N4K6I:Bdnl2" + }, + { + "size": [ + 640, + 425 + ], + "counts": "\\no1c0Vc0;H7H7K5K4L4M2N2N3M2N1O2N2O001N100000000O11N100O2O0O2N1O2O1N2N2M4M2M4K5K6I7I8K;\\OQS`5" + }, + { + "size": [ + 640, + 425 + ], + "counts": "R>V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LR and , and the tower, ?", + "choices": [ + "A. Both and are on , and is beside .", + "B. is on , and is beside .", + "C. Only is on , with located beside it.", + "D. is on , which is on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_336.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000304396.jpg", + "mask_rles": [ + { + "size": [ + 640, + 425 + ], + "counts": "\\no1c0Vc0;H7H7K5K4L4M2N2N3M2N1O2N2O001N100000000O11N100O2O0O2N1O2O1N2N2M4M2M4K5K6I7I8K;\\OQS`5" + }, + { + "size": [ + 640, + 425 + ], + "counts": "R>V1l0VOVOG_13`N6a0HQ>R1VBB;YOSOe0K[O89b=L^BU1IR3OhKR=2nBf10g2c<_K]CO1o6`T3^A^MT>c2jAmMg=S2XBVN`=k1^BbNV=^1iBQOij;BSDc07\\Le:R3REc08cL_:k2WEc09mLV:k6gEbIn9_6mEoIi9\\8H5K5K00000000000000O10000O100O100O1O1O1N2O100O100O1000000O1000000O1000000001O1O00001O0000001O002N1O1O00001O1O1O001O001O00001O001O1O1O001O1O1O1O001O00001O00001O1O001O1O001O001O1O1O1O0bKiDD1_LW;k3iDE2_LRZ]OCeb0=Z]ODfb0;Z]OGeb09[]OGeb09[]OGeb08\\]OHdb08[]OHfb08Z]OHfb07[]OIeb07b0N2N21O001O0e\\OLjb04R]O2lb0OQ]O4nb0`0O1O00001O00001O002N1O3M3M1O1O002N4L1O1O001O00001O001O00001O00001O1O00001O00001O00001O00001O001O00001O00001O001O00001O00001O001O001O00001O00001O00001O00001O001O00001O00001O1O1O0000O100O100O10000001O000000000000001O2N001g^OhMh`0m2K1O001O001O000Z@gLe>Z3XAUM[>k2dAhMj=X2UBTN`=m1^BbNT=^1kBkNm?", + "choices": [ + "A. , , and .", + "B. Only and .", + "C. Only and .", + "D. and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_337.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000184324.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "Ula52U=3N2N4L4M2M10001O00000000N2O1N2J6O1000000012M4L010000O11OO1O1O1N`QZ2" + }, + { + "size": [ + 425, + 640 + ], + "counts": "fka44T=1iNMXE3h:0UE1k:MWE2j:LWE4o;00N21oBNi<3TC2iV4`5eLZJNi0\\3Q5XLoICZ2U4f601O1ON2O2OO0100N200N2M3K5M3M3O100O10000O1O1O1K5O1N21O000000O10000001O1O0010O01O1O1O001O000hKQHV3o7jLRHU3o7jLQHV3o7iLSHV3o7bLWH^3j7^LYHb3i7XL[Hh3g7TL[Hl3d8M1O1O000000O100001OO1ROfLhG_3S8n0O1O1N2N2N2O100O1O1O1O100001O1O1O1O1O2N1O000O10O100N2O1O100O1N2O1O100001O1O2N1iKZHTOCa3U8YM\\Hg2f7VM\\Hi2f7UMZHk2o7aLfGM7_90UFEc0NkNOb:=Y2O0O0O2N100N2O2L\\Ya6" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i:S2V;1O2N2O0O2N1O0100O5gMQEo1Z;UNcD^1`;_N`Da1e;O2fNiD>[ and ?", + "choices": [ + "A. is carrying .", + "B. is on .", + "C. is attached to .", + "D. is inside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_338.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000184324.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "fka44T=1iNMXE3h:0UE1k:MWE2j:LWE4o;00N21oBNi<3TC2iV4`5eLZJNi0\\3Q5XLoICZ2U4f601O1ON2O2OO0100N200N2M3K5M3M3O100O10000O1O1O1K5O1N21O000000O10000001O1O0010O01O1O1O001O000hKQHV3o7jLRHU3o7jLQHV3o7iLSHV3o7bLWH^3j7^LYHb3i7XL[Hh3g7TL[Hl3d8M1O1O000000O100001OO1ROfLhG_3S8n0O1O1N2N2N2O100O1O1O1O100001O1O1O1O1O2N1O000O10O100N2O1O100O1N2O1O100001O1O2N1iKZHTOCa3U8YM\\Hg2f7VM\\Hi2f7UMZHk2o7aLfGM[ and positioned relative to ?", + "choices": [ + "A. Both and are over .", + "B. is attached to and is over .", + "C. is on and is inside .", + "D. is over and is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_339.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000169996.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Qjl5j0S>7J4L3N2M3N2N2O0O2O0O10000001O00000O2O001N1O2N3M1N5K2MLTOWBm0k=03L5NL601O2N2N1O00TOaB7`=BfB=P>N2M2K401O2O0O18H5\\NB\\Db0_;_OaDb0^;^ObDd0[;^OcDe0[;[OeDf0Z;^OcCJM34M20010000O100001O1N3N8H9G9G10O5K1O0O101N101O0O101O1O00001N103M1O001O3K:A\\Z2n0mdM4K2O000O100000001O00000000000000000000000000001O000000000000001O000000001O001O1O001O1O0010O01O2N1O1O001O001O001O1O001O1O001O1O001O1O001O001O1O001O1O001O1O2N1O1O1O1O1O001O100O010O01O1O1O001O1O1O1O1O1O1O2N1O3M1O1O1O1O2O0O1N3M2M3M4J6J6M3L3K5N1N2N4K4L`fX3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "UQ[83j>5J5M3N2O0O2O001N3N1O1N10001O0O1000O10O1000O1N1010O100O1O1O100N200O1O1O100O100O1O100O101N2O1O1O01Nia9" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y=:9h0R=XOnBh0Q=ZOmBg0R=_OhBc0V=d0O1O100O1000000000000000000000000O10000000000O10O2O010O001O001O1O1O1N101O000000000000001OO1000000000001O0000000000000000000000000000000000000O100O1O1O100O1N2O1O1O1O10000001O000000000000000000O1000000O1000000000000000000000000000000000000O11O000000O1000000000000001O00001O1O00001O00000000001O0O10010O1O4K2O102M3M3L2O1O010O1N2O1O01OO2O000001OO2O00000000O100000000O100O100O1O1O1O100N2N2N200O100O10000O100O100O100O1O1O10000O1O1O100001O6J1O1O001O1O001O001O2N1N110O00001O0000000000000000000000O10000O100O1N2O100O100O100O020O01O00000000000O100010N1000000000000000000O1O1O100O1O1M3M3N2O10000001O001O1O1O00001O0000000000000000000000O1O1O1O1O1O1O1O100000000O10000000000000000000000000O11O000000O11O0000O1001O000O10O11O0000O1001O0000O1001O0000O1001O01O2N2M200O1O1O0O11O01O000O11O00000O10000000000O100O1O1001O002N1O1O001O1O1O00000000000000000000O100O1O1O100O1N2N2M3M3N200O1O100dNhMPFX2P:hMPFX2\\;00000O100M3N2O11O3M4L3MdMmMmGQ2S8PNmGo1U8PNlGn1d:O1O1O1O1O0000hMXNYGg1g8XNZGh1f8XNZGh1n:100gMVN\\Gj1d8WN[Gi1d8[NYGe1g8\\NYEKi1i1o8cNnF^1R9bNmF_1S9aNlF`1T9`NkFa1U9_NjFb1U9_NhFd1X9\\NfFf1Y9[NeFg1Y9[NeFg1[9YNUFCTO^2f:PNSFY2j9jMVFV2j9jMVFV2j9jMVFV2j9jMVFV2j9jMUFW2k9iMUFW2k9iMUFW2g9TMbEf0g0V2o9eMQF[2^:UMcEk2Y;00000000000O10000000000000000iNVMXFj2h9VMXFj2i9TMXFl2f9XM\\EMb0l2j9iMUFW2h9lMXFT2h9lMXFT2h9lMXFT2h9lMXFT2h9lMXFT2h9lMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMWFU2i9kMVFV2j9jMVFV2j9jMVFV2j9jMVFV2o9QM`E5NK00d0o2Y;00000000000000000O1aN_MfEC`0n2j9hMVFX2k9gMUFY2k9gMUFY2j9hMVFX2j9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMTFX2l9hMTFX2l9hMUFW2k9iMUFW2k9iMTFX2l9hMTFX2l9hMTFX2k9iMUFW2k9jMTFV2l9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2k9iMUFW2m9gMSFY2n9fMQF[2P:mL`Ed0`0_2P:nL_Ec0a0_2P:dMPF\\2S:aMmE_2S:aMmE_2S:aMmE_2T:`MlE`2T:`MkEa2V:^MjEb2V:^M\\E^O:T3Z:^M\\E^O:T3Z:^M\\E^O:T3Z:^M\\E^O9U3[:^M[E]O:U3[:^MZE^O;T3[:^MZE_O:S3\\:^MZE_O:S3\\:^MZE_O:S3\\:cMbE^2^:fM^EZ2b:fM]E[2c:fM\\EZ2c:hM\\EX2d:R10000O10000O1O10RNfEVO[:d0nEXOR:g0oEYOQ:g0oEYOQ:f0PFZOo9g0QFYOo9f0RFZOo9e0QF[OP:_O\\E3f0=o9UOiEEB7i0o0l9TO^F@J\\1h9TO`Gl0`8SOaGm0`8oNcGQ1]8mNeGS1]8bNlG^1k:0000000000000000000000000000000000000000000bB" + }, + { + "size": [ + 480, + 640 + ], + "counts": "hUn54h>7H6L4M3M3L4M4K4M4K=D2M3O2M2O2N2N1O1O1OI\\CQNf>1O1O1O1O1O1O2N2N1N`a\\2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "R:S3m;O001O2N1O1O1M3N2O100000O2O00001O000000O100001OO10000O1O11O011N3M1O2N1O1O1O1O1O001O00001O00000000000000000000O100001O00000000000000O10000000000000000O1O1O100O1O1N2M3N2M3H8L40001O0O100001O001O00001O00000000000000O100000000000000000000O2OO2O00000000001O1O00001O001O0000001O3M100O1O1O001O1O001O000000000000O1M3J6I7O1N2O1N2GUC[Nm2O1N2O1O1O1N2O002N001O1O1O1O1O1O001O000gBDn;?", + "choices": [ + "A. , , and ", + "B. , , and ", + "C. , , , and ", + "D. , , and " + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_340.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000245026.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "Y[Z2>d<7J7J5J5N3J5K6L4J6J6I7J6fEbMe9c2WF`Mf9d2VF_Mf9S3N3N2M3N3M5J4L3NO11OO1000010OO2ON3N101O001O10O0100O000O2000O100000000O10000O10000O1000001N010000000000OWNVLUJk3_5bL`J^3\\5lL^JT3_5RM^Jn2_5WM_Ji2^5]M_Jc2_5cM]J]2b5hMYJY2e5kMYJT2f5PNXJP2f5TNXJl1g5WNVJj1g5[NWJf1e5_NYJa1d5eNXJ\\1g5gNVJZ1i5iNUJW1j5T300O1O100000O100000O10000O100O10O01000000O1000001O0O2O0000000000000001N1000000010OO101O1O00001O00001O00100O1O1aKPJd1Q6WNTJh1m5TNWJk1j5RNXJn1j5oMXJP2i5mMYJS2i5jMYJT2k5hMVJX2l5dMVJ\\2l5aMVJ^2l5`MTJ`2m5^MTJb2m5\\MTJd2m5ZMUJe2l5XMVJh2j5WMWJh2k5VMWJi2j5oKYIN;a0e0b3h5mKjI>?e3`6WLaIi3a6RLcIm3l7001O01O0000001O001O00000O100000001O0001O01O1O001N101O00001O0001O0001O01O01O1O001O001O1O001N3N001O1O3N0O2N1O2M4M2N4M0O3M5K2N4M0O3N3O0N2O1N1N4M2O0O2N2N1N3M2N3M3M5ROWD3n;F[D3i;G]D4T\\j2" + }, + { + "size": [ + 424, + 640 + ], + "counts": "bY]54bHHc9^O[Fl09MIJ]9h0hF_OKI]9i0gF^OLJ[9j0hF\\OLK\\9j0fF\\ONJ\\9l0bF\\O2H\\9o1eFQN[9o1dFRN\\9n1dFRN\\9n1dFRN\\9m1dFSN^9l1bFUN]9k1cFUN]9k1cFUN]9k1cFUN]9j1dFWN[9i1eFWN[9?_FO6B[9>eFK0G[9=jFHJK]9=kFFHM]9[EE0K^:n0]EWO6Ik9e1lEcN9Gi9[2XFbMj9^2VFaMk9_2?0000001O00000000000000000000000000hN^EJb:5bEjNIj0e:;XFTOh9h0]FWOd97mFIY;1O0bCHl;7TDKS;7iDO3KS;9gDM6Jf:3nD95J7J^:o0[EWO7JQ:^1eEiN9Je9\\2ZFdMf9\\2YFfMf9\\2WFTN[9l1cFUN\\9m1cFSN]9m1cFSN]9m1cFSN^9m1aFSN_9m1bFRN^9n1bFSN]9m1cFSN]9m1dFRN[9P2dFPN\\9P2dFPN]9o1cFQN]9o1cFQN]9P2bFPN]9Q2dFnM\\9R2dFnMf6\\OYI1U2e2lMnMf6\\OYI1U2e2mMnMe6[OZI0V2f2lMmM[9S2fFlMa9V1[FgN`0A_O0Ta1" + } + ], + "question": "What are the relationships of and with respect to ?", + "choices": [ + "A. is in front of , and is attached to .", + "B. Both and are in front of .", + "C. Both and are attached to .", + "D. is in front of , and is attached to ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_341.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000115885.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "RWo35_;7K2N2L5M4L3L3N2N3M2M3M4L3M3K5K5L4L4L3J8I6M2N3M4M2N2N1O2N2N1O1O2N1O1O1O2N101O0O1O1O10000jM[LYLf3l5OnM^LPLc3P4aLkK`3U4bLhK^3Z4bLeK^3[4dLbK\\3`4fL\\K[3e4dLZK]3g4cLWK^3i4gLPK[3P5mLfJU3[5kLdJU3\\5mLaJT3`5PMZJQ3f5PMXJQ3i5nLWJR3j5nLSJT3m5[1001O001O1O001O01O01O001O001O10O000001O1O010O1O00010O001O010O10O100O01O10O01O010O0010O01O000O2O00001N2N1O2N101L3O1O2N2O1N1O2L4M3N1O2O01O01N2N3K300O2O001O0O1N3NaIcL_NOo5]3dKcL]N0P6[3eKcL\\N2o5[3fKbL\\N4l5Y3iKcLZN8j5U3mKbLYN9j5T3kKeL\\N8i5Q3kKiL[N6k5o2lKiL[N7i5o2nKhLZN1N1j5V3VLhLV2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "]bP33b;3O1N1N3N2N1N3N1N3N1N2N3L3N3M2M4L3M4M2N2N3M2N3L3N3L3N3N1N3N1O2L3M4M2N3M2M4N1N2O2N1O2N2N1O101N101O0O2O0O2O00001O001O0010O010O001000O0100O1O100O100O10O001O01O01O0010O01O010O010O0010O00010O01O0010O01O0100O010O01O1O1dH`L`6b3[IaLd6Y40010O100O1O10000O0100O02O0O010O01O00101N001O1O00001O00001O00001O001N3N1O1O1N2O1O2M2O2M2N2N2N3M2O2M2O2N2N1N2N2M3M4M1O3N1N2O2M2N3N1N2O1N3M2N2N3N0N4M2N2N2N3L3N3L4L4M3M2N3N2L4M3L4K6Ifo=" + }, + { + "size": [ + 375, + 500 + ], + "counts": "]1Y:^100000O100000000000000000000O100000000000000000000000000000000000000O1001O00O100000000000000000000000000000000O100000000000000000000000000000O11O0O1000000000O11O00O1001O00O1001O00O1001N10O11O0000O11O0000O11O0000O11O0000O11O0O10000000O1001O00O0101O00O100000000001O00O1001O00O11O0O10O1001O00O11O0000O1001OO1001O00O1001O00O1001O0000O11O000000000O10000000000000000000000O02O00000O1000O2O1O01O0O1000010O001O2N7H8I7J:Ea8h1^GkM0>c8`20\\O]GTNd8f1^GhM0001N8f8o1_GhMM0O:d8P2aGTN`8k1`GUN`8i1[GiM51J9f8P2_GTNb8j1]GXNc8h1_GVNb8g1aGPNG1i8o1_GXNa8i1^GWNb8b1XGTN7:b8a1WGTN7;c8a1VGVN5:d8`1VGWN5:f8^1VGUN7;e8_1UGTN7=e8^1TGVN6 and ?", + "choices": [ + "A. is on .", + "B. is looking at .", + "C. is beside .", + "D. is looking at ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_342.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000289659.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i_e2?e<>E8J5K4J6K5L3M3M2N3L5K4M3M4K6Ka0_O5L2M3M7I;EY1gN7I100O1OUNZISMg6g2eIQM\\6P3jIeL[6\\3ZHYLZ9R4J4K9H5K7I6J6J3M00iNYHjLh7Q3Z1J7M3L3M4N2K5M2N201O001000O1O1O1O2N1O2N3M7I:G4K7I6I7K7cGSK71X7e5L3N2NO2O01O0O20O00001POUI`Kk6\\4`IXKUO2\\7a4X1I6N3N2O010O11O1N4YGcKKo0]7i4100O1O100O100O100O1O0C_HPKd7j4a0L4N2N2M3M2O2M3N2J6J5RNlFBZ9=iF\\O\\9b0gFYO]9f0fFSO^9m0gFiN_9V1gF\\Nb9c1R10100O100O010O1O10O0100O010O010O100O010O00100O10O01O10O01O0010O01O10O01O010O01O01O10O01O1O00100OO20O010O10O01O10O01O01O0100O0010000O10O010O0100O010O10O01O01O1O10O1000O001000O010O0010000O001O1O10O1O01O1O10O100000O001O100O010000O10000000100O1O00O20O1O1O1O002M2N2N1O1E;O2N1O101O1O100O000100O1O101O2M2O002M2N3M2O0O1O1O0101N0010O1O2O1N2O1O1N2O1O0O2O1O010O000O100000000O1O2N1O1O1O2N2M2O0O3M2O1O001O2N2M4M2M5Kl]^1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "kRZ37R=3N2[CKQ<6mC0m;2PD2n;f0N2M201O00O11O0000001O00001O001O1O000000O1000000001O001O1O1O1O2N1O1O4L5K2N2N;E001O1O3M2N4L2N2N6J;E2NgNYFoNg9l0jFmM@h0f9Z1RGbNn8]1VG`Nj8^1^G[Nc8d1b1N2N2O1N200O1O100O1O1O100O100O100O100O100O100000000O1000000O101L3O1O2N1N201N2N6I3NX8o2kGcLM=Y8P3jGdLL?", + "choices": [ + "A. is standing on and eating it.", + "B. is in front of and standing on .", + "C. is standing on and eating .", + "D. is standing on and eating ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_343.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000210032.jpg", + "mask_rles": [ + { + "size": [ + 401, + 640 + ], + "counts": "Yja12\\<3L4M3M3J6M3L5L3N2L4N2O1O1N2N20O01O2OO0100000000O1000000000000jNaNYG`1c8gNZGY1e8kNhFJL\\1[9_OdFa0\\9_OcFb0]9_ObFa0^9_ObFa0^9@`Fa0`9X10000000000000000000O10000000000000000000000000000000000000000O1001O00000VHWMl5i2UJVMk5j2n102N1aM_Fl1V:UNdE[1^:aNdE_1g:0001O00001O000O1001O00O2O00000O2O0O1N3K5N11O002O0O11O0O101M2N2M4M2N2O2M2O1O2N2N2N1O1O1O1O1O1O1001N3N000O01O001O1O00001O1O001O1O1O1O1O2MRY]4" + }, + { + "size": [ + 401, + 640 + ], + "counts": "jZY17P<=K5K4L3M3N2M200N3N2N2N100O2N100O1M4M2O1O10000N200O1O101N1O1N3L3O1N2O2N1O100O2O0O100O10001O0O10000000000O1000001OO1001N100O101N1O1O10000O1000000000000O1000001O01O0000000000000001O001O001O001O000O11O00000O101O0000000001O0000000001O00000O1000000000001O000000001O00000000001O0000001O00000O10000000000000000O100000001N1000000001O1O0O1000000O0100O1000000O10000000000000000000000000000000001N10000000001N1000001O001O00001O00001O00001O001O00000000001O0O101N10001O001O1N101O0O2N2O0O2N2N2O0O2N2N2N2N:F3M2N3M3J6L4K5L4K5LbQ]3" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is eating .", + "B. is beside .", + "C. is looking at .", + "D. is holding ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_344.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000144114.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "iU_31Y<7M3N1O1O010O1O1O1IDYD=e;800O1N2O100O1000000001N2O2N3M3]OWD9QgDC1003\\;h04M3K4H9N2O01000O010O1000O010O010O10O1000000O001O010000O010O1000O0010000O0100000O01O0100O01000O10O0100O0100000O0100O10O1O0100O01000O010000O010O0100O01O1000O10O0100N1100O1000O1KHQD8n;50100O10O1000O10O10O010O10000O1000000O010000000000000002M4M1O0O3K_i8" + } + ], + "question": "Based on the provided information, what is the relationship between and ?", + "choices": [ + "A. is in .", + "B. is over .", + "C. is driving .", + "D. is under ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_345.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000144114.jpg", + "mask_rles": [ + { + "size": [ + 400, + 600 + ], + "counts": "iU_31Y<7M3N1O1O010O1O1O1IDYD=e;800O1N2O100O1000000001N2O2N3M3]OWD9QgDC1003\\;h04M3K4H9N2O01000O010O1000O010O010O10O1000000O001O010000O010O1000O0010000O0100000O01O0100O01000O10O0100O0100000O0100O10O1O0100O01000O010000O010O0100O01O1000O10O0100N1100O1000O1KHQD8n;50100O10O1000O10O10O010O10000O1000000O010000000000000002M4M1O0O3K_i8" + }, + { + "size": [ + 400, + 600 + ], + "counts": "2\\<400kMOkG1U80jG0W8OiG1[:0mMMiG3W8NhG2W80hG0X80hG0X81gGOY81gGOY81gGOY81gGOY81gGOZ80fG0Y81gGOY81gGOY81gGOZ80fG0Y81gGOY81gG0W81iGNW82jGNR86nGJP89oGGP8:PHFP8;oGEP8PHCn7>RHAo7?QHAn7a0QH_Oo7a0QH_On7b0RH^On7b0RH^On7b0RH^On7a0SH_Ol7b0TH_Ok7a0UH_Ok7a0UH^Ol7b0TH^Ol7b0TH^Ok7c0UH^Oj7b0VH^Oj7b0VH]Ok7c0UH]Ok7c0UH^Oj7b0VH^Oj7b0VH^Oj7a0WH^Ok7`0VH@j7?WHBj7:XHFk74XHKh76XHJh76XHKg75YHKg75YHKg74ZHKf76ZHJf76ZHKe75[HKX6LnH9j0KW6NnH7k0JW60nH5l0LU6OnH6m0KT60oH5m0KS61PI4m0KS62oH3n0JT63mH4o0JS61oH5n0JS61oH5n0JS61oH4o0JR63oH3o0JR63oH3o0KQ62PI3o0KP63QI2o0KP63QI2o0JQ63QI2o0KP63PI3P1Ko52QI3P1Ko52QI3P1Ko52QI3P1JP63PI3P1Ko51RI4o0Kn52RI4P1Jn52RI3Q1Ke32RL0XO3Q1Je38nKJ]O4P1Kc3>jKCC4P1Kc3b0fK_OF5Q1Jb3g0bK[OK4Q1Jb3Q1XKQO53R1K`3U1VKmN74S1J`3Y1QKjN<3S1J_3`1lJbNc04R1J^3g1gJ[Ni05Q1H`3Q2\\JSNS14Q1I^3_2aKhMQ1I^3_2aKhMQ1I^3_2aKhMQ1I^3`2`KgMR1I_3b2[KfMV1H`3b2YKeMX1I`3c2VKdMZ1Ib3c2RKdM\\1Ic3c2PKcM^1If3a2kJgM_1If3`2jJgM`1Ig3_2iJgMa1Jh3]2gJiMa1Ij3^2dJiMb1Jj3\\2dJjMb1Jk3\\2aJkMd1Im3Z2_JmMd1Io3Y2\\JnMe1HS4W2XJQNe1IS4U2WJSNf1HT4U2UJSNg1HU4U2SJSNh1GX4U2oISNj1HY4S2mIUNj1IY4Q2lIWNk1HZ4P2kIXNk1H\\4n1iIYNl1H_4l1eI\\Nl1H`4k1dI]Nl1I`4i1dI^Nl1Ib4g1bI`Nl1Hd4n1YIYNT2Id4n1WIYNU2If4m1SI[NW2If4k1SI\\NW2Ii4h1PI_NW2Hl4f1nHbNV2Hn4c1mHeNU2Ho4a1mHgNT2Io4^1nHhNT2IP5^1kHjNU2HQ5\\1kHlNT2HR5Y1lHoNR2IR5T1oHSOo1IS5R1oHUOn1HU5R1mHUOo1IT5S1lHTOP2IT5S1kHUOQ2IT5Q1kHVOQ2IT5Q1kHVOQ2HU5S1hHVOS2GU5S1hHVOS2HT5R1iHVOS2HU5Q1hHVOT2IT5Q1hHVOT2HU5R1gHVOT2HU5R1gHVOT2HU5R1fHWOU2HT5Q1gHWOU2GT5S1gHVOU2GT5T1fHTOW2IR5S1gHTOW2IR5S1gHTOW2IR5S1gHTOW2HS5T1eHUOX2GS5T1eHUOX2GR5U1fHTOX2HQ5T1gHTOX2HQ5T1gHSOY2HQ5U1fHSOY2Io4U1gHSO?Hl00n5U1gHTO50V1Gn5U1gH3Z1hNo5U1gH3Z1gNP6V1fH3Z1gNP6V1eH3\\1hNn5V1eHFJROi12g5W1fHYOU2_OV5X1eHQO]2Gn4X1eHQO]2Hm4W1eHRO^2Gl4X1fHPO_2Hk4X1fHPO_2Gk4Z1fHoN_2Gk4Z1fHoN_2Hj4Y1gHoN_2Hj4Y1gHoN_2Hi4Z1hHnN_2Gj4[1fHoN`2Gi4Z1gHoN`2Gh4[1hHnN`2Gh4[1hHmNa2Hg4[1hHmNa2Gh4\\1fHnNb2Gg4[1gHnNb2Gg4[1gHmNc2Hf4[1gHmNc2Gf4]1gHlNc2Gf4]1fHmNd2Ge4\\1gHmNd2Ge4\\1gHmNd2Gd4]1hHlNd2Fe4_1fHkNe2Gc4_1gHjNg2Gb4_1gHjNg2Gb4_1fHkNh2Fb4_1gHjNg2Fc4`1fHjNg2Ga4`1hHiNg2Ga4_1hHkNg2Fa4`1gHjNh2Fa4`1gHiNi2G`4`1gHiNi2G_4a1hHhNi2G_4a1gHiNj2F_4a1gHiNj2E`4f0gH0OEj2F_4e0iHONFj2F^4j0iHGOIj2F^4j0iH6j2PO\\4R2dKmM]4S2cKnM\\4R2dKnM\\4R2dKnM[4S2eKlM\\4T2dKlM[4U2eKlMZ4S2gKmMY4S2gKmMY4S2gKlMZ4T2fKlMZ4S2gKnMW4S2iKmMW4S2iKlMX4T2hKlMX4T2hKlMW4U2iKlMV4o0nHOl2ROV4T2jKlMU4U2kKjMV4V2jKkMU4T2lKlMT4T2lKkMT4V2lKjMT4V2lKjMT4V2lKkMS4U2mKkMR4V2nKjMR4U2oKjMR4V2nKjMR4V2nKkMQ4U2oKkMQ4U2oKjMQ4W2oKiMQ4W2oKiMQ4W2oKjMP4V2PLjMo3W2QLhMP4W2QLiMn3X2RLiMm3W2SLiMm3W2SLhMn3X2RLhMn3W2SLiMl3X2TLiMj3X2VLhMj3X2VLgMk3Y2ULgMk3Y2ULgMk3X2VLiMh3X2XLgMi3Y2WLgMi3Y2WLgMi3Y2WLhMh3X2XLhMh3X2XLhMg3Y2YLfMg3Z2ZLfMf3Z2ZLgMe3Y2[LgMe3Y2[LfMf3Z2ZLfMf3Z2ZLgMe3X2\\LhMd3X2\\LhMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMd3Z2\\LgMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMd3Z2\\LgMc3Y2]LgMc3Y2]LgMc3Y2]LfMd3Z2\\LfMd3Z2\\LfMc3[2]LeMc3[2]LeMd3Z2\\LfMd3Z2\\LgMc3Y2]LgMc3Y2]LgMc3Y2]LfMd3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LgMc3Y2]LhMb3X2^LhMb3X2^LgMd3W2]LiMc3W2]LiMc3V2^LjMb3V2^LjMb3U2_LkMa3U2_LkMb3T2^LmMa3S2_LmMa3S2_LmMa3R2`LmMb3R2^LnMc3Q2]LPNb3o1_LPNb3P2^LPNb3P2^LPNc3o1]LRNb3n1^LRNb3m1_LSNb3l1^LSNc3m1]LSNd3l1\\LTNd3l1\\LUNd3j1\\LUNf3j1ZLVNg3i1YLWNg3h1ZLYNe3g1[LYNf3f1ZLYNg3f1ZLZNg3e1YL[Ng3e1YL[Ng3d1ZL\\Ng3c1YL]Ng3c1YL]Ng3c1YL]Ng3c1YL^Nf3b1ZL]Nf3c1[L]Nd3d1\\L\\Nd3d1\\L\\Ne3b1\\L^Nd3b1\\L^Nd3b1\\L^Ne3a1[L_Ne3a1[L_Ne3a1[L_Ne3`1\\L`Ne3_1[LaNe3_1[LaNe3_1[LaNe3_1[LaNe3_1[LaNd3`1\\L`Nd3`1\\L`Nd3`1\\L`Nd3`1\\L`Nd3_1]LaNc3^1^LbNb3^1^LbNa3_1_LaNa3_1_LaNa3^1`LbNa3]1_LcNa3]1_LcNa3]1_LcNb3[1_LeNa3[1_LeNa3Z1`LfN`3Z1`LfN`3Z1`LfN`3Y1aLgN`3W1aLiN_3V1bLjN^3U1cLkN]3T1dLlN\\3S1eLmN[3R1fLnNZ3R1fLnNZ3Q1gLoNY3Q1gLoNY3P1hLPOX3P1hLQOW3n0jLROW3l0jLSOW3l0jLTOV3k0kLUOV3j0jLVOV3i0kLWOU3i0kLWOV3g0kLYOU3g0kLYOU3f0lL[OS3e0mL[OT3d0lL[OU3d0lL\\OT3d0lL\\OT3d0lL\\OU3c0kL]OU3b0lL^OT3a0mL_OU3?kLBU3JB5>LB4>LB3?MA3?MA3>NB1?OA1?OA1>0B0>0BO?1AO?1AN`02@N?3AM?3AL`04@L`04@L`04@Ka05_OKa05_OKa05_OKa05_OK`06@J`06@J`06@J`06@J`06@J`06@J`06@K>6BK=5CN:2F071I015OL040LO51KO51LN42LN42KN62IO71IN82HM93GM93GM93R6000cIN<2CO=1CO=1CO=1CN>2BN>2P60`INb02^ONb00" + } + ], + "question": "What is the spatial relationship between and the other objects?", + "choices": [ + "A. is over .", + "B. is over .", + "C. is over .", + "D. is in ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_346.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000369370.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "dje28g>3M2M4N1O2M3M4M2M2N3N3Ma0^O4M3L3N2N1N2N2N2N3N2M2N2O1N2N1O100O2O0O100O1O10000O2O0O1O100O1O1O11O1O000000O10000O10000O10000000O010000O1000O10O1000O01000O01000O010_OWDjMj;T2ZDiMf;W2\\DgMe;X2\\DgMe;Y2]DcMd;]2`000O010O1O100O10O10O10O0100O010O1O10000O10001O00000O10O10O001O1O1N101N2O1O0N3N2N3L3N3L5mNfB8W>CUXi4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "b01o>0nYX50PfgJ4N7WAF2ON2S>i000O00000O101O000000000000000000001O00000000000000001O0000001O001O00010O0000001O0000001O001O000000001O0001O000001N1000000000001O00000000000000001O0000000000001O00001O001O001O001O2N1N2O001O1O1O1O1O1O1O1O1O001O1O001O1O000O2O001O001N10000O2O001N101O0O2O0O2N1O2N1O2N101M3N2N100O1N3N1O1O1O2N1O1O1N2O1O1O1O1N2O1O1O1O1O1O1O1O1O1O100O100000000O1000000000000001O00000000000000000000000000000001O0O11O000000000000000000000000000000000001O000000000000000000000000000000001N100001O00000000O100000001O01O000000O1000000001OO100000001O00000000000000000000000000000000" + }, + { + "size": [ + 480, + 640 + ], + "counts": "h1i4W:0001OO1KXKQFi4j9WKVF2Ng4m9WKUFm4k9TKSFm4m93O1LnJZFR5j9000O001N21O000O0100000000O010000000O10O100000N20O010000O10000000O0100O001O1O10000000O100O010000N1100000O1O1O1000O101N101O0000O1000O1000O10O0100O1O01000000O10O100000O1000000O10O10O100000O10O1000000O10000O100000000O10000O100000000O10O1000O10000000O010000O100000000O10000O10000O10O010000O1000O0100O10O10O101N10001N10000O101O0O101O0O2XLhEf2Y:WMlEe2V:YMmEe2T:XMoEf2S:XMPFf2S:iLaE3b0Q3T;M2O3L4M1N3N2N2M2O2aMjCo1f, , and ?", + "choices": [ + "A. is on and beside .", + "B. is on and beside .", + "C. is on and beside .", + "D. is on and beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_347.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000369370.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "lg_32m>2O1N101N2O1M4M2O1N2O0N3N101N1O2N1SOUOdCm0Z:G8J3O3I5M4N1N2M3M4L3N1O2M3M3N2N1O2M3N2M3O1N2N2N101O0O2O0O2O1N101N101O1N101O1N101N2O001N2N101O001N2O1O0O2O001N2O001N2O1N2O1O001N101O001N101N10001O0O101O1O0O101O000O101O00000O101O0O10001N10000O1000000O10000000O010O10000O01000O100000O010000000000O100000O10O100000000O0100000000000O1000000000O10O10000000000O10000O100O1000000O1000000O1000000000000000000O10000O1000000O10010O0O10000000000O11O00000000O11O00O100001O00O10000O100O2N11O1O0000O2N1000000O20O000001O0000010O0O11O010OO10001O00000O1100O0000O1O10001N1O1001O00O2O0O100O1O10001O000O1O10001N1L4N21O1O00O2L3001O01N100003NO000O10010O000000010O000O11O01O00001O0000001O0O100000000010O01N101O001N101O00001O00000001O01O00001O00010O00000O101O000000001O00001O00001O001N10001O0000001O0000001O0000001O00001O00001O01O0001O00001O001O001O001O1O001O100O2N2N2M3N2M3N3L3N3L3N1O1N2O1O3L2N2O2M4M3M2M3N4K5Ib]f2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "b01o>0nYX50PfgJ4N7WAF2ON2S>i000O00000O101O000000000000000000001O00000000000000001O0000001O001O00010O0000001O0000001O001O000000001O0001O000001N1000000000001O00000000000000001O0000000000001O00001O001O001O001O2N1N2O001O1O1O1O1O1O1O1O1O001O1O001O1O000O2O001O001N10000O2O001N101O0O2O0O2N1O2N1O2N101M3N2N100O1N3N1O1O1O2N1O1O1N2O1O1O1O1N2O1O1O1O1O1O1O1O1O1O100O100000000O1000000000000001O00000000000000000000000000000001O0O11O000000000000000000000000000000000001O000000000000000000000000000000001N100001O00000000O100000001O01O000000O1000000001OO100000001O00000000000000000000000000000000" + } + ], + "question": "What is the relationship between the sandwich and the slice of bread ?", + "choices": [ + "A. is on .", + "B. is leaning on .", + "C. is leaning on .", + "D. They are beside each other on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_348.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ojk57R=4K3N2N2O0O2M3N4L3J4QEVO\\98_E1n0Mb90dE2c06h9FgE59<8@`8MRG7Oc0H8n8lN]G:D^2k8WMcG]3^8aLcG_3_8]LcGc3S9O0000000000F^LlFc3m8b0L6L4K6oG^Ko6h4kHYKU7j4gHWKY7m4bHUK]7o4^HRKa7\\5000000001O001N2O0O2SKYHe36nKb7;ZH_3^8_LcGn2MlLg83_Gn2R9nLPGo2g9O1N2jNkE`N4FN2Y:c1jE[NQ;P1eEmN]:h0Y1L4N2M4N2L6IVPe1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ldT13V=100O3M1O2N2N2O2O1O2N1O2LBgCOW and ?", + "choices": [ + "A. is looking at and playing with .", + "B. is flying over .", + "C. and are both playing with .", + "D. is walking on towards ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_349.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000527215.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "i8h02n2h8RMXGn2h8RMXGn2h8RMXGn2h8j0O100000000001O00000000000000000000000000000000O11O0000000000000000000000000000000000O1000000O100O100O1O10000O1000000O10000O1O10bKaGQ4_8oKaGQ4^8PLcGo3]8PLdGP4`7fK`H0a0:@P4_7gK^H1`09Do3^7gK]H2>;Gl3^7oKgH6Kk3]7RLeH4Nj3]7ULaH22i3]7kLcHU3^7jLcHU3]7jLdHV3\\7gLgHY3Y7gLfHZ3Z7fLfHZ3Z7eLgH[3Y7eLgH[3Y7eLgH[3Y7eLgH[3Y7lK]Ha0:c3Y7eLgH[3Z7dLeH]3[7kK\\H`09e3[7jK]Hb07d3]7hK]Hd06d3]7gK_Hd04e3^7eK`He02f3]7hK_Hb05e3[7kK^Ha06e3\\7kK\\Hb07c3]7eLcH[3]7fLbHZ3_7eLaH[3_7eL`H\\3`7dL`H\\3`7eL_H[3a7fL^HZ3b7gL]HY3c7W100001O000000001O000000000000000000001O000000000000001O00000000000000000000001O00000000000000000000000000001O001O001O1O001O00001O00001O001O001O00000000000000O100O100O100O100000000000000O1O100O1000000O1000000000000001O0000000000000000001O0000000000000000000000000000001O0000000000000000001O0000O1000000001O00000000000000000000001N11O00000000000001O00000hKUHQ3k7iL^HT3b7hLhHR3X7lLkHS3U7mLkHS3U7mLkHS3U7nLjHR3V7oLiHQ3W7QMgHo2Y7RMfHn2Z7SMeHm2[7YM_Hg2a7aMWH_2i7bMVH^2j7eMSH[2n7fMoG[2Q8_1000000000000000000000000000000000000000000000000000000000000000000000000000lKnGP3S8oLmGQ3R8PMnGP3R8PMnGP3R8PMnGP3R8oLoGQ3Q8oLoGQ3Q8oLoGQ3Q8T1000000000000000000000kKoGQ3Q8oLoGQ3Q8nLPHR3P8nLPHR3P8nLQHQ3o7oLQHQ3o7oLQHQ3P8nLPHR3P8nLPHR3o7oLQHQ3o7oLQHQ3o7nLRHR3n7nLSHQ3m7oLSHQ3n7nLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLQHS3o7mLRHR3n7nLRHR3n7nLRHR3n7nLRHR3n7oLQHQ3o7oLQHQ3o7oLQHP3P8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8QMoGo2Q8RMmGo2S8QMmGo2S8QMlGP3T8S1000000000000001OO100001O000001O0O01001O00000000000001O0O11O00000000000000O100001OO100000000000001O000iKmGX3R8cLZHV3f7gLaHU3_7kLbHT3^7kLcHU3]7lLbHT3^7lLbHT3^7oL_HP3b7RMYHQ3g7QMSHT3l7nLoGU3Q8mLlGT3T8Q100001O0_KXH`3h7[L^Hd3c7VLcHd0YOV2V9gMXGn1X:J4L1OO1000nNSEGn:7SEIm:7UEGk:8WEGi:8XEHh:8XEHi:7WEIk:4VELn:0SEOS;JnD6U;GkD9Y;BhD>\\;]OeDc0a;UOaDk0n;1000000O1000000000000O100O1\\O]OaDc0Y;j0@`0L4O1O1L4WOi0^OgLmF_3b8UMYGm2^8\\LaGe4]85L4N2O10000000kKlGT3T8kLnGT3R8lLnGT3R8lLnGT3R8kLoGU3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8jLnGV3R8kLmGU3S8kLmGU3S8kLmGU3S8lLlGT3T8mLiGU3W8o01O00O10000000000000000000000000000001O0000000RLmGe2S8ZMnGf2R8XMPHh2P8XMPHh2P8XMPHh2P8XMoGi2Q8XMjGl2V8T1000000000000000nKiGQ3W8oLkGo2V8oLnGn2R8QMTHj2l7UMUHk2k7TMVHl2j7TMVHl2j7UMTHl2l7UMlGR3T8R1001O000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000eG" + }, + { + "size": [ + 426, + 640 + ], + "counts": "l]f33U=4COZC;^<:O02N2N1N6J:F3O01OOi\\^4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ojk57R=4K3N2N2O0O2M3N4L3J4QEVO\\98_E1n0Mb90dE2c06h9FgE59<8@`8MRG7Oc0H8n8lN]G:D^2k8WMcG]3^8aLcG_3_8]LcGc3S9O0000000000F^LlFc3m8b0L6L4K6oG^Ko6h4kHYKU7j4gHWKY7m4bHUK]7o4^HRKa7\\5000000001O001N2O0O2SKYHe36nKb7;ZH_3^8_LcGn2MlLg83_Gn2R9nLPGo2g9O1N2jNkE`N4FN2Y:c1jE[NQ;P1eEmN]:h0Y1L4N2M4N2L6IVPe1" + }, + { + "size": [ + 426, + 640 + ], + "counts": "ldT13V=100O3M1O2N2N2O2O1O2N1O2LBgCOW and ?", + "choices": [ + "A. is playing with , while is walking on .", + "B. is looking at , who is walking on .", + "C. Both and are flying over .", + "D. is walking on , while is playing with ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_350.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404128.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "aX?2W=5M1N2O001O00001O0000001O00000000001O00001O000000001O0000001O000000001O0000000000001O01O000001O000000001O0000001O00000000000010O0000000001O000000001O00000000000010O0000000001O00000000001O0000000eC]On;d0oC^OQW:[10O01OFeEeM[:W2>1O0O201N100O1O3UNoMXHR2f7UNSHm1k7YNPHi1n7]NlGf1R8aNdGe1X8m1LO5K4M2013L06F9H7H2_OlFeLW9X3?N2O1O1O101C relative to and ?", + "choices": [ + "A. is enclosing .", + "B. is parked on .", + "C. is under and over .", + "D. is over both and ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_351.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000493905.jpg", + "mask_rles": [ + { + "size": [ + 640, + 571 + ], + "counts": "TYf43R?0eE2Y:OgE2U:1kEOU:2jENV:4hEMW:6fEJX:9gEGY::fEEZ:=dED\\:=cED[:`0aEA_:`0]ECc:>[ECf:RDBo;?PD@Rg0T@[Ob1O[>f0o_O_Oe1J^>]1m_OcNS11P?P2j@PNW?T2d@mM]?T2`@lMa?W2[@iMg?X2V@iMj?X2U@gMl?Z2R@fMn?[2Q@fMo?Z2P@fMQ`0Z2n_OfMS`0Z2m_OeMU`0Z2i_OhMW`0X2h_OhMY`0o22OO01OV@gLn>X3m@TM]ODZ?X3UAaMBYOm>V3^APN]>P2cAaNl=`1RBdNk=]1TBfNh=\\1UBiNh=j3`DQJk8Q6RGQJm8Q6PGPJP9Q6nFPJR9S6jFnIV9T6hFmIX9V6bFlI^9W6^FiIc9Z6YFgIh9\\6RFfIn9]6lEfIT:\\6hEfIZ:[6ZETI^Ob0X;\\6VEUI_Oa0\\;[6SElIo:W6lDjIU;`71O1N2O3nLZDdMR^41O1N2\\LmAX1U>PNdBm1_=nMgBn1\\=mMjBP2Y=jMlBS2Z=^Mo@No1a2^?M4K5Mk0TO7I4L4K4M2N1N3N1O1O2N1O2N2N2N1O2M3L_bb3" + }, + { + "size": [ + 640, + 571 + ], + "counts": "SRm34jc04M2O0O2O001O000000O2N2O1NoQo6" + }, + { + "size": [ + 640, + 571 + ], + "counts": "hP11Z`0[3k_OaM0]5m8TMRG_M0^5n8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMO[5P9SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMO[5P9SMQGbMN\\5Q9RMQGbMO[5P9SMQGbMO[5P9SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGaM0\\5o8SMQGbMN\\5Q9RMQGo3o8QLQGo3o8QLQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9RMQGaMO]5P9QMRGaMO^5o8QMRGbMM^5Q9QMQGo3o8QLQGo3o8PLRGP4n8PLRGbMM^5Q9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMRGaMN_5P9PMQGbMN_5Q9oLQGbMO^5P9PMRGaMN_5o8QMSG`MN_5o8QMSG`MN_5o8QMSG`MN_5P9PMRGaMN_5P9PMRGaMN_5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMN_5P9PMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8QMRGaMO^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMM_5P9oLSGbMM_5P9oLSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMN^5o8PMSGbMM_5P9oLSGbMN^5o8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMN`5o8oLSGaMO_5n8PMSGaMO_5n8PMSGaMO_5n8PMSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGaMN`5o8oLSGbMM_5P9oLSGbMM_5P9oLSGbMM_5P9oLRGbMO_5o8oLRGcMN^5P9oLRGcMN^5P9oLRGR4n8nKRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9oLRGcMN^5P9nLSGdMM^5P9nLSGdMM^5o8oLTGcMM^5o8oLTGR4l8oKSGQ4m8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGR4l8nKTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGR4l8nKTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLTGaMMb5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5o8mLSGbMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8mLUGbMMa5n8mLUGbMMa5n8mLUGbMMa5n8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8nLTG`MOb5m8nLTGaMNa5n8nLTGaMNa5n8nLTGaMNa5n8mLUGbMMa5n8mLVGaMLb5n8mLVGaMLb5n8mLVGaMLb5n8nLVG_MLc5n8mLWG`MKc5n8mLXG^MKe5m8mLXG^MKe5m8nLXG]MJe5n8nLXG]MJe5n8nLYG\\MIf5n8nLZGZMIh5m8nLZG[MHg5n8nL[Gk3e8UL\\Gj3d8WL[GXMGi5n8oL\\GVMGk5m8oL]GVMEk5n8PM\\Gf3d8ZL]Ge3c8\\L\\Gd3c8]L^Gb3b8_L]Ga3c8_L^G`3b8aL]G_3c8bL]G]3c8cL^G\\3b8eL^GZ3b8fL^GZ3b8gL^GX3b8hL^GX3b8iL^GV3b8jL_GU3a8lL^GT3b8mL^GR3b8nL^GR3b8oL^GP3b8QM]Go2c8QM^Gn2b8RM^Gn2b8SM^Gl2b8TM^Gl2b8UM^Gj2b8WM^Gh2b8YM^Gf2b8ZM`Gd2`8]M`Gb2_8_MbG`2^8aMaG_2_8bMaG]2_8cMaG]2_8cMbG\\2^8eMaG[2_8fM`GZ2`8gM_GY2a8gM_GY2a8hM_GW2a8iM_GW2a8jM_GU2a8kM_GU2a8lM_GS2a8mM`GR2`8oM_GQ2a8PN_Go1a8RN^Gn1b8RN_Gm1a8SN`Gl1`8UN_Gk1a8UN`Gj1`8WN_Gi1a8XN_Gg1a8ZN_Ge1a8[N_Ge1a8\\N_Gc1a8]N_Gc1b8oIVGV48j1b8nIYGW45k1b8nI[GV43k1b8PJ\\GS42m1b8oI]GU41k1P9UNPGj1P9WNoFi1Q9WNPGh1P9YNoFg1Q9YNPGf1P9[NPGd1P9]NPGb1P9^NPGb1Q9]NPGb1P9_NoFa1Q9_NPG`1P9aNPG^1P9bNPG^1P9cNPG\\1P9eNoF[1Q9eNPGZ1P9gNPGX1P9hNPGX1P9iNPGV1P9kNoFU1Q9kNPGT1P9lNPGT1Q9lNoFS1Q9nNnFR1S9mNnFR1R9oNnFP1R9QOmFo0T9POlFP1T9QOlFn0b8`JcGb4Kn0b8aJcGa4Km0b8bJcGa4Km0a8dJcG`4Lk0a8eJcG`4Mj0`8fJdG`4Kj0a8gJcG`4Lh0a8hJcGa4Lf0a8jJcG_4Lg0`8kJeG]4Lg0_8lJeG^4Kf0`8mJeG]4Ke0`8nJeG]4Ld0_8oJeG^4Lb0_8PKfG^4Jb0`8PKfG^4Ka0_8RKfG]4Ja0`8RKfG]4K`0_8TKeG\\4L`0_8TKfG\\4J`0`8TKfG\\4J`0`8TKgG\\4I?`8UKgG\\4J>`8UKgG]4H>a8UKgG^4HfK]AY4c>hK\\AX4c>iK^AV4b>kK^AT4b>lK^AT4a>nK_AQ4a>PL^AP4a>RL_Am3`>TLaAk3_>VL`Aj3b;oKgF8gMi3a;SLdF6jMg3c;TLaF5lMg3d;WL[F4QNd3h;ULVF8QNc3j;YLoE6WN`3l;lLXEFkN^3SkLeAU3Z>mLfAR3Z>oLeAQ3Z>PMgAo2Y>QMgAo2X>SMhAl2_8ULkLo0gJk2]8ZLiLl0jJi2]8]LUIKRN1V3P1VMf2]8`L[Hl0l1O]Md2\\8cLWHm0n1N_Ma2\\8fLRHQ1o1JcM^2[8jLoGR1P2HfM[2[8mLiGU1T2DiMY2Z8UM]GV1\\2^OlMW2[8@eIYNPNW2[8D`IVNUNV2[84oHhMeNT2\\8>dH`MoNR2]8c0]H^MVOn1]8i0WH\\MZOk1_8k0TH\\M]Oh1_8m0QH^M_Oe1`8n0oG^MBc1_8P1lG_MF`1^8Q1lG`ME_1_8R1jG`MH]1^8S1iGbMIZ1^8U1gGbMKY1^8V1eGbMNW1]8W1cGeMOT1^8X1aGfM1Q1^8Z1_GgM3n0^8]1[GhM6k0_8S4bGlK^8U4aGkK_8U4aGkK_8U4bGjK^8V4bGjK^8W4bGhK^8Y4aGgK_8Z4`GfK`8[4`GdK`8\\4aGcK_8]4bGbK^8_4bG`K^8`4bG`K^8`4bG`K^8a4bG^K^8b4bG^K^8d4`G\\K`8d4`G\\K`8e4`GZK`8g4_GYKa8g4_GYKa8h4aGUK_8k4bGTK^8l4bGTK]8n4cGQK]8P5cGoJ^8Q5bGnJ^8R5bGnJ^8S5bGlJ^8T5cGkJ]8V5cGiJ\\8X5eGgJ[8Y5eGgJ[8Y5fGfJZ8[5fGdJZ8]5eGcJ[8^5eGaJ[8_5fG`JZ8a5eG_J[8b5eG]J[8d5eG[J[8e5eG[J[8f5eGYJ[8g5eGYJ[8h5eGWJ[8i5fGVJZ8k5fGTJZ8l5fGTJZ8m5fGRJZ8o5eGQJ[8P6eGoI[8Q6eGoI[8Q6fGnIZ8S6eGmI[8S6fGlIZ8U6fGjIZ8W6eGiI[8X6eGgI[8Y6fGfIZ8[6fGdIZ8\\6hGbIX8_6gGaIY8_6gGaIY8`6fG`IZ8`6gG_IY8b6gG]IY8d6fG\\IZ8e6fGZIZ8g6eGYI[8h6eGWI[8j6eGUI[8k6eGUI[8k6eGUI[8k6fGTIZ8m6fGRIZ8o6eGQI[8o6fGPIZ8Q7eGoH[8R7eGmH[8T7eGkH[8U7fGjHZ8W7eGiH[8W7eGiH[8X7eGgH[8Y7eGgH[8Z7eGeH[8[7eGeH[8\\7eGcH[8]7eGcH[8^7eGaH[8_7fG`HZ8a7eG_H[8a7fG^HZ8b7fG^HZ8c7fG\\HZ8e7eG[H[8e7fGZHY8h7fGXHZ8i7fGVHZ8k7eGUH[8l7eGSH[8m7eGSH[8m7fGRHZ8o7eGQH[8o7fGPHZ8Q8eGoG[8Q8fGnGZ8S8eGmG[8S8fGlGZ8U8fGjGZ8W8fGhGZ8X8fGhGZ8X8gGgGY8Z8gGeGY8[8gGeGY8\\8gGcGY8]8hGbGX8_8gGaGY8`8gG_GY8b8fG^GZ8b8gG]GY8d8fG\\GZ8d8gG[GY8e8hGZGX8g8gGYGY8g8hGXGX8i8gGWGY8j8gGUGY8l8fGTGZ8l8fGTGZ8m8fGRGZ8n8gGQGY8o8gGQGY8P9gGoFY8Q9hGnFX8S9hGlFX8U9gGkFY8V9gGiFY8W9gGiFY8X9gGgFY8Z9fGfFZ8Z9gGeFX8\\9hGdFX8\\9iGcFX8]9hGbFX8_9gGaFY8`9gG_FX8b9hG^FX8c9gG]FZ8c9fG\\FZ8d9gG[FY8f9gGYFY8g9gGYFY8g9hGXFX8i9gGWFY8j9gGUFY8l9fGTFY8m9hGRFX8o9gGQFY8o9hGPFX8Q:hGnEX8R:iGmEW8T:hGlEX8U:hGjEX8W:hGhEX8X:hGhEX8Y:hGfEX8Z:hGfEX8[:hGdEX8\\:hGdEX8]:gGcEY8^:gGaEY8`:fG`EZ8a:fG^EZ8b:fG^EZ8c:fG\\EZ8d:gG[EY8f:gGYEY8g:`010Od:YEg0" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is playing with and jumping over .", + "B. is playing with and standing on .", + "C. is talking to and is beside .", + "D. is holding and standing on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_352.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000272212.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "PVZ73l>2O1M2O2M1O2N200N2O101N1kA]Om=c0SB]Om=c0RB_On=a0QB_Oo=b0oA_OQ>h01O0\\OPB8P>EUB9Z>N101O010N2OjTe1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "TVU22m>2O0O2O0000000O2L9\\ADX>c0N2N1000001O01O01O010O100O1O2M2O2M2O4K4M6I;F3M1O00YOg0O1O1O1O10000O11O001O2N2N4L2N2NCiBROV=n0kBSOS=m0oBUOmn0L4K5O13N6IWOiBFY=6iQn5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m7R7n7000000000001O00001O0000000000000000001O00000000001O000000000000000000000000001O0000000000001O0000000000000000000000001O0000000000000000001O000000001O000000000O1000000001O0000O1001O0000000000000000O10000001O00000000O100000000001O0000O2O0001O00000000000000O20O00000000000001O0000000O1UJfGQ5Y8oJhGP5X8PKiGo4W8QKiGo4W8QKjGn4W8PKjGP5V8PKkGo4U8QKkGo4U8PKlGP5T8PKmGo4T8PKlGP5T8mJoGS5Q8lJSHQ5m7oJ]Hg4c7YK^Hf4b7YK`Hf4`7ZKaHe4_7ZKbHf4^7ZKdHd4\\7\\KeHc4[7]KeHc4[7]KeHc4[7^KdHb4\\7^KcHc4]7]KcHc4^7\\KcHc4]7]KcHc4]7]KfH`4Z7`KgH_4X7bKjH\\4W7cKkH[4U7eKmHY4S7gKPIV4P7iKSIU4m6kK_Ii3a6VLfId3[6[LfId3Y6\\LiIc3W6^LhIb3X6^LhIb3X6]LhId3Y6[LXIT4h6lKQI[4o6eKoH]4Q7dKmH]4S7bKnH^4R7bKnH]4S7cKmH]4S7cKnH\\4R7eKnHZ4R7eKQIY4o6gKSIW4m6iKVIU4i6kKXIT4h6lKZIR4g6mK[IP4f6PL]Im3c6SL`Ij3`6WL`Ih3`6XLlHFWOR4l7WLmHLSOm3P8WLlH3nNf3V8WLlH5lNd3X8WLlH6jNd3Z8VLlH`4T7`KlH`4T7`KkH`4W7_KiHa4W7^KkHa4U7_KkHb4T7^KmHa4S7_KmHa4S7_KnH;eN^3]8WLnH7iNb3Y8VLRI1iNi3U8VLfIj3Z6VLfIi3[6WLfIh3Z6XLfIh3[6WLeIj3Z6VLfIj3Z6WLeIi3[6WLeIi3[6WLeIi3[6WLeIi3[6XLoH]4Q7cKiHc4W7^KiHa4X7`KgH_4Y7bKgH]4Y7dKeH]4[7eKjGT5V8nJcGW5]8f000000000000O11O0000000000O10000001O01O00000O10001O000001O00000000O2O00000000001O000000000000000000000000000000000000000000000000000000000001O00O1000000000000000000000001O0000000000000000000000000001O00O100000000000000000000000001O00000000000000001O0000001O00001O00000000001O000000000000001O0000000000001O00000000001O0000000000000000001O00O1001O00000000000001O00O100000001O000000000000000000001O0000000000000000000000001O00000000000000001O00000000000000001O0000000000000000000000000000000000000000000000001O0000000000000000000aJjFT5W9iJkFW5V9hJjFX5V9hJjFX5W9fJjFZ5V9fJjFZ5V9fJjFZ5V9eJjF\\5V9cJkF]5\\900000001O00001O0000001O0000001O001O001O0000000000001O00000001O00O10000000000000000000001O0000001O00000000001O00000000O1000000001O00O10000000000000000000000O10000O100_OiJYGW5g8jJWGW5i8lJSGU5m8lJQGU5o8mJlFV5T9;000000000000000000000000001O0000000000001O00000000000000001O000000000000000001O0001O0000gF" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PeZ74j>3N1O1N2N2N2O1O101N10000jA^On=b0QB@n=a0QB_Oo=b0oA@Q>g00000000O01O1O012N3M00000010O001O006K0O00000000001O0000000000000000000000000001O01O00010O00001O001O000000001O000000001O001O1O1O1O1N2O00001O0O101O0O2O1N3Lege0" + } + ], + "question": "Which statement accurately describes the state of the cows on ?", + "choices": [ + "A. All three cows, , , and , are walking.", + "B. All three cows, , , and , are lying down.", + "C. is walking while and are lying down.", + "D. is walking while and are lying down." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_353.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000574520.jpg", + "mask_rles": [ + { + "size": [ + 399, + 640 + ], + "counts": "g`_32Y<4N3N1PDGj;9TDIl;=00000O1O101O1N[EIU96iFOV9OhF5W9JgF9Y9GeF;Z9EeF=Z9CeF`0Y9@gFa0Y9^OfFd0Y9]OfFd0Y9\\OfFf0Y9ZOgFf0Y9ZOgFg0X9YOhFg0Y9YOfFh0Y9XOgFh0Y9XOhFg0X9ZOgFg0X9YOiFe0X9[OiFd0W9]OiFa0X9_OiF`0W9AhF>Y9BiFEd0[OoN2Ei4c0TMGPO`0Db0l4WOaLFnNc0Ca0n4VOaLGmNb0C`0Q5WO`LFlNd0A?T5WO_LFlNd0AOB@c57^LFlNe0_ONFTOl5c0SLFlNf0^OKj5IlKFlNf0^OIl5KjKFkNh0^OGm5KjKFkNh0^OGGWOS6d0mKGkNg0]OJj5IoKEjNg0^OKi5IoKFjNe0^OLi5IoKFjNe0^OLj5HnKHjNb0_ONi5HnKHkNa0^OLl5KkKIkN?^OLm5LjKIlN?\\OLn5LjKImNe0TOFU6LjKJmN`1Y5fNkKIlN`1Z5gNjKJdNd1d5bNhKKUNFKP2Y6_NhKc2X4]MhKb2Y4^MhKa2X4_MhKa2X4_MiKa2V4_MjKa2V4_MjKa2V4_MjKa2V4_MkK`2U4`MlK_2T4aMlK`2S4`MnKa2P4_MTL]2l3cMYLX2g3hM]LT2c3lM`LR2_3nMdLo1\\3QNhLl1W3TNoLg1P3YNSMd1m2\\NTMd1k2\\NVMd1i2\\NYMc1f2]N[Mc1d2]N]Mc1b2]N^Mc1b2]N^Md1a2\\N`Md1_2\\NbMc1^2]NcMb1]2^NdMa1\\2_NeM`1dLlM]5d0PN_1bLoM]5b0TN\\1]LWN\\5=XN[1\\LYN[5iNAX1`0iN]OX1c0Z500000000O10000001O00O10000000000001O00000000000000000000000000000000000000000000000000000001O0000O10000000000001O000000O100000000001O000000O1000000001O00000000O1000000001O0000O1000000000000001O0000000000000000000O11O00000000000000O100001O00O10O11O0000000000000000000000000000000000O1001O00000000000000O1010O0O100000O2O00000001O00000O11O0000O100O1001O001O001O0000O100O100000000000000000000000000000000O100001O00001O000000000000O10000001O0000_O" + } + ], + "question": "Which statement accurately describes the relationship between , , and ?", + "choices": [ + "A. is in , while is over .", + "B. and are both in but are not touching.", + "C. is standing on , and both are beside .", + "D. is standing on , and is on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_354.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000289417.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "gXn43a;4N2N00M3001N110SE4n9MoE7P:h0G:N1OI8M3N2O12O6J10O001N2\\OdEA1N\\:=SF@o9" + }, + { + "size": [ + 375, + 500 + ], + "counts": "SRf21f;000]j<0bUC1bG5P5KmJ, , and ?", + "choices": [ + "A. and are both beside .", + "B. is in front of , who is in front of .", + "C. is in front of , who is in front of .", + "D. is beside , and is behind ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_355.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000439854.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "hmT19o0JR8=gGDX8`0SGG3Jj8o0SGROl8X100ZOjNUHV1i7nNUHQ1k7QOTHo0k7SOTHm0k7UOTHk0l7VORHL26l70QHJ54j73PHI73i75oGH;0f78oGH=Me7 and ?", + "choices": [ + "A. is on and is riding .", + "B. is riding and is on .", + "C. is beside and is on .", + "D. is on and is riding ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_356.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000474164.jpg", + "mask_rles": [ + { + "size": [ + 640, + 633 + ], + "counts": "h`b03kc04M2N2N1O1O2N1O100O1O011O2N3M3L2O1O1O000000000O2OO1000000000O0100N1O1O110O01O1O010O100O1O100O100O01J7]O^d:l0W[E101O0000000000000000O1O1N2J6J6K6L3M3O1N3NTXP:" + }, + { + "size": [ + 640, + 633 + ], + "counts": "^i1h13jN^a0R2J1O1L4O1000000O1000000000000TNUMcBk2Y?01O00000000iMVMYCi2g00000O1001O^MXMlCh2T000^MWMlCj2f>O0000TMYM_Dg2a;YM^Dh2a;YM`Df2a;XM`Dh2]>OO1oLWMkDi2U;WMkDi2S;YMmDg2R;YMoDg2P;[MoDe2P;]MnDd2R;XMSEg2S>00000O11O0000`LXMhEh2X:XMhEh2i=O3M0000M3003M00M30000000YMj^Od2Va0300000YMj^Od2Ya0000M[Mk^Oe2Ua0300000000000000I7L4ZOmLZ@X3c?QNP@VOk0AUO1`00_O0k?a1W@^NQ21mM0R?m4_O>I7J6K5H8L4J6M3L4K5J6N2H8L4K5N2N2O1O1O1O1N2O1O100O1N2J6M300O100O100N20000O1000000O1000000000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000000000001O00000000000000O10000000000000000000000001O00000000000000000000000000000000000000O11O000000000000001O00000000001O0000000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000001OO10000001O0000000000000000000000000000001O00000000000000O1000000000000000000000000000000000000000000001O0000000000000000000000000000001OO10000001OO1001O00000000000000000000000000000000000000001O000000000000000000001O000000000000000000001O000000000000000000000000000000001O0000000000000000001O0000000000000000001O000000000000001O000000001O000000000000001O000000001O000000001O001O000000001O00001O001O00002N001O001O2N1O001O002N1O001O1O3M1O1O001O2N1O1O002N2N1O001O1O2N1O001O2N2N1O00003M2N1O001O2N1O1O1O1O1O2N1O001O1O2N2N1O1O1O2N1O001O1O3M1O002N1O1O1O1O1O1O2N2N1O001O2N1O001O1O3M1O1O001O2N1O001O1O2N1O1O001O3M1O00001O3M1O001O1O2N1O001O1O2`MW@\\NO2N?3_ON303Ra06n^OGW1Nad0" + }, + { + "size": [ + 640, + 633 + ], + "counts": "WUm:1WP50Q\\O1fWL2N1O1O1O2N1O1O100O1O2N100O1O1O1O2N1O1O1O1O2N100O1O1O1O101N1O1O1O1O001O100O1O2N1O1O100O1O1O1O100O1O1O1O1O1O1O1O1O1O10NQB" + }, + { + "size": [ + 640, + 633 + ], + "counts": "RZn44jc05g@I`K1n>;\\EKb:8YEMd:4\\ELc:6[ELc:9YEId:?TECk:>PEHm:;PEGn:;XDWOaMb0V>:ZCiNoNf1g=CXBiNO8Bi1V>YOUBkNLd2n=cNQBnNHf2V>_NnAV2Q>m1000000000000001O000O100O10000000000000000000000000000000000000000000O1O1O2O000001O001O1O1O1O1O1O1O1O001O00000dKiAo2W>kLUBP3k=eLdBV3\\=iLiBS3W=jLoBS3R=kLQCS3oL9G4L3M2hNP_O\\O[a06l^OFXa0DX^O0o02eb0Kl`e5" + } + ], + "question": "What is the location of ?", + "choices": [ + "A. Inside .", + "B. Sitting on .", + "C. Sitting on .", + "D. Sitting on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_357.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000565391.jpg", + "mask_rles": [ + { + "size": [ + 640, + 480 + ], + "counts": "e6173a2M\\M0R>3nBM13QO14O[1NbN3NNM0e=n2\\BiNOOOZN218Z4f7f7K1N2O001O1O001O001O001O001O1N101O00001O001O001O1O1O00001O001O001O1O001O001O00001O1N2O1O001\\LPC6P=]3001O1O1O1O001O00002N001O1O001O1O1O1O1O001O002N1O001O1O2N2N1O2N1O1O1O3M2N2N4L2N1O001O2RKaAW4b>eK`AHNL0W4f>n04L4L3M5K6J;`KZ@P4m?N1O1O3M1O1O00003M1O1O00O1002N01O03M0O100001O00O1001O0000O100001O0000000000000000000000000000000000O11O00O10000000000000000000000000000000000000000000000001O00000000000000000jE`Ll3`3UL_Lk3a3QL_LS4a3V6000dE_LY4a3S6000bE_L]4a3cK_L]4a3cK_L]4a3Q600000000000000000000000000000000000000000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000001O000000000000000000001O0000001O001O0000001O0000001O001O00001O1O001O001O001O1O001O1O1O1O1O2X@lK`?\\4N2N2N2N2N2N3M2N2N2N3M3M2N2[EdJb6_5n36J3M2N2N3M3M3M2N1O3M5K3M2N2N3M3L4M3N1N3M3M3M1O2N2N3M3M1O2N2N4L2N2N2eEjGh8X8UGjGj8Z8nFbGPO4R:]8kF_GSO4R:_8mFbGR9a8jF`GV9b8eFbGZ9b8_FaGa9b8\\F^Gd9c8ZF^Gf9d8QFoF09NIP:d9PF\\FP:c9QF^Fn9b9RF^Fn9j901O000000001O00000N31N00LoEYFR:f9oEYFQ:g940lEXFP:h9oEYFQ:g940000000001O00000O11O000000000001O000O11O0aM_FYHNo1c9h5_FYHOn1d9g5]F[HOn1f9]5XFfH5NOm1d9\\5]FgH:1Db1b9i5TGdH[Oc1a9i5^GVJa8g5ZFgHV1c1_8]N\\FS7X1bJ\\8[N\\FS7W1cJ]8`5dG`J]8`5bG`J_8_5aGaJa8]5_GdJ`8\\5`GdJ_8]5aGcJ^8^5bGbJ`8\\5_GfJ`8Y5_GiJa8W5YGoJd8T5\\GlJd8T5\\GlJe8S5[GmJg8Q5YG^IQOm0f9e5YGZIUOQ1`9h5YGXIWOP1`9h5YGXIWOQ1`9f5YGUI[OU1]9e5XGUI]OU1W9i5]GQI]OV1V9i5]GQI]OV1T9k5_GoH]OV1T9k5_GPI[OV1V9j5_GoH\\OW1V9j5VGgHA27\\1R9k5UGiHAN9_1Q9j5VGhH2^1h8j5VGgH3`1f8i5VGiH3^1c8m5YGfH5\\1_8R6lF_HO<3D:_1g8S6lFRIL^O2O0O9a1k8R6nFUIM^O:[1k8S6mFUIM^O:Z1k8V6kF\\I4nNOa1P9c6mFaH3]OMb1Q9Z9XKWE]LKV12GP;^NoD0Ob0o5_O^J5c0Iac0" + }, + { + "size": [ + 640, + 480 + ], + "counts": "aX`74lc04K3N2N2N1O:F3M00000000O1O10000000O01000000O100OX]OAQb0?k]OEUb0:k]OGUb08k]OIUb07k]OIUb06k]OKUb05k]OKUb04k]ONTb01l]O0Tb0Om]O1Sb0Ol]O2Tb0Mm]O3Sb0Ml]O4Tb0Km]O5Sb0Km]O6Rb0In]O8Rb0Go]O9Rb0Fm]O;Vb0Aj]O`0Yb0\\Og]Oe0[b0XOf]Oh0[b0VOe]Ok0hb00O1O1O1O2O0O10000O2M200O1000001O000O11O0001O0000000000001O000000000000001O000000001O000000000O1001ZO]]OKh9" + }, + { + "size": [ + 640, + 480 + ], + "counts": "i]f34jc02O100O2N1O1O1@GY]O9fb0JX]O6hb0LV]O4jb0MT]O4lb0LT]O4lb0MS]O3nb0Nm\\O5Sc0<0O1000O100O100O1O100O10O1001O0000000000000O100O10O010000O10000002N1O0000000000000000001O00O1O101N1O101O0O1000000O100000000000000001O00001O00000O11O000010O000001O00000001O0000000000000000010O00000000000O101O000000000000000O10000000001O0O1000001O001O00001O001O0O3N002N1O2N1O1O2M2O1O2M5K\\jb2" + }, + { + "size": [ + 640, + 480 + ], + "counts": "n??240_OO30N11OO171H4<21KEi?o2Z@_MLBj?o3001O001O001O001O00001O001O001O1O001O001O001O001O1O001O001O001O001O1O001O1O001O001O1O001O1O001O1O001O001O001O1O1O1O001O1O1O001O1O1O2N1O001O1O1O1O1O1O2N2N1O1O2N2N1O1O2N2N1O2N2N3M2N2N3M3M4L5K6YOW]OK`c0O3M2N1O1O1O2NQhY7" + }, + { + "size": [ + 640, + 480 + ], + "counts": "T^h0191N11Od`0\\3F0000000000001O000000000000000000000000000000000000000000000000000000O100001O000000O1000000000000000000000000000000000000000000001O0000000000000000000000000000000000000000000000000000000000O10000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000O11O00000000000000000000000000000000O10000001O0000000000O10000000000000000000000001O00000000000000000000000000000000O1000000001O00000000000000000000000000000000001O000000000000001O0000000000001O0000000000001O00000000000000001O00001O00001O00001O001O00001O001O001O1O001O00001O1O1O1O001O1O1O3M7I3M3M2N2N2N1O4L2N3M2N3M4L2N1O1O2N8H3M2N2N2N4L2N2N3M2N5K2N2N2N2N6J2N1O001O2N5K5K1O3M3M3M1O2N2N2N3M1O1O00000000001O001O0000000000000000000000000000000000000000000000000000000000000000O1O1F:GSDYHS and ?", + "choices": [ + "A. ", + "B. No object is between them.", + "C. ", + "D. " + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_358.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000319607.jpg", + "mask_rles": [ + { + "size": [ + 640, + 640 + ], + "counts": "cim2g1:gNVa0f3ZNe0Z@ZKg0OV=b6ZOQ1hB]HO2k;U8I3M1O1M4N10001O0O1O100O10001N100000000000000001O00001O010O1O001O010O2N1O1O1O1O2N1O1O0O1000O10000001N1000000O2O00001O0O2O001N105JW1jNR1hJiAb2V?TLi@N=h3o?^Lk_Od2n`0I7[Nd^O_O24aa0OS2Bh]o0MXiQ6" + }, + { + "size": [ + 640, + 640 + ], + "counts": "WlQ1<_c07L2N2M3M3L4M4M3M2M4L5K4N2O1N3M3M4M1O1N2O3L2O002N3L3N1O2N1N2N3N2N1O1O2M4M1O1O1O2N1N2O1O003M4L1N2O2N1N3M3N1O1O0O4M1O1O2N1N3N1O0O3N2N3L3N1O1O2M3M2O2N001N5L1O1O2M3M2O1O2N1N3N3M1O2M2O1O003NjI]Bl5f>RJdAODa4h?Bm1YLe^OF:=QY`0@YZ@0Pd05k[O3L20O00O100]\\OF`c0:_\\OJ]c07c\\OJ\\c03e\\OI14Wc0c0M2O1N1O100O1O2N1O1N4M2N4L001N3N1M3O001N4L5K3M101O1O1O3M1N102N1jGcNgM^1W2EgFnMl5U1SJ0Z9T1`LkNXJ0W9Y1nFdMV5R1eJ1V9j1PL^Oo3e0PLZOP4f0QLVNhJHX9S2oKTNkJHU9V2SLPOm3R1TLiNeJQNW9V3WLeNm3\\1SLaNo3_1RL^No3c1SL[Nm3f1SLYNl3h1ULVNl3j1ULUNk3k1ULTNk3n1XLnMh3R2]LiMc3X2]LhMb3X2aLeM_3[2bLdM^3\\2fL`MY3a2gL_MX3b2iL^MV3b2kL]MU3c2lL[MU3d2PMXMo2i2XM`LcI:T9V3bMiL]2W3cMiL]2V3dMkL[2U3gMiLX2Y3iMfLU2[3mMcLS2\\3QN`LP2`3W7101N1000O11N101O001N2O0fDQMc5P3[JUMa5l2]JXM_5j2`JXM^5i2aJYM\\5j2_JYMa5i2\\JYMb5j2\\JVMd5m2YJSMg5P3UJQMk5S3PJWMf5l2VJZMf5h2XJYMg5i2WJXMh5j2VJWMh5n2TJVMg5n2VJSMi5n2VJRMj5o2UJSMi5P3UJUMd5P3[JoLkLjM]8Y5gJYMW5i2gJXMW5m2eJTMZ5n2gJQMV5R3hJPMV5S3gJnLX5S3hJlLX5U3gJkLX5Y3eJgL[5[3dJdL\\5`3`J`L`5a3`J^L`5l3VJSLj5Q4TJnKk5T4UJkKk5W4SJiKm5X4RJhKn5Z4PJfKo5^4nIbKR6a4jI`KV6c4fI^KY6d4eI\\K\\6e4cI[K]6g4`IYK`6j4^IUKc6l4[ITKe6m4[ISKe6m4\\ISKc6n4\\ISKc6o4nI_JP6d5oI[JQ6f5nImISNcNo7b7mIiIYN]NKOn7m7mIgIa6Z6fHRHE`1f7`6bHQHI_1c7b6bI]I]6d6cI]I[6e6dI[I[6h6dIVI\\6k6fIRIY6o6hIPIX6P7iIoHW6Q7jInHU6T7lIjHT6W7lIhHS6Y7mIgHS6X7oIfHQ6Y7TJbHn5]7TJaHm5_7TJ`Hk5`7VJ_Hk5`7VJ`Hj5_7XJ_Hh5b7WJ^Hj5a7XJ]Hi5c7YJ[Hf5f7ZJYHg5g7YJZHe5g7WJ]Hh5d7TJcGfM62>o7i7ZJkGkM6m7P8WJiGmM4o7S8SJkGmM2JER8c8SJiGnM2n7V8SJkGkM2Q8S8UJiGlM2o7U8VJiGlMNNEP8e8VJVHkMUOn7g8WJSHkMXOk7h8YJoGmME]7[8hJnGPNCX7^8iJnGSN]OEN_7g8jJnGQNAU7a8mJlGmMDU7`8PKlGjMDU7_8VKiGfMFU7`8WKhGhMEQ7d8^K^GcMMo6f8WL_Ga3c8]L^Gb3c8`LZG`3g8aLVG`3l8_LRGb3Q9e31O00004L5K4L1O004L1O2N1O1O1O3M2QFWFc9T:O004L4L001O00002N2N1O2N1O1O1O00FQGbEP9_:PG`EP9a:oF_EQ9a:oF_EQ9`:PG`EP9`:PG_EQ9Q:nFYF2DQ9R:PG[FS9e9nFZFR9f9PGXFP9h9SGSFo8l9c0O1O1N2N2O1O100O100N2O1N200N2O1O10000O100M3O100000000O100O1N2N2O1O1O1000000O10000O100O1O1O1O10000O100N2O10000O10000O1O1O1O1O100O10000O1O100O10000O10000001O0000002N1O00001O1O3M001O00001OO1000000O1O1O1O1O100O100O100O100O1O10000O1O1O1O100O100O1O1O1O10000O100O1O100O1N20000O100O100O1O100O100O1O100O1O100O1O1N200O100O100O100O100O1O1O100O1O100O100O3M2O1N3M2N3M3N2M2O1N2N2NBeI[CY6cC\\BY2i=hMXBT2j=mMVB>D_OW>3VBo1k=TNSB=PB;GVOZ>" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is on top of .", + "D. is behind ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_359.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "Sjn73k?3N2N1O2N2N2N102M2N2N1N3O1N2N2M3N2O0O2M3N2M3N2N2O1N2N2O001N2O2FUN[Bn1_=XN\\Bl1`==L3[E^M[7S3_HVMY7n2aHWM\\7m2aHVM\\7n2aHSM^7o2`HRM^7Q3`HQM^7>`F]1Q2VN^7P3bHQM\\7P3dHPM[7P3fHQMY7o2gHRMW7o2hHRMW7o2iHRMV7n2jHRMU7o2kHQMU7n2lHSMS7m2lHTMS7T2oFcMT1]OoN26j0h8R2RGdMP1U1m7W1TGcMP1U1m7W1TGdMP1S1m7W1UGfMn0Q1o7W1TGhMo0n0n7Y1UGiMn0l0n7Y1VGjMn0j0n7Z1VGlMm0h0n7[1UGmMo0f0m7[1VGoMm0d0o7[1VGPNm0b0o7\\1UGSNm0?o7\\1VGTNm0>n7]1UGUNo0;n7^1UGVNo09n7_1UGXNn06o7a1SGYNQ12n7c1SG[NQ1Nn7e1RG]NR1Kn7g1QG^NS1EP8l1mF_N];`1dD_N];`1cDaN^;]1bDcN_;]1`DdN`;[1aDdN`;[1`DeNb;Y1]DiNc;W1\\DiNe;V1ZDlNg;S1XDmNi;R1WDnNk;Q1SDQOm;h0XCgNl0`0n;f0YD[Oh;c0YD]Oh;c0WD]Oj;c0UD\\Om;c0SD]Oo;b0oC_OT<>lCBY<:fCE_<7bCH`<6`CIaIaAoN^>P1:O000J60010J41KZOQAf0T?0O1010O01N20Mn@XOQ?h03O02M3O000000001O10O000000100O0100O0010O01N110O010O010O00O1001N1O2O000000001O1O0nLPOTGR1g8UOUGm0j8VOoFn0n8WOPGi0P9YOlFi0S9ZOiFi0T9\\OhFe0U9AgFa0R9GkF:U9GiF;T9HjF9T9JiF9U9IjF7S9LjF7U9KhF7X9JcF;[9F_F`0a9BWFd0h9kN_ETOf0S2k9eNeEVO?U2l9fNhEQO;[2m9bNhFb1W9\\NlFc1U9[NmFc1T9[NmFf1S9XNPGg1P9ZNoFg1Q9XNoFg1R9XNoFg1R9VNQGj1P9UNPGk1P9TNoFm1S9RNnFm1R9SNnFn1R9RNoFl1Q9VN^FYOUO^2`:YNWF_OWOW2c:ZNTFAUOX2h:VNQFFSOV2m:UNmE]2T1kLn6h0jG`2T1oLo6c0iG`2R1fL[N=g8>iG`2X1TMo6=fG_2^1SMl6`0bG^2b1SMl6a0^G\\2g1TMk6T4TInKl6Q4UIPLj6o3WIRLh6m3XITLh6i3ZIYLe6f3[I]Lc6b3]I_Lc6`3]I`Ld6_3\\IcLc6\\3\\IgLc6Y3]IgLc6Y3[IiLe6X3ZIhLf6X3YIiLg6X3WIjLh6W3UIlLj6T3UIoLi6R3TIPMl6P3SIQMm6P3PISMo6m2PITMP7m2nHUMQ7k2nHWMQ7j2mHWMS7j2jHZMT7f2iH^MV7b2iH_MW7a2hH\\LTO0T8e3fHZLYO0Q8f3fHYLCHg7o3eHYLHEd7c2lGSNf0GJCd7a2oGUNa0HMAc7`2SHUN;H6@[7d2THUN7H>]OW7e2VHVN2Jb0[OV7d2WHWNOJf0ZOT7e2WHWNOIh0ZOR7e2YHWNLJk0YOP7d2[HZNGJo0WOX6NPIf24[NBLS1TOS63TI]29aN\\OKT1TOQ65VIT2a0hNROLW1ROo5?XABh>>WADg>=XADh>=SAFn>f0O00ESADm>=TACm>?QA@P?g01O100O1O1O2O1N2N3L4I8KPdn3MU\\QL5U@Ka?46N2O100O11OOg?1Y@1OOgo30ZPL00ObbZ1" + }, + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Vlo54j?3M2N3N1N2N3M2O1N3N2M2O2M6K2M4M2N2N3L4M3L3N3M6J5dCkMd:X2UEoMh:R2REUNl:k1PE[No:d1WDeM;R1\\;a0WDTO2YO6X1_;;[DRO0]O0[1d;7[Dk0d;VO[Dl0d;h1O1O1O10O0100N2O0O2iDSLZ:o3_EXL_:e4O100O1O100O100O100O10000O10000O10000O10000O100O100O10000O100O100O100O1O1iNTJ[G2e0j5i7iJRHX5i7oJUHQ5h7TKVHl4g7XKWHi4h7YKWHg4g7[KYHe4e7_KXHb4g7`KXH`4g7m1O100O1O1000000O100O100O10000O100000000000000001O000000001O001O001SJVHb3j7]LYHa3g7\\L\\Hd3d7XLaHg3`7SLfHl3Z7QLiHo3X7mKlHR4U7mKkHS4V7kKlHT4U7kKlHd2TO\\MQ8OlHc2UO^MP8LmHW4T7hKmHW4U7eKnHZ4U7bKmH]4V7`KkH[3nNgKMk0a8JgHn2hNQM, , and interacting with each other and the environment?", + "choices": [ + "A. is looking at , while is running on .", + "B. and are running on , while is looking at them.", + "C. is looking at , while runs on .", + "D. is looking at , and both and are running on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_360.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000427160.jpg", + "mask_rles": [ + { + "size": [ + 512, + 640 + ], + "counts": "To59f?2N2N2O1N2O1O1N1O3M3N1O1O1O1O1O0O2O0N2O2O000O100O1O10000O1JhNbAY1b>IaAoN^>P1:O000J60010J41KZOQAf0T?0O1010O01N20Mn@XOQ?h03O02M3O000000001O10O000000100O0100O0010O01N110O010O010O00O1001N1O2O000000001O1O0nLPOTGR1g8UOUGm0j8VOoFn0n8WOPGi0P9YOlFi0S9ZOiFi0T9\\OhFe0U9AgFa0R9GkF:U9GiF;T9HjF9T9JiF9U9IjF7S9LjF7U9KhF7X9JcF;[9F_F`0a9BWFd0h9kN_ETOf0S2k9eNeEVO?U2l9fNhEQO;[2m9bNhFb1W9\\NlFc1U9[NmFc1T9[NmFf1S9XNPGg1P9ZNoFg1Q9XNoFg1R9XNoFg1R9VNQGj1P9UNPGk1P9TNoFm1S9RNnFm1R9SNnFn1R9RNoFl1Q9VN^FYOUO^2`:YNWF_OWOW2c:ZNTFAUOX2h:VNQFFSOV2m:UNmE]2T1kLn6h0jG`2T1oLo6c0iG`2R1fL[N=g8>iG`2X1TMo6=fG_2^1SMl6`0bG^2b1SMl6a0^G\\2g1TMk6T4TInKl6Q4UIPLj6o3WIRLh6m3XITLh6i3ZIYLe6f3[I]Lc6b3]I_Lc6`3]I`Ld6_3\\IcLc6\\3\\IgLc6Y3]IgLc6Y3[IiLe6X3ZIhLf6X3YIiLg6X3WIjLh6W3UIlLj6T3UIoLi6R3TIPMl6P3SIQMm6P3PISMo6m2PITMP7m2nHUMQ7k2nHWMQ7j2mHWMS7j2jHZMT7f2iH^MV7b2iH_MW7a2hH\\LTO0T8e3fHZLYO0Q8f3fHYLCHg7o3eHYLHEd7c2lGSNf0GJCd7a2oGUNa0HMAc7`2SHUN;H6@[7d2THUN7H>]OW7e2VHVN2Jb0[OV7d2WHWNOJf0ZOT7e2WHWNOIh0ZOR7e2YHWNLJk0YOP7d2[HZNGJo0WOX6NPIf24[NBLS1TOS63TI]29aN\\OKT1TOQ65VIT2a0hNROLW1ROo5?XABh>>WADg>=XADh>=SAFn>f0O00ESADm>=TACm>?QA@P?g01O100O1O1O2O1N2N3L4I8KPdn3MU\\QL5U@Ka?46N2O100O11OOg?1Y@1OOgo30ZPL00ObbZ1" + }, + { + "size": [ + 512, + 640 + ], + "counts": "o=P2P>0000000000000000UOXB^Og=a0^B\\Ob=c0bBZO^=f0cBYO]=f0dBZO\\=f0eBYO[=f0gBYOY=g0gBXOZ=g0hBYOW=f0kBYOU=f0mBXOT=h0nBVOR=i0S10000O10OO2O10000O10000000000O100M3O1O1M3000000O10lAEc<:^CGa<9_CGa<6SCLUOOh=3RC>n<_OSCd0l<\\OTCd0l<[OUCe0k<[OTCf0l0O100M3O100O100O100O1O1O1O1O10000000000M300O1O1O010O100O2O0O100O1O1N11001N100O1O1O1O10O0100000000O101O0N101O1O100O1O100O10000O10000000001O0000000000000000000000mNXBNh=M`B0`=OcBO]=NgB0Z=NjB0V=MWB[Od0h0U=MmB3S=LoB4P=LPC4P=LQC3oN1N2O00102M000000001O0O11O01O0000oNHZB8d=LZB4a=5cADd07g==WBBg=c0XB\\Oh=d0XB\\Oh=e0WB[Oh=f0XBZOg=i0VBXOh=l0VBUOh=l0XBTOh=m0WBSOh=o0VBROi=Q1TBoNl=c100O1O100O2O0O1M3O0010000000000O10000O1O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000O11O00000001O000000O100000000001O000000O100001O000O10000000O10000001O0000000000O100001O000000000000O100001O00000O100000O10000001O0000000000O100001O00000000000000O1001O0000000000000000000000000000000000000000000000000000O10000N2N2N2M3O1O1M300O1O1O100O101O0^NmAW1_>N2O00000O2@^A@c>6hAGZ>7hAHY>3WAHb02[>NnA1Qoi18h_VN3M1O1O1O1O2N2M4M2N1N1N2M3K5K5M3O1O100O1000000O11O1O001O001O1O001O00001O00000000O100O1O1M3O1N2O1O1N2N2O1O1O1M3N2N2O1O1L4N2O1O1000000000000000000000000000000000000000000000000000O11O0000000O10000000O11O00000000000000O1001O000000000bB" + }, + { + "size": [ + 512, + 640 + ], + "counts": "Vlo54j?3M2N3N1N2N3M2O1N3N2M2O2M6K2M4M2N2N3L4M3L3N3M6J5dCkMd:X2UEoMh:R2REUNl:k1PE[No:d1WDeM;R1\\;a0WDTO2YO6X1_;;[DRO0]O0[1d;7[Dk0d;VO[Dl0d;h1O1O1O10O0100N2O0O2iDSLZ:o3_EXL_:e4O100O1O100O100O100O10000O10000O10000O10000O100O100O10000O100O100O100O1O1iNTJ[G2e0j5i7iJRHX5i7oJUHQ5h7TKVHl4g7XKWHi4h7YKWHg4g7[KYHe4e7_KXHb4g7`KXH`4g7m1O100O1O1000000O100O100O10000O100000000000000001O000000001O001O001SJVHb3j7]LYHa3g7\\L\\Hd3d7XLaHg3`7SLfHl3Z7QLiHo3X7mKlHR4U7mKkHS4V7kKlHT4U7kKlHd2TO\\MQ8OlHc2UO^MP8LmHW4T7hKmHW4U7eKnHZ4U7bKmH]4V7`KkH[3nNgKMk0a8JgHn2hNQMS1Z9X3jFbL]OVOi9Y4iFaL]9^3dFbL^OUOh9Y4mFfLR9Y3SGgLi8V3[FlKS1d0c8[3aFPLR1=^8b3bFPLX;o3=O1N2N2O100N200O1O1O100000000000000O1000000000000001OO1O100000000O1000000000000000000O1001O0000O1001O00O100001O0000001O2N1O00000000O11O001O1O4L00N22N1O1O00000000000000O100000000O100YMnC_1SgNmAW1S>iNPBT1a>M2N2N2M3N3M1N2O2N2M6K4K_\\b3" + }, + { + "size": [ + 512, + 640 + ], + "counts": "\\jj33i?5O0O1j@K`>6^AM_>6^AM_>5`AL_>5`AIGIh>`0_AHKHb>b0bAFd>:[AGd>:[AFf>:XAIf>8XAKg>d02O2O0O2O1O1O00100O101N2N2M3N2N2M4K5Loed5" + } + ], + "question": "Which statement accurately describes the actions of ?", + "choices": [ + "A. is running on alongside .", + "B. is wearing and running on .", + "C. is running on while wearing .", + "D. is wearing and is looking at ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_361.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000500477.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "0001O1O1O1O2N1O1O1O1O2N1O001O2N1O1O001O2N2N001O2N1O1O1O1O2N1O1O100O1N2O1O00101N1O1O001O1O1O1O001O1O1O1O1O001O1O1O001O1O001O1O1O1O001O001O1O1O1O1O1O001O1O1O1O2N1O002N001O1O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O1O1O1O1O100O1O010000000002N00O11O00001O01O00010_EmLR9T3fFTMY9m2bFXM]9i2ZF_Mg9m302O0O2N10N101O0O1O1O2O0O2O0O1O2O0O1O1O100O1O1N2O1O1O2N01gEhKi9X4TFkKl9e41OO100010O001O0O100O1O2N1O1O1O1O3M001N2O2N100O1O1O2O1N2O02N02N1OI6M4M2N3MTLfEV3Y:j0M3N3L3OSLQFj2l9W1M3MUL[F`2b9`MaF_2^9aMfF]2W9dMkF[2R9gMPGX2n8iMTGV2j8lMYGQ2f8nM]GQ2b8oM^GS2`8lMaGU2^8jM\\G_2b8aM^G`2a8`M[GUODm2P9oMVGk2h8i1N1O1N101O0O01L4E:N3N101L4bMfFnNb9Q1_FnNa9Q1aFoN_9o0bFRO]9l0dFWO[9d0iFaMBd1c9j0gFFW99jFIU97gFMY93eF0Y9Q1cFTN\\9i3O1N10001N1000dNkFnLS9S3RGiLm8W3WGfLh8[3YGcLf8_3[G`Ld8`3_G^L_8c3dGZL\\8f3gGXLX8i3iGVLV8j3lGULR8l3PHRLP8n3QHRLm7n3VHPLi7Q4XHoKg7P4[HoKd7Q4^HoK`7Q4bHoK\\7Q4fHnKZ7Q4hHoKV7Q4kHoKT7Q4nHnKQ7S4PIlKo6T4SIkKl6V4TIiKm6W4TIhKk6Y4VIeKi6]4YI`Kg6b4YI\\Kg6e4j10M3O1O1O1N2O1O1N1O20OO2O0O11O0O1O001O002M2O2M3M3L3O2N2O1O1O1O101M2O1M2O2M3N2N2N2N3M2N2M3M2L5M3I7L4K5J7L3M3N2N101N2O1O2M2O1O1O1O1O1O001O1O1O2N1O1O1O1N2O1O1O0O3N1O1O1O1N2O1N101N2O1N3L3N2N2O1N2N1N2O2O2M3K4N2Oo\\i3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i3132O0O]6R8aIPH2Nc6Q88O11O4ZIkGZ6T8aIRH0JZ6T8iInG]6P8:M301N10000010O0PISH70]6l7=0001O1O001O0021ON1O002O2MfH_HX7_7gHaH[7]7eHcH^7Z7cHdHa7Y7_HgHd7V7[HlHh7P7XHPIj7n6VHRIk7Q71JUHTIn7j6SHUIn7j66N00011NiGZIU8i6OO1010O010O01O00001O0001O101lGVIo7n61M2O101N2N1O10010002NN1O011N4M1N1O2O02OO`GbI]8`600010O010O001O1O1O1O00001O2N000O110O1O1O0001O1O1O001O101N100O00002M2O001O1N5L2N1O0O3O3L1O1O002N6J3M0O100O2N1O001O2O0O2OO1001O1O1O00001O1oHYHg6h7VI[Hi6e7WI\\Hh6d7XI]Hg6o7O010O1O001O1O1O001O2N13M0O1O00100O1O1O0O4M1O1O1O1O1O100O0010O2O0O100O02O00O10O0100O100000O0101N10O00100O1O0010O00001O002M2O2N20OO001O1O00001O1N1010O100000O001O001O00001O00001O0000O1000000O10000000O1001N100000O100000O100O1O2N1000001N1O100001O001O0000O10000000000000000000000O10001N010O11O00O100O10O10001O1N2O1O0100O0001O1O001O001O000000000000N2O10000O100O1000000O1O10000O100O1000000O1000000000000001O0000001O00000YJkGc4U8[KnGd4R8[KPHd4P8[KRHd4o7[KRHd4n7[KTHd4l7\\KUHc4k7\\KVHd4j7ZKYHe4g7WK]Hi4d7TK^Hl4c7QK`Hn4a7QK_Ho4d7mJ]HS5f7gJ]HY5e7dJ]H[5f8N1O001O001O00000000000000000000001O1O1O0000001O00001O000000000000001O00000000001O001O00001O001O001O00000000001O000000001O0000001O00000000000iKoE_3R:_LQFBHj3W:bLXF\\3h9cL[F[3f9cL\\F\\3f9`L]F_3d9_L^F`3c9\\LaFc3_:O1O1O001O1O1O2N1O1O2M2O1O1O2N1N2O8G3[MkC[12[O] and ?", + "choices": [ + "A. is attached to .", + "B. is cleaning .", + "C. is lying on .", + "D. is cleaning ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_362.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000485844.jpg", + "mask_rles": [ + { + "size": [ + 396, + 576 + ], + "counts": "Pmi21[<0RU12kjN4L8I2M4M2M3N0O2O0O2O0O101O000O10000O1O1L4O0SFmNa8U1]GoN_8R1_GSO]Oh0\\76VIUOVOn0_7OXIn0e6SOYIR1c6oNZIV1c6kN[IY1b6hN\\I\\1a6eN[I_1d6bNZIa1c6aN\\Ib1_6aN_Ib1]6`NcIf1V6\\NhIU2d5QNZJS2`5PN^Ja2Q5aMnJd2l4_MRKe2f4e2K4M\\L_KA`4W4M2N200O1O1K5O1O1N2O1O1O1N2O1M3O1O001N2O1OM3O1L4O2O1O1N3M4K4L4M3M2N3M2N2M3N3L3WOQKdIO6U5R6[KdIk4W6h0M3L4L4K6J6K7I5L4K6K6K4K5J4M5J:F5K7TNRG0j:K4fNSE>VXj2" + }, + { + "size": [ + 396, + 576 + ], + "counts": "n^Z26T<3M2N3M2O2M2O1O1O1N3N1O1N2O1O001O1O1O101OO10O10O1N2K4N3N2N2O1N1O20000000O1000000O100001O1O2N6J3M3M1N]bP4" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is holding .", + "B. is holding .", + "C. is on .", + "D. is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_363.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000485844.jpg", + "mask_rles": [ + { + "size": [ + 396, + 576 + ], + "counts": "QWd21Y<4N00001O000000O100000000001O000000001O0000001O0000000O100001O0000001O0O3Lhjk3" + }, + { + "size": [ + 396, + 576 + ], + "counts": "Vhk07S<4N1O1O1N2O1N1O2N1O2O0O101N101N10001N101O0O1000001N1000000O2O00000000001N1000000010O00O11O01O00000000000000000000000000000O1000000000O100000000O2O0000000000001N100000000O2O1O001N1000001N101O0O2O0O2O0O2O000O2N1O2N2O0O1N2M2O20001O01O0000010O00010O1O010O00100O00010O0001O001O001N`f2MdYM00003M000[OLRE4n:LRE4n:LRE4n:LRE4c;000000000000001O0000001O00000000000000MlCNU<1kCOU<40000001N3MRWl3" + }, + { + "size": [ + 396, + 576 + ], + "counts": "Pmi21[<0RU12kjN4L8I2M4M2M3N0O2O0O2O0O101O000O10000O1O1L4O0SFmNa8U1]GoN_8R1_GSO]Oh0\\76VIUOVOn0_7OXIn0e6SOYIR1c6oNZIV1c6kN[IY1b6hN\\I\\1a6eN[I_1d6bNZIa1c6aN\\Ib1_6aN_Ib1]6`NcIf1V6\\NhIU2d5QNZJS2`5PN^Ja2Q5aMnJd2l4_MRKe2f4e2K4M\\L_KA`4W4M2N200O1O1K5O1O1N2O1O1O1N2O1M3O1O001N2O1OM3O1L4O2O1O1N3M4K4L4M3M2N3M2N2M3N3L3WOQKdIO6U5R6[KdIk4W6h0M3L4L4K6J6K7I5L4K6K6K4K5J4M5J:F5K7TNRG0j:K4fNSE>VXj2" + }, + { + "size": [ + 396, + 576 + ], + "counts": "n^Z26T<3M2N3M2O2M2O1O1O1N3N1O1N2O1O001O1O1O101OO10O10O1N2K4N3N2N2O1N1O20000000O1000000O100001O1O2N6J3M3M1N]bP4" + }, + { + "size": [ + 396, + 576 + ], + "counts": "QT6b1f9aN`F[2Z9?K4M3N2O1N2O1N2O1N2K5M3O1O1N_MbGX1\\8[NTHd1l7YNWHg1h7YNZHf1g7UN]Hk1d7PN`HP2`7mMcHS2]7kMeHU2\\7iMeHW2]7cMgH]2d8000000000000000001O00000000000000000O100001O000000000001O1O5K1O1O1O1O1O0O2O01lNUNfGj1W8]NfGb1Y8`NfG`1Z8`NfG`1Z8`NgG_1X8bNhG^1X8aNjG^1V8aNkG_1U8`NlG`1S8aNnG^1R8bNnG^1R8bNnG^1R8bNnG^1R8bNoG]1Q8cNoG]1P8cNQH]1o7cNQH]1o7cNQH\\1P8dNQH\\1n7dNRH\\1n7cNSH]1m7cNSH]1m7cNSH]1n7bNSH]1m7bNTH^1m7[NYHe1W900000001O0O10001O0000000001O0000000000O1000000000000000000000000000O100000000000000O1gN[NkGe1T8\\NlGd1T8\\NlGd1S8^NlGb1S8_NmGa1R8`NmGa1R8aNmG_1S8aNmG_1Q8cNoG]1P8eNoG[1P8fNoG[1Q8fNnGZ1Q8gNoGY1P8iNnGX1Q8jNnGV1Q8kNoGU1P8lNPHT1m7oNRHR1m7PORHP1n7POQHQ1o7oNQHQ1o7POPHP1P8POoGQ1Q8POnGP1R8QOlGP1T8POlGP1T8QOjGP1V8POiGQ1W8POgGQ1Y8oNgGQ1Y8oNfGR1Z8nNdGT1]8kNaGW1_8iN_GY1a8fN`GZ1`8fN_G[1a8\\N]G\\O2X2a8ZNjGf1V8YNkGg1U8XNmGg1S8XNnGh1R8WNoGi1Q8VNQHi1o7WNQHi1o7VNSHi1m7WNSHi1m7WNTHh1l7XNTHh1l7WNVHh1j7YNVHf1j7ZNVHf1j7YNWHg1i7YNXHf1h7ZNXHg1g7YNZHf1f7ZNZHe1g7[NYHe1g7[NYHe1g7[NXHf1h7ZNXHf1h7ZNXHf1h7ZNVHh1j7XNTHj1l7VNQHm1o7SNQHm1o7SNRHl1n7TNRHl1n7UNQHk1o7UNQHk1o7UNQHk1o7TNSHk1m7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7VNRHj1n7WNQHi1o7XNPHi1o7WNQHh1P8XNPHh1P8YNPHf1o7[NQHe1o7\\NPHd1P8]NoGc1Q8_NmGa1S8`NmG_1T8aNkG_1U8aNkG_1U8aNkG_1W8bNfG^1\\8aNcG_1_8_NaGa1b8\\N_Gc1e8YN[Gg1g8XNXGh1j8WNTGj1n8UNQGk1Q9XNjFh1X9WNgFi1\\9TNdFl1^9RNaFo1d9lM[FU2k931O002N2N4L3M2N2N3M3M2N2N3M6J9G7I6I;Da]d3" + } + ], + "question": "Based on the scene, where is located?", + "choices": [ + "A. It is being held by .", + "B. It is on .", + "C. It is inside .", + "D. It is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_364.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000151480.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Z\\`26d>=E4M3K6L2M3O1O1O3]BmNV=g1G7I6J6K3L;E5L5_DlLl:Y3lDkLS;a300O1BWLdEj3Y:_L`Eb3_:KgEbLY:Y3nEfLR:_3hEbLX:R4100000001N102oK]EQ3OQMU;h2TESMn:a2_E\\Mc:U2]1G7N3M20100O2N004L2M4H;GWVT6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "i^_23N11O0000000000002N8FQmf6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "YTd26f>6L2L4K5K5AWOfBj0Y=YOdBg0]=ZObBf0^=]O^Bc0c=<2O3M3M2NO10000000001O2O0O10O00O2N1NQO\\Bf0c=ZO^Bf0a=ZOaBe0^=[OdBc0]=\\O[CLg<2T1O3NePW6" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is wearing both and .", + "B. is wearing and carrying .", + "C. is carrying both and .", + "D. is carrying and wearing ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_365.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000026204.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "\\ga75R=8J4L3DY7DeH<[7EdH;\\7EdH;]7EbH:_7H_H9a7H]H8d7G\\H9d7H[H8f7GZH9i7FUH:P8BoG>W8]OhGc0[8[OdGe0]8ZOcGf0^8\\O_Gd0b8\\O]Gd0d8\\O[Gd0e8\\O[Gd0e8]OZGc0f8_OXGa0h8@WG`0i8@WG`0h8BWG>i8CVG=j8ETG;j8HTG9k8JSG6m8KRG5m8MRG3o8NoF2R9OkF2W9NgF2[9MdF3]9MaF4a9K^F5d9KZF5g9JXF7h9IXF7h9JVF7k9HSF:n9FoEd?", + "choices": [ + "A. and ", + "B. and ", + "C. and ", + "D. and " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_366.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530099.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "T\\Y11d;3N1O100O1O1O1O1O1O100O1O1O100O1O1O1O1O1O1O100O1O1O1O100O1O1O1O1O1O100O1O1O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O010O1O1O2N1O100O1O1O1O100O1O1O1O100N200O1O1O100O1O1O1O100O1O1O1O100O1O1O1O1O100O1O1O1O1O1N2O1O100O100O1O1O100O100O1O100O1O10000O10000O100O100O10000O10000O10000O10000O1000000O100O10000O100O10000O100O100O10000000000O100O10000O10000O1000000O100O10000O10000O1000000000000O1OVM[Hd1e7ZN\\Hg1d7VN_Hj1a7UN_Hl1a7SN_Hn1a7QN_HP2`7PN`HQ2`7nM`HS2`7mM_HT2`7kMaHV2_7iMaHX2^7hMaHZ2_7eMaH\\2_7bMbH_2]7aMcH`2]7_McHb2]7]MdHc2\\7\\McHf2\\7[McHf2]7XMdHi2[7VMfHk2Z7TMfHm2Z7RMfHo2Y7QMgHP3Y7oLhHQ3X7nLgHT3Y7kLgHV3X7jLhHW3X7hLhHY3X7fLhH[3X7dLhH]3X7bLiH^3W7aLiH`3V7`LjHa3W7\\LiHf3b72O1O1O1N2O1O1O1O1O1O1O1O100O100000000000000O10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000GhKmH11X4R7gKmH21V4R7hKmH21V4R7kKPIU4P7kKPIU4Q7jKoHV4R7gKPIY4W700000001O000000cH" + }, + { + "size": [ + 375, + 500 + ], + "counts": "Zel11f;1N2O3L2O0O2O0O2O1N2N3N3L3M102M1O3L2N3O1N2O1N3M2N2M2O1O1O1O1O1N2O1O001O1O10O00010O01O100O001O100O100O100O1N3O1N2N1O2OO10O0100O010O010O10O01O10000O010O1000000O10O1000O1000O010000O10000O1O01000O1O1000O01O1000O0100000O01O1000O10O1000O01000000O01000O1000O1000O0100000O1000000O0100000O1000000000000O10000O1000000O100O1000000O101O0O10001N101N101O001O000O2O001O001N10001N1O1O1O2N1O1N3N1OaEROX:l0gEVOY:i0gEYOY:e0gE[OZ:d0gE\\OZ:a0fEA[:=eED[: and ?", + "choices": [ + "A. is sitting on .", + "B. is inside .", + "C. is under .", + "D. is sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_367.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is over .", + "B. is in front of .", + "C. is behind .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_368.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "1f2c:00001O0000001O00001O001O001O000000001O00000000O10000O1000000O1010O0O1000000000O10001O01N10000000001O0001OO100000000000O10001N10O1000000000000001O01N10000000000010O01O001O0000000O100001O01N1000001O000001O01O000O110O00000000O100000000O100000000001O001O1O001O00000000001O000000N2N1O201O0O1001N110O1O2N1O1OO100O100O10000O001O2O0O1000O11O0001O001O1O001O000000000000O10000001O00000000000000O10000000000000000O1000000000000000000000000000000000000000000001O00001O001O000000001O00000000000000000000O1N2O1O1N2O100O1000000001O00001O0000000000O100O1O100O100O100O1000000001O001O001O1O001O2N1O1O1O1O1O1O1OO1000000O10000O11O00O10000000000000000O10000O10000000000O10000O10000000000000000000000000000000000000000000000001O000000001O00000000001O0O110O0000000000O1000O11O0000000001O00000O11O000O01001O000000O10O2O0000000000000O10000O10O10O2O0O10O1000O2OO10O101O0000O10O101O0000000O1000000O100O100000000000000001O000000001O0000O011O000O01001O000000O1001O00O1001O0001N1000010OO010O2O000000O1001OO10001O10O01N2O1O1O0000001O010N10001O0000001O1O10OO2O1O0000001O00001O00000000001O000000000000001O00[OPFQNP:o1PFQNP:d201OO100000000000000YOoEVNQ:j1oEVNQ:a200000000000VOoE\\NQ:^2000000000O100O100000000O10000O10000UOkEaNV:_1jE`NW:`1iE`NW:Y2O02O000O100O1UOgEfNY:Z1gEfNY:Z1gEeNZ:U2O02N10000O10000000O1001O00000000000000O100010mNcEXO]:k1ON2OdEXMY:l20kNfE[OZ:k1N1000010OhNgE@Y:`0gE@Y:?hE@Y:`0gE@X:a0gE@Y:?iE@W:`0iE@W:`0iEAV:?iEBW:>hEEV::kEAZ:?gE_OZ:a0gEdNM41L\\:\\1fE[O\\:e0dEeNN9_:Q1dEfNM4OF`:" + }, + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i2T5U801O0000000001O01O1O10O01O001O00000001O00001O1O0O2O1O000O10O100000000000000000000000O11O000000001O0000001O0000O100000000O10000O100O10jLRHQ1n7iN]HR1c7jNeHR1\\7iNjHU1W7fNTIS1l6jNXIU1i6iNXIW1h6gNZIY1f6eN]IY1d6fN]IX1e6fNeHoNVO2a0W2e7eNaH_OHj1i7gN[HDMa1k7iNUHNOU1m7lNSH21n0n7oNRH4Ol0Q8nNPH8Oh0R8POoG8Og0S8POmG<0c0S8QOlG>1`0S8QOlG`01>T8ROkG`02=S8ROlGa04:P8UOkGb0=0i7]OjGc0a0Lf7@jGc0f0H`7EiGd0k0C]7HhGd0Q1_OW7MhGc0T1^OU7NfGd0\\1WOo65eGc0d1nNj6?bGb0f:_OWEa0j:DQE;P;FnD:S;GkD:U;l01O1O2N100O1O1O1O100O100O1O1O10O02O0O10001N2O1N100O100O100O101O0O2N10000O2O001N2O1OO0100000O001O10000O1O1N11000O100O10O010000001N101OVGmNa5P1_3M00001O1O00001O001O00001O00010O00000010O0O101N100O2O00000O2O0000001O0O10010N1O1N200O1O2O0O11O001O1O10O0001O0000000000000O1000N2O01000O10O1000000O100O2O00O0O2M2YFZOj6i0TIYOl6f0SI]Ok6d0SI^Om6b0RI_On6`0RIAm6`0SI@n6>SIBm6>SIAo6>QIBo6=RICn6=SICl6=TIBm6>SIBn6=RICn6?", + "choices": [ + "A. is in front of and under .", + "B. is in front of and under .", + "C. is beside and behind .", + "D. is behind and over ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_369.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000237864.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "1f2c:00001O0000001O00001O001O001O000000001O00000000O10000O1000000O1010O0O1000000000O10001O01N10000000001O0001OO100000000000O10001N10O1000000000000001O01N10000000000010O01O001O0000000O100001O01N1000001O000001O01O000O110O00000000O100000000O100000000001O001O1O001O00000000001O000000N2N1O201O0O1001N110O1O2N1O1OO100O100O10000O001O2O0O1000O11O0001O001O1O001O000000000000O10000001O00000000000000O10000000000000000O1000000000000000000000000000000000000000000001O00001O001O000000001O00000000000000000000O1N2O1O1N2O100O1000000001O00001O0000000000O100O1O100O100O100O1000000001O001O001O1O001O2N1O1O1O1O1O1O1OO1000000O10000O11O00O10000000000000000O10000O10000000000O10000O10000000000000000000000000000000000000000000000001O000000001O00000000001O0O110O0000000000O1000O11O0000000001O00000O11O000O01001O000000O10O2O0000000000000O10000O10O10O2O0O10O1000O2OO10O101O0000O10O101O0000000O1000000O100O100000000000000001O000000001O0000O011O000O01001O000000O1001O00O1001O0001N1000010OO010O2O000000O1001OO10001O10O01N2O1O1O0000001O010N10001O0000001O1O10OO2O1O0000001O00001O00000000001O000000000000001O00[OPFQNP:o1PFQNP:d201OO100000000000000YOoEVNQ:j1oEVNQ:a200000000000VOoE\\NQ:^2000000000O100O100000000O10000O10000UOkEaNV:_1jE`NW:`1iE`NW:Y2O02O000O100O1UOgEfNY:Z1gEfNY:Z1gEeNZ:U2O02N10000O10000000O1001O00000000000000O100010mNcEXO]:k1ON2OdEXMY:l20kNfE[OZ:k1N1000010OhNgE@Y:`0gE@Y:?hE@Y:`0gE@X:a0gE@Y:?iE@W:`0iE@W:`0iEAV:?iEBW:>hEEV::kEAZ:?gE_OZ:a0gEdNM41L\\:\\1fE[O\\:e0dEeNN9_:Q1dEfNM4OF`:" + }, + { + "size": [ + 425, + 640 + ], + "counts": "mce59b0Le;9PD4k;NQD7i;^O[D3Jc0h;XOcDR1m;L01M4M^OZDMZ;2jDKV;5lDIT;8SE@m:`0n0:F2O1QDPOa;R1[DROc;P1[DPOe;Y1HdNdD\\1];fN`Dm0O\\Oa;c0iD[OW;c0lD\\OS;d0QEXOP;g0SEVOl:j0j0000000002N3M3N[OQD0m;JnCI=8eWIAh6?YI@g6`0]I\\Oc6d0aIXO^6i0cIVO]6k0eIRO[6n0fIQOZ6o0hIoNX6Q1kIlNU6T1lIkNT6U1nIiNR6X1PJeNP6\\1RJaNn5_1UJ^Nk5b1VJ]Nj5d1WJZNi5f1WJZNj5e1WJZNi5f1XJYNi5g1WJWNk5h1WJVNi5j1XJUNi5j1XJTNi5l1k2O010O1O002N001O1O2N1O101N1O2EZDoNh;o0YDoNj;o0XDoNi;P1XDoNj;o0VDPOl;P17011OO0O3N4L11N01O1O10O100O00012NO01001N1N2O1N3O1N3N001O0O2M2N5JbPP4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "i2T5U801O0000000001O01O1O10O01O001O00000001O00001O1O0O2O1O000O10O100000000000000000000000O11O000000001O0000001O0000O100000000O10000O100O10jLRHQ1n7iN]HR1c7jNeHR1\\7iNjHU1W7fNTIS1l6jNXIU1i6iNXIW1h6gNZIY1f6eN]IY1d6fN]IX1e6fNeHoNVO2a0W2e7eNaH_OHj1i7gN[HDMa1k7iNUHNOU1m7lNSH21n0n7oNRH4Ol0Q8nNPH8Oh0R8POoG8Og0S8POmG<0c0S8QOlG>1`0S8QOlG`01>T8ROkG`02=S8ROlGa04:P8UOkGb0=0i7]OjGc0a0Lf7@jGc0f0H`7EiGd0k0C]7HhGd0Q1_OW7MhGc0T1^OU7NfGd0\\1WOo65eGc0d1nNj6?bGb0f:_OWEa0j:DQE;P;FnD:S;GkD:U;l01O1O2N100O1O1O1O100O100O1O1O10O02O0O10001N2O1N100O100O100O101O0O2N10000O2O001N2O1OO0100000O001O10000O1O1N11000O100O10O010000001N101OVGmNa5P1_3M00001O1O00001O001O00001O00010O00000010O0O101N100O2O00000O2O0000001O0O10010N1O1N200O1O2O0O11O001O1O10O0001O0000000000000O1000N2O01000O10O1000000O100O2O00O0O2M2YFZOj6i0TIYOl6f0SI]Ok6d0SI^Om6b0RI_On6`0RIAm6`0SI@n6>SIBm6>SIAo6>QIBo6=RICn6=SICl6=TIBm6>SIBn6=RICn6?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. No object is in the background." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_370.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000099810.jpg", + "mask_rles": [ + { + "size": [ + 332, + 500 + ], + "counts": "jmi04V:3M2M3M3L4I7L4J6K5H8D]L@d3a0\\L\\Of3e0ZLXOh3h0YLVOh3j0ZLROh3h0`LUOa3i0bLTO`3i0cLVO^3GXK@_1g0Z3E`K]OY1k0Y3GbKWOY1P1W3HgKoNU1X1U3FjM8X2GiM8X2FjM8X2GiM8X2FjM8X2FjM8X2FjM9W2FkM7W2HlM4V2KmM1U2NnMOS20PNLR23QNIQ26RNEQ2:Q400O100000000O100000000000000O10000000000000000000000000000001O00000O11O000000000O10SNFhI9V800O1O1000000O01[NJTI6i6MXI2e62ZIMe65[IKc67\\IJc67]IId66\\IJc67]IIc67]IJb66^IJb67]IIc67]IIc67YICeN6R87XIDfN5R87XIDeN6S86XIDeN7R85YIDfN6Q87WIDhN5Q87VIEiN4Q87VIEiN5P86WIEiN6o76VIEkN5o76VI3i6MVI4k6LTI5k6KTI6l6JTI7k6JTI6m6IZH@=h0Y7IXHA>g0X7JWHA`0g0Y72[HUONj0g71ZHWOLk0i7OXH>f7U100O10013NO140LN2OO00O2N2O1M3N2M3N1O2M1oJkLk2U3RMoLm2P3RMSMm2k2RMXMm2h2RMZMn2c2RMaMl2^2nLjMR3U2`LSMgNU1h4h1VLgNh3Z1QLmNo3S1lKSOS4m0hKYOV4i0eK[O[4\\3N2N2O001M3N2M4L3O3L5L4L9G8H4L4L4L6I8I5K2N4L1O3M2N3M1N2O1O000O10000O1000O0100O1O0100O010O0000010O001O10OO2N101M2M30000000000001O2N2N2N2M4L4M2O1N1O1O10O010N2K5M3N1O2L400O10000000O101O001N2O1O2M2O1O1N3M2N2N3M8F6KfXZ1" + }, + { + "size": [ + 332, + 500 + ], + "counts": "bTP21X:3N2N2O1O1O100O2O0000000000001O0000000010O00010O000001O001O1[FBZ9?cFD\\9f001O01N2N3M4L5Llod2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is standing in front of .", + "B. is holding .", + "C. is beside .", + "D. is holding ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_371.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000191013.jpg", + "mask_rles": [ + { + "size": [ + 640, + 474 + ], + "counts": "Tia22kc03N3O0N2O1N2O2O0O000O2O100O1O1O1O10O00001001O0O10000000000000000000001O002N3M00000000001O00000000001O000000001O001O001O001O1O001O1O1O1O2N1O1O1O2N1O2N2N1O002N00O10000O1001O001O1O7I1O2N3M2N5K2N1ON2N2004L0000000000000001N100O1N2O1N3N1O101O00001N2O010N3N3M=C8fNY]Oj0dc0YOiUU4" + }, + { + "size": [ + 640, + 474 + ], + "counts": "^bS68fc03K5M3N2O1N101O0O101O001N101O1O00000O1000000O2O0O10001M2O100O1O001N2O1O01O00O11O0010O2M2O100O10O01000000O100O1000000O100000000O100000000000000000000O100001O000000000000000000O10000000001O0000001O1O1O100O0000000001O01O0000O2O1O2M2N2N5H:H7J4M2M:G3KdUg0" + }, + { + "size": [ + 640, + 474 + ], + "counts": "Xc0;dc01J7M2H8L4L4M2N3O100000000000O10001O00000000001O01O00O100O2O0001O00O100O10001O0OEPOh]Oo0Yb0QOg]Om0\\b0ROd]Om0hb0N3I6NG]OY]O=mb0DR]O9]c0K`a[30X^dL?", + "choices": [ + "A. is driving on , while and are parked.", + "B. is parked on , while and are driving on it.", + "C. All vehicles, , , and , are driving on .", + "D. is driving on , while and are parked." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_372.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000191013.jpg", + "mask_rles": [ + { + "size": [ + 640, + 474 + ], + "counts": "Q_b1:fc01N3N1O010O1O0O2O00001O0000001O000000000K6J5K5L4H8J6L5J5I8E;H8I6J6I7J5K6L5I6I:C7OfJcAS5Z>PKfAo4Z>RKfAn4Y><1000O010O00000100O1VKeAn3\\>QLeAP4Z>PLgAP4X>oKiAQ4W>PLhAP4W>QLiAo3T>XKkAj0On3W>XKjAd5Y>0N10100000O0100O001O001O010O00100N20^JmAV5S><0OO2O001O10O100O10O1000O1N1gJmAg4S>c000O1O1OfJRBd4n=]KPBd4P>^KPB_4Q>aKoA_4Q>aKoA_4Q>`KPB`4o=aKRB^4n=aKSB_4m=k000O000O2O001N10010O1N101O00001O1O0001dJZB]4e=cKZB^4e=P1OUK\\B[3c=eL]B[3c=a101O1O00010O001OkJaBi3_=WL`Bk3_=[101O000010O01O001O0010O010O2O0O1O2N2O0O8H204J4L2XJjAb5^>MO2N010N101O0O2O1O0GYJXBi5g=801O0O2O0O2O0O2N101O0N3O001N101N1O2O00001UN^ISEO8i6k6VITJ0Oc8j5]H_HQO514O10Fc00g9c5QF]JP1OPO2N0S5" + }, + { + "size": [ + 640, + 474 + ], + "counts": "mei2`0Xc0a0B9H7I6L5J5L5K4L4M2M4L3N3L3L4M2N5L2N2M2M5M2N2N1O2N1O3M1O2N2N1N3N2N101M3N1O1O2N1O2N101N1O2N1O2N1O2N100O1O2N101N1O10001N1O1O2N10000O1O101N100O100O100O10000O100O10000O1000000O100000000O1000000O100O01000000O01000O10000O10000O10000O100O1O100O10000O10001N100O100O101N1O101N100O2N1O101N101N101N1O2N2N1N3N1O101N2N2O1M2O2N3M1N3N1O2O1M2O2N3M2M5K3N1O2M6K3L4M2M4M3L5J5L3L5L5J4L7I7H8G>@YSi2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is driving on .", + "B. is hanging from .", + "C. is hanging from .", + "D. is parked on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_373.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000057027.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "moe05R=6I6L3M3N2N1N3J5M4L4M2O1L5N2M2O1N3L3O1N3N1O2M2N2O1O2M2M3O1N2N2N2IZMfEj2Q9lMoF[O5S3V8mLiGY1Oj1h6WMZIGLX21j0S6g0kIYOe5n0ZJQL0V3a5m0aJlKNW3\\5Q1gJSOW5l0lJTOo4f4L4N2MWJVKaM3_5f4nLdKP3Y4mLYKeM?`5V4iLnKZ3P4fLPL\\3n3iLbKd3\\4f2M3N2N2NROTLbHk3[7\\LbHa3_7cL_H\\3_7hL`HV3`7nL]HR3`7UM\\Hk2b7_10ON300N200O0N3M30000O002N1O1O2N01001N1O2O0O1000[LVIl0h6TO]Ih0e6TO_Ij0c6ROaIm0_6jNkIT1X6aNQJ_1Q6ZNUJd1o5nM\\JQ2`8N100O100O100O2N100O100O10000O1000000O100O1O100O10000O100O1O1OlNjMVGV2h8PNTGP2i8TNVGk1i8WNWGi1f8ZNZGf1c8]N]Gc1a8_N_G`1^8dNbG\\1[8gNeGX1_7RNSHl0>R1V7MjH3S7NVHiMf0Z2Q7OSI1l64iH2V71_H9`7J\\H7d7h2N2O1O1O0O2O1O1O0O2O0O200N1O2O1O1O10O010000O100O10000000O10000000O01000000O1000000O100000000O10O100000O1O100O1000000000000O100000000000000000000000000000000000000000000O10000000000000000000000O101O00000O101O00000O100000000O100000000000000000O11O00000000000000000000000000O101O000000000010O000000000lMlHSNS7j1QIUNo6i1TIVNl6i1VIVNj6i1WIWNi6g1YIYNh6f1XIZNh6e1YI\\Nf6f1XIZNh6h1VIXNk6h1nHaLHg1[7R4001O001O0001O01bJTIb0J]2T7kL\\I<4[2b9eM_E]1f:]N_E_1c:VNhEi1l:0001O00000000000001O0000001O000000000oNZN^Ff1_9_N_Fa1`9bN^F^1`9eN_F[1`9gN_FY1_9jN`FW1]9lNbFT1]9mNcFS1[9POdFP1Z9SOeFm0Y9VOfFj0W9ZOgFg0V9^OhFb0V9AiF?U9FgF;V9n1N2N2M3I7K5L5L3N2N2N2M3M3N2N2N2N2O101O01O2N2N1O1O2O2M3M2O0O4L4M0O3M002N5K4L1O2O2M3M1O1O1O5K3M2iMZFf0i9TO^Fh0c9VO_Fi0d9QO`Fo0b9lNaFS1f9cN^F\\1m9UNWFk1e:O0000000000000000001O0000000000TJ\\N2d1HeN5[1GmN5S1FVO6j0H]O3c0LB0>OFN:2KI561EO;8^OGc0`0VO@i0e0SO[Ol0j0POVOP1l0nNTOR1Q1TKXM`3g1Z1]1aNcN]1h1ZNWNe1l1ZNTNe1m1[NSNd1n1[K[Lm2g1h1T2iKcL]1Y1j2Q4YLoKg3T4VLlKk3T4TLlKl3U4SLjKn3X4PLhKP4Z4nKfKR4\\4lKdKU4[4kKeKU4\\4jKcKX4]4gKbKZ4_4eKaK\\4_4cKaK]4`4bK`K^4b4`K]Ka4e4\\K\\Kd4i4UKYKk4c600001N2O1O00001O000000000O2O0000000000000000010O000000010O0001001O0O2N10O01oLjJmMV5R2mJkMU5o1eJYK9f2S5P2dJZK9g2S5o1PKPNT5l1lJUNU5h1_JaK:h2Y5d1]JdK3O1j2`5j40iL]JhNb5W1`JhNb5U1aJiKMj2b5eM[JT3`0TOW5`McJY37UO[5YMdJh1D[NNZ1=]1U7[MfI;^O[2e93M1O2N2M3N1O2M2O6I:F?A2O1N3M3K5L4M2M4H;XNf1F:G8H:Hb0YOc0@;E:F=A>UOk0A=H:Aknb0" + }, + { + "size": [ + 426, + 640 + ], + "counts": "m\\Z2:o<4K2K5M0_C]O_Q9BRG`0g8EYG>c8C]G?`8C]Ga0a8]O`Gf0]8\\ObGf0\\8\\OWGQ1g8ROmFY1Q9jNhF\\1W9\\1O1N3N1O1O1N2O1O_OjFPMV9m2PGoLP9n2g0N2N2N3M2O100O101OSF_MS9b2cFkMX9Y3K6J1N2O0O2O000O10000000000O010000QORGfL1h0m8\\2cG\\M^8_2jG^MV8_2nG_MS8_2PH`MP8^2SHaMm7]2VHaMk7\\2YHcMg7[2\\HcMe7\\2_H`Mb7`2d1000000001N1000O100000000000000O01000000000O2O00000O10000O100O10PGgMo6Y2nHlMP7T2mHPNR7T2hHnMX7V2aHmM`7U2ZHnMf7X2jGSNU8a3000000000000001O0000000CWGYLj8a3^G\\Lb8b3bG[L`8c3cGZL^8d3e0N3M3N1012cF_Lk8R4N01O1O00001O001nMfF2Z9MkFOU90oFMR92VGGj89^G^Ob8c0aGYO_8g0gGSOZ8m0lGlNT8U1VH`Nk7_1WH_Ni7c1VH\\Nj7e1VHYNl7g1THXNl7i1UHUNl7k1UHRNl7o1g1100O100O011N10000O100O4M2O1N1O3M3N0]E^MN0Y:Q3MN3M3oMlEe0Z:PO\\F?l9UO`Fb0Z;GZYc3" + } + ], + "question": "Which statement best synthesizes the relationships involving and ?", + "choices": [ + "A. is guiding from a position above it.", + "B. is positioned over and is guiding it.", + "C. is walking in front of .", + "D. is guiding while walking on it." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_374.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000498463.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "YQQ24j>o0ROd0]Oh0XOl0TO>B00000000000000YMTEZ1l:eNVEZ1j:eN_ESO_Oh1R;TOcF1]9OeFO[90gFOY90hF0X9OiF1W9NjF2V9MkF3U9LlF4T9LlF4T9KnF4R9KoF5P9LPG4P9KQG5o8JRG6n8ISG7m8FVG:j8DXGe8B\\G>d8A]G?e8_O[Ga0f8]O[Gc0e8]O[Gc0e8\\O[Ge0f8YO[Gg0e8YO[Gg0_;0000000001O7J3M3N2N1O2N2N2O1O0O101N100O10000O1000000000000000000001O00001O00001N10YI" + }, + { + "size": [ + 480, + 640 + ], + "counts": "ZUW28f>3N2NY1hN4K2N2N1O1O1O100O100O1O1O1O1O1N2M3O2N10O01O1O10O10O2O03Me0[Oi0VOd0ZO`\\d6" + }, + { + "size": [ + 480, + 640 + ], + "counts": "l_T7i0V>2O000000000O1O1bNVOdDl0Y;YOaCKd0V1k;YO_CY1`001O000000OUE^OZ7b0dHBZ7>cHG[7:bHI]77bHK]75bHL^74bHL^74bHL^73cHN\\72dHN\\72dHO[71eHO[71eHO[71eHO[70fH0Z70fH0Z70fH0Z70fH0Z70fH0Z7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OgH1Y7OfH2Z7OeH1?[OV3e0[L0;2g2MoL18h0U2WOcM17]1b1bNWN16R2o0mMkN25\\2e0bMWOd0A^2d0mLL[1iNY2k0\\L\\Ng1U2^Nd2e0dJeNj0b0Hg1V2[Nf2\\2dJPM0O4Y2c2[Nf2Z5ZMeJg2^5`KRJ`0?Q4Q7`J^Ho0?b4^7^JTH`0NB`0`5h7^JjG50M=`5l8^JgF]5W:WOh0XOg0YOb0^Of0ZOd0TN]BR1^>nNeA2Q^Y7" + }, + { + "size": [ + 480, + 640 + ], + "counts": "j`T7a0]>201O000000000000000000000000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000000000000000000000000O100000000000000000000000000000000000000000001O001O000000001O000000000000000000000000000000000000000000TH" + } + ], + "question": "Which statement accurately describes the arrangement around the microwave, ?", + "choices": [ + "A. is on and is beside .", + "B. is attached to .", + "C. is on and is beside .", + "D. is on and is beside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_375.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000581062.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "X]`11d21Z63_I2a6NZI8d6JjG1[OJl0d0o7AdGc000[8^O_GR2a8oMVGY2j8:OO2N1E and ?", + "choices": [ + "A. is on top of .", + "B. is holding .", + "C. is standing on .", + "D. is sitting on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_376.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000029640.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "h\\R51T=9H6M3L3N2I[OfCf0X<8M2O1N3N1O2N2O010O0000010O00010O00010O01N1O1M3O2N1000000O1O1O1O2O010O01O000001O0000001O001O0O1O2M2L5M2N3M3Mf_^2" + }, + { + "size": [ + 426, + 640 + ], + "counts": "Pim43V=3N2N1N3N1O2M3N1N2O1O000O_OeC4\\ and ?", + "choices": [ + "A. is positioned next to but not touching.", + "B. , the carrot with a slight curvature, is attached to .", + "C. , the carrot with a green stem end, is attached to .", + "D. is larger than ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_377.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000370486.jpg", + "mask_rles": [ + { + "size": [ + 640, + 421 + ], + "counts": "h4Z31W4Q9]3N2O00fN^GkFDHP9X9nGbF^8g8h1J7L4K5L4K5L4H8GhCdH^L2N1O0O2O002N1O1N3M``k6" + }, + { + "size": [ + 640, + 421 + ], + "counts": "[f97fc07J3M3N2N2M3J6M22O0000O010O001O000010O00000000001O00001N10000O2O0O2N4L5K9G_eU7" + }, + { + "size": [ + 640, + 421 + ], + "counts": "eob22nc00a_:OVdFg0^kN9G8l@lNT;X1^DVO^;m0ZD]Ob;e0XDAe;a0WDGb;=YDMc0^Nf7g1dG2a0XNi7h1cG5a0SN[72QFk1^1VN`NU2a2hMU7g2aG;U1nLW7l2`G=k0dLTN3_9o2oFR1W1SLTNMb9S3hF[1Y1_K_N<_9n2`Fc1W1mKT8j7]GfHX8j9F8YGdD]8e;H:G:E7I5J7J5L5XIkBd07g4`=lJeBS5]=gJhBX5P>001O2NBdAXK\\>f4hAXKX>f4b0ObAUKi=k4WBZKd=e4]B\\Kc=b4^B`K`=_4dB^K]=`4fBbKW=]4jBdKT=\\4mBbKU=]4oB^KR=a4W10QB^KX9i0hGi3oNaKW1MQ5X1dJZ3SObKd8d1\\F]NV1\\4JcK_8]2eGo1i9XNTFh18dKd7l2QHa1:eK^7R3VHY1[^OCea0>W^OHea0[1N5L1O1N5L3L4M3j@^Mh?AFOK1d00j?`3b_ORMc?f300O1K5O1ZOaKjA`4k>01O0000000000000000000000001OO10000000000AaK\\A`4]>oKYAS4k>?2N9G2N2N9G2N1O001O00000000000000000000000000000000000000O10000000000O100O1K5O100O100WOdK]AL=`4k=TLTBl3j=WLTBj3l=XLmAm3n<\\KlCf5o;PK_CQ5a01O00000000000000000000000000000000O10000000000hN]LSBc3cgKfAX4S?N1O001O0000001O1O6J:F;E6J7I5K3M3M2\\Nk^O9Ya0@n^OS1QAmNm>V1RAjNl>Y1SAgNh>^1XAbNg>`1XA`Nh>a1WA_Ni>a1WA_Ni>b1VA^Nj>b1VA^Nj>b1VA^Nj>c1UA]Nk>c1UA]Nk>d1SA]Nm>c1SA]Nm>d1QA]No>d1PA\\No>f1n@\\NR?e1l@\\NT?e1j@\\NV?W3O1O1WO\\KVBd4j=]KUBc4j=_KTBb4l=^KTBb4l=_KSBa4l=`KTB`4l=aKSB_4m=aKSB_4m=aKSB_4m=aKSB_4m=aKSB_4m=bKRB^4n=bKRB^4n=cKQB]4o=cKQB]4o=cKQB]4o=dKPB\\4P>eKnA\\4R>eKmA[4S>fKlAZ4T>gKjAZ4V>hKfAZ4Z>f000000000O10000000000000000001O00O1001O0000000gJdAn4]>lJjAR5c>O1O00001O0000000000000000000000000000001O00000000001O;E2N2N1O1O1O000000001O00000000000000000000000000000000000000000000001O001O00000000000000000000001O00O100000000000000000000001O3kKh@Mm2" + }, + { + "size": [ + 640, + 421 + ], + "counts": "Ybf18cc06UOj0J7K4L3O2M3M3N2M3N1O2O1N2N2O001O1O100O1O00100O1O100O1O1O001O100O1O1O1O1O1O1O1O1O1O1O1O1N2O1O1O1O2M2O1O001N101N101O000010O001O10O0100O0010O010O01Ec_ORM^`0l2g_OPMY`0o2=I7K4M4N101N2N1O2M201O0N3K4M4L4O0El]OiNWb0W18100O1OO2O01O0010O00001O10O010O010O1O00100O100O0010000O101O01O0000000000010O000001O0000001O001O001O0O2O00001N3N2N3Q^OPNha0\\2H7I1N101O00O010O1O1O1O1O1O1O100O1O1O1O1O1O1O2N100O1O1O101N1O1N200O1O1O2N1O1O100O1O1O2O0O1O1O2O0O1O100O2@?L4J6L5L3M3N2NSV\\2" + } + ], + "question": "Which object are both and standing on?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_378.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000087875.jpg", + "mask_rles": [ + { + "size": [ + 487, + 640 + ], + "counts": "X6n8Y60O1000O100000001O000000000000000000000000000000000000000001O00O10000001OO1001O000000O11O00000O1000000000RNiIlJW6R700000000000QNjImJV6S5jImJV6R700001O000000O10000000O2O0001OO010001O0000O1001O00000000000000O20O0O100000000000000000000000000000000000O100000000001O0000O1001O0000O11O000000O1001O0000O1000000000000000000000000fJlIa1T6]NRJ_1n5_NUJ`1k5`NUJ`1k5_NWJ`1i5_NXJa1h5_NXJa1h5_NXJa1h5^NYJb1g5\\N\\Jc1d5\\N^Jd1a5ZNbJd1_5ZNeJd1[5[NkJ`1U5_NoJ^1Q5aNSK\\1m4dNVKY1j4fNYKX1g4hN[KW1d4hN_KV1a4jNaKT1_4_MlIEg1j2]4]MSJFb1k2[4]MVJGa1j2Y4_MWJFb1i2W4`MXJGc1Z1_N]Nf5b0XJGd1Y1bNZNb5e0YJHj0ROZOT23[N`5h0YJGi0TO[OP25]N]5g0[JHh0VO[Ol16^N\\5h0\\JGg0XO\\Oh17`NZ5i0\\JGg0ZO[Oe19`NY5i0]JHe0]O[Oa1:aNY5i0_JFd0_OZO`1;aNX5j0aJCc0BZO^1;bNX5j0cJB`0E[O[1;cNW5k0dJ@`0G[OZ1:dNW5k0gJZOa0LXO[18dNX5j0mLQOkM]11hNW5h0\\N<_LkNU5>c1XOaN>Y8K5N2N2O1O1O1O1O1Nbjf00`UYO1N3N1O001O1O1O1O000aAHo=8PB6c=J]B7b=I^B8a=H_B8a=H_B9`=G`B9`0Dl2K]L3]84kG:;Gh2S1fMcNlN3\\66oI;8Hg2W1eMmNV5HWJ<7Gg2Y1dMmNW5GXJ;5Hi2Y1cMmNW5FYJ<4Gj2Z1bMmNW5FYJ<4Gl2Y1_MnNX5EZJ>3Dm2[1^MoNW5D\\J=2El1K_N^10QOW5B^J?0Dj13^NW11ROY5@`J`0NBj18]NS12SOS61cI@LL_1a0kNo02TOV6ObI@KO_1a0lNm00UOX6ObI\\OM4[1?nNm0OVOZ61mI_Ok0=oNm0NWO[60mI@j0;QOo0KWO^6NlIBi08SO`2X6fMlICh01ZOf2R6fMlIDh0L]Oj2Q6dMjIHQ2d2V4cMhIKP2c2Y4aMgIMn10XMo1d8RNPJ2\\Ml1d8TNlI2`Mj1d8UNiI2eMh1b8XNfI1hMg1b8ZN`I2PNIIV1g8POYI7WNBJW1f8QORI<_NZOJY1e8QOjG[O=U1[OTOJ[1d8ROhG^O2[1IgNK]1c8SOfGHDW19`NJ^1c8SOeGg1MgML_1b8TOdGf1NfMM`1`8VOcGe11dMLa1`8VObGf13aMMb1^8XOaGe14aMM=I`0e8N_Ge17_MK?J?f8M^Gg16]MM?J`0e8L_Gh17[MK`0Ka0d8K`Gi1a0jM\\Ob0c8K_Gk1a0gM^Oc0h6oNkJj0_Nm1`0fM^Oe0h6nNkJ8WN88W2`0eM_Of0c8SOZG85Z2?eM_Of0d8QO[G84[2`0dM^Og0]6PO`K0RN93Z2c0eM\\Oh0\\6POlK6jMY2f0hMXOf0U9WOnFV2k0kMSOg0V9VOmFV2m0kMQOh0h9\\1YGkMnNj0h9Z1\\GkMmNk0g9Y1]GkMlNn0f9W1lGiNT8W1lGhNT8Z1jGhNU8Z1iGhNU8Z1iGfNW8b1`G`N_8g1ZGZNe87TF6R1Cj86VF6P1Eh85YF6n0Fi83ZF;i0Cl81\\F=h0Ak82^F>f0@l81_F`0d0_Om81_Fb0a0cNXOe9MZFI6\\1b0\\N[Oc0c9K[FI6]1b0ZN[Oe0b9J\\FJ5]1n0oNa8I^FI4_1m0oNa8I^FI4_1m0nNb8J\\FI5`1l0nNc8I\\FI5`1l0mNd8J[FH6_1l0nNe8JZFI5^1m0mNf8KYFI5]1m0oNf8JYFI5]1l0oNh8KWFI4\\1o0nNh8MUFH5Z1Q1POf8MkFR1?QOh8LiFQ1a0ROg8LiFo0c0TOe8LiFn0d0VOd8KhFm0e0ZOb8HjFl0f0_O]8DoF3lN;i10[8@RG2nN:g17V8]OVGMQO>d1:V8WOdG<7a0l9]OVFd0i9[OXFe0h9ZOYFf0h9XOYFh0g9UO\\Fk0f9QO]Fn0n;0001O00001O000000001O0GlA^OU>a0mA^OT>`0:O2N100O1O2M4M3LU[h3" + }, + { + "size": [ + 487, + 640 + ], + "counts": "[RY24Q?5K3N100O101N1O1N3N1M4N2N2N2O1N3N2N0O100dNkNfDV1T;ROiDn0U;UOjDk0V;UOjDk0T2SO\\53`Hj0S2\\OU5JhHj0S2]OT5JhHi0S2_OT5HjHh0R2AS5GkHh0Q2CR5FmHf0R2DQ5FmHf0R2DQ5GlHe0R2FP5GlHd0T2EP5JiHb0W2CP5LhHa0X2Dn4NgH?[2Cn40bHa0_2_Oo4T2QKlMn4Y3mGYM?A[7]6^OS1[O5M3N1O1O100O1O100O1]O_G_Jb8`5aG\\Ja8d5a000O10000000000O100O1K5N2O10000000000000000001O1O7I00001O0000000000001O0kF]Jf8d5VGaJh8P6O1O1O1O0]GgIX8Z6gGfIY8[6fGfIY8[6fGeIZ8d600gH[IP6f6oI[IP6f6nI\\IQ6e6nI[IR6i6iIYIV6X7YIhHg6Z7WIfHi6[7VIeHi6\\7WIeHh6[7XIeHN7S6T7oIfHJ9V6R7PJ\\IT2QOdM12d1V1`6POjHl1c1gNe5\\OhHi1g1kNa5[OiHg1i1nN_5ZOhHh1i1nN_5ZOiHg1h1nN`5[OhHg1h1nNa5ZOgHg1j1nN_5[OhHf1i1oN_5[OhHf1j1nN^5\\OhHe1l1mN^5]OgHe1k1nNh6P1ZInNg6Q1ZImNi6R1YIbNnLKo9a1c3N2N2N2N2N2N3M2L5M3M3J5L4L4N2N2N2M3NlkQ5" + }, + { + "size": [ + 487, + 640 + ], + "counts": "\\gm21V?1O4K2O00001O0O1000000000000001O00iNK\\C5clCZO98k;b0hCVO=8k;R1UDnNk;R1UDnNd1@n7c1\\FnNd1AP8c1ZFlNe1AR8f1VFiNh1AR8g1UFhN^1_O_N2o9g1TFhN]1@_N2P:f1TFhN]1B\\N2R:e1TFgN^1La8[1QFiN^1Lb8\\1nEhN`1La8]1oEgN`1La8^1mEgNa1Lc8V2]GiMe8V2ZGkMf8U2ZGkMf8U2YGlMg8T2YGlMf8V2YGjMf8W2ZGhMe8Z2[GfMe8Z2ZGgMf8Z2YGfMg8[2XGeMh8]2VGcM7Ak6l2cGRMV1a0E_OKQ6Q3RJ_M=F_OLQ6P3RJ]M?H]OKR6Q3PJ]Ma0G]OKR6T3mIZMd0G]OKANk5W3bJYMd0H^OJAMl5Y3`JXMe0H^OJ_OOn5Z3\\JVMg0J_OH^OOR6Y3XJVMh0K_OH_ONR6Y3WJWMh0K@H\\OOU6Y3UJUMj0K@H\\OOU6Z3jIaLMc0W1MAE[O1Y6W3eIeLNa0X1M@F[O0a6KUI=9P2LjML;R1;JZO[OO`7[3VIbMNTO[OOb7[3XI_MKWO[OOb7\\3WI_MKXOZOMe7[3WI_MJZOWONi7X3VIaMIFR7g2VIcMHFR7h2TIcMJEQ7i2UIcMICU7h2RIeMHDX7e2PIjMROTO4=k7d2nH^NSOoNn7d2PIYOo6h0QIXOn6i0RIWOk6l0UITOk6l0UITOj6m0VISOj6P1SIPOm6m0VISOj6m0VISOb3\\MbN`3mMSOS3_MTL0k2_3mMROU3^MSL1k2_3lMSOV3bMlKNQ3a3jMPOX3jMmNX3jMkLUO^O1O1f1R4jNkNZ3lMhLBR1g3lNkN[3kMgLBS1h3kNjN]3kMeLXO^O6f1m3kNiN_3]MULI>:Q1f3mNjNd3XMiL>a0?`Nl1c0Nf3VMgLc0>QN_O]1Q6aNfLU1]Mk0>mM_OO0c1P6aNfLU1]Mi0a0mM^Of1o5_NfLS1\\Ml0g0AY5]NeLV1\\Mi0h0C;`N\\6T1aHh0i0D1^NTO4a7?\\HK6\\1l0G4eN]6=^HJ5[1n0F2lN\\68`HK4Z1o0FLSOa62`HJ5X1P1JnNjN6=X7KaHJ4Y1P1JmNmN4]7GcHJ2S1V14fNjNO`0b7DaHJ3S1V15fNiNKc0f7AbHI3Q1W19cNhNL<2]Oc7m7H[HEGN:U1Z1;bNeNJc0n7C\\HG5Q1V1=aNfNHc0Q8_O\\HK5l0X1a0_N@i7VO\\HL6O_Of0c0]O8]1@ZOe7YO]HM:f0J@9Z1AZOe7YO^HL:e0GEg0@@P:J`Ff0@@P:HbFh0^OAo9FdFi0]OAo9EfFi0[OBo9DgFk0YOAS:AeF5lNO=;S:_OdF5POO9=Y:XO_F;RON6?[:UO^F>QON6>j;CQDO5>k;AQD24fY0000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000O10000001O0000000000000000O100000000000000000000000000000000000000000O1001O00000000O100000000000000000001O0O2M3K=Am^j7" + } + ], + "question": "Which object is positioned between and ?", + "choices": [ + "A. No object is between them", + "B. ", + "C. Both and ", + "D. " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_379.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000542089.jpg", + "mask_rles": [ + { + "size": [ + 500, + 375 + ], + "counts": "i`P17\\?3M3N3L4L3N1N2O1O1N2O1N3N1O1O1N2O001O5K2N1O1N101O00001O00001O0O1001OO2O000000000000000000001OO100000001O000000000O100000000O101O0O100O100O2N1M3M3O2N1O1O2N1O1OdNROTDm0m;SOTDk0m;UOSDj0\\=ObNXOSDf0n;ZORDe0`=M3M2O2M3M3M4L]UY3" + }, + { + "size": [ + 500, + 375 + ], + "counts": "WgQ14_?9G2O00000O1O1N2M3K5A?N2N2O1O1001O2N5K6J1O2N2N4L4L6K1N00000000000000O1L4K6L3I7K5M3M3001O2N6J4L1O2L4K7L6IU`k3" + }, + { + "size": [ + 500, + 375 + ], + "counts": "RW73^?b0d@]Oe>0[Ak1a=UN^Bn1`=4OnMaBo1_=QNaBo1_=QNaBo1`=PN`BP2_=QNaBo1_=QNaBo1`=PN`BP2`=2O010000000O010O10000O1O1O1O10O10000000000O01000000O01000000000O1000O10000000000000O01000000000000O100000O100000O10000000O01000000000O10O101O1O2N1O1O1N2O00001O000O1000O100O0O2O1M3M210O10000000O1000000000O1000O10000000O1000O1000000000O010000O2OO10000000O1000O10O10000000O1000O1000000000O01000000000000O1000O1000O1000000N2^Ob0XOXoj2" + } + ], + "question": "Based on the arrangement of the objects, which statement accurately describes the vertical positions of , , and ?", + "choices": [ + "A. is over , which is on .", + "B. is on , which is over .", + "C. is on , which is over .", + "D. is over , which is on ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_380.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000229858.jpg", + "mask_rles": [ + { + "size": [ + 555, + 640 + ], + "counts": "gPT5PNhAL;\\2n=eMWB[2\\>2EcMbAa2Q>_MlAR3Q>nLoAV3S>7H4J4SOZL_C05i3P and ?", + "choices": [ + "A. is in front of .", + "B. is looking at .", + "C. is standing behind .", + "D. is eating ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_381.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000229858.jpg", + "mask_rles": [ + { + "size": [ + 555, + 640 + ], + "counts": "gPT5d1h0I7L5K5L:E3M100O010O001O1O001O1N2N2L4M4HbSU5" + }, + { + "size": [ + 555, + 640 + ], + "counts": "0Q:Z7000000O10000O10000O1O10000O10000000000O1O100O10000000000000000O10000O10000M300000000001O1O1O1O00001O00O100001O000000002N1O001O002N1O1O001O5K2N001O001O1O010O0O2O00001O001O0000001O1O001O00001O00001O0000001O0000O1000001N100O10000001O0000010O000000001O1N101O000010O00O10000001N1000000O10000O1O1O10000O1O100O100O100O11O0000000000001O1O1OO1001O1O1O00001O2N1O0000000O1001O000000000O11O0000000000O100O10000O1O1O1O1O1O1N2O1O1O100O1000000O10000O1000000O100O1SLmHaMT7\\2XIYMj6e2[IWMf6g2^IVMc6i2`ISMb6j2iIiL\\6i2YJnLi5n2\\JPMe5o2]JoLd5P3^JmLd5R3_JiLd5V3_JfLc5Y3`JdLa5Z3dJcL\\5\\3gJ\\L_5c3cJSLf5m3]JjKi5U4YJjKg5V4YJjKg5U4[JiKf5V4\\JhKe5W4^JeKd5[4\\JaKh5^4ZJ_Kh5`4b300UOfCeLZPNhAL;\\2n=eMWB[2\\>2EcMbAa2Q>_MlAR3Q>nLoAV3S>7H4J4SOZL_C05i3P and ?", + "choices": [ + "A. is in front of .", + "B. They are both eating from .", + "C. They are looking at each other.", + "D. is eating ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_382.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000313182.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "i^b04R=4M2N1WCHa\\;BeDa0X;^OhDg0S;XOnDj0P;VOoDk0Q;UOoDl0Q;SOoDm0R;SOmDm0T;ROlDn0U;POlDP1U;oNkDQ1W;mNjDR1W;mNiDS1X;lNhDT1Z;jNfDV1[;iNeDW1\\;hNdDX1];gNcDY1_;eNaD[1`;cNaD]1e;00000001O00O1001O0000001O0000000000000000000000001O0000000000O10000000000O10000O100O1O1O100O100O100O100001O00000000000000001O00000000001O0000001O00001O00000000001O000000001O0000001O00000000001O00001O0000001O000000001O0000000000001O00000000000000001O00000000000000O100000000000000O100O10000O1O1O1O1O1000000000000GeNeD[1Y;gNcDM0\\1Z;nNeDS1Z;>N2N`NlDP1S;POnDP1P;ROQEm0m:VOREj0l:XOTEh0k:YOUEg0i:ZOXEf0f:]OXEd0f:^OZEb0c:A\\E`0a8dNaG50IU1Q1jN<^8iNaG8R1f0PO9\\8]OZHc0[OO[8_OhGS1L_O\\8^OfGU1N]O\\8^OfGU1O\\O[8@eGS11]OZ8@eGT1O]O\\8_OeGT1O]O\\8_OeGU1N\\O^8^OdGU1O]O]8^OdGU1O]O]8^OdGU1O]O^8^ObGU10]O^8^ObGU10]O^8^ObGT11^O]8^ObGT11^O]8^OaGV11\\O^8_O`GU12\\O^8e1aG[N_8d1bG\\N^8d1bG\\N_8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8c1aG]N_8c1`G^N`8b1aG]N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^N`8b1`G^N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^Na8a1^G`Nb8_1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`Na8_1_GaNa8_1_GaNa8`1]GaNc8_1]GaNc8_1^G`Nb8_1_GaNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1]GcNd8]1[GcNe8\\1\\GdNd8\\1\\GdNd8\\1\\GdNM" + }, + { + "size": [ + 424, + 640 + ], + "counts": "[96o20n60RI0n60RI0n61QIOn62RINn62QIOo61PI0P70PI0P70oH1Q7OoH0R7OnH2R7NmH3S7MdG1@2k8NcG5^ONo8LbG:\\OJR9M^Ga0\\OBV9NZGg0[O\\OZ9NYGl0XOWO^9OfFU2Y9mMaFY2^9hM`FZ2_9gM_F[2a8_MnG7_O\\2a8bMlGS3T8oLjGR3U8PMiGQ3W8PMfGQ3Z8QM_Gh12jN_8m1gGnMY8R2YH]Mg7a2`HYMa7g2`HXM`7l2\\HSMe7o2XHQMi7Q3SHQMm7Q3kGUMU8o30000001O002M2O2M5CQGXLS9[2TGUNb9d1jFnM^9k1U1I7M2M3K5M=B7Ia0^OQSa7" + }, + { + "size": [ + 424, + 640 + ], + "counts": "f[75n<6N1O2O1N1N3L3M3M4L3L4N29H6I1O1O1O1O0000000O01O1O0O1fNHbE<`:OQE1Y;FiDKGHPde7" + } + ], + "question": "Which of the following statements correctly describes the actions of and ?", + "choices": [ + "A. is driving and is on .", + "B. is carrying and is carrying .", + "C. is carrying and is carrying .", + "D. Both and are carrying ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_383.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000313182.jpg", + "mask_rles": [ + { + "size": [ + 424, + 640 + ], + "counts": "\\T_11Y\\a08\\P_O;]CBS\\;BeDa0X;^OhDg0S;XOnDj0P;VOoDk0Q;UOoDl0Q;SOoDm0R;SOmDm0T;ROlDn0U;POlDP1U;oNkDQ1W;mNjDR1W;mNiDS1X;lNhDT1Z;jNfDV1[;iNeDW1\\;hNdDX1];gNcDY1_;eNaD[1`;cNaD]1e;00000001O00O1001O0000001O0000000000000000000000001O0000000000O10000000000O10000O100O1O1O100O100O100O100001O00000000000000001O00000000001O0000001O00001O00000000001O000000001O0000001O00000000001O00001O0000001O000000001O0000000000001O00000000000000001O00000000000000O100000000000000O100O10000O1O1O1O1O1000000000000GeNeD[1Y;gNcDM0\\1Z;nNeDS1Z;>N2N`NlDP1S;POnDP1P;ROQEm0m:VOREj0l:XOTEh0k:YOUEg0i:ZOXEf0f:]OXEd0f:^OZEb0c:A\\E`0a8dNaG50IU1Q1jN<^8iNaG8R1f0PO9\\8]OZHc0[OO[8_OhGS1L_O\\8^OfGU1N]O\\8^OfGU1O\\O[8@eGS11]OZ8@eGT1O]O\\8_OeGT1O]O\\8_OeGU1N\\O^8^OdGU1O]O]8^OdGU1O]O]8^OdGU1O]O^8^ObGU10]O^8^ObGU10]O^8^ObGT11^O]8^ObGT11^O]8^OaGV11\\O^8_O`GU12\\O^8e1aG[N_8d1bG\\N^8d1bG\\N_8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8b1bG^N^8c1aG]N_8c1`G^N`8b1aG]N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^N`8b1`G^N`8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8a1aG_N_8b1`G^Na8a1^G`Nb8_1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`N`8`1`G`Na8_1_GaNa8_1_GaNa8`1]GaNc8_1]GaNc8_1^G`Nb8_1_GaNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1^GbNb8^1]GcNd8]1[GcNe8\\1\\GdNd8\\1\\GdNd8\\1\\GdNM" + }, + { + "size": [ + 424, + 640 + ], + "counts": "eig02Q=7M7H5L1000001O001O000kC[OLNc;i0_DYONNc;o0<4mCmN52e9c0aFj1U9X1M6J2N0000000000000000000000000000000000000000O1000000000000000000000000000000000000001O0000O2aNhFkN[;iN_EMf[2l0_dMUO8OV80jFR1IB4I;M@4S9BUGi3U90O11O000O10000O100001O000000000000O1N2000000003M0000O1O1O101O0000001O001bN_FSOb9k0eFnN]9P1lFfNW9Y1iFgNW9Y1eFRNAd0m9W1cFmN]9R1cFoN^9n0cFRO^9n0`FTOa9k0ZF[Oe9e0ZF\\Og9c0YF\\Oh9d0WF\\Oj9c0WF\\Oj9b0aF]NX:^1l0L4N2O1O1O1N3M2DlCAX<:lCBW<9`0Lemh5" + }, + { + "size": [ + 424, + 640 + ], + "counts": "Z96R=00000000O1O11O0001NW3N]Q12\\kN100001NV`0Oh]j02RRUO1X^l02]bZOK]oH001N1PC1i:OfF0]Nh0^:\\OiEMIZ2Q:l0K2N2O3L10001M200O1O100O6K4L001N100N2O100000O10ON301N10000001O0O1000000O1000001O000O2O0O101N1O100O10001O0O1000000O1O2O000O1M5L2N3O0O1000001N100000001O000O100O2O1O000O1000000O2O0000000O2O0O10000O100O2O000O2O001O00000O2O0010OO100O11O0001M11001O0O10000000000000001O00001O0001O01N11O0010O00000001O00O1010O0001O00000010O01O00001O00000001OO100O2O001O001O00001O0O101O000000000O1000001O0O10000O100000O10O01001O1O2N2M2O2N1O1O2N1O1O1N2O1N2O1O1O1O1N101O001O1N2O0000000O1000000000001N10000O1000000O10O10O0100O100O100O1O1O1O1O1O010O1O1O03N1N2O1O000O1000001O0O1000001O000O100000001O001O0O100O2O00000O101O000O2O000000000O2O000000000O2O1O000O101O0000001N100001O0000001O01O01O001O00010OO1000000O10000O1O10000000ZHiJ[7W5cHlJ\\7T5dHmJ[7S5dHoJZ7R5fHoJY7Q5fHQKY7o4fHSKY7m4gHSKY7m4gHTKW7m4iHTKV7l4iHVKU7k4jHWKU7i4kHXKS7_5M4N1L6N1N2O1N4M3L2M5K6J5L;E5J;E7Ic0\\O`0B:E4M6G5RMUF[2g:iMmDc1k;ZNVDn0Y, , and ?", + "choices": [ + "A. is beside , and is in front of .", + "B. is driving on , and is beside .", + "C. is driving on , and is beside .", + "D. is on , which is beside ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_384.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000463174.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "fmU11X=2000000001O00O12N[da11][^N8TJG[I3_4l0`3jMnMc0PNk0a0i0^3UOhLVOXOn0b0g0\\3\\OdLQO]Ol0d0f0Z3B`Ln08_OV3G_Lk0;^OT3OWLg0d0[OT3]3lLcLS3^3nLnIYO`2i3b3RM]Lm2d3TM[Lk2f3UMZLj2g3VMYLi2h3WMXLi2h3WMXLi2h3WMXLh2i3XMWLh2i3WMWLi2j3XMULh2k3XMTLh2m3XMQLi2P4WMmKk2T4UMiKn2W4RMgKo2Z4QMbKS3^4mL`KT3a4kL]KX3d4gLXK]3h4cLUK`3j4bLRK`3o4`LnJa1oNUOV6XOiJc1WOQOQ6\\OeJc1GfNe5GbJa1NgN`5I^Ja16cN]5K\\Ja1`Im1d1dMm4?\\Io1k1\\Mm4d0WIP2U9PNfFU2Z9m010O10O101OO0101O0001N100000O11N1000O1000000001O00000000001O0O2O2N001OmLmFn1S9RNnFm1R9SNoFm1Q9QNQGo1n8oMUGS2h8mMYGT2e8lM]GT2`8nM`GX2S1PM_5g0`IZ2i0UMf5`0cI[2HjLa0_67eIi2GTMd63fIj2_OYMk6LgIn3Y6QLiIn3W6RLjIm3V6TLjIk3U4^KnLg0nNj3R4eKlLa0SOi3o3kKkL;XOi3j3QLlL6[Oh3h3ULjL4_Of3g3XLhL3Ae3d3\\LjLMDf3a3`LiLFgN[OP1^4_3dLiLBhN\\OP1^4^3fLhLELc3\\3jLfLDNb3V3PMkL^O0b3l2bKRLc1S1XO0c3c2`M\\MmN2b3a2cM\\MkN5a3]2gM\\MiN7_3[2kM]MeN:`3e1gK`MV2`0cN<`3a1QLWMQ2k0]N>b3`1bNPNmMa0`3^1dNPNmMb0_3_1cNnMmMf0`3\\1bNmMnMh0`3Z1cNmMlMk0b3W1bNnMlMk0h3Q1]NRNkMn0S4e0RN\\NjMQ1X4`0mM^NgM]O]Of1P5>mM]NfM@\\Of1S5;kM^NjMX1^46jM`NhM\\1_43hM`NiM^1a41eMaNhM`1d4NeM`NgMc1d4LfM`NfMe1e4JgM^NdMi1f4HgM\\NcMo1f4CbN`0_1_O`Nb0a1\\O`Ne0a1XOaNg0a1VO_Nk0c1RO[NQ1f1mNYNU1g1jNZNW1e1gN]NY1c1fN\\N]1c1aN]Na1b1_N^Nb1a1]NaNb1`1]NaNb1_1]NbNc1]1^NdNa1\\1^NeNb1[1^NgN`1Y1`NiN^1W1bNjN]1V1bNmN\\1S1dNoNZ1Q1eNSOX1m0hNUOV1k0iNXOU1h0jN[OT1e0lN]OR1c0nN@o0?RODk0GB;, , and ?", + "choices": [ + "A. has already hit with .", + "B. is holding while looking away from .", + "C. is using to point at .", + "D. is swinging and is about to hit ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_385.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000192904.jpg", + "mask_rles": [ + { + "size": [ + 436, + 640 + ], + "counts": "oTo37[=5L3M2N2N2O1N101N100ODYCNf<1[COe<1\\CNd<2\\CNd<2\\COc<1]COc<1]COd<0\\C0d<0\\C0d<0\\C0d<1\\CNe<1[COe<2[CMe<4ZCLg<3ZCL^S5DnJYIAi6;ZIDg69\\IFf64_IKb63cII_65bIJ_65aIK_64bIL_63aIM`62aIM_62bIN^62bIN_60bI0^6OcI1]6NeI1\\6MhIOY60iIOX6OiI1W6NjI2W6LkI3U6LnI2S6LgIHoL=Y9JiIInL=Z9IPJ6P6IQJ7o5HSJ7n5GSJ9m5GSJ:l5DVJgJ\\O_L6h8`0iJZO_L6o75WH?Z3VO`L5n7]1bK]NaL5n7^1fKaN[4_1]K_NeL2o7^1[KaNeL1Q8_1YK^NhL2P8`1XK]NiL2P8c1TK^NjLOR8d1SKdNn4]1QKbNP5`1nJ_NS5b1lJ^NT5c1kJ]NU5d1jJ\\NV5e1iJ[NX5d1hJ[NY5f1gJYNZ5g1eJYN[5f1`JUNVM5[8f1_JVNUM4]8f1]JUNWM5\\8g1\\JTNXM5\\8f1aJ[N`5b1bJ]N`5b1eGXNc26h5i1YJWNh5h1jGTNLNg17d6h1RJZNk1J^Om1dNZNn1K^Oj1cN\\No1I_Ol1`N\\NQ2H@l1]N]NS2G@m1\\N[NT2J^Om1\\NZNV2I^On1ZNZNW2I^Oo1ZNWNY2J]OP2XNWNZ2J]OQ2UNXN]2H]OR2TNWN_2H\\OS2RNVNb2G[OV2oMTNg2FYOY2mMRNj2EXO[2lMQNk2EXO]2iMoMP3DWO_2fMnMS3DUOa2dMmMW3BUOe2_MiM]3BSOi2ZMhMa3ATOj2UMiMf3_OSOl2QMhMl3\\OSOQ3dLjMZ4UOQOc4o0^KoNc4Q1]KoNb4R1_KlNb4S1_KmNa4R1`KnN_4Q1cKnN^4Q1dKnN\\4Q1eKoNZ4R1gKlNZ4T1fKkN[4T1fKlNZ4T1gKjNY4W1hKhNX4X1hKhNW4Y1iKfNX4Z1iKeNW4Z1jKeNV4Z1mKeNS4[1nKcNS4]1nKbNR4^1nKaNR4`1oK^NR4b1oK\\NR4d1nK\\NR4d1nK[NR4f1oKYNQ4f1PLYNQ4g1PLWNQ4j1oKTNQ4m1oKSNQ4n1oKPNR4P2oKnMQ4T2nKkMS4V2mKhMT4X2lKgMT4[2mKbMT4^2mK`MT4a2mK\\MS4e2nKYMS4h2mKVMT4k2lKQMV4Q3jKmLW4S3jKkLV4W3lKdLV4]3jK`LX4`3lKYLV4j3iKRLZ4o3hKkKZ4V4jKbKZ4^4iKYK]4h4V22N2O001N2O001N100O2O0O2O1O0O2O000O2O1O00100O001O001O001N102N1O1O1O1O1O1O1O1O1O101N1O1O1O100O2N1O2N2N3M4L2N4L3L5L3L4K5DH9B=D;G9C=H9G8M3L6J4M5J6J5L5I7H?WOhmk0" + }, + { + "size": [ + 436, + 640 + ], + "counts": "]hn35^=2O0O2O000000001O00000000001O0001O000000000000001O0000000000001O0000000000001O0000000000001O00000000001O000O10000000001N1001O000001O000001O01O0000O1000000000100O2N1N1O2O1N2O1N101N101N100O100O100O100O1000O1000000O10000O10000O100O1O100O101N1O101N1O2N2NkRl2" + } + ], + "question": "What is the spatial relationship between and ?", + "choices": [ + "A. is on .", + "B. is inside .", + "C. is on .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_386.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000015335.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "fYc2=`>5K5L3L4N1O3K3O1O1M300O1O1N2O2N1O1M2001O01M3O001O1O1N2O001N110O1O0O101O1O0O2O001O1O1O1O001fDiMi9X2VFmMfNKl:X2^FSN_9n1bFSN[9o1dFSNZ9n1eFUNX9l1gFWNW9h1iF[NS9g1kF\\NS9e1iFaNT9_1jFfNT9Y1hFoNT9S1`FZO]9h0[FAb9R3M2M3N1N4M1O2M4M2M4M2N2M3N2N1O2N2M5L1O3M3M1N101O0gN\\IVJd6f5dIVJ]6g5hIVJX6i5jIUJX6i5lITJT6S5TIZJO;0Ek0b0R6Q5kI[J5c0o5l4SKSKm4i4YKTKh4h4]KWKc4f4aKYK_4f4cKYK]4f4eKYK[4f4gKYKZ4c4jK[KW4d4kK[KU4e4lKYKU4f4mKYKS4d4V3O1000000O1O1O1O100O100O10000O1O100O1000000O1000000000000000000000000001O000000001O0000001O001O0000001O0O110O1O1O1O1O001O0O3N1O10O01cMTKdJm4k0UKW1OmMl4j0YKb3i4ZLZK^1GiMQ5d0[Kd1ChMS5a0\\Kj1C`MR5c0]Km1A`MS5?_KR2]O_MU5;aKV2ZO_MW55dK\\2UO^MX5NjKd2oN\\MZ5JkKd1iNjM4e0Y5GPLT3dNSM^5ZO\\LV2UNTN1:e5oN]La2oMUNO:`8`1bGVNN7c8`1cGXNKiNOU1g8h1_GYN4K`8k1]GYN4Ci8e1mF`M`0X1KAi8d1dGiNH[Oi8h1cGkNKTOe8m1eGlNKoNf8P2dGRNiNe0h:U1cFTNgN1N2O9j:]1\\2K5M2N3M3N2M3M3K5K6K5F:IWVh3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "g4l1T=0O101OO01N101000O1J6N20000O2O1N101OOL5M3O10000O101L3N2O4Jocj1WOh\\UN6N2M2N4M2M3N2N1O2L3O2N2N1N3L4N1O2O001O1O1O00001O1O00001O10O01O001O001O1O1O001O1O1O2N1O1O2N3M2N3M4L4L4L5K5K7I6J4L4L3M4M3L1O0000O1M3O100N2O1O1N20O01O1O1O100O2N1O1O101O0O10001N100O100O10000O100000000O100O1000000iN[DSOe;i0eDPO\\;l0jDROV;`0YDlNh0b0o:6hEHX:2oELS:NTFOm9NWF1i9L[F3e9HaF6W43000000O10000000001N1000jiU1" + } + ], + "question": "Which statement accurately describes the relationship between and ?", + "choices": [ + "A. is pushing .", + "B. is positioned on top of .", + "C. is sitting on .", + "D. is standing in front of ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_387.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000060932.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "Ydg0=h<:I5L4J5M3N3N1gDfN_:\\1REWOi:k1Ja0@T1kN9H3L7J6I3M3@mJcHT5T7XKhHh4V7]KcHh4Z7f000O100000nNQIgKn6m3fIlKZ6R4kIkKV6R4nIlKR6S4QJkKo5T4SJjKn5V4VJeKk5Z4d1010O20ZLVGj2g8TM\\Gn2a8oLbG_OEb3h8kLlGT3g9AOTMfEMa0o1P;JNWNdD`1f;1N1M3N01N3N4J7J6J6J6I3N2M5LVSk6" + }, + { + "size": [ + 428, + 640 + ], + "counts": "m7o0]<0000001O1O1O0000O1O1O1O1O1O100001O001O000000O1O1O1O10000000O2O01O0000O01O1O100O10000001O001O00O1O100O010O11O1O1O0010O0O101M2F;HmR`7" + }, + { + "size": [ + 428, + 640 + ], + "counts": "QQZ1?gh4Y7eK`Ha4Z7aKcHc4[7j0L3M2O1N1000001O0000000001oNmHWK00T7f4QISK23n6f4V1oKcGo2_8QMcGm2^8QMhGk2X8RMlGn2U8lLoGU3P8hLTHX3T8[L\\H1PO1JU3Y:K8H5ZM[EY2g:dM\\E[2k:0O2M1O1102gMTEm1j;lMXD[1R, , and ?", + "choices": [ + "A. is in front of , who is beside .", + "B. is looking at , who is beside .", + "C. and are both in front of .", + "D. is beside , and is also beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_388.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000076417.jpg", + "mask_rles": [ + { + "size": [ + 478, + 640 + ], + "counts": "oXg29`>5M3N3K4M3N2N101M4M2N2O1O1N2O1O1O1O1O1N200O100O100O10000O1O100O100000000O10000000000000000000000000000O100000000000000O11O0000O100001O00O1001O000000000000000000000000001O0000000000O1001O000000000000000000001N10000O100001O1O0000VOlBJU=3nBLR=P100001O001O001O00O1N2N2M3M3N2M3M3M3N2N2M3M3M3M3N2M3N2M3N2N2M3N2L4N2M3M3M3N1N3N2M3N2M3M3N2N2N2M3N2N3M2N2N2N2N2N2O1M3N2O1N2N2M3N2M3N2O1M3M3O1N2N2M3N2N2O1N2M3N101N3N1M3N2O1N2N2M2O2O1N2N2M3N2N2N2N2N2N2N2O1N2N2N2O1N2O1N2M3O1N2O2M2N101N2O2M2O001O1O2M101O1O1O2N001O1O100O1O1O100O2OO0100O1O2O0O00101N1000O100001O0000O02O0000O01000000000001O00O10O100O2OO10O10001OO10000001O00O1001N1000O100001O000O011O0000O100001O0000O02O0000O10000000O10001O000000000000000000000000000O100000000000000000000O100000000000000000000000000000000000000O10000000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000000000WK" + }, + { + "size": [ + 478, + 640 + ], + "counts": "R\\_6Q1Z and in relation to ?", + "choices": [ + "A. and are both over .", + "B. is beside , and is over .", + "C. is over , and is beside .", + "D. is over , and is beside ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_389.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000326627.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "VYY45g>2N3M:F2N1O0000001O00001OO2N1O2O1N4M2N4LQmQ4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Q>n0R>000O10000000000O100O10000001O0000O100O1000000000000000000O10000000000000000O100000000000000O10000000000000000000000000000O100000000O10000000000000000000000O100000000000000O10000O10000000000O100000000O10000000000000000000000000O100000000O1000000O10000000O010000000000O100000O0100000O1000O100000O10O01O100O1N2O1O10O10000000000O1000O0100O101O0O10001O00000O2O001O0O10001N10000O100O2O000O01000O10O100O10O10000O010O0100000000O0100000000O01000O1000000O10000O10000000000O10O1000O100000O01000000O010000000O1000000000000O1000000O100O100000000O1000000O010000000O1000000O1000000O100O10001NndS5" + }, + { + "size": [ + 480, + 640 + ], + "counts": "R>m0S>0000000000000O1000000000000O1000000O10000000000O10000000000O1000000000000000000000000O10000O10000000000000000000000000000O1000000O10000000000O10000000000000000O10000000000000000O100000000O100000000O10000O10000000000001O0000O1000000000000000000000000000000000000O1000000000000O100000000O100000000O100O1N2N2O1O100000000001O0000O1000000000000000000001O001O001O0000001O0000001O0000000000000000O1000000O1000000O1O1O100O100000000000000O10000000000001O000000001OO100O1000000O100000000O100000000O100000000000000000000O10000000000000000000000000000000000000000000000O10000000000000000001TNPC0Ob1b=H1O1O00000000000000O1O1000000000000000000000000O100000000000000001O001O000000000000O1O10000000000000000O1N2O1O100000000001O00O100O100O100O100000000O10000O1O1J6M3M3O11O3M1O1O0000001OO10000000000000000000000000000000000O10000O100O1000000O100001O1O00000000O1001O00O100001O0000000000O1CSNcCQ2\\<:O1O1O10000001O0000000000001O00000000000000000000O100000000001O001O0000000000000000O10000002N10O0001N1001O0000O100000O1O10001O01O1O1O1O0O10001O00000000Q1oNU1iNQ`69d_I?E;mAoNa=m1`BQNe and , and the road, ?", + "choices": [ + "A. is parked on , but is parked on .", + "B. Both and are parked on .", + "C. Only is parked on .", + "D. Both and are enclosing ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_390.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000077460.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "Sj[55ic07G5N2O3M4N1ON2Ci\\OMXc0Om\\OOUc0Nn\\OOn\\i2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "gQo17gc07\\\\ODYc0n0^OQOZ]OZ1[b07N2N11Bf]OSO[b0k0f0NK7H10oN_]Oa0cb0\\O`]Ob0db0YOh]O;]dS6" + }, + { + "size": [ + 640, + 428 + ], + "counts": "[a093Jh0028^`09g^O@0M13N\\1Ya0V10N2N2O10e^O`MVa0a2g^OaMYa0c20000000000000000000000001O001O0[Ni^Oa0Wa0^Oj^Ob0Va0\\Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0\\On^Od0Sa0[Om^Oe0Sa0[Om^Oe0Sa0\\Ol^Od0Ta0\\Ol^Od0Ta0\\Ol^Od0Ta0]Ok^Oc0Ua0^Oj^Ob0Va0_Oi^Oa0Xa0^Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0@h^O`0Xa0@h^O`0Xa0_Oi^Oa0Wa0_Oh^Ob0Xa0^Oi^Oa0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0_Og^Oa0Ya0^Oh^Ob0Xa0^Oh^Ob0Xa0^Og^Oc0Ya0]Og^Oc0Ya0^Of^Ob0Za0^Oe^Oc0\\a0P1000000001O000gMg^Ok1Za0RNh^On1Ya0PNi^Oo1da0UNR^Od1Yb0L4`Ne]OS1db001O0000O1M31O1O1O001OO100[OPOP^OZ1ka0`0M3O1L4O100001O0000002N:F5TNk]Oc1_b0J=C2N1OO10000O11O1O0000O1ROQOk^Oo0Sa0ZOf^Of0Ra05Z^OLfa0W100000000001O00000000000000001O0000000000000000000000O100001O000000000eN[^O;ea0SOm^Om0Sa0POQ_Oo0o`0POR_OP1n`0oNR_OR1Qa0kNo^OU1ma01O1BlNl]OX1Rb0<00O1O100O100000000001O0000O1JWNS^Ok1ka06O1000000001O00000000000000^OoMV_OP2^a0O1O1O1O00O100O100O100LPNW^OQ2ia0oMW^OQ2ma0O000000002N1O0000000000001O000000001O0000O100000000O1O10000000000001O0000O100000000001O000000000000O100000000001O00000000000000O100JRNZ^On1ma0O7I4L2N1O3M3M2N1O1O0000O100ElNi]OU1na0jNQ^O7KP1Sb0mNm]O_1Sb070000000000000000000000000000000000000000O1001O001O1O1O0000O1O1O10000000000001O00O1001O0000O100000000000000001O000000O11O001O2N1O001O0000000cNk]Oo0Ub0POm]Oo0Sb0QOQ^Ok0oa0UOR^Oj0na0VOS^Oi0ma0VOT^Oj0la0VOS^Ok0la0VOS^Ok0ma0UOS^Ok0ma0VOQ^Ok0oa0TOS^Ok0na0TOR^Ol0bb0O1O1O00001OO10000UO[OZ^Of0ea0@V^O`0ja0_OV^Ob0ja0^OV^Ob0ja0]OW^Oc0ia0]OW^Oc0ia0]OX^Ob0ha0_OX^O`0ha0@X^O`0ha0@X^O`0ha0@W^Oa0ja0^OS^Oe0ma0\\OQ^Oe0oa0\\Oo]Oe0Pb0]Oo]Oc0Qb0f0000000000000000000000000000000000000000000000000000000000000001^Nm]OH[1" + } + ], + "question": "What are and doing on ?", + "choices": [ + "A. Both and are running on .", + "B. is running on and is standing on .", + "C. Both and are standing on .", + "D. is standing on and is running on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_391.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000077460.jpg", + "mask_rles": [ + { + "size": [ + 640, + 428 + ], + "counts": "gQo17gc07\\\\ODYc0n0^OQOZ]OZ1[b07N2N11Bf]OSO[b0k0f0NK7H10oN_]Oa0cb0\\O`]Ob0db0YOh]O;]dS6" + }, + { + "size": [ + 640, + 428 + ], + "counts": "[a093Jh0028^`09g^O@0M13N\\1Ya0V10N2N2O10e^O`MVa0a2g^OaMYa0c20000000000000000000000001O001O0[Ni^Oa0Wa0^Oj^Ob0Va0\\Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0]Om^Oc0Sa0\\On^Od0Sa0[Om^Oe0Sa0[Om^Oe0Sa0\\Ol^Od0Ta0\\Ol^Od0Ta0\\Ol^Od0Ta0]Ok^Oc0Ua0^Oj^Ob0Va0_Oi^Oa0Xa0^Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0_Oi^Oa0Wa0@h^O`0Xa0@h^O`0Xa0_Oi^Oa0Wa0_Oh^Ob0Xa0^Oi^Oa0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0^Oh^Ob0Xa0_Og^Oa0Ya0^Oh^Ob0Xa0^Oh^Ob0Xa0^Og^Oc0Ya0]Og^Oc0Ya0^Of^Ob0Za0^Oe^Oc0\\a0P1000000001O000gMg^Ok1Za0RNh^On1Ya0PNi^Oo1da0UNR^Od1Yb0L4`Ne]OS1db001O0000O1M31O1O1O001OO100[OPOP^OZ1ka0`0M3O1L4O100001O0000002N:F5TNk]Oc1_b0J=C2N1OO10000O11O1O0000O1ROQOk^Oo0Sa0ZOf^Of0Ra05Z^OLfa0W100000000001O00000000000000001O0000000000000000000000O100001O000000000eN[^O;ea0SOm^Om0Sa0POQ_Oo0o`0POR_OP1n`0oNR_OR1Qa0kNo^OU1ma01O1BlNl]OX1Rb0<00O1O100O100000000001O0000O1JWNS^Ok1ka06O1000000001O00000000000000^OoMV_OP2^a0O1O1O1O00O100O100O100LPNW^OQ2ia0oMW^OQ2ma0O000000002N1O0000000000001O000000001O0000O100000000O1O10000000000001O0000O100000000001O000000000000O100000000001O00000000000000O100JRNZ^On1ma0O7I4L2N1O3M3M2N1O1O0000O100ElNi]OU1na0jNQ^O7KP1Sb0mNm]O_1Sb070000000000000000000000000000000000000000O1001O001O1O1O0000O1O1O10000000000001O00O1001O0000O100000000000000001O000000O11O001O2N1O001O0000000cNk]Oo0Ub0POm]Oo0Sb0QOQ^Ok0oa0UOR^Oj0na0VOS^Oi0ma0VOT^Oj0la0VOS^Ok0la0VOS^Ok0ma0UOS^Ok0ma0VOQ^Ok0oa0TOS^Ok0na0TOR^Ol0bb0O1O1O00001OO10000UO[OZ^Of0ea0@V^O`0ja0_OV^Ob0ja0^OV^Ob0ja0]OW^Oc0ia0]OW^Oc0ia0]OX^Ob0ha0_OX^O`0ha0@X^O`0ha0@X^O`0ha0@W^Oa0ja0^OS^Oe0ma0\\OQ^Oe0oa0\\Oo]Oe0Pb0]Oo]Oc0Qb0f0000000000000000000000000000000000000000000000000000000000000001^Nm]OH[1" + }, + { + "size": [ + 640, + 428 + ], + "counts": "RRR62mc03N0O4M1N3N1OIb\\OM]c02f\\OMXc04h\\OLXc03j\\ONTc02m\\ONSc01n\\O0Pc00Q]O0ac0M_QP2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "Sj[55ic07G5N2O3M4N1ON2Ci\\OMXc0Om\\OOUc0Nn\\OOn\\i2" + }, + { + "size": [ + 640, + 428 + ], + "counts": "0^21]N2[O1001N020OO101O0O1000e0M]O3Oc00\\O91GOO2O000Mh0OB5CZ1O;RMiAm2X>SMhAl2Y>TMgAk2Z>TMjAh2V>XMjAh2W>VMjAj2f?000000000000000000000O1001O00001O00000000000000mKYMmFg2T9XMkFi2U9XMjFh2V9XMjFh2V9YMiFg2W9ZMhFf2Y9XMhFh2Y=O0000000000O1O1001O1O1O001O001O1O00O1O100O1001O2N1O1O1OO100O10000O10000]O_Mg_Oa2Y`0`M[_OK8e2]`0dMb_O\\2_`0cMa_O]2d`0]M]_Oc2c`0]M]_Oc2c`0]M]_Oc2b`0^M__Oa2b`0^M^_Ob2b`0^Mb_O^2Ra0N001O1O1O0000001O001OO100O1H8O1O1iM\\MRCd2m<]MSCc2l<]MVCb2j<^MWCa2V?N3MN2N2O1O1O10000C[M^_Of2b`0ZM__Oe2a`0\\M^_Od2o`00L40000000ZMm^O_2[a0O00O1O100001O3M3M2N1gM[^OU2]`0jMi@0kNV2d>kMWDY2h;hMWDY2j;fMVDZ2f>2`MdM[C]2d1O1TMnMiCS2P?200O1000000001O001O1O00000000O1O100O100000000000000M3O100O1O1000000O11O00^HfM^MZ2b2gM]MY2b2hM]MY2c2gM]MY2b2hM^MX2b2hMWFKP7]2h2hMYFKo6]2h2hMYFMm6[2j2hM[FKl6\\2i2iMbMV2_2iM]FLg6[2l2iM]FLg6[2l2iM]FLg6[2P3eM]M[2T:0000000O1000000lJgM`HZ2_7gMaHY2_7gMaHY2`7fM`HZ2d0O100aMeMYC[2fS1@UN1OOR3`0nLOW3BmL3M20Li0NZO3N103L12LNM3:Y;" + } + ], + "question": "Based on the provided relationships, where is located?", + "choices": [ + "A. Being held by .", + "B. Next to .", + "C. On .", + "D. In ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_392.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000287545.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "[jm05n=1gB2]OM4LV=;RC8FAT=:RC7JCo<9RC7O@oVCBj<>UCCl<=SCBn<>QCCo<=QCCo<=PCDP=0O0001O1O2N1N3N^b\\3" + }, + { + "size": [ + 480, + 640 + ], + "counts": "\\]^34l>2N1jA1Y=0dB2\\=NWBJ5:e=2YBOh=2SB1n=?2O0O1OO0O2O2O02N2O0O1O[CYOS;g0iD^OV;b0gDBX;n0UDUOk;n0dCcN5c0W interacting with ?", + "choices": [ + "A. is looking at .", + "B. is standing on .", + "C. is beside and looking at it.", + "D. They are standing on different objects and looking away from each other." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_393.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000545219.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "\\Zm12j>8J3N2M3M5K5L2M3N3L5K4M3M2M4L8I2M2O2N1M3O2N1O1N2N2O1OiNgCIX<7kCGS<:PDDo;XD@g;a0ZD^Oe;c0\\D\\Oc;f0]DYOc;h0^DVOa;l0_DSOa;P1]DnNc;T1]DkNc;[1WDeNh;e1PDXNQoG\\OQ8e0RHUOa7NoEl0S=VOlBj0S=YOjBh0U=]OfBc0Z=b0O010O0010O100O100O1O101N1O2N2N2N3M3N1N1O2N2O1N3M3M3M4L2NkQo4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "aag1>_>;eBC`;X2C7F:I6I7J7K4M2O2N1O2M3O0O100O2O0O101N3M4M3L2O1O1N1O2N100O1O101N101N2N2N1000O1O0M301O01O010O10000O10O010O01O10O010000O01O001O0010O001000O10O10O1000O01O10O010OdNgFQM[9j2_1F;1O1O0O101O1N100O1O1O1000O10O0100000O0001000O1O2O00000O2O1N1O101N1O100000000O1O2O000XFWLX8j3gGVLY8l3fGQL\\8P4dGPL\\8Q4cGRLZ8o3dGTLY8P4dGRLZ8V4TGULj8Y4hFjKV9W4iFiKW9W4iFiKV9X4jFhKU9T4lFVK1e0R9V4nFTK0f0R9Z4nFgKQ9Z4mFgKS9X4oFfKQ9W4nFSK=6i8f4k00O101N3M101O000O2O1O1N2O0O2O0O100O1O1O2O0O1O1O1O1000O10OL4M40O100O01000O100O00011O0O2N101O000O7J001O0O2O001N100000000O10000O101O00000O3N1O1O1N102M4L3N1O1N2N2N2cLVIbNk6i41N100O2O00002N4L1ZHnH:Na6[7b0011`HdHZ7a700jH^Hn6b7RI^Hm6d7TI^Hi6V8F1\\IPHV6_8N3L5L9H3L1O000010O000000001O001O0000O2O00000O100O2M2O100000000001O0000001O000000000000000000010O0000000000001OO1001O000O10001O00000O100O101O00000010O0000002OO01O1O1O1O001O0001O2N1O0O10000O1O1O101M2O100O2O1O002M2O000O101OO1001O0O1000001O0O10001O001aN]J_Ic5`6_J^Ic5`6`J^Ia5]6fJQIUO6V6e6XKSIj4j6i1M001O1O2M2N3O0O2O1kNmGoJB1c8j4\\HTKe7j4\\HUKf7h4]HVKe7h4[1N2N3M2N3N2M2O2N1N3N2L4N1O2N1O2M3N2N1O2N2N2N2N2N2N3M2N2N2N1O2O1N3M3M2O2M3N1O1N105L1N1O2M2O3O00SCoMO2OO`<[24M2O4K4L3M3L5K7J5J5L5J;CXlR1" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is looking at .", + "B. is wearing .", + "C. is inside .", + "D. is moving towards ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_394.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000096001.jpg", + "mask_rles": [ + { + "size": [ + 426, + 640 + ], + "counts": "ejb06Q=7I7I5K4N3M2M3M4M1O2N2N2N1O2N1O2N1O2N1O2N101N1O100O1O2O0O1O2O0O1O101N1O10001N100O2O0O2O000O100O2O0O100O101O0O100O1000000O10000O10000O10000000000O100000O100000000O10O1000O10000O10000O100O100O100O100O100O2N100O1O1O2O0O1O1O1O1O2O0O1N3N1O1O2N1N3M2O2M3M2N3N2M2O2M3N3M2M3N3L4L4K6HZQn5" + }, + { + "size": [ + 426, + 640 + ], + "counts": "Pfm37R=3M2N3M2N2N2O2M3M2N3N1N2N3M2N2O2N1N3M5L1N3M2N3N1N2N2N2N3M2O2N1M3N3M2O1N3M2N2O1N2N2O2M2N2O2M2N2N2N3M2O1N3M3M2O1N2O1N3M3N1N3M2N2N2O2M2O1O1O1N2N200O2N1O1O2N1N2O1O1O2N1O2O0O1O1O001O001O1O0000000000000000O1O100^OVHcKk7P4eHlK\\7R4hHkKY7Q4QIiKo6U4T1XOTGoLo8Q3SGiLP9X3a0O11O000000000000001O00001O00001O00010N1001O010O010O100O2N100O100O10001N100O10000O10001O0O1O100O10TFoLM0W9P3i0O11O2mEWMa9Y30O1O1O2N2M2O2N1N3N3M2M5L4L7I000O01O001O10O0100O1O01O1O001O001O1O0O2O1OgHbMg3^2c3O001N101O1O001N101O001O0O101O1O0O2O1O1O1O1O0O2O1O1O1O0O101O1O001N101O1O000O2O001O1O0O200O1N101O0O2O1O1O00001O0000O0101O001O0O2O1O2N1O1N3N101M3N1N3N1O1O2N001O2N1N4LRkf0" + }, + { + "size": [ + 426, + 640 + ], + "counts": "oc51Y=000Pm<0PSC0\\R<0Wd10[YB3M2O1N2O1O2M2N2O1N2O1N2O2M2O1O1N2N3N1O1N3N1N2O2N1000001O0001O0001O000001O01O00000001O01O00000010O000001O000001O01O00000001O01O0001O0001O00010O00001O0001O01O0000010O0000001O0001O01O00000010O00000010O0000001O000001O0001O00010O00001O01O01O000000O2M2N2N2N2M4M2N2O1N2N3M2N2N2O1M3N3M2O1N2N2Niao4" + }, + { + "size": [ + 426, + 640 + ], + "counts": "d1Z29Bb0f0b4IkJBc0e0a4JjJDd0b0a4JjJFe0`0a4Q2_KPN_4Q2bKnM^4R2bKnM\\4U2dKjM\\4V2dKjM[4W2eKiMY4Y2hKgMV4DoJ`0k0LU4\\2jKdMU4]2kKdMS4]2mKcMS4]2nKbMQ4_2oKaMP4`2PL`Mo3b2PL^Mo3c2RL]Mk3e2UL[Mj3g2VLXMi3i2XLVMg3k2YLVMf3j2ZLVMe3k2[LUMd3m2\\LSMb3n2^LRMa3o2_LQM`3P3`LPM^3R3cLmL\\3U3cLlL\\3T3dLlL[3U3eLlLY3U3gLkLY3U3hLjLW3W3iLiLW3X3hLiLU3Y3lLfLS3[3mLeLR3\\3oLcLo2_3QMaLn2`3RMaLm2_3TM`Lk2i0[L[Oi0Lk2f0dLZOa00j2e0kLWO;5h2b0RMXO56h2`0XMXO08g2?]MWOM9d2a0aMUOL9b2a0eMUOI;a2?hMUOH;_2`0kMTOF=^2>nMUOD<^2=QNVOA=^2;TNWO^O>^2;TNWO^O>^2:VNXO\\O=^2:XNXOZO>^29YNYOYO?]27\\NYOXO?\\27]N[OVO>]26_N[OTO?]26_N[OTO`0\\24aN\\OSO`0\\23cN]OQO`0[22eN^OPO`0[21fN_OPO?Z22gN^OoN`0[20gN@nNa0[2nMdLc1S2OnN?o2ZOTN7mN`0o2XOUN7lNa0Q3UOTN:lN`0R3SOSN>jN?T3QOTN?hN`0U3POSN`0hNa0V3lNSNc0gNa0X3jNRNd0gNa0\\3eNnMk0eN`0a3`NkMP1eN`0a3gM^Ka0\\2X1eN`0b3fM_K?\\2Z1cNa0l3RNaM]1dN`0n3oM_Mb1bN`0P4kM`Md1`Na0P4kM`Md1aN`0P4kM_Me1aN`0Q4jM^Mf1aN`0R4hM_Mh1_N`0R4eMaMk1]N`0j6@VI`0j6@VIa0i6@VI`0k6_OUIa0k6_OVI`0j6@VIa0i6_OWIa0i6@WI?j6@VI`0j6@VI`0j6@VIa0i6@VI`0j6@WI?i6AWI`0h6@YI?g6AYI?g6BXI>h6AZI?e6A[I?e6A[I?j2fMIj1^Ma0h2iMFd1eMb0e2kMEb1gMc0d2lMD_1jMe0b2nMB\\1mMf0a2PN_OZ1RNf0^2RN^OV1VNh0\\2TNWLEk2^1dNh0Z2_NROg0fNj0X2_NROf0hNj0[2[NmNi0jNm0Y2YNlNj0lNm0Z2WNjNj0oNn0X2WNiNi0QOQ1V2UNiNi0ROR1d5lN^JT1a5lNaJS1_5kNcJU1]5jNdJV1\\5hNgJX1W5hNjJX1V5gNkJZ1T5dNoJ[1P5eNRKZ1n4eNSK[1l4eNVKZ1j4fNVKZ1i4gNWKZ1h4gNWKY1i4gNXKX1g4iNYKW1f4jNZKV1f4jNZKW1d4jN]KU1b4lN^KU1a4lN^KT1a4mN_KS1a4mNaKQ1^4PObKQ1\\4POeKo0[4QOeKo0Z4SOeKn0Y4SOgKm0X4TOhKl0W4UOjKj0U4WOkKj0l0nM]OY1Gi0j0QN]OW1Jg0i0SN[OX1Ke0j0SNZOY1Le0i0SNQMGd1b1b0d0h0UNoLHe1`1d0c0h0VNmLIe1_1g0a0g0XNjLJg1_1g0?h0YNhLKf1_1j0>g0\\NcLIj1_1l00n3d2dMXOPNTN>Oo3e2cMYOoMSN`0MP4f2aMZOoMSN`0LQ4g2aMXOoMVN?IR4j2_MWOPNVN`0FS4m2]MWOQNUNY5d2gLVOPNVN=IV4k2\\MVOQNVN60]4d2\\MVOQNWN32_4a2]MWOnMXN53^4^2_MWOlMZN65\\4Z2cMGPNQN\\4X2dMHnMSNo2@QNd2S1g0e0PMSNZ2W1g0b0UMTNT2Z1i0=YMVNn1]1j0;[MVN:]OOS2\\27_MXN3@3P2\\26aMXNMF5l1^23cMZNHI7j1^21gMZNCL8j1e0iMGU2K[N^O0;g1d0kMGR2N[N[O2=e1c0lMHQ2M]NXO4?b1d0lMIo1O]NSO8b0`1b0mMIm11^NPO:d0^1b0nMIk12^NmN>e0[1c0oMHi15^NiNa0g0Y1c0PNHf16aNeNb0k0V1b0RNGd19aNaNe0m0T1b0TNEa1=aN]Nh0o0S1a0UNE^1?aNZNl0P1P1b0VND\\1a0bNXNm0Q1o0b0VND\\1b0aNUNP1S1m0b0WNCZ1e0bNoMS1X1i0a0YNBY1f0bNmMV1Y1g0a0XNCY1g04UO;a0XNCX1h06SO:b0YNBW1i06TO9a0ZNBW1i06TO9a0ZNCU1i08TO9?ZNDU1i09SO8`0ZNDU1j08RO9`0ZNDU1j08RO:?YNDV1k08RO8?ZNDU1l09QO8?ZNDU1l09QO8?ZNDU1l09QO9>YNEU1l0:QO7>ZNEU1l0:QO7>ZNEU1l0:QO7>YNFV1k0:QO8=XNGV1k0;PO7>WNHW1j0;PO7>VNHY1j0:QO7Y2a7VNQHA>Z2`7VNVIj1j6VNVIj1j6WNUIi1k6WNUIi1k6XNTIh1l6XNTIh1l6XNTIh1l6XNTIi1k6XNTIh1l6XNTIh1l6XNSH^O9[2OfMc6a0UI^O9[2NgMd6`0UI^O:Z2MhMd6a0UI\\O:[2MiMc6`0VI\\O:\\2LjMc6>VI^O:Y2MmMa6gM]O:W2MQOX2oNJ_OA]NSO?S2IiNl1c0\\O_O`NSO>R2JiNj1h0ZO[OeNQO=S2JiNi1m0VOWOjNPO=T2IgNj1Q1TOUOkNoN>T2IgNi1S1ROUOnNmN?S2HgNh1W1oNUOROjN?S2HgNg1c1dNjN_OhN>T2HfNg1l1ZNfNIcN>U2HeNg1S2QNdN2`N=T2IeNf1Z4SNmJ>T2IdNf1\\4TNkJ=U2IdNd1^4VNiJ>T2HeNb1a4XNeJ>U2HeNa1b4YNdJ>V2HbNa1f4XNbJ?U2IcN_1g4YNaJ?U2IcN^1h4[N_J>V2IcN]1j4\\N\\J>W2IcN[1l4_OaLVOcN[1l4@`LUOcN[1n4@_LUOcNY1Q5A\\LWObNW1S5B[LWObNU1V5CXLXObNT1W5EVLWOaNU1Z5DULWOaNS1]5ERLXO`NS1_5EQLXO`NR1`5FPLXO_NR1c5EnKYO_NQ1d5GlKYO^No0i5GiKZO^Nn0j5IgKYO^No0k5HhKXO]No0l5IgKXO\\Nn0o5JeKXO[Nn0R6IcKYO[Nl0U6K_KYO\\Nk0V6L]KZO]Ni0W6M\\KZO\\Ni0Y6M[K[O[Ng0[6NZK[O[Nf0]6OXKZO[Ne0_61VKZO[Nd0`62UKZO[Nc0a64RKZO\\Nb0d63PK[OZNb0h64nJYOZNb0j64lJ[OXNa0m64jJ\\OYN?n66hJ[OYN?Q75fJ\\OYN=S77dJ\\OYN_IDZNMX8`0]IC[NLY8a0\\IC[NJ[8c0ZIDZNH]8c0ZIEZNF^8d0XIFZND`8f0VIGZNBa8f0UIH[N_Ob8i0SIG]N^Ob8j0QIH]N\\Oe8k0nHJ[N\\Og8j0nHJZN\\Oj8i0lHK[NYOl8k0jHKZNYOn8k0hHL[NWOf0M[7P1cIL[OVOR7n0cILZOXOR7l0dILXO[OT7i0cILXO^OS7f0eILWO@S7e0eIKWOBS7c0fILVOBS7b0gILUODT7?gIMTOGS7=hILTOHT7) and the rectangular tag ()?", + "choices": [ + "A. ", + "B. ", + "C. ", + "D. " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_395.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000322829.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "k\\T1221P=`0E2M5M5J2O1O1O2N2N3L2O1O1O5K3M6J1O3L`0_O3`E_MS:c2hEdMU:Z2nEdMQ:Q2kEPNd:Q2\\EoMd:]2N2O0O1O1O1O2lEUMe9j2>00001N3N1N2N2O1N2O1O1N2N2O1O1N101O1O100N101O10O1O1M20100O1O1O010O10O01O1O1000000O1000O100000000000000001O00000000O1O1000000000000001O00002N001O00000000000000000001O000000000O100O1O0010000000000000000000001O000000000000000000000000001O000001O00000000001O000000000000O011N1O11O00O1O001000000000000000000001O1O000000000001N100O10O11N101O00000O2O0O2O0O1N2O1O1O1O1O1O100O10000O101O0001O000000001O00001O001O1O1O010N4M1N2O01O01O00001O0O101N110O000O2O000O110O001O000O1O1O11N1O101O1O0O11O0000000000001O1O1O001O0000000000000000O100000000000000O10000000000000000001O0O100001OO1001O00000O10001O01O00001O0O10001O00001O0000O100O10000O100000000O10000001O001O0O2O000O2O00000000OgMQFX1o9hNQFX1o9S1O001O1O00001O002OO01O1O1O3M00010O002N3NO010O0000001O10O01O1N4M1O1O2N1O2O0O1OfHmL\\M2f6o2PLmLZM40Hd6U3TLlLZM6i6k2TLQMR4n2W3N2N2N3N0O101M2N101000OO2O2N0JZEgMg:U2:L3O2K5I6O1L3DXDTOl;k0SDQOR[4^OPL:R4DRL9n3FVL7j3HZL6e3I^L5b3K_L4a3L`L3`3MbL1^3NdL1\\3OfLiMkNa0^4g1jLbMoNb0Y4k1iLaMQOb0V4m1mL[MSOd0P4Q2RMUMPOj0m3Q2WMoLROl0g3U2YMlLSOm0d3W2\\MgLVOm0_3\\2UO]Ml0b2ZOYMf0g2ARM?m22cLN\\3Q5O1O1O002O0O1O010O2N1O1O1N102N1M3O001N2O2M2N2O1O1O1N20nJ_MYO`2d0fM[OY2e0jMZOT2g0lMYOR2i0nMWOQ2j0oMUOR2j0oMVOP2k0PNVOo1k0oMVOQ2j0nMWOQ2k0mMUOT2k0jMWOV2P6N1000O10O101O00O0100000000000000000`JjMKV2J]NNc1NdNO\\11dNO\\11dNO\\10fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11fNOZ11eN0[10eN0Z10gN0Y10gN0X11hNOX11hNOW12iNNW12hNOW12iNNW12iNNX10iN0W10jNOW10jNOV10kN0V1OkN0W1NjN1W1NjN1V1OiN2W1MjN3V1MjN3V1MkN2U1NkN2U1NjN3V1MjN3U1NjN3V1MjN3V1MeNTKTOP5W2LdNUKUOo4W2KcN<]1DdN;[1FgN8Y1HhN7W1JjN5V1KkN4T1MmN2S1NmN2S1NmN2S1MmN4S1LlN5T1KkN6U1JkN6U1JjN7V1IkN6U1JkN6U1IlN7T1IkN8U1HkN8U1HkN8U1HkN8U1HkN8U1GlN9T1GkN:U1ElN;T1ElN;T1ElN;T1ElN;T1DmNU1BjN?V1AjN?V1@kN`0U1@kN`0U1@kN`0U1@kN`0V1_OjNa0V1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1^OkNb0U1]OlNc0S1^OmNb0S1^OmNb0R1^OoNb0P1_OPOa0o0@RO?m0BSO>m0BSO>m0BRO?m0AUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>k0BUO>l0@UO`0k0@UO`0l0_OTOa0m0^OSOb0m0^OROc0n0]OROc0n0]OROc0n0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0l0]OSOd0l0]OTOc0l0]OSOd0m0\\OSOd0m0\\OSOd0l0]OTOc0l0]OTOc0k0^OUOb0k0^OUOb0j0_OVOa0h0BWO>h0CXO=g0DYOl0ATO?n0_OROa0o0^OQOb0o0^OQOb0P1]OPOc0P1]OPOc0P1]OPOc0Q1\\OoNd0Q1\\OnNe0R1\\OmNd0S1\\OmNd0T1[OlNe0T1[OlNe0T1[OlNe0U1ZOkNf0U1ZOjNg0V1YOkNf0V1YOjNg0V1YOjNg0W1YOiNf0W1ZOiNf0W1[OhNe0Y1ZOmM[Kc0[5`1ZOmMZKd0\\5_1YOiNf0V1ZOkNf0U1_OfNa0Y1@fNa0Z1_OfNa0Y1@gN`0Y1_OhNa0X1_OhNa0X1_OhNa0W1@iN`0X1_OgNb0Y1^OgNb0Y1^OgNb0Y1_OfNa0Z1_OfNa0[1^OdNc0\\1]OdNc0]1\\OcNd0]1\\OcNd0^1[ObNe0_1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1]O^Nc0b1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O]Nb0c1^O\\Nc0d1]O\\Nc0d1]O\\Nc0d1^O[Nb0e1^O[Nb0d1_OZNc0f1]OYNd0g1\\OZNc0f1]OnMSK6^5l1_OmMUK3_5P2\\OmMVK1_5R2[OlMQ1T2^OiM6W2IjM7V2_5000QJmMf0S2ZOmMf0S2ZOmMf0S2ZOmMUKMV5V2EmMQKLN1\\5W2DlMRKML1^5V2DkMSKNK1^5V2DjMTKOJ1^5V22iMNW2HkMoJNY5V2ImMmJMZ5V2ImMmJNY5U2IoMlJM[5T2FYN:g1FYN:g1FYN:g1FYN:g1FTNmJG^5V2DXN;h1EXN;i1DXN;h1EXN;h1EXN;i1HhMQK2W5V22hMOX2b51O00O100O100O10000O1000000000000000000001O1O001O001O00O100000000000000000000000000001O000000000000001O001O00001O00001O001O002N2N1O001O2N1O1O2N2N3M2N2N3M1O3M1O1O2N2N1O1O001O1O001O1O1O1PJYLZ2g3]L^LfNNl4e3ZLaLeNNQ5a3XLeLdNKT5`3TLjLdNJW5\\3RLnLdNHZ5Z3nKTMcNE_5W3kKVNU4j1iKYNV4g1hK[NX4e1fK]NZ4c1dK_N\\4a1aKbN_4^1`KcN`4]1^KfNa4[1\\KgNd4Y1ZKiNf4W1XKkNh4V1UKmNj4S1TKoNl4R1QKoNP5S1hJTOW5Q1^JUOb5Z40001O001O000000001O0000001O1O00001O001O1O001O1O1O1O1O00001O00000000001OO1000000000000000000000000000000000000000000O1000000O100001O00000000000000001O000000000000000000000000000000000000O1000000000000O1000000" + }, + { + "size": [ + 427, + 640 + ], + "counts": "`:j2a:0000000O10000000000000000001O00001O0000001O00001O0000000000001O000000000000000000000000000000O10000000000000000001O000000000000000000000000000000001O000000000000001O00000000001O000000000000000000000000000000001O000000001O001O1O001O1O0gMSET2m:lMSET2l:mMUER2k:nMUER2k:nMUER2k:mMWER2i:oMWEP2h:QNXEo1h:QNYEn1g:RNYEn1g:QN[En1e:RN\\Em1c:TN^Ek1b:TN`Ek1a:SNgEf1P;O001O1O00000000001O000000000000WO\\NkEd1T:]NlEc1S:^NmEb1S:^NmEb1R:_NnEa1S:]NnEc1R:\\NoEd1l:0001O000000000000001O000000O10000000000000000O100000000000000O100000000O10000O1]OYNbEg1]:[NbEe1^:[NbEe1^:\\N`Ee1_:]N`Ec1`:^N_Eb1a:^N^Ec1b:]N\\Ee1d:\\NZEe1f:[NYEf1g:[NWEf1i:[NVEe1j:\\NTEe1k:>00000000000000000iMWEl1i:TNWEl1i:;00000iMZEi1f:UN\\Ek1e:QN^Eo1o:000000000000000000000O10000O100O10000O1000000O1O1O1O100O100OWNPEY1n:c0O1O100O1000000O100001O00O1000000000000001O00000000000000000000O1000000000000001O000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000000000000000O10000000SNXEW1h:gNZEY1f:fN[EZ1e:dN^E[1b:bNaE^1_:`NdE_1\\:aNdE_1\\:`NeE`1\\:_NdEa1[:`NeE`1[:_NfEa1Z:_NgE`1Z:_NfEa1Z:_NfEa1Z:_NfEa1Z:_NfEa1Z:^NgEb1Y:^NhEa1Q;00000000000000000000000000000000000000000000000O1000000O10000O1O10000000000O1000000O1000000001O_OYN]Eh1c:XN]Eh1c:YN\\Eg1d:ZNYEh1g:ZNREk1n:9000000000000000000000000000000O1000000000000001O000000000000O100000000000000001O000000000000000000000000000000000000000000000000000000O100000000000000000000000000000000O100000000000000O1O1O1N200O1O100000000001O000000000000001O00000000000000001O0000000000000000001O001O00001O000000000000000000O1000000001O00001O0000000000001O0000000000000000000000000000000000001O0000000000000000000000000000001O00O1001O0000O1000000000000000000QE" + } + ], + "question": "Which statement accurately describes the relationship between and the other objects?", + "choices": [ + "A. is in front of and behind .", + "B. is in front of both and .", + "C. is behind and in front of .", + "D. is behind both and ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_396.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000261116.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "PoT51`01`:4\\EOa:LUE68Oc:KTE86Ng:JSEf0l:30O100O010O00100O010O1O100O100O1O101N1O2OO0100O100O1O1O2N1O10000O010000000O0100000000O100jJ" + }, + { + "size": [ + 375, + 500 + ], + "counts": "bUP58];6K3N3M2M2O1N101N100O100O10O10O100000O10O100O1000000O100OO1010000000O0100O101O001O002N3L4M1N2N3L6Kjc4" + } + ], + "question": "Based on the provided information, what is the relationship between and ?", + "choices": [ + "A. is stuck inside .", + "B. is resting on top of .", + "C. is on top of .", + "D. is beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_397.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404534.jpg", + "mask_rles": [ + { + "size": [ + 500, + 386 + ], + "counts": "o841300040G1R14oNj<[4TN4M10000O1000000000000000000000O100000000000000000000000000O10000000000000000000000000000O2O00000000000000O1000000000O100000000000001OO10000000O100000000000000000000000000O100000000000000000000000000O1000000000001O000000000O1000000000000000000O1000000000000000001O1O2M3O3L1N10000000000000000O10O11OO1000000N1L5M30O00010O3N00O0011M30O1O01O1O0PL^Dn3c;01O001O2O001M2ZLWDO00000U3j;lLeDQ3Z;PMfDP3Z;QMeDn2\\;RMYDL21JQ3k;RMdDm2];SMcDl2^;[M\\Dc2e;]M`D]2b;\\MRDM>f2`;\\MTDM=e2`;eM_DZ2b;fM_DX2b;hM^DX2b;hM^DW2b;jM^DU2c;lMlCD60Oa2o;kMTDDM`2P1lk[2" + }, + { + "size": [ + 500, + 386 + ], + "counts": "g?a4P;4O000000O1001O00O11O00001O00001O00001O0000M3N2N20000O10000001O00001O001O1O001O00002N:F1O1O1O00000000O1000000O10000O10000O10000O100000000O1000000O1000000O100000000O10000000000O10000000000O1000000O100O1FUEjKl:]42ESEoKm:o3SEQLo:m3RESLo:k3QEXLn:g3REZLn:e3QE^Ln:a3REaLm:\\3SEhLl:W3UEiLk:U3UEnLj:Q3UERMj:l2VEVMj:h2WEYMi:f2VE\\Mj:c2WE]Mi:a2UEcMk:[2VEfMj:X2VEjMj:U2VElMj:R2VEPNj:o1VERNj:l1VEVNj:i1UEYNk:e1VE\\Nj:c1VE^Nj:_1VEdNj:[1VEfNj:W1XEjNh:U1WEmNi:R1VEPOj:P1TEROl:k0VEVOj:h0WEYOi:e0WE]Oi:b0VE@j:>WECi:;VEHj:7WEIi:6WEKi:3VE0j:OUE4j:JWE7i:GWE;i:DWE=i:BUEa0k:T2000UMWEk0i:SOWEo0i:POWEQ1i:m10QMUEU1k:fNgEm0Y:nNmEQ1S:hNTFX1l9gNUFY1k9gNUFZ1j9eNWF[1i9eNWFZ1j9fNVFY1k9gNUFV1QOeMi:U1VFV1POgMi:S1WFQ1PORNh:m0XFP1POUNg:j0ZFl0ROjNV::gFl0ROkNX:9fFi0ROPOY:7eFh0QOQO\\:6dFf0oNWO8POk9S1nFf0nNWO7SOm9P1nF`0QO_O4ROm9o0nF`0PO^O6TOl9n0nF;ROE4SOl9m0nF:ROE7nNn9S1iFEPOG23M_O0OP;S1TFoNQ351U>m0jAL\\=8aBI_=V1000000000000O10000000000000000000000000000O10000000000000000O1000000000000000000000000000000000000O1000000000000000000O1000000000000000000000000000000O10000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000O100000000000000000000O1000000000000000000O1000000000000O10000000000000000O10000O100O10000O100O1O1O100O1O10000O1O100O1O10000O1O100O1O1O1O1O100O100O100O100O100O1O1O100O100O1O100O100O1O1O100O1O1O100O100O100O1O1O100HXLcDi3];ZL`Df3`;71O0000004L1OJVLbDj3];XLbDh3\\;ZLcDg3X;^LhDb3W;_LiDa3V;?O100O100O100O1O100O100O10000O1O10000O10000O10000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000O1000000000000000000000000001O00001O0000O100O1O100]Oc00000O1000000O10000000000000000001O1O1O000000000000000000O100000000003M4L1O1O1O3PK^E0N051O002NQ3NYMY and ?", + "choices": [ + "A. is mounted on .", + "B. is over .", + "C. is over .", + "D. is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_398.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000404534.jpg", + "mask_rles": [ + { + "size": [ + 500, + 386 + ], + "counts": "o841300040G1R14oNj<[4TN4M10000O1000000000000000000000O100000000000000000000000000O10000000000000000000000000000O2O00000000000000O1000000000O100000000000001OO10000000O100000000000000000000000000O100000000000000000000000000O1000000000001O000000000O1000000000000000000O1000000000000000001O1O2M3O3L1N10000000000000000O10O11OO1000000N1L5M30O00010O3N00O0011M30O1O01O1O0PL^Dn3c;01O001O2O001M2ZLWDO00000U3j;lLeDQ3Z;PMfDP3Z;QMeDn2\\;RMYDL21JQ3k;RMdDm2];SMcDl2^;[M\\Dc2e;]M`D]2b;\\MRDM>f2`;\\MTDM=e2`;eM_DZ2b;fM_DX2b;hM^DX2b;hM^DW2b;jM^DU2c;lMlCD60Oa2o;kMTDDM`2P1lk[2" + }, + { + "size": [ + 500, + 386 + ], + "counts": "g?a4P;4O000000O1001O00O11O00001O00001O00001O0000M3N2N20000O10000001O00001O001O1O001O00002N:F1O1O1O00000000O1000000O10000O10000O10000O100000000O1000000O1000000O100000000O10000000000O10000000000O1000000O100O1FUEjKl:]42ESEoKm:o3SEQLo:m3RESLo:k3QEXLn:g3REZLn:e3QE^Ln:a3REaLm:\\3SEhLl:W3UEiLk:U3UEnLj:Q3UERMj:l2VEVMj:h2WEYMi:f2VE\\Mj:c2WE]Mi:a2UEcMk:[2VEfMj:X2VEjMj:U2VElMj:R2VEPNj:o1VERNj:l1VEVNj:i1UEYNk:e1VE\\Nj:c1VE^Nj:_1VEdNj:[1VEfNj:W1XEjNh:U1WEmNi:R1VEPOj:P1TEROl:k0VEVOj:h0WEYOi:e0WE]Oi:b0VE@j:>WECi:;VEHj:7WEIi:6WEKi:3VE0j:OUE4j:JWE7i:GWE;i:DWE=i:BUEa0k:T2000UMWEk0i:SOWEo0i:POWEQ1i:m10QMUEU1k:fNgEm0Y:nNmEQ1S:hNTFX1l9gNUFY1k9gNUFZ1j9eNWF[1i9eNWFZ1j9fNVFY1k9gNUFV1QOeMi:U1VFV1POgMi:S1WFQ1PORNh:m0XFP1POUNg:j0ZFl0ROjNV::gFl0ROkNX:9fFi0ROPOY:7eFh0QOQO\\:6dFf0oNWO8POk9S1nFf0nNWO7SOm9P1nF`0QO_O4ROm9o0nF`0PO^O6TOl9n0nF;ROE4SOl9m0nF:ROE7nNn9S1iFEPOG23M_O0OP;S1TFoNQ351U>m0jAL\\=8aBI_=V1000000000000O10000000000000000000000000000O10000000000000000O1000000000000000000000000000000000000O1000000000000000000O1000000000000000000000000000000O10000000000O1000000000000000000000000O1000000000000000000000000O1000000000000000000000000000000O100000000000000000000O1000000000000000000O1000000000000O10000000000000000O10000O100O10000O100O1O1O100O1O10000O1O100O1O10000O1O100O1O1O1O1O100O100O100O100O100O1O1O100O100O1O100O100O1O1O100O1O1O100O100O100O1O1O100HXLcDi3];ZL`Df3`;71O0000004L1OJVLbDj3];XLbDh3\\;ZLcDg3X;^LhDb3W;_LiDa3V;?O100O100O100O1O100O100O10000O1O10000O10000O10000000000000000000000000000000000000000000000000000000000001O000000000000000000000000000000000000O1000000000000000000000000001O00001O0000O100O1O100]Oc00000O1000000O10000000000000000001O1O1O000000000000000000O100000000003M4L1O1O1O3PK^E0N051O002NQ3NYMY is attached to?", + "choices": [ + "A. ", + "B. The gate secured with a latch mechanism.", + "C. ", + "D. " + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_399.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000548780.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "W[R33W=1O2N1N201N1O100O1O100O100000000O100N2O1O1M3N2N1O2bLMbI5o5j0]IZO_6U1RIoNk6[1jHiNU7a1_HbNa7d3O1O1O1O010O1000000000000000N1TOlGTLd8i3`0000001O001O0]NnFmNS9l0WGPOj7kNbHb1;]OR7TObH\\1c0\\Oi6ZOdHX1i0YOb6AeHV1k0gNbNLQ88cHT1e9oNZFP1d9SO]Fl0`9WOaFh0\\9[OeFd0Y9^OfFb0W9BiF>U9DkFfGeJg7b5N1O1O00000001O1O001O1N104L1O1O1O2N2`LPHiN7k1l7POTIAeNUOd0d0j7:RIKD^Oo9?`1N1O1N2M7ITcm5" + }, + { + "size": [ + 427, + 640 + ], + "counts": "_kX38R=2L5M1L5N2M200O100O0100O010O010O1O0010O01000O10O1000000000000001O0000001O1O001O001O0010O01O001O000000000000001OO1000001OO10000001N2O00000O101O0O2O2N2N2M2O0O3N3L3J_jo3" + }, + { + "size": [ + 427, + 640 + ], + "counts": "n_T51X=2O1O2O0O3M5L7H5L1O1O0O10000000O010001O000O1O2N1O2N2O1N3L5M3YDdNX;i1M2O1FQNWEo1i:QNWEP2Q;1O1O1BnMbER2]:PNbEQ2\\:QNcEP2[:RNeEm1Z:TNfEm1W:VNhEj1X:VNhEk1U:XNjEh1U:YNkEh1T:XNmEg1S:YNmEh1R:XNnEh1R:XNoEh1Q:XNnEh1S:WNmEj1S:UNmEl1R:TNoEl1Q:SNPFm1P:SNoEn1S:oMmEQ2V:mMiET2X:jMiEV2X:hMhEY2Z:dMfE]2c:010O]OXEUN29f:^1eE`N[:^1hE`NX:^1lEaNS:^1oEaNR:^1oEaNQ:^1QFbNo9]1RFbNn9]1TFcNk9\\1WFcNi9\\1ZFcNf9\\1T101O1O1O10O02O0O10001N1O2N101O1O1O1N102N1O1O1N2O2M3M4L4M4KaVj1" + } + ], + "question": "Which object is being carried by ?", + "choices": [ + "A. , the dark-colored, structured handbag.", + "B. , the tan and teal handbag.", + "C. , the handbag with a multicolored abstract print.", + "D. , the woman with long blonde hair." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_400.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000008211.jpg", + "mask_rles": [ + { + "size": [ + 459, + 640 + ], + "counts": "j[i53V>2I8L4H7M3M7fBTOi<[1L8mCYNT;m1YD[O1gNP:0cFb3Y9bLdF`3Z9dLbF]3]9dLbF^3D`L_92lFj3S9VLlFk3S9VLmFj3S9VLlFk3S9ULnFk3R9XLjFi3V9[LdFg3[98eFfK^9[48O1I8UOUFeLM4T:S3h001JeFmLj7S3SHRMj7o2oGYMP8h2fFmLn0b0\\8f2\\GaMd8`2VGeMj8]2nFjMQ9i30O001O1O001O1bKdFk3_9PLeFn3^9mKdFR4k901H8N2@eEhLa:o2b0H9H7H9H7N3K6K5E`CeNdGX9HXFm02C=IZ9DZFQ1MC9M`9_O[FQ1LEM4b:6`Em0b:RO^Em0d:QO\\Eo0f:ROXEm0j:UOREl0o:UOoDj0S;V11O1AjDhM^;o1cDPNa;l1`DSNb;k1_DTNf;HYDi12^NP<]1QDbNR<[1nCeNS to the other elements?", + "choices": [ + "A. It is beside .", + "B. It is on but behind .", + "C. It is on .", + "D. It is on and beside ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_401.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000156643.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "j`l2<`>5N2M5L3M3M2N2N3M2N2O2M2O3L4M3M1O7I3M4L1O1O3M2N2N2N3M2N4L2N1O1O100O100O1O0010^NTDNl;1VDNk;1VDNj;2VDNk;0WDOm;MTD2k;NVD2j;NWD0j;OXDOj;0VDOk;0WDNj;2WDMi;3XDKi;5XDIi;6YDHi;7\\DBf;=]DAc;?_D_Oa;a0`D]Oa;c0`D\\O`;d0cDXO_;g0cDVO^;j0fDPO];o0[101N00010O001O01O1O100PCQOm;P1QDROn;P1QDoNo;R1o010O01O1N3N1O001O000010OQDiNU:X1hEQO`NE\\;Z1SF\\Oh9e0VFAe9?ZFI`97_FN\\93bFN^92_F1a9O\\F4d9MXF6h9LTF6l9KRF6n9KnE8R:IkE9V:GgE\\:CaE`0^:BZEd0g:k1O1O100O000000000000XO^ERMb:k2eEQM\\:l2gESMY:k2lERMT:l2PFQMQ:n2QFQMP:n2QFPMP:o2QFoLR:o2P1O101O1O1O1O10O01O0000100O1O1O2N1iN^DnNe;d0iDYO_=08GR]10mbN7I3M4N2N0O2N2M4M2M3M4L3N2N1O2N2M2O1M4N2N1N3N2N4L2N2M5L2N2N2N1O00001O1O1O1O1O1O1O1O1O1O2M3L5L3L4L3N3M3O0O11O01O2O5J2O0O01O000000000000000000001O001O01O01O001O1O001O1O1O2N001O1O2N1O1O2N1O1O1O1O001O2N2O0O2N2N101N2N1O2O1N4L2N1O2O2M2N4L4L2N2N2N101N1O2N4L2O0O2N2M3N2M4L6J6I9EdVZ2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "kV43h>5N2N2M4M2N2L4O1L4L4K5M3M2M5M2L4M3N2K5N2J6N1O2K6K4K5mDbNoNfN]9l2ZGYOW8i0fGD]O]Mg02e6S3VIm0[6UOcIY1_OdKU6Y3ZJf1T5bNkJj1AbJh1?;@YNP43b26d0JEBWI2X7;V700N2O10000O1O10000O100O100O0100001O00000000000O100000000000000000000000000000000000000000000000000000000000000O100001O000000000000000000000000000000000O1001O01O000000001O001O001O001O0000001O001O001O00001O1O001N2O1O0010O01N100010OO3N1O010O1O1O2N1O2N8aBhND0T30`6\\3YF5o7h3F2N3RJdHR4k7oJTIb4U9@g0YO7I7I3M3M4L2N4L1O2N3M2N4L2N2M3N3M3M2N3M4L4J8_Ob0_O:J7J5N1N2M4K5N2M2N3N2K5_Oa0A>O2L3nMXMkGi2T8ZMiGg2U8[MkGe2T8]MjGd2U8^MgGe2X8\\MgGe2Y8[MfGf2X8\\MhGd2W8]MgGd2Y8]MhGb2W8`MiG^2W8cMiG\\2W8fMhGY2W8lMfGT2Y8oMeGP2X8WNfGe1Y8cNcG[1\\8lN`GS1_8QO_Gl0d8XOXGf0h8^OVG4X9OeFL`96^FJb98\\FGe9;ZF@j9a0UFZOP:g0PFWOQ:j0nETOT:n0iEQOY:P1fEoN[:S1bEmN_:U1_EjNb:W1\\EjNd:W1[EhNf:Y1YEgNg:h21000000O100000000O100000000O10000000000000000000000000000000000001O001O0000001O0000001O001O2N001O001fGgKk5[4QJhKn5Y4oIjKP6W4nIjKR6Z4iIhKV6Z4dIkK[6X4aIiK_6Y4\\IkKc6X4WIkKi6Z4`HnJHP1h7n5000000000001O0000001O00001O1O1O1O001O1O1O1O1O1O1O2N1O1O2N1O1O001O1OWNjHjKT7R4`I_K_6d1oHKh0\\NX6h1RIKj0ZNS6k1TIJn0XNl5m1XIKV1kMd5Y2XIKZ1bMb5b2WIKe84\\GLd83^GMa82`GN`81bGN^80fGNZ80iGOW80kGOU80mGOS8OSHLn72XHJh75ZHJf74]HJd74`HIa76cHG]76hHGY78jHFV78mHGS77SIDn6:WIBj6;[IBf6=[IAg6<]IBd6<_IAd6:cI_Ob6>aI_Oa6=eI@\\6;lIAU6;RJ]OS6a0QJXOT6d0V401O1O1O001O1O1O0O2N10001N2N2N3LdaR2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Z`l31l>;G5J6K4K4M4M2M3N8H2N001ODdBTO\\=l0gBPOY=Q1hBnNX=R1jBkNW=U1:0;F34KM4L8GUWT5" + } + ], + "question": "Which statement accurately describes the spatial arrangement of the objects relative to ?", + "choices": [ + "A. is holding in front of .", + "B. is in front of , which is holding .", + "C. is in front of , which is holding .", + "D. is in front of , which is in front of ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_402.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000252332.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "abe1h1V=Z1kBQMh;c3@l0TOf0ZO?A4L2N2N2N2N2N2M201N2N2N2N2N1O2N2N2N2N2N2N1O2N2N2N2N1O2N2N2N2O1N2M2O2N3M1O2N[MPI]Mn6b2VI\\Mj6d2VI]Mh6d2WI_Mf6b2XI`Mg6a2YI`Me6a2[I`Mc6a2\\IaMa6a5OnL`IeM^6[2dIdM[6]2eIQKNS2[6m2gIcMX6^2hIcMV6^2jIcMT6^2lIcMR6]2oIdMo5^2PJcMn5i5N2N2N2N]MXJPLg5P4ZJQLd5P4\\JPLc5R4\\JPLa5i6N2N2N1OYMgJjKW5W4iJjKU5W4kJjKS5W4mJjKQ5S7OUMPKiKn4X4SKgKl4Z4SKhKk4Y4UKiKh4Y7NPMZKhKe4j3[KdI0c2c4Y4]KgKb4Z4^KhK`4]7NlLbKhK]4Y4cKhK[4Y4dKhK[4`7N2N2N2N2N2N1O2N2NfIWL`1g3bNYL^1e3cN\\L]1b3Q5O2O1MbIbL_1]3aNcL_MOa1\\3Q1eL^MOa1[3c3eL^LX3g7N3N1N1OcJoLmNP3c6NkJUMWNj2j1XMUIN6M]4l2Y2TMYI0j4i2V7N3N2N3N1N1O2N1O2N2O0O1O2N1000O1000000001OO1000001OO10000001O0O100000000000000000001N1000000000000000000000001O000O1000000000000000000O1000001O00000000000000001N100000000000000000000O2O0000000000000000000O1000001O00000000000O10000000001O00000O10000000000O100000001O00000000000O11O00O10001O000000000000000O2O000000000000000O101N1O1O1O1O1O1O1O100N2O1O2N1O1N2O100O2N1O2N2N2N2N1N3N2N3N3L2N1O3M2N2N2M3N4L2O1N2M3N3M2N2N2N2M4N0O2N3M3M2N2N2M3N2N2O1N2N3M3L3N3M1O3M2N2N2O3L2M3N2N3M1O2N3M3M2O1N2M4L3N3M1O3M2O1N2N2N3M2M4L3N2O1N2N2O1M4N2L3M3N3M1O2N2K5H:]Oa0H9^Oa0D2010O1001O00000001O0000O10000000000000001OO1000000000000000000000000000000000000000000000001OO100001O00O10000001OO10000001OO10000001OO10000001OO1000000WET1_5kN]JQ2j4PNTKk2S4UMkKY3i3gLULS4YLlKe61QMm4[2SKdM\\5n1dJQNS6Y1mIfN]6Q1cImNe6m0[IROi6l0VISOm6j0TIUOn6j0RIUOQ7i0oHUOT7j0lHUOV7k0iHUOX7i0iHVOY7i0gHVOZ7j0fHUO]7i0cHVO_7j0`HUOb7i0_HVOc7i0]HVOd7j0\\HUOf7k0ZHROi7m0VHTOk7j0VHUOl7j0THUOm7k0SHTOo7l0PHSOQ8m0oGROS8l0nGSOT8l0lGSOV8l0jGROY8n0fGQO\\8n0dGRO]8l0dGSO^8l0bGTO_8k0aGSOa8m0_GROc8m0]GROe8m0[GSOf8l0ZGSOg8m0YGQOj8n0VGQOl8n0TGQOn8n0QGSOP9l0QGROQ9m0oFQOT9n0lFQOU9P1jFoNX9o0iFQOX9n0hFQOZ9n0fFPO\\9P1dFoN^9P1bFPO^9P1bFoN`9o0aFPOa9o0_FPOc9P1\\FoNf9P1ZFoNg9Q1YFnNi9Q1WFnNk9R1TFmNn9Q1SFnNo9Q1QFnNQ:Q1oEnNR:R1mEoNT:P1lEoNV:Q1iEnNY:Q1gEnN[:Q1eEnN]:Q1cEnN_:Q1aEnNa:Q1_EmNd:R1\\EmNf:R1ZEmNg:S1YEmNh:R1XEmNi:S1XEkNj:T1UElNm:S1SElNo:S1QElNQ;S1oDlNS;S1mDlNT;T1lDkNV;T1jDkNX;T1hDkNZ;T1fDkN\\;T1dDkN];U1cDjN_;U1aDjNa;U1_DjNc;U1]DjNe;U1[DjNf;V1ZDiNh;V1XDjNi;U1WDjNj;V1VDiNl;V1TDiNn;V1RDiNo;W1QDhNQM3O10001N1000000000000O1000000000000000000O10000000000000000O100000000O100001OO1000000000000000000O1000000O1000000O1000000O10000O100O1O1O100O1O1O1N2O1O1O100O1O1O1O1O1N2O1O1O1O1O100O1O1N2ZCkM` relative to the other objects?", + "choices": [ + "A. is located behind .", + "B. is located between and .", + "C. is positioned in front of both and .", + "D. is on the back of ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_403.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B5I7B>H8M3M2O2N3M2N1O1O2N001O1O010O1O1O1O1O1O2N2O0O1O1O2N2N2O1N2N2N1O3M1O01O0O1O100000001O0001O01O00000001O010O0000001O00010bEeMn8[2iFPNU9P2iFSNV9n1\\FaNb9_1[FfNc9[1TFbMMW1n9n2N1O1O_NVFgNh9Y1XFiNg9V1YFkNf9U1ZFjNg9d27M4K4iNjE[NX:a29K5L4G:E:O1O10001O01O00001eF_Mn6a2PIcMn6]2hHnMX7R2bHUN\\7k1_H[N`7e1[HaNd7`1UHZMoNY1k8]1oGPOP8P1jGVOU8k0`GA^8`0YGXM1\\2e8a0YGAg8Q3N2N2O1N9G001O1O10O0001O00010O00000001N1O1K6I6N2K5MWH" + } + ], + "question": "What is the primary action of in relation to and ?", + "choices": [ + "A. is holding and sitting on .", + "B. is sitting on and eating from .", + "C. is looking at and holding .", + "D. is holding and sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_404.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B_OcX[4" + } + ], + "question": "Which statement correctly describes the interaction between , , and ?", + "choices": [ + "A. is using to eat from .", + "B. is looking at while holding .", + "C. is holding who is looking at .", + "D. is eating from inside ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_405.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000530061.jpg", + "mask_rles": [ + { + "size": [ + 455, + 640 + ], + "counts": "glZ29n=3N1O02O1OO101O00010O6J2N4L00N2N3M3MJ7M2N3J6L4M3L4L3M4K5L3M5J6I6J8F9K5FiDZM];`28L4K4N3M2O4K3L3O1N4L4N1N1O2M4K5L3N3K4M4J6G]BHg=2goV1" + }, + { + "size": [ + 455, + 640 + ], + "counts": "]XX18n=8G:G8H7I4L5K7I4M3L3M3M3N1O2M3M3N1O2M3N1O1N2O2M2O1O1N2O1O1O1O1O2N1O1O1O1O2N1O2N10O01O1O100O10O01O001O100O1O001O001O1O1N2O001O2N1O1O1O1O1O1O1O001O001O0010O2OO000L5O001J5O1O1000gKbFi3^9`0O2O00000O1O1O101O0O1O100O10000O101O0000000O11O0000O1000000000000000000000O10000000000000000O100000000OjNPKWIP5i6PKWIP5i6PKWIP5i6PKWIP5P80001N1000000O100000000O100O10000O2O00000O100O100O2O0O100O100O101N100O1O1O100O1N2O2N1N2N2O1O1O1O1O2N1O100O2N1O1O100O2N1N200O1O2N100O2N1O1O2N1O1N3N2M2O2M2N2N3N1N3N1L5L4K4N3L4L4K5I7D>_OcX[4" + }, + { + "size": [ + 455, + 640 + ], + "counts": "lSU13S>2L7I4N1O4L1N3O1N2N3N0O2N2O000O1O0LZOTC;b]NEb1;]NHa1;kLaKo0Y4U27iLbKMNd0a4e21hL`KIO110Nc0c4k2OgL`KKN02g0b4f2NhL`KO0`0f4h2LgL^KMN02OO`0k4j2KgLZK0001OO`0n4h2GiL[K0001OO2O50JT5R3BoLZKO101OO;1DV5Q3^OWMZKJ2OO;\\5e2^OPMWKO1001N1b5n2XOPMgK0T5o2a5O1O2O1N3M2N2N3N2M5K3M1O3N2M3N0O100O2N1O10O0001O3N0O002N101N3N0O1O3N3L2N010O3M2N3NO01O1O3N0O0000010O1O010O100O1O01O01O2N10K\\OeBd0Z=^OgB`0Z=^OjB?`=00OO2OA[B5I7B>H8M3M2O2N3M2N1O1O2N001O1O010O1O1O1O1O1O2N2O0O1O1O2N2N2O1N2N2N1O3M1O01O0O1O100000001O0001O01O00000001O010O0000001O00010bEeMn8[2iFPNU9P2iFSNV9n1\\FaNb9_1[FfNc9[1TFbMMW1n9n2N1O1O_NVFgNh9Y1XFiNg9V1YFkNf9U1ZFjNg9d27M4K4iNjE[NX:a29K5L4G:E:O1O10001O01O00001eF_Mn6a2PIcMn6]2hHnMX7R2bHUN\\7k1_H[N`7e1[HaNd7`1UHZMoNY1k8]1oGPOP8P1jGVOU8k0`GA^8`0YGXM1\\2e8a0YGAg8Q3N2N2O1N9G001O1O10O0001O00010O00000001N1O1K6I6N2K5MWH" + } + ], + "question": "Which object is sitting on, and which object is attached to ?", + "choices": [ + "A. is sitting on and is attached to .", + "B. is sitting on and is attached to .", + "C. is sitting on and is attached to .", + "D. is sitting on and is attached to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_406.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481390.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "Voj43l>2N1O011N3N2N1O1N10001bABY>c0O0001N101N2N4L3LhUW4" + }, + { + "size": [ + 480, + 640 + ], + "counts": "dRh55i>3O001O01O001O0000100O00000001O0mBMY;3gDNk97`EKd0Ok99_EHAN`02^:;_EF@1a0N`:<^EE@4?Ma:<_ECA6=Kc:=^EBA;:Ff:?^E@A?8Bi:?^E@@b07_Oj:a0^E_O_Od06]Om:`0^E_O^Of06[On:`0^E_O]Oh04[OQ;>^E_O\\Ok00[OW;:\\EA]O]1W;SOYEB@[1X;SOTEFBX1[;SOoD9IOY;IjD90LW;W1mDeNU;Y1oDcNY;U1mDeNT;[1nDbNR;h0WD^Oh0GR;k0VD_Oi0DQ;h0[DDe0CP;h0YDIh0\\OP;j0YDJk0WOn:n0VDMka02N1O1O1O1N2O1O1N2O1N2O1N3M3KSej1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "Y3e;[3000001O000000001O0000001O0000001O00000000001O0000000000001O0cKbLXM^3f2iLUMW3i2mLUMS3i2QMUMP3i2SMUMm2j2ZMPMf2[2eLYJk0W3`2_2hLWJl0W3]2`2jLWJk0W3]2_2kLVJk0Z3]2[2ZNdMl1V2UNiMl1V2TNjMQ2P2PNPNW2i1iMWNX2h1hMXNZ2f1gMYN[2e1eM[N\\2d1dM\\Nd2\\1]MbNk2W1VMhNk2X1UMhNj2X1XMfNi2Z1\\M`Nh2]1cMWN`2g1cMUN_2k1cMQN^2P2fMjM]2V2hMbMk2m1XMnMj2Q2VMnMk2R2TMnMl2R2UMmMl2R2UMmMk2S2WMkMi2U2XMjMh2V2]MeMc2[2bM`M^2`2eM]M[2c2gM[MY2e2iMYMW2g2jMXMV2h2kMWMT2j2mMUMS2k2nMTMQ2m2QNQMn1P3SNoLm1R3TNlLn1R3SNmLo1R3RN`LmKLQ6d3WNWLoK3j5f3dNZL\\1f3eNYL^OOaMi3P3XLZO5fMc3V7^LjHc3U7^LjHc3U7]LkHd3S7gLbHZ3^7QMWHP3^2VLa1Q1kKi2d2YL^1P1lKh2e2ZL]1o0lKj2f2nKbL6j4S1WKgN`0T4R3PLV1\\1eKg2T8]MgGf2W8\\MfGh2X8XMhGi2R4bKgKOm3b6aMmIh0Di1a6ZM`J;oNZ2d6XMbJ9kN_2f6UMbJ7jNe2g6PMdJ5gNj2i6mLdJ4dNo2m6hLbJ6aNS3n6dLcJ7`NU3n6cLeJ4^NY3n6QLhH9Q2N`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PTT42m>2O1N100O1O100O1O1O1O1O1O100O1O1N2O1N2L4N2M3M2OF81200OO00M41N2N3O0N20O2O0O1O100M400O001O1O1O1000]NCbD>Z;HdD7[;McD3];NaD3_;O_D1d;MZD3i;LVD3l;NQD3o;OoC1R<1kCOU<2iC1Z4M2M4N110O2N0011O2M1O0O10000000O0010O1O1O000010O000000000001O00O100O10000RCFU;;gDKW;5hDMW;5fDLZ;;_DF_;`0ZDCe;a0UDC];o0\\DSOa;Z1QDjNm;W2M4L6J9HM3O12N2N1O3M2O1N10O2N2O9F3M2N010O1POZDdNf;Y1dDbN[;\\1hDdNW;[1kDdNU;[1lDeNS;Z1oDgNP;W1SEgNn:X1TEgNl:X1TEiNm:U1TEjNm:U1UEiNm:T1VEjNl:T1TElNm:d0ZDSOk08l:b0eE\\O^:`0eE]O]:b0R2N2N3M2N2N2N3M3M3LPko2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "^g<2k>8J2M2O1M4L3N2N2N2N200O1O101O3RBoNg=\\1H:YOaN[Cg1[ and in what state of motion?", + "choices": [ + "A. is running on while holding .", + "B. is jumping from while holding .", + "C. is standing on while holding .", + "D. is running on while holding ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_407.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000481390.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "YZn76i>2N10000O0100O0001O0O101_NMXC074=FQ;;VD070a0Jm:;WDg0j0ROl:c1TE_Nj:a1UEbNj:^1UEdNi:\\1VEfNj:Y1VEjNh:U1WEnNh:R1WEPOh:P1WEROh:n0TEWOk:j0nD]OQ;d0lD_OT;a0iDBV;U200001cLgDP3[;lLgDT3b;2O0O2N2O1N2_EdL[9]3aFeL_9^3]FcLd9`3WFaLi9b3SF_Lm9\\2iE]N2\\OU:U2nEROR:l0QFROQ:=aFA`9fF_O\\9?gF^OZ9UOcEg0Y10T9YOgE`0[13P9\\OiEj8BmEM]1?h8CfE2m11^8j0dGSO^8k0dGSO^8l0cGSO]8l0dGSO^8k0dGSO^8j0eGUO\\8d0kGYOY8b0U3M3Jgh>" + }, + { + "size": [ + 480, + 640 + ], + "counts": "bgP32l>3N101O00001O01O010O10O010O010O001O0010O000001O000XOJeB7W=OfB2X=1dB2[=1bB0^=2_BO`=h0100O1N2O1N3N1O2O101O100O10O0\\CZOl:g0QE]Om:d0PE_Oo:b0mDAT;?hDEW;Q<@`DK@b0T3O001O01O001O0000100O00000001O0mBMY;3gDNk97`EKd0Ok99_EHAN`02^:;_EF@1a0N`:<^EE@4?Ma:<_ECA6=Kc:=^EBA;:Ff:?^E@A?8Bi:?^E@@b07_Oj:a0^E_O_Od06]Om:`0^E_O^Of06[On:`0^E_O]Oh04[OQ;>^E_O\\Ok00[OW;:\\EA]O]1W;SOYEB@[1X;SOTEFBX1[;SOoD9IOY;IjD90LW;W1mDeNU;Y1oDcNY;U1mDeNT;[1nDbNR;h0WD^Oh0GR;k0VD_Oi0DQ;h0[DDe0CP;h0YDIh0\\OP;j0YDJk0WOn:n0VDMka02N1O1O1O1N2O1O1N2O1N2O1N3M3KSej1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "`i?5h>6L4L4M5J5WOXOQCm0iYEI8d1b:oNlEk0V:RO^F:g9B_F7f9F]F6i9E\\F6h9F\\F6g9G[F6k9DXF9j9EYF9i9DYF:XN`3[4SKXLm0A>Ld3Z4RKYLi0C`0Ke3X4SK[Ld0Ed0He3X4SK]L`0Ff0Fg3W4TK]L>Fg0Fg3W4TK]L=Gh0Eg3W4TK^L;Hh0Cj3X4RK^L:Ii0Ak3X4RK`L5Km0\\Om3Y4QKbL0LR1XOn3Z4PKcLNMS1VOo3[4oJcLMNT1SOQ4\\4nJSNP1bMR4[4oJRNo0cMR4[4oJRNn0cMT4[4nJRNn0cMT4\\4lJSNo0`MV4]4kJSNn0aMW4\\4kJSNn0aMW4\\4kJTNc0SM^O=T5\\4kJTN?oMe4n3kJTN>oMg4m3kJTN=QNg4k3lJUN;RNh4j3lJTN;VNf4f3oJTN;WNe4e3PKTN;XNd4d3QKUN:XNg2fNgNn4WNTN:ZNe2gNiNk4XNUN9[Nc2gNkNi4YNWN7YNd2hNlNh4ZNWN5ZNc2iNmNg4[NVN4\\NQ1bN36=f4[NXN2[Nn0gN60?f4\\NXN0\\Nl0iN7Na0f4[N0l0_K6Lb0e4]N0h0bK7Jd0c4bNO?fK:Ie0b4cNO;iK;Gg0a4dNO7lK;Fi0`4fNN4mK^1BaNc0[1]OcNg0[1YOdNm0W1SOhNR1S1POkNU1Q1kNoNV1Q1iNoNX1YOVId1b5SO\\1l0dNTO\\N\\OfLN\\O110l1[1f3YOXNAdLIAa0n1_OWNP1^5F[N^OaLh0ESOg1n0h3J[N\\OaLR1[1Fi3L[N\\O`LU1\\1Ai3OZN[OaLX1Z1]Ok31ZNYOaL[1X1[On31XNYObL^1U1XOQ42WNXOcL`1S1VOS43VNWOdLa1R1UOT45aJSO[32nLa12]N=h0g47^JSO]31nL`1GkNg0:i4?lMiNoLa1IlNa0;k4`0]N8YLlNID`0h0Q5c0ZN5[LWO52W5b0YNnN\\L?03OO\\5b0ZNiNbL>I:LN_5b0YNgNeL>G_1[5]OXNfNgL=G_1Z5_OWNdNlL;Cb1Z5_OXN`NPM<_Oe1Y5@XOiN_Kg1Y5@YOgN_Ki1X5AZOaNaKn1V5@N`02CK=6FF::GE9;HD8dLR5kN`Jb4=cLS5kN`Jd4;aLU5lN_Jd4<_LV5oN[Je4=ZLY5ROYJe4>XLY5SOYJf4=WLZ5TOXJf4U7bMdIV2RO9[7bM]I_5c6bJ[I_5e6bJVIb5k6`JkHg5U7Q11O100O1O101O000O10010O0O100000010JZHoHg7Q7ZHnHf7R74O100O2O000O10XHQI_7n6:O[HSIZ7l6eHUI?" + }, + { + "size": [ + 480, + 640 + ], + "counts": "PTT42m>2O1N100O1O100O1O1O1O1O1O100O1O1N2O1N2L4N2M3M2OF81200OO00M41N2N3O0N20O2O0O1O100M400O001O1O1O1000]NCbD>Z;HdD7[;McD3];NaD3_;O_D1d;MZD3i;LVD3l;NQD3o;OoC1R<1kCOU<2iC1Z4M2M4N110O2N0011O2M1O0O10000000O0010O1O1O000010O000000000001O00O100O10000RCFU;;gDKW;5hDMW;5fDLZ;;_DF_;`0ZDCe;a0UDC];o0\\DSOa;Z1QDjNm;W2M4L6J9HM3O12N2N1O3M2O1N10O2N2O9F3M2N010O1POZDdNf;Y1dDbN[;\\1hDdNW;[1kDdNU;[1lDeNS;Z1oDgNP;W1SEgNn:X1TEgNl:X1TEiNm:U1TEjNm:U1UEiNm:T1VEjNl:T1TElNm:d0ZDSOk08l:b0eE\\O^:`0eE]O]:b0R2N2N3M2N2N2N3M3M3LPko2" + }, + { + "size": [ + 480, + 640 + ], + "counts": "^g<2k>8J2M2O1M4L3N2N2N2N200O1O101O3RBoNg=\\1H:YOaN[Cg1[OmA3l=e0AVO]BO4l0[=^OcBc0Y=e0N3M4L4M2lCUNo:n1kD[NP;f1mD]NQ;f1lD\\NS;e1jD]NU;f1gD^NV;h2fDaLQ;f32N=C2N1O0O2N1SO[D_Nf;X1fDcN[;\\1hD`NZ;a1fD]N[;d1fDYN[;h1eDVN];k1cDRN_;o1i01O0010O0eNYC=g<^O^Cb0c?", + "choices": [ + "A. and are standing, while is running.", + "B. and are running, while is standing.", + "C. and are standing, while is running.", + "D. and are running, while is standing." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_408.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000042296.jpg", + "mask_rles": [ + { + "size": [ + 425, + 640 + ], + "counts": "jg\\11X=0WgP19WXoN;J7J4L4M3M3M4L3M3M3N3L3N3L3N3M3M3M2N2N2N1O2N1O2O0O3M3N1N1O100O2N100O100O101O0O10000O1000000O1000000000000O01000000000000O1000001O00000O10001O000000001O0O101O001O000O2O001O1O0O2O1N101O1N2O1N2O1O1N101O1N3N1O1O2M2O1O2M1O3M2N2N3M3K5L5K5I9D`0^On`Y4" + }, + { + "size": [ + 425, + 640 + ], + "counts": "QUo172LhA5L1O2N1O1O1O1O1O1O2O0O1O1O010O100O1O10000O100O10O010000O1000O0100000O010000000000000000O2O000000000001O0001O0000001O0001O0000001O01O01O001O00001O0010O01O001O1O001O1O0O2O1O0O2O1N2O1O1O1O2M2M4L3M3N3M2N3N2M3L5L2N4KVbb5" + } + ], + "question": "What is the relationship between and the other objects?", + "choices": [ + "A. is looking at and about to hit .", + "B. is about to hit .", + "C. is looking at .", + "D. is attached to ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_409.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y and ?", + "choices": [ + "A. is in front of .", + "B. is in front of .", + "C. is behind .", + "D. is standing on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_410.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000278353.jpg", + "mask_rles": [ + { + "size": [ + 640, + 596 + ], + "counts": "[me1k0gb0d0C;CO1O1O1O10O01O001O0000jMhJdEX5[:nJ`ER5_:SK]Em4c:UK[Ek4f:WKWEi4h:\\KTEd4l:^KREb4n:`KPE`4P;bKmD_4S;cKkD]4d8bKgG5AZ4W8YLPHBCV4o7kLZHROBT4P8RMZHlNCS4Q8VMYHjN@S4U8XMXHh4e7^KXHb4g7`KYH`4e7cKYH]4g7dKXH\\4i7fKTHZ4n7gKnG[4R8eKmG[4U8eKhG\\4Y8jK^GX4c8c3O0100O002N1O2N000010O01O1O001O2NnHQET5o:lJoDV5P;m110O101OO01000O0100O1000_HREo5m:QJTEo5l:PJTEQ6k:oITER6m:mISET6l:lITEU6l:`1O00ZHUEX6k:gIUEY6k:gIUE[6j:dIVE\\6j:dIVE]6j:bIVE^6j:bIVE_6j:`IVE`6j:\\10SHVEb6j:]IWEd6h:\\IXEd6h:\\IXEe6h:YIYEh6f:YIXED1W6g:TJXEB6Y6b:UJXEB6Z6a:TJYEE3W6d:TJYEE3VOOn6e:VJYEC0[O100l6f:VJYEC0@1g6f:VJYEL1n5g:UJXEI1TO1o6e:RK^En4b:RKYEPN2n6f:TKZEm4e:PKZEPN1P7e:oJ]ES5d:lJ[EU5e:S201O01O0010O0001cGWEa7i:^HXEb7h:_HWEa7j:j001O01O000010O\\GWEQ8i:oGWEQ8i:d0OWGWE[8j:cGWE]8k:aGUE_8l:`GTEa8l:9005K0001N1M4N2O1N2N2O2M3N6J2M5L5J6J5L1O1N2O2N1N3N2N2M2O2M3N1O2N1O1O1O1O1N2O1O1O1O1N2O2N3M3M2N4L3M4L5fKlC>X00000O1010O1O001O10O0000000jIoL]KQ3^4YM]Kg2b4]M[Kc2e4cMUK]2j4lMnJT2R5RNhJn1Y5UNdJj1]5[NnGWNUN1l1]3Q8_N`G@GP2j8dNVGGIe1Q9gNnFLN]1T9nN^F39o0Y9U2dFjM]9X2_FiMa9Z2[FgMe9\\2XFdMh9_2SFcMm9X600001O0000000O20O00O100O1O100N2N2O1O100N2M3N2O1O1O1N2O1N2N3N1N\\FZGS8d8mG]GS8c8lG_GS8`8mGaGS8_8kGdGU8Z8jGhGW8W8hGkGX8T8gGmG_8m7_GUHj8b7TGaHR9Y7kFiH[9Q7bFSIV:U6iEkIb:j5^EVJg:g5PEaJS;\\5gDjJ\\;U5^DnJe;f63M4gIVDS4Y[1CdN41b0c3ZO\\LN6OP2U1hN]Ob1@aMO0O60d00\\O000:1@>1Gc00[O]JRBb5\\>J>B?UKe@S4i?K002N1O1O2N001O1O1O1O1O001O001O000000001OO1jM`LlC`3Te1oAQNU>h1QBUNR>d1SB[No=`1UB_NS>V1QBiNS>n0TBPOo=j0UBUOn=`0ZB@h=5aBKf=_OiBa0[`001OO1001O001O0000001O000000000000O1cITO]C8m5d0f6KoH5P7NoH1Q74jHLV7S1aNQ6]2fHSOY1aNn5d2cHkN_1bNh5n2cH`Ne1bNe5Y3]HVNn1bN_5e3ZHjMW2bN[5j3[HdM[2bNW5m3]HaM\\2dNT5P4\\H\\M`2dNT5Q4[H\\M`2cNT5T4ZHZMb2aNT5W4WHYMe2aNP5]4WHSMh2`NP5`4VHPMj2aNn4b4VHmLl2cNi4e4YHhLn2dNf4g4ZHfLQ3cNa4l4[HbLT3cN_4n4ZH`LW3bN^4R5WH]L[3cNZ4T5XHYL_3dNV4[7iKfHU4\\7jKeHR4^7nKfHm3[7SLeHl3\\7ULdHg3[7fHoGc3f0f3\\7gHnGc3f0a3a7lHiGc3i0[3e7eL[HZ3f7gLYHY3g7gLZHX3f7hLZHW3g7iLZHV3g7jLYHS3i7mLYHo2i7QMXHl2j7TMVHk2k7VMUHg2n7XMTHe2m7[MSHe2n7ZMTHc2m7]MTH]LmNo5R9dMUH[2k7eMWHX2k7gMWHV2j7kMVHS2k7mMUHQ2m7oMSHP2o7PNTHk1m7UNUHf1n7ZNUHb1l7^NZH[1g7eN]HV1d7jN]HS1e7nN[Hn0h7RO\\Hh0f7YO]Hc0c7]O_H\\LhMP4k9DaH9a7GbH\\LbMe3m9OPICS7=PITORMZMR:b3ZIdNP7]1VIZNl6f1[IRNf6o1^IkMc6U2cITMkKVO06f:_3lI[LPL5T:`3bJ`L^5`3^JZLlJ7f:_3bJ`L^5a3fJZLZ5f3gJYLY5g3gJZLX5f3iJZLV5f3b5ZLi_Oc3W`0]Li_Oc3W`0\\Lj_Od3Y`01O01O0001O00000010O1OO2O000001OO2O1OYLj_Od3V`0\\Lj_Od3V`0[Lk_Oe3U`0[Lk_Oe3U`0[Lk_Oe3X`00O10010O01OdM^LYBNh1e3e;[L_B`0f0B8c3c]LiAc3W>]LiAc3W>[LkAe3X?0000001QOXLdAh3\\>XLdAh3Z?001QOWLeAi3[>XLVAOK0Ij3V?WLl@O42O0Ji3V?WLk@O64LMMi3V?jLl@[3S?j0YAXKC00O0k0O\\O0b11XN5Z1MSOl2A`M]2AfM0On05:_c0PO" + } + ], + "question": "Which statement accurately describes the positions of the objects relative to ?", + "choices": [ + "A. is standing on , which is on .", + "B. is standing on , and is also on .", + "C. Neither nor is on .", + "D. Only is on ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_411.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000217400.jpg", + "mask_rles": [ + { + "size": [ + 480, + 640 + ], + "counts": "^6T2l<0000001N100000001O000000000000000000O100O010001O0O2O001O0O1000001N101O00000O2O1O1O2N001N1000000000O11N10000O1000001O000O101O0000O10O100000000000000O2O00001N1000001O001O000O101O000000000O101O0000001N1000001O000O10000000000O10001O000O1000000O1000000O100000000000000000000O1000000O2O0000001N101OO0101O1O1O00001O1O0O2O00001O1O000O2O000O2O0000001N1000001N100000001O0000000O101O0000001O00000O10000O10001O001O000O101O000O10001O00000000001O00000O2O001O00000O1000001N1000000O101O00000O10000000000O2O000000000000000O10000O10001O4K4M2N2N1N2O3M0000000000000O1000O1O100O1O0100000O1000000001O00001N10000O101N100000001O001N1000001O001O1O00000O2O00000O2N100000000O100000001O1O2N1O1O1N3N2N00001O001N2O001O0000000O10O01O1000O100O1000000O10000O1O1M3O010000000000000000000000000001O0O101O1N2N100O2O0O10001O0000000O10001O00000000000000001O00001O001N1O1000O010000000000O0100000000O10000000000O100000O10000000O100O100O11O1O000000O10000001O00000000000O100001O000O1O100000000O10001O000O2O0O2N3L2O1O1N2O2O1N1000000001O0001O1O1O00001O10O0001O001O001O001O0O2O00O11O9F3N001N2N2L3G\\FPKj9Q5210I7M4M1N2N4J9hKYEd00ZOS11GOUOd0[;G\\V`1" + }, + { + "size": [ + 480, + 640 + ], + "counts": "m5>b>0000000O100000O0101O00O010000O1000O1000O100000O01000000O0100000O1000O01000O10O1000O10O10000000O010000O100000O01000000O01000O10000000O0100000000O100O10O1000O010000000O10O1000O100O00100000O10000000O100O10O10O10O1000O10000000O1000O100000000O1000O01000O100O1000O2N1000000O010000O10O100O1000000O10O1000O10N2O2_OkAJ40R>g0M2O1N1O0_Ob0M3O1O1000O010000O1000O010000O1O10000O10O10O01000O100O10000O010O10000O10O10O10000O10O010000O1000000O01000O100O10O010000O10000O01000O1000O0100O1000O010000O10000O10O1000O10O10O100O01000O10000O10O10O10O010000O10O10O100O10O1000O100O100000O01000N1O2O100O010O10O100OMWCPNj and ?", + "choices": [ + "A. is positioned over .", + "B. is a component attached to the side of .", + "C. is traveling on top of .", + "D. and are parallel and side-by-side." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_412.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000017182.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "]bc57R=4N1O1O1O1000O10O10000O1O10O2O1O000000001N2O1O2N2MmQ]2" + }, + { + "size": [ + 428, + 640 + ], + "counts": "[b]25T=7K5K6I6K4L4L7I4M0O2N10O01O100O100O1000O10O1000000O10000000O10O1000000000O010000000000000O10000000O100000O101Oc0]O[SV5" + }, + { + "size": [ + 428, + 640 + ], + "counts": "o^g12Y=3N00001O0O1001OO1O2O1N5SCB0132K=b;S3TN:F1O1O000000000000000O100000000O1000000000000000000O10000000000O10000000000000000O10000000000000000000000O100000000000000O1000000000000000000O100000000O1000000O100000000000000000000000YLoFS3Q9mLoFS3Q9d03nKmFJ2e3b9O000000O1O100N2O100O1O100000000000000000000000000001O000000001O0000000000O100oMULaJk3`700000O10000000000000000O100000000O1000000000000O100000000000000O100000000O1000000000000O1000000O10000000000O10000000000000000O10000000000O1000000A?N2O100O100000000O100000000000000O1000000O1000000000000000000000000001O0000000000001O00000000001O0000000000001O000000000000000000001O0000001O002N3M00000000000000000000O1000000000000O100000000000000O100O100000000000000O100000000000000000000O10000000000000000000000O1000000000000O100O10000O10000O1000000000000000000000000000000O10O2YMcGkM43O1OP1MQOh8g0e3M302N?", + "choices": [ + "A. and are on .", + "B. is on , and both are on .", + "C. and are on .", + "D. Only is on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_413.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000017182.jpg", + "mask_rles": [ + { + "size": [ + 428, + 640 + ], + "counts": "[b]25T=7K5K6I6K4L4L7I4M0O2N10O01O100O100O1000O10O1000000O10000000O10O1000000000O010000000000000O10000000O100000O101Oc0]O[SV5" + }, + { + "size": [ + 428, + 640 + ], + "counts": "o^g12Y=3N00001O0O1001OO1O2O1N5SCB0132K=b;S3TN:F1O1O000000000000000O100000000O1000000000000000000O10000000000O10000000000000000O10000000000000000000000O100000000000000O1000000000000000000O100000000O1000000O100000000000000000000000YLoFS3Q9mLoFS3Q9d03nKmFJ2e3b9O000000O1O100N2O100O1O100000000000000000000000000001O000000001O0000000000O100oMULaJk3`700000O10000000000000000O100000000O1000000000000O100000000000000O100000000O1000000000000O1000000O10000000000O10000000000000000O10000000000O1000000A?N2O100O100000000O100000000000000O1000000O1000000000000000000000000001O0000000000001O00000000001O0000000000001O000000000000000000001O0000001O002N3M00000000000000000000O1000000000000O100000000000000O100O100000000000000O100000000000000000000O10000000000000000000000O1000000000000O100O10000O10000O1000000000000000000000000000000O10O2YMcGkM43O1OP1MQOh8g0e3M302N and ?", + "choices": [ + "A. is in front of .", + "B. is on .", + "C. is in front of .", + "D. is on ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_414.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000559543.jpg", + "mask_rles": [ + { + "size": [ + 333, + 500 + ], + "counts": "]lV24Q:8oK4RJAX3`0c2i0cL\\O\\3i0]LZOb3k0XLWOf3o0TLSOl3o0oKTOP4n0hKYOX4j0XKCU4Q1fKSOS4ZNTKf2c0VOW4P1_KZO^4k0[KZO`4\\3N2N2I7L4O1N2O001N3N10OO201O0O0100001OO1001O00O1001OO010001OO100001O00O20O0000O10100O1O001O2M3N1O103L2N002N3M5K1O5K6J2eMPKUOR5i0RKSOQ5]NjJk18FS5YNgJINo1=M]5SNVJ71^1=7^5\\NUJ\\1`06h5F]J8e5SOPJVOENj0g1c5nNUKP1l4gN]KX1e4bNaIKm1_1c4fN`IJ10k1]1h4fN\\IM10l1[1i4gN`KW1a4hN`KV1m6O1O2N2M4L3M3N3L3N3L4L3M4KPb0OS^O0gE0ef<6jaP1" + }, + { + "size": [ + 333, + 500 + ], + "counts": "led0:R::F7J4L2N1N2O0O2O000O1O100O1O101N1000001N1BdNkG[1S8lNiGT1V8QOfGo0Z8LdG[O\\8c0iGYOX8f0kGYOS8h0nGVOS8i0oGVOQ8j0oGWOP8i0PHXOo7b0gGQO<=m7a0XH_Oh7a0XH_Oh7`0YH@g7`0YH@f7a0ZH_Of7a0ZH_Of7a0YH@g7?ZHAf7?ZHCd7<]H0W70iH8o6HQI`0f6AYIb0e6^O[Ic0d6]O[Ie0d6\\O[Ie0d6[O\\Ie0d6[O[Ig0c6ZO]Ig0b6YO^Ih0a6XO_Ii0`6WO`Ij0_6VOaIk0^6VO`Il0OYNk5k0VJl0N[Nj5k0WJk0I_NP6g0VJk0EbNU6c0VJ_1j5cNTJ\\1m5fNQJX1Q6iNnIR1V6oNjIn0Y6SOfIk0\\6TOeIj0]6VOcIi0^6VOcIi0]6XOcIg0^6YOcIe0^6[OcId0]6\\OcIc0]6]OdIc0\\6\\OeIc0\\6]OeIb0[6^OeIa0\\6^OeIb0Z6_OfIa0Z6^OhI`0Y6@gI`0Y6@gI?Y6BgI>Y6BgI>Y6AhI>Y6BhI=W6DiI8k0i7TOVHm0j7SOUHn0k7SORHn0P8QOoGP1Q8QOmGP1T8oNjGR1W8b0001N1000O010000O10000O1000O01000O010000O100O=D7Hf0ZO6K4J5Mnji2" + }, + { + "size": [ + 333, + 500 + ], + "counts": "mVl25U:3O1O2O0O2O001O2O2M00N3M3M4LW`Q2" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is on the floor next to .", + "B. is standing next to .", + "C. is placing on .", + "D. is holding ." + ], + "answer": "D", + "type": "relation", + "image": "images/vqa_415.png", + "model_output": "D" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000578545.jpg", + "mask_rles": [ + { + "size": [ + 474, + 640 + ], + "counts": "[me2:Y>9K4M2M4J5J6M3N3L3N2N2N2N2N2N3M2O1O1N3M2N3M3N1O2L4N1O2N2M3M3N2N5K6J4`LbL]Kb3_4`L`Kb3]4`LaKf3Y4aLbHDk2o3_4eL\\K_3`4gLZK\\3d4fLZK\\3d4eLZK^3c4fLYK^3d4dLYK_3d4fLXK\\3e4iLVKY3h4iLWKY3f4lLUKX3g4nLTKT3i4RMQKQ3l4QMSKP3k4QMTKQ3k4PMTKQ3k0kJj1U2YMR3l4PMQKR3n4oLPKR3P5oLnJR3R5oLlJR3T5PMiJQ3X5PMeJQ3\\5QM`JP3a5QM\\JP3d5RMYJo2Z1`Jb2d2oKn2_1^Jc2a8k100000O10000oJ^Gj3b8VL^Gj3b8UL_Gk3`8X100N2O1N200N2O1O10kIiG2N2O1N2O1O1N2N2O1N3N1N2O1N2N2N3UCQOg;Q1XDPOg;Q1UDSOj;n0VDSOg;P1WDQOg;Q1XDQOe;R1ZDnNd;T1\\DlNc;V1[DkNd;X1ZDiNc;[1YDgNe;]1WDfNf;\\1XDfNg;\\1WDeNh;W2N2O1O1O100001O00001O00001O00001O0010O0001O00010O0001O01O00001N1O2N1O1O2N1O1O2N1O1O2N1O2N1O2N1O2N1O1O2M2O1O2N2N1O2N1O2N1O2N1O2N2N1O2N1O2N1O2N1O1O2N1O2N1O2M3^OPB7Z>O1O1O1N3Moj12PUN1O10O_>2`AO1Nnj7NVUH0\\Zm1" + } + ], + "question": "What is the relationship between and ?", + "choices": [ + "A. is holding .", + "B. is attached to .", + "C. is reading .", + "D. is sitting on ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_416.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000437898.jpg", + "mask_rles": [ + { + "size": [ + 427, + 640 + ], + "counts": "gXR3\\1f;:G9A?kNV1mMaM`HV3f6T2O2O1O1N2O001O001O1O1O1O1O1O1O1O1O001O001O1O1O1O1O1O1O1O001O1O1O1O010O1O1O001O1O1O1O1O001O001O000000001O001O1O1O0000000O10O100000000O101O0eK`Jk0`5UObJJ>TMR5P3bJlMJ\\On1Gf3Q3kJiLn25W2R3^NlLc1T3^NkLb1T3`NjLa1V3aNhL_1X3Z40000000000000000O100000000000000000000000000000000O2O00000000000000001O00000ZOmESNT:l1nESNS:0YFi0FVOT;f0REVOo:?", + "choices": [ + "A. is located between and .", + "B. is over .", + "C. is below both and .", + "D. is above ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_417.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000225532.jpg", + "mask_rles": [ + { + "size": [ + 375, + 500 + ], + "counts": "gdb27_;4K3N1O1O1O011N100O2OO100000000O11O000000O1001O0000O101O3M10O0010O00001L4Ibaf2" + }, + { + "size": [ + 375, + 500 + ], + "counts": "h_d21f;1O1O2N1O1N2O1O1O1O001O001O001O0oDBi:?VEBi:?VECh:=XEDg:=ZEBe:>\\ECb:>]EDa:=_ED_:=aEC^:=bED]:=cED[:=eECZ:=gEDW:=hEEV:SHAl7?UHAj7a0UH^Ok7c0UH\\Ok7e0TH[Ol7f0THYOl7g0VHWOj7j0VHUOj7l0VHSOj7n0VHQOj7P1VHoNj7R1VHmNj7S1WHlNi7U1WHjNi7V1XHiNh7Y1WHfNi7[1WHdNi7\\1XHcNh7_1WH`Ni7a1WH^Ni7c1WH\\Ni7d1XH[Nh7f1YHXNf7j1ZHUNf7l1ZHSNf7n1ZHQNg7o1YHPNg7Q2YHnMg7S2YHlMg7U2XHkMh7V2XHiMh7X2XHgMh7Z2XHeMh7[2YHdMg7]2YHbMf7`2ZH_Mf7b2ZH]Mf7d2ZH[Mf7f2ZHYMf7h2ZHWMf7j2ZHUMf7l2ZHSMg7m2ZHPMg7P3`01O1O1O100O002N001O001O2N001O1O1O1O1O1O0000000000000000000000000000cMQHR1o7eN_HV1a7eNgHX1X7fNPIU1Q7hNXIQ1h6nNbIi0]6VOiIf0W6YOmId0S6\\OnIc0R6\\OQJb0o5^ORJ`0o5@SJ>n5@TJ2fMOU80WJKdML14OLU89WJ and ?", + "choices": [ + "A. is driving on .", + "B. is driving on .", + "C. is driving on .", + "D. is parked next to ." + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_418.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000564336.jpg", + "mask_rles": [ + { + "size": [ + 360, + 640 + ], + "counts": "gTV12T;3N2N2I6L4M3M3G9M3M300N1010N101M2IPFVOR:g08O1M4M210000O100O2O3dEFd93ZF6e9J]F6a9JiFLX93nFGR9:n000001O2_E_OW:n0J;F4K1O2AbNoFa1P9j0100O10O1O00001O1Ng]h2" + }, + { + "size": [ + 360, + 640 + ], + "counts": "0[5m500000000000000000000000000000000O100000000000000000000000000O10000K5O1O1000000000000001O0000000000000000O11O1O1O00002N3M3M2N3M2N2N1O001O000000M5aK^JlN0000O12NN3O]10hN14OF2OO11O0Oc81YV7OWlH11OP\\n5" + }, + { + "size": [ + 360, + 640 + ], + "counts": "R_P24kg0NWC:E6K4L4L4L4L4N2O0100000000O1L4L400O11O00000001O01O00000O100010O0000000000000000000000000N2L4L4L4M4L3LQa]4" + } + ], + "question": "What is the relationship between , , and ?", + "choices": [ + "A. is looking at and is positioned in front of .", + "B. is looking at while being beside .", + "C. is looking at and is positioned beside .", + "D. is looking at and is positioned in front of ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_419.png", + "model_output": "A" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000319935.jpg", + "mask_rles": [ + { + "size": [ + 398, + 640 + ], + "counts": "i9d2j90O1000000O1000000O100O1O1N2N2O100000000O10000O10000O100000000O1000000O10SMkFa2U9]MRG^2n8aMVG\\2j8cMZGZ2e8gM\\GX2d8hM]GW2c8hM^GX2b8hM^GX2a8iM_GW2a8iM_GW2a8iM^GX2b8hM^GX2b8hM]GY2b8hM^GX2b8hM^GX2b8gM_GY2a8gM^GZ2a8fM`GZ2_8gMaGY2_8fMbGZ2^8fMaG[2_8eMaG[2^8fMbGZ2^8fMbGZ2]8fMcG[2]8eMcG[2]8dMdG\\2\\8dMcG]2\\8dMdG\\2\\8dMdG\\2\\8dMdG\\2\\8dMdG\\2\\8cMdG^2\\8bMdG^2\\8aMeG_2T91000000O100000000O10000O1000000O100VO\\MjGd2V8]MiGc2W8^MgGc2Y8]MgGc2Y8]MgGc2Y8]MfGd2Z8\\MfGd2Y8]MgGc2Y8]MgGc2Y8]MfGd2Y8]MgGc2Y8]MgGc2Y8\\MhGd2X8\\MgGe2Y8ZMhGf2P9000O1001O3M6J7I7I7I8H6J4L4L2N1O1O7I1O1O1O1O0000000000000000O1O1O1O1N2O1O1O1O1O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000000000000000000000000000VLPOVLP1j3TORLl0n3VOPLj0o3XOPLh0o3]OmKc0R4_OmKa0R4@nK`0Q4AoK?Q4APL>P4BRLo5ARJ>n5BSJ=m5CSJ=n5ASJ?n5@RJ`0Q6\\OoIe0S6YOlIh0U6WObIR1^6mNkHk1V7TNhHn1X7QNhHP2X7PNhHP2X7oMjHP2V7PNjHP2V7oMkHQ2U7oMlHP2U7oMlHP2T7PNlHP2T7oMmHQ2S7oMmHQ2S7oMnHP2S7nMnHR2S7mMmHS2S7lMnHT2R7lMoHS2Q7lMPIT2Q7jMQIU2o6kMQIU2o6kMRIT2n6lMSIS2n6kMSIU2m6kMSIU2m6jMUIU2k6kMUIU2k6kMUIU2l6jMUIU2k6jMVIV2j6iMXIV2i6iMWIW2i6iMXIV2h6iMYIW2h6hMYIW2h6hMXIX2h6gMYIY2h6fMYIY2g6gMYIY2g6fMZIZ2f6eM\\IZ2d6fM\\IZ2e6eM\\IZ2\\801O1O00001O00001O001O00001O001O1O001O6Jd0\\Od0\\O7I1O1O00001O0000O1O1O1oKDlJ0VN=m6EZJP2g5oMWJS2Q81O1O100O1bMiMdJX2Y5lMeJU2[5jMfJV2h71O1O100O1O1O1O1eM`MhJ`2Y5_MaH2o1`2c5aM]J_2h71O1O1]N[M\\If2d6[M[Ie2d6\\M[Ie2\\6ZM\\H1X1e2T6cMcHHX1e2^6[MZH0W1e2i6\\MUIh1fN\\Ni8L`H]1U8dNjGU1]8kNbGo0e8QOZGo0g8ROWGn0j8ROVGm0k8SOTGm0m8SORGn0n8RORGm0o8SOPGn0Q9QOnFo0o8mNUF1l0R1o8nNXGR1h8nNWGR1j8oNUGQ1l8mNTGS1S:010O1O1O1000000mNlNlFT1g:lN]D=P6CX;:bDIc;=2L4K5M300001O=D4K2N]OnDKQ;3f00000003MO2Liel2" + }, + { + "size": [ + 398, + 640 + ], + "counts": "Q^k433c0P;g0I5J6O1O1O1O1O100O1O100O100O100O10000000000000000001O00000000O1O1O1O1O1O1O1O1O1O1O1O100N2O100O1O1O100O1O100O1O1O1O1O1O1O1O100O1O1O1O1O1O1O100O1O1O1O1O1O1O100O1O100O100O1[Oe0O100O10000O1000000000000000000000000001_LmGe2S8XMRHf2n7YMSHg2m7XMUHg2k7WMXHh2h7UM[HK^Oh2W8[M^HK^Oh2U8[M_HL\\Oi2U8ZMiHe2W7ZMjHf2W7XMkHg2U7XMmHg2T7XMlHh2T7WMnHh2]8O2N1O1O1O1O1O2N1O001O2N2N1O1O2N2N2N1O2N1O1O1O1O1O001O1O1O001O001O0000000000O100O1N2O100O1O1O10000000000001O1OK5M3O1O1O1O1N200O100M3O1O100O1M3N2N2O100O1N2O1O1O1N2N2O100bNQM\\IP3b6QM]IQ3b6PM\\IR3b6PM]IQ3a6QM^IP3b6PM]IQ3b6QM\\IP3d6PMbHJ;X3R7SM]HH?V3T7RM\\HI?V3T7\\MlHd2T7\\MkHe2U7[MjHf2V7ZMgHi2Y7WMgHi2Y7XMfHh2Z7YMdHh2\\7YMbHh2_7XM^Hj2b7Q11O00000000001O00001O00001O00001O1OVH" + }, + { + "size": [ + 398, + 640 + ], + "counts": "nQY1>n;:G8H5L6I9H7I7I7I8G8H8H8H4L5L1N2O1O3M3M1N3N0O100O1O11O00O01O1O001O1N10001000O100O10O1000O100O10000O1O1O1000000000000000000001O1O1O1O1O2N2N3M2N2N3M4L3M3M1O1O0O11N0100000000000000000000000O10000000000O10000O10000O100000000000000000000000001O6J2N1O1O001O000001N1O3L6L4J9_O:N2N2M3M3L4L4L3N3M3K6K4M3L5Ab0BZdS2HW\\lM11O\\b`2" + } + ], + "question": "What spatial relationship do , , and all share with ?", + "choices": [ + "A. They are all next to .", + "B. They are all under .", + "C. They are all on .", + "D. They are all inside ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_420.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000116439.jpg", + "mask_rles": [ + { + "size": [ + 640, + 429 + ], + "counts": "fcg49fc0?B2N2N3M3M7I1O001O00O11O000000O10000001O000O1000001O1O1O1O1O1O2N1O1O1O00hb0A\\]O;fb0C^]O6jb0EZ]O7`hT3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "aWd5h0Wc07K0O10O02O00O1O01O01O01O00000000001N10000O1000O10O100O1O1O1O1N2N200O100001N101N2N1O2O1M4M5J5JbKcA1Lj4a>UKcA2Jj4c>SKdA2Kj4j>VKVAj4a>UKcA2Jj4c>TKcAQ5]>PKaAQ5_>oJ]A60e4c>UK]A60e4h>ZKXAf4h>93M1O001O4L2N002N001O001O2N1O1O3M2N001O1O3M3M2N5K0000001O00000000O10000001O00000000000000O11O000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000001O000000O11O00000000000000000000000000000000000000000000000000000000000000001O00000000000000O1000000000000000000000000O100000000000000000000O1000000O100000000000000000000O100000000000000000000O10000O10000O100O10000O10000O1000000O1000000O100000000000000O1000000O100000000O10000O1000000O1O1000000O100O100O1O1O100O100O1O1TO]KYBe4f=`KTBb4k=aKoAc4P>aKkAa4T>f0O1O1N2O100O10000001O0000001O00001O00001O000000001O000000001O00001O000000001O0000A[JdBd5\\=]JcBc5\\=^JdBb5\\=_JcBa5\\=aJcB_5]=aJcB_5]=aJdB^5\\=cJcB]5\\=dJdB\\5\\=eJcB[5]=eJcB[5]=eJcB[5]=eJdBZ5\\=gJcBY5\\=hJdBX5\\=hJeBW5[=iJeBW5[=iJeBW5[=iJeBW5[=iJfBV5Z=kJeBU5[=kJfBT5Z=lJfBT5Z=lJfBT5Y=mJgBS5Y=mJhBR5X=nJhBR5X=oJgBQ5Y=oJgBQ5X=QKgBo4Y=QKgBo4X=RKiBm4W=SKiBm4W=TKhBl4W=UKiBk4W=VKhBj4Y=UKhBj4X=VKhBj4X=VKhBj4Y=UKhBj4X=VKhBj4X=WKgBi4X=XKiBg4W=YKiBg4W=ZKhBf4X=ZKhBf4X=[KgBe4Y=[KhBd4X=\\KhBd4X=\\KhBd4W=^KhBb4Y=]KgBc4Y=]KhBb4X=^KhBb4Y=^KfBb4Z=^KfBb4[=]KfBb4[=]KeBc4[=]KeBc4[=]KfBb4Z=^KfBb4Z=_KeBa4Z=`KgB_4X=bKhB^4W=cKjB\\4R=hKnBX4Q=iKoBW4Q=iKoBW4Q=jKnBV4Q=kKoBU4Q=lKoBS4V=hKjBX4W=fKjBZ4V=gKiBY4W=gKjBX4V=hKjBX4W=hKhBX4X=hKhBX4X=hKiBW4W=jKhBV4X=jKhBV4X=jKiBU4W=kKiBU4V=mKiBS4V=nKkBQ4S=QLmBo3R=SLmBm3hj?BV@>i?DU@>i?CW@=i?CW@=i?DV@ZOSAe0o>XORAh0P?VOPAj0R?TOm@n0V?nNk@ZNLd2Z?QOn@n0S?POo@o0R?POn@P1R?POn@P1S?oNm@Q1S?oNm@P1T?oNl@`NG]2_?ROj@bNF\\2`?QOk@S1V?lNj@dNE[2c?POi@cNF[2c?QOk@o0V?nNl@Q1V?nNj@R1V?mNk@T1U?kNk@U1V?iNk@nN\\OV2h?kNm@oN[OV2i?iNm@PO\\OU2j?hNl@ROYOW2l?eNo@\\1Q?bNPA^1Q?aNo@_1S?]No@b1S?\\NPAc1P?ZNVAGiNJ1c1R`0fNmAIoMV1T`0oN^Bf0_`0O2N4L3Bg\\OOfc0N100001O0000001O0O1000001O1O1O001O001O01O00004D\\\\O3jc0O1O0Ob^P3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "\\_c51dc0?", + "choices": [ + "A. , , ", + "B. , , ", + "C. , , ", + "D. , , " + ], + "answer": "B", + "type": "relation", + "image": "images/vqa_421.png", + "model_output": "B" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000116439.jpg", + "mask_rles": [ + { + "size": [ + 640, + 429 + ], + "counts": "cd69fc0hb0A\\]O;fb0C^]O6jb0EZ]O7`hT3" + }, + { + "size": [ + 640, + 429 + ], + "counts": "[`02332OO2ib0W1X]OhN4Li`01T_O^4^>bKcA1Lj4a>UKcA2Jj4c>SKdA2Kj4j>VKVAj4a>UKcA2Jj4c>TKcAQ5]>PKaAQ5_>oJ]A60e4c>UK]A60e4h>ZKXAf4h>93M1O001O4L2N002N001O001O2N1O1O3M2N001O1O3M3M2N5K0000001O00000000O10000001O00000000000000O11O000000000000000000000000000000000000O11O0000000000000000000000000000000000000000000000000000001O000000O11O00000000000000000000000000000000000000000000000000000000000000001O00000000000000O1000000000000000000000000O100000000000000000000O1000000O100000000000000000000O100000000000000000000O10000O10000O100O10000O10000O1000000O1000000O100000000000000O1000000O100000000O10000O1000000O1O1000000O100O100O1O1O100O100O1O1TO]KYBe4f=`KTBb4k=aKoAc4P>aKkAa4T>f0O1O1N2O100O10000001O0000001O00001O00001O000000001O000000001O00001O000000001O0000A[JdBd5\\=]JcBc5\\=^JdBb5\\=_JcBa5\\=aJcB_5]=aJcB_5]=aJdB^5\\=cJcB]5\\=dJdB\\5\\=eJcB[5]=eJcB[5]=eJcB[5]=eJdBZ5\\=gJcBY5\\=hJdBX5\\=hJeBW5[=iJeBW5[=iJeBW5[=iJeBW5[=iJfBV5Z=kJeBU5[=kJfBT5Z=lJfBT5Z=lJfBT5Y=mJgBS5Y=mJhBR5X=nJhBR5X=oJgBQ5Y=oJgBQ5X=QKgBo4Y=QKgBo4X=RKiBm4W=SKiBm4W=TKhBl4W=UKiBk4W=VKhBj4Y=UKhBj4X=VKhBj4X=VKhBj4Y=UKhBj4X=VKhBj4X=WKgBi4X=XKiBg4W=YKiBg4W=ZKhBf4X=ZKhBf4X=[KgBe4Y=[KhBd4X=\\KhBd4X=\\KhBd4W=^KhBb4Y=]KgBc4Y=]KhBb4X=^KhBb4Y=^KfBb4Z=^KfBb4[=]KfBb4[=]KeBc4[=]KeBc4[=]KfBb4Z=^KfBb4Z=_KeBa4Z=`KgB_4X=bKhB^4W=cKjB\\4R=hKnBX4Q=iKoBW4Q=iKoBW4Q=jKnBV4Q=kKoBU4Q=lKoBS4V=hKjBX4W=fKjBZ4V=gKiBY4W=gKjBX4V=hKjBX4W=hKhBX4X=hKhBX4X=hKiBW4W=jKhBV4X=jKhBV4X=jKiBU4W=kKiBU4V=mKiBS4V=nKkBQ4S=QLmBo3R=SLmBm3h, , and ?", + "choices": [ + "A. is on , but is not.", + "B. is on , which is on .", + "C. Both and are on .", + "D. is on both and ." + ], + "answer": "C", + "type": "relation", + "image": "images/vqa_422.png", + "model_output": "C" + }, + { + "image_path": "/mnt/bn/zilongdata-us/wangyuhao/datasets/PSG/coco/val2017/000000581615.jpg", + "mask_rles": [ + { + "size": [ + 640, + 478 + ], + "counts": "QXi223m1ORNTa0j3eNQ1\\MYKkBM2Z6ob3M3O1N2O0O2N1O2O2N1N101N101N1O2O1N2N2N101O1N2N2O0O2O0O2O0O2N2N2N2O1N100O3N1N1O2N101N2O1N2O0O2N2N1O2O1O1N2O1N1O1O2O1N2N2N1O2N3M2N2M3N3L4I8H:D?BPGYE]9V:c0^Oa0_Oc0]Oe0]Oc0[Of0TOe0D7N2O1O1N101N101N2N101N2N2M4M3N2L4L8Hf0jNX1`NWQh0mMl]WO5MMO520OOWa0Li^O^30eLe?_4[@`K4OR=d6L4L4N2N2N2M3K5M3O1N200O1O1O1O100O1O10000O100O100O10000000000O100000003VDPHK?3Y2n7U9M101O0000000000000000000000000000000000000000000000000nJjESMYNS3m;HSF5m9JUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF5k9KUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF5k9KUF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LTF4l9LUF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF3k9MUF3k9MUF3k9MUF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LVF4j9LUF5k9KUF5k9KUF5k9KUF5k9LSF6l9JQF9o9IaEe0_:^40000000000000000000000000000000001O000000000O101O00001N2O0M3]Nc1iMX2M201O1O001O1O1O1O1O1O2N2N2N1O4LB:A=ZDfGn:R9A>@>F8E and ?", + "choices": [ + "A. is mounted on .", + "B. is leaning against .", + "C. is placed on the floor in front of .", + "D. is behind ." + ], + "answer": "A", + "type": "relation", + "image": "images/vqa_423.png", + "model_output": "A" + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/annotations/mdvp_caption_mask.json b/evaluation/MDVP-Bench/annotations/mdvp_caption_mask.json new file mode 100644 index 0000000000000000000000000000000000000000..0a8f6166091476402b15fa7f39860e16f69184e8 --- /dev/null +++ b/evaluation/MDVP-Bench/annotations/mdvp_caption_mask.json @@ -0,0 +1,4734 @@ +[ + { + "image_path": "android_detailed_caption/images/web_shopping_251980.png", + "mask_rle": { + "size": [ + 1170, + 540 + ], + "counts": "Znn242>gS1;K100O1000000000O1N2O10000KZlN_OeS1g00001O1O10O001O100O1O1O00000000000000O1O1O1O1O100O100001O1N3N4L0000O10001N10000O2O0O1O1O1N2O10O10000000002N5K1O0000000000O2O00000000000001O0000000001O2N2N1O1O00000000O2O1O1N01O0010O0110O000001^O\\lN4eS1K\\lN4fS1KYlN5hS1JXlN5jS1JVlN5mS1HUlN5VT1Mlo7NWPH3N2BNWlN4eS11YlN0eS1?O100O1O2O0O10001O0000000001N10001O0O2O001O1O1O1O1O0001N100000001O00000000O2O000000010O03L5LO1000O1001O0O1M3O1O0100001O0O101N2O1N2O2M2L6K]T9KdkF7nkNO^S15]lNObS1b0N100O100000000000000000000001O1O002A_lNHdS13_lNMTT1O0O2M[f=6_YB5K4M3M2N101N10001N100000000000000O1O2N1O1000O1000004L3M001O1O0O2O1N2N4J6Klf5NWYJ2N3L6I5L3M3N2N100O1O2O0O100000000001O0O2O1O4K1O00O11G8M44K5L6J3L10000O10000001O001O1E:DVlNNlS10<001N4M4L2M100O010000000001O0010O2N3N0N8H2O0O1000000000N2K5N2O10000O10001O000N3IYgn2" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "image_path": "android_detailed_caption/images/web_shopping_251980.png", + "mask_rle": { + "size": [ + 1170, + 540 + ], + "counts": "[]l1>RT14M3M2N2N2N2N2N2O\\OelN1ZS1OhlN1WS1NklN1US1NnlN0RS1NQmN2mR1OTmN0lR10klNHO8VS10jlNKN5XS10ilNNL3[S1OhlN" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "X^e3d0k^1101O000000000000000001O001O1O1N2O00000000000000000000000001O000000O100000001O0000000000O1000001N10000000000N2O1O010O13^Ofhlj0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "Zeka0:T_19H2O000000O100O1O100001O0010O100O0010O0000000001O000O100001O000O10000000000001N1000001O0000O101O000O10000000000O1O1O1O100O12N1O1O1O0000001O0O10000000000001O0O1001O000O101O0000O100000000000001N10000000000O1O1N200O010002N2N1O010O00002FSaNL`\\S:" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "jiP:?P_13M2N10000O2O01O00001N110O10O1O100O0000O1O100O2O0O1000001N100001N1000000000000000000000O10001OO1000001O0O100001O0O1000000000O101O000O1O1O100O10000001O000000001O001O001O0000001O0000O1000000000000001O0O1000O2O0000000000000O10O100001N100001O002D^kfa0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "ZRbi0`0n^14N0O20O1O001O001O000000000000001O0000000000000000O10000001O00000O10O1O2O0O100001O001O001O00000000O1000000001O00000001O1O1O001O001N100000000000O2N100O1O10O010000000000000000000000000000000001O000000000000O100000000001O00000000000O1000001O001O1O4HacR2" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "jg\\356Nk^1a0K0001O1N3N1I60O2j`NJn^1=O1O2O000000000002N1O1O00000000000000001O00000000O1000000000000000000000000000000000000001O00001N1000O101O0O101O000001N101N3Ladcj0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "ZU\\k0`0o^14M000001O=C10O000000000C1WaN1h^1;1000O100000010O00000001O1N2O1O000000O101O000000O101O001N3KbXo3" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "ZSh9`0o^14M000001O001O1O1O001OO100N2O1O100O1001O002N2N000000000001O000000O101O000000O1000000000000000000000000000000000001O00000000O2O001N10O2O0O101O000000000001O0000O10O100000000000O10010O000001O0001O1N2O1O00000000000000O1O1O100O1000000000000000000000000O2O00O100000O1K5M4M3MSX\\a0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "knba0a0m^13N101O01N101O001N02O0O1O2O00000001O002N1O1OO1000000000000000000000000000000O11O000000000000000000000000001O0000000000O100000000000000001O000O2O1O2M3N00O1N2M4L3O101N1000000000000001O0O101O001O1O1O0000O101O0000000001O001O1O1O1O2N2N2N001O00000000001O000000001Ngih9" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "image_path": "android_detailed_caption/images/install_18183248185514867672_2.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "hlY3?n^15L4M2N2N2N2O0O2O00000O2O000001O00001N10001N101N2N3M3M4M3MO000O101O0001O0O2O1N2O1N2O01O001O0O10001O000001N100O10000001O0O2O000000000000000000000000O101OO10O1O1001O010O000O2O0000000001N10000O01L5N1001O3M1N1000010O001O1Of]X4" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "image_path": "android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "mask_rle": { + "size": [ + 732, + 412 + ], + "counts": "]el41Xk74iTH0_f08N2O000O2O000001N101O0O1000O101O0O101O0001O0O10000O0100O1O1O2O00010O01O00O1O10000001O001OO1O1O100001O1O1O001O0000000000000O101O01O0000001O2N1O0O2O00000000O2O0O1O1O1O1O10000001O0O100001O0O2O0O:Dmmk1" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "image_path": "android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "mask_rle": { + "size": [ + 732, + 412 + ], + "counts": "XPb4=^f03N00000000000000000000000000000000000001O000000000000O10001O0O100000O2O00000000O1L4O100002N2O0O000000000O1000O100N2O10000003MO1N2O1001O1O3M1O000000000001O00000001N101O2M7I3O03LNVUd2" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "image_path": "android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "mask_rle": { + "size": [ + 732, + 412 + ], + "counts": "k_i68cf03L3O1N10000O10000L4O1001O5J2N11O1O0O1000000000000001O0000O10O10N3N100002N1O001O00000000O101OO1000001O1O1O^]V1" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "image_path": "android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "mask_rle": { + "size": [ + 732, + 412 + ], + "counts": "fnj25gf05K3N4M11L1OO1M2O10O1O2002O0010O2MM3N2O0001O1M32O0O3L100001O2AhYO1^f0KeYO3cf00O1ML^YO4bf0M]YO3bf0N^YO2cf0310N2J:J10000000O1JkYODVf0M10000O101O0001O00001O001O00001O0000000000000001O2N001OO1O1O10000O101O0O1000O2O0000000000000001N100000O100000001O0000O100000O100O100001O1O1O3DllN2\\S1NI1ilN1VS1700N2N3N1O10000001O1O1O0000O100O1O1000000000000101N0000O11O000000000000000000000000000O101O000000O2O0001O000O2O002N6J1N2OeUT8" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "image_path": "android_detailed_caption/images/web_shopping_98501.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "Zon29ZS15L01O001O01O000000000000000000000000000000001O001O1O00000000000000O100O1O100000000000000000O100O10000001O2N1O00000000O1O10000000000000000000O100001O000000000000O10000001O0000000000000000000000O2O004KmgY<" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "image_path": "android_detailed_caption/images/install_5797941172247377583_7.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "Zcl55Y_1:H1N100O2O0000000001OO010O2M2N2O1001O1O2N1O001O000001O1O2N1O010O00000000O1O1O001O100001O2N1O1O000000O1O1N200O100O2O0000001N101O0O11N1O101O000000000000O101O1M3M4Jh]fg0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "image_path": "android_detailed_caption/images/install_5797941172247377583_7.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "XQk78V_19H2N101O0O11O000000000000000000O11O001O1O1O0010O001O001O1O000001O01O00O10000O10O01001O001O000000O1O2N10000O1000001N1000O101O0000000000000001N1O3KgkSf0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "image_path": "android_detailed_caption/images/install_5797941172247377583_7.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "gid29U_16J=C9H4L3L3O1N2N3M2O1O1O001N2O1O001OHbbN^N^]1`1ebN_N[]1a1fbN^NZ]1U1cbNPO4KZ]1Q1ebNTOf]1m0YbNSOg]1n0WbNTOh]1Z10001O00SOVbN4j]1\\OVbN63e]1]O[bN41>d]1^O\\bN30?d]1^O\\bN30?d]1^O\\bN40=d]1^O]bN50;d]1@\\bN50;d]1@]bN40:e]1@\\bN6O:e]1@\\bN608e]1A\\bN7O8S^1GnaN7T^1GnaN7g^1MTUbl0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "image_path": "android_detailed_caption/images/install_5797941172247377583_7.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "Shbj0c0m^11O00O2O4K11O1O0000000000000000000000000001N11O0O10001O000000O100O1000O100O1O101O000001O1O001O000000O10000001O0000000000O1O1M3O100O1000000001O000O2_O^aN1PT\\3" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "image_path": "android_detailed_caption/images/install_5797941172247377583_7.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "dRk77R_19M101O0O100000000000000000000000010O01O0010O0010O01O00000000000000O10000001O00000000O100O1O10000O101O00000000O1000001OO101O0O2N4Haf_f0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "image_path": "android_detailed_caption/images/install_5797941172247377583_7.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "V\\`;4[_12N3N9F2O0O100000000000000000000000000001O00000O2O1N3N1O1O1N100001N101N11N2N2N2N2McjZa0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "image_path": "android_detailed_caption/images/install_5797941172247377583_7.png", + "mask_rle": { + "size": [ + 1520, + 720 + ], + "counts": "VhSe03S_1=M1N10001O00000001O0000001O0000000000O1O10000O100000000001N1000001O00001O001O0010O000000000000000000O1000000000000000000000000000000001O000000O100Ncei8" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "image_path": "android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "RZh3>US11O2O000000000001O001O1O001O00O2O00000001O0O100001O000O11O0000000000000000O100000O10001O01N10000000000O100N200000000002M2O1O001O1O3LSb52j]J2O2O000000000000000000000000000O2O0000O1000O100O101O00000000000000000000000O1001O1O1O0000000000001O0001O00001O1O0000000000000000O1O10000O100000000O101N4Laki9" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "image_path": "android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "TbQ1b0QS13N0O10000001O00O10000000000000000000000000000001O006_OQmNOh`i`0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "image_path": "android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "`\\o03^S14N0O2L4O1001O1O2O0O0000O10000O100O1O10000001O1O1O000000O1O1N2O11O6H[jb`0" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "image_path": "android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "PQ]27ZS14O000O2O01O0000000O100000O11O0000000000000001N1000000000O100000O101O0O10000O01001O0001O1O001O000O110O00000000O1000000001O0O10000N2M4MWb54f]J2N2O0000001O0000O100000O10000000000000000001O000000000001O000O10000000O10O1O100O100000000001NS^U<" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "image_path": "android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "n`k69ZS12O00000000000001O0000O1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100O101OO2Olf_9" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "image_path": "android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "Q]o0:XS13O000000000001O001O00000000000000000000000000001HklNNVS118OO3N6J00001O0000001O0000000000000000O101N1OmZ3LXeL5J4M1OO100O100O1000000000000000000000000000000000000000000000000O1N3Je^4OaaK8G3N00O10000000000000000000000000000000000000000000000000000000000000000000000000O1001O000000O1000000000000000000000000O10000O10O2OThX<" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "image_path": "android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "eoe64^S14M2O00001O00000000O100000000O1O1N2N2004L1O1O1O000000000O10000000000000000000000000000000000O100O1000000001O001O0O2O00000000000001O0000000000000000000000000O100000000000000000000000000000000O100O10000O10000001N;ElQ\\8" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "image_path": "android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "_a^3:VO4M2N2B^O_ZOd0`e0\\O_ZOf0`e0ZO_ZOg0ke0ASZO0ke00WZOOie00YZO0fe00[ZOOee00\\ZO0de00UZOKO5le00SZOOO1ne00SZO0N0oe00SZO0N0oe01RZOOO0oe01RZO8ne0GSZO8oe0GQZO9oe0GQZO8Qf0FPZO:Yf0O1O1O1M5Jfef7" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "image_path": "android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "mask_rle": { + "size": [ + 732, + 412 + ], + "counts": "beW17cf04M2N3N2M2O3L4M2NO2K4N2O2CiYO1Zf0LiYO1Zf0M[]c7" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "image_path": "android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "mask_rle": { + "size": [ + 732, + 412 + ], + "counts": "mYj02if03N1O3MN1N3M4L3L4MXXi;" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "image_path": "android_detailed_caption/images/single_1849.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "gX_:2`S16L3L2O00000001O000000000000001O00000000000000000000000000000000O100000000001O00000000000000000000001O0000000000000000000000000000000000000000000000000000000000002Ml_]5" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "image_path": "android_detailed_caption/images/single_1849.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "XnQ>4^S12M4N1O1000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000O10000001O2L^lh2" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "image_path": "android_detailed_caption/images/single_1849.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "XbPa06]S12N100001O000O2O000O10000000010O0O1O1N20SWQ1" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "image_path": "android_detailed_caption/images/single_1849.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "`YX16ZS1:klNFcR1" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "image_path": "android_detailed_caption/images/single_1849.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "ncX61YS1=L1O2O000000000001O001N2O1O0001O0O11O0000000000001N10O2O00000O11O000000000000000000O100001O000O11O000O1000000001O00O1O1M300003M00001O001O001Oi\\P:" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "image_path": "android_detailed_caption/images/single_1849.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "[ih05[S17L2O000001O00O1000001O00001O0010O001O000O1M3N2O1O1O1O0003N4L1O1O000000000000000O1001O0000001O0000001O0000000000000000000000O100000000O101N01000001O1O1O00000000000000000000000000000000000001O00O1000000000000000001O0000000000000000000O3Mjlk=" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "image_path": "android_detailed_caption/images/single_1849.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "ZaY9=VS12O1O00000000O2O00000000000001O00000O1000000000000001O000000001N1000000000000001O0000000000000000000000001O1O10O0O100O1O1O1O2M4L03M5L101O000010O1O0000000000O100000000001O0O10000001N11O00000O1001N10000000000000000O1000000000000000O101O000000001O0000000001O0000000000O2O1NSWh4" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "image_path": "android_detailed_caption/images/single_1849.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "VfX29YS14M2O00000000000001O1N2O1OO2O00000000000000000000000000000000000O100O1O1000000000000000000000000O1000000001O2N1O0000000000001O1N4JPPW>" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "image_path": "android_detailed_caption/images/install_125967318814166469_6.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "R[Y5f0kR14O00000O100000O2O0O10000O100001O02O0O010O00010O001O00000000000000000O10O100O1O1O2O000001O1O001O001O0000000000000001N10000000O2O0000000O11K4O1O100000001O02N3L101O00000001O01N2O0O2N3M101O1OO100O100O2H8M2O101O00000O10000000000001O00001O1O001O1O1O00001O0000000000000000000000000000000O1000000010O0000000O10000000000001O00001O00O100O10O1001O000000000000000000000000000000000001O0001N10000000O1000000O1000000000001O0001N1000000O10000O1N2O1O1000000001O2N1O1O00001O00000000000000001O000O2O0000O10000000EQmN0QS1MTmNNabi0Nk`WO0Vin3" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "image_path": "android_detailed_caption/images/install_125967318814166469_6.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "RQ]>=mR1" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "image_path": "android_detailed_caption/images/install_125967318814166469_6.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "lVW56[S14N2O00000000001O00001O1O1N100000O10000000000001O000000O1O100O100000000000000000000000000000000000000000O100O1O100001O1O001O00000000000000000O10001O00O101O0000000000000000000000O2O0O100O101O0001O0O2O000O1000000000000O100000000001O:D]f_9" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "image_path": "android_detailed_caption/images/install_125967318814166469_6.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "gWW52`S16J4N001O0000000001N101O000001O0000000001O00O100000000000000000000O10000001O0000000000000000000000O1O100001O1O00000000000000001O000000O10000001O004Jl]n:" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "image_path": "android_detailed_caption/images/install_125967318814166469_6.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "o`R88[S12O00000000000001N10O101N1O1O1001O0O3N0000O2O0001O0O3NP_g9" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "image_path": "android_detailed_caption/images/install_125967318814166469_6.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "WUd?;WS13N100000000001O0O2N2N11N2N2O0O10000001O0000001N10O101N10000000O1000000FmlN4RS1KQmN3oR1LSmN3WS10O00O100101O1O0001N2N2NXW\\1" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "image_path": "android_detailed_caption/images/install_125967318814166469_6.png", + "mask_rle": { + "size": [ + 1140, + 540 + ], + "counts": "iaW23_S13N1N2N3N100O2O01O6J2N1O00001O001O00D0TmN0lR11PmN2PS1NolN3QS1NmlN3SS170N2N2O1O2N3MP[3MSeL2O6J000O1O1N22N2N00000K6N_Ym>" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "k_n03db05L1O000000000000001O0000000000000001O000000000O110O0O1000O2O01O0000O1000O1000000010O00O1000000000000001O0000000000000000000000000000000O10000001M1101O0O1001O00001O00000000000000000000000000001O1N[fU2" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "[`n01fb03M2O000000000000000O100001O000000000000000000000000000000001OO10000000000000000000000000O10001O^oR3" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "Vf71gb0000a]O1dT10fkN1^a00U^ONN1O1O1oa0OR^Oa0ma0_OS^Oa0ma0_OU^O?la0@T^O`0Rb0000001Hj]OLWb0:HAS^O1O>na0AS^Oc0Rb0]Ol]O`0Wb0O000000M300K^OS^Oc0ma0]OS^Oc0ma0]OS^Oc0Rb0O1O2N00M30000O^Oo]O?Ub0000001O00N2O10000001Gl]OKUb05l]OJTb06k]OKTb06k]OKUb0<01O00Il]OIUb06m]OISb07m]OISb07n]OGTb08l]OHUb07k]OIUb07k]OIUb08i]OJUb07k]OITb08l]OHTb08l]OHTb08l]OHUb07l]OGVb08j]OHVb0852O2MMf]OIYb06h]OJXb047000000000000001N1N3M2O2OMj]ODnXU3" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "\\XV42_b07O1000Jc]O1]b05002N000000001O0000O11O00000000000000000000O10000N2008H\\c4" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "f_96ab02O00010O0000O100001O00O1000000000000000000000000000000000001O001N2OM301N101O0000O10000000O110O0000000000000O11O0000010O000O10000000000000000O10O100000OTcW3" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "]S:8ab0O1O000000000000000000001O0000O10000001O000000000000000000001LR[Q4" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "fcn07ab0000O1000010O000O100001O001OO10000O100002N0001O1OO1O10000000000000000000O1O10O1OTbW3" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "WWg1121ab040O001O001OnkR3" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "gRm16bb00000000001O00000000000000000000000000000000000000000000O1001O0000001OO1001O_XZ2" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "image_path": "android_detailed_caption/images/single_2921.png", + "mask_rle": { + "size": [ + 600, + 270 + ], + "counts": "ga96ab02O0010O0000000O1001O0000000000O100000000000000000000000001O001N_U12`jN1O0O101O00000000O100001O00000000000000000001O01O000000O10000000000000000O10O100002M5LM3O2O0O100O1000001O01O0O100000000001O00001O000000000000O10000O101O00O11N1N3NPS`2" + }, + "dataset_name": "android_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "image_path": "multipanel_detailed_caption/images/6.png", + "mask_rle": { + "size": [ + 940, + 940 + ], + "counts": "`RY6?ll06J2N2O000O1jSOFZk0;bTOI^k06`TOM_k0P101O00000O100000O100000000O101O0001O00000O2O00000000000000000000000O1001N1000O100000010O001O000000000O1000O1000000000001O000000000000000000000000000001O000000O10O1000000010N10000O1001O0000O100VO_TOIak06fTODZk0iTOAWk0?gTOCYk0=aTOJ^k07_TOKak05]TOMck03\\TOObk02_TOMak03_TOLbk04_TOL`k0P11O010O000000000000O1000000001O001O000000000001NSO_TO1ak0NbTO0^k00bTO0]k02bTON^k02bTON^k02bTON_k01aTOO_k03^TONck0m0000000000QO`TO2ak0LaTO3`k0KaTO5_k0JbTO6^k0JbTO5_k0KaTO5_k0KaTO5^k0LbTO4^k0N_TO3ak0N\\TO4ck0k00000O100O101O000O1000O10000000000000000001O0O100000000001O00O10000001O0001N1000000001N10000000000000000000O1001O000000000000O100000000000O10010O0001OO1000000001O0O01000001O1O000000O10000000000000001O01O1N100001N11O0O2O1O000001O00O2O001O000000ORO]TO5ck0J`TO4_k0LcTO3]k0MdTO2\\k0NdTO2\\k0NeTO1[k0NfTO2Zk0OeTO\\OH`0dk04jTOJUk07PUOCQk0=QUO@Pk0`0PUO@Pk0`0PUO@Pk0`0PUO@Pk0`0PUO@Pk0`0PUO@Pk0`0nTOBRk0>cTOM]k03bTON^k02bTON^k02bTON^k02bTON_k01aTO0^k00bTO0^k0OcTO1]k0OcTO1]k0OcTO1]k0OdTO0\\k00dTO^OF>fk04dTO0\\k01cTOO]k01bTO0^k00bTO0^k00bTO0^k00bTO0^k00bTO0^k00bTO0^k0OcTO1]k0OcTO0^k00bTO0^k00bTOO_k01aTOO_k01aTOO_k01aTO0^k01aTOO_k01aTO0]k01cTOO]k01cTO0\\k00dTO0\\k00dTOO]k02bTON^k02bTON^k01cTOO]k01cTOO]k01cTOO]k01cTON^k02bTON^k02bTON^k01cTOO]k02bTON^k02bTOO]k01bTO0^k00bTO0^k00bTO1]k00bTO0^k00cTOO]k00dTO0\\k00dTO0]k0OcTO1]k0OcTO1]k0OcTO1]k0OcTO1\\k00dTO0\\k00dTO0\\k00cTO1]k00bTO0^k00bTO0^k00bTO0^k00bTO0^k00bTO0^k01aTOO_k00bTO0^k00aTO1_k0OaTO1_k0OaTO1_k0OaTO1`k0NaTO1_k0OaTO1^k00aTO1_k00`TO0_k01_TO2`k0m0000000000000000000000O11O000O100000000000000000O10000001O00000000000001O000O101O00O101O000O1000O2O00000000000000001RO\\TO2dk0N]TO1ck0O^TO0bk01]TOOck01^TONak03_TOMak0o00SO]TO1ck0O]TO1ck0l0000000000SO]TO1ck0O_TOOak01_TOOak01`TON`k02`TON`k02`TON`k01bTOM_k02bTON^k02cTOL^k04cTOJ^k06nTO[OUk0e0i001N2N4]OWSO7elY6" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "image_path": "multipanel_detailed_caption/images/6.png", + "mask_rle": { + "size": [ + 940, + 940 + ], + "counts": "h`f2`11eN;V2ah0V2M2N2O000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000QO_KSYOa4af0mK]YOS4bf0nK^YOR4bf0oK]YOQ4cf0PL\\YOP4df0PL\\YOP4df0PL\\YOP4ce0]KeZOc0HP4be0_KeZOa0IP4be0aKcZO?KP4be0cKaZO=MP4be0dK`ZOMo3de0cK_ZO>Mo3de0aKaZO`0Ko3de0`KbZOa0Jo3de0`KbZOa0Jo3ee0^KbZOc0Io3ee0\\KdZO9SO0d0[4ee0[KfZO8RO2c0[4ee0[KfZOe0EP4ee0[KfZOf0Do3fe0aK`ZOa0In3fe0eK]ZO>Mm3fe0hKZZOLn3he0bK^ZO>KP4fe0cK_ZO=KP4fe0cK`ZOHQ4fe0aKbZO>HQ4fe0aKbZO>HQ4fe0`KcZO?GQ4fe0aKbZO>HQ4ee0cKbZO=HP4fe0dKaZO=Ho3ge0eK_ZO>Im3he0fK^ZO=Jm3he0eK_ZO>Im3he0eK_ZO=Jn3ge0eK_ZO=Jn3ge0eK_ZO=Jn3ge0dK`ZO=Jo3fe0cKaZO=JP4ee0bKbZO=JQ4de0bKaZO>KP4de0bKaZO>KP4de0bK`ZO?Lo3de0bK`ZOa0Jm3fe0cK^ZOb0Kk3ge0dK]ZOb0Kj3he0dK]ZOb0Kj3he0dK]ZOb0Kj3he0dK]ZOb0Kj3ie0bK]ZOd0Jj3ie0bK^ZOc0Ik3he0dK^ZOa0Jk3he0dK_ZO`0Il3he0eK_ZO=Jn3ge0dK`ZO>In3ge0cKaZO?Hn3ge0cKbZO=HP4ge0bKaZO>HP4ge0bKaZO?Go3ge0dKaZO=Ho3ge0eKiYOGc0f0Ln3he0jK[ZO8Mn3he0eKiYOGc0e0Mo3fe0eKbZOfd0[O]YO3n1e0hc0jNYZO>20l1k0gc0iNZZO=4Ni1n0gc0iN\\ZO;4Ng1P1cc0nNaZO45Nf1Q1dc0lNbZO55Ld1U1ec0jNcZO46Jb1Y1ec0hNdZO57Cd1a1ac0gNdZO5V1BFc1_d0gNeZO4S1HE^1cd0gNdZO3;G[O4Q1\\1ed0gNcZO2;KWO3T1Z1hd0fNaZO2;f0:c0kd0dN`ZO39h0;b0od0_N^ZO77j0;`0Se0\\N[ZO:6l0:?We0YNYZO<4n0<=Xe0WNYZO>2P1<;Uf0eN^YOR1;:Wf0dN]YOT1;8Xf0eN\\YOU1:6Zf0eN[YO\\140af0dN[YO^12Nae0TNgZO?Eb11Kce0TNgZO`0Dc10Iee0TNgZOa0Be1OGge0TNgZOb0Bf1MDje0TNgZOb0Bg1LCke0TNgZOb0Ai1KBne0RNfZOc0Ak1I@Pf0RNkYONf0e0Fl1G@Qf0XNaZOCW4de0\\KhZO=DW4de0aKcZO7JX4ce0fK]ZO21X4be0gK\\ZO12X4be0gK\\ZO12X4be0eK^ZO30X4be0bKaZO6MX4ce0[KlYOKf0b0KX4ce0\\KfZO;HX4be0_KcZO:KW4be0_KcZO;KU4be0`KcZOHS4ae0]KiZO`0FS4ae0^KhZO>HT4`e0bKdZO9NT4^e0fKaZO61T4^e0gK`ZO44T4]e0gK_ZO54T4]e0fK`ZO63T4^e0dK`ZO83S4^e0dK_ZO93k1YOGTf0UNaZO92i1]OGPf0VNbZO:2f1_OIke0YNdZO82e1AIie0YNeZO91c1CKge0YNeZO91b1DLge0XNdZO:1`1FNee0VNfZOUf0cNeYOn06?Uf0cNfYOl07`0Tf0cNfYOk08`0Sf0eNhYOMUO5Q1X1Qf0gNaZON@Z1oe0hNbZOL@\\1ne0hNcZOJA]1le0iNiZOC]Oc1ie0kNlZO@\\Od1he0lNa\\OS1_c0mNc\\OQ1]c0oNd\\OP1\\c0POf\\On0[c0QOf\\On0Zc0ROY[OAhN\\1oe0TOW[OBlNW1me0WOX[O@oNU1je0]OiZOBHn0_e0AhZOA:8Re07fZO_OWh0`0jWO@Vh0`0kWO^OVh0a0lWO^OTh0b0lWO@Rh0`0nWOBkMLoi0b0VXOCiMMPj0`0WXOCiMNoi0?XXOBjMNoi0`0WXOBkMMni0`0XXOBjM0mi0>YXOChM1ni0[XO_Omg0a0SXO_Omg0a0SXO@kg0a0TXO@lg0`0TXO@kg0a0UXO_Okg0a0`2000001O1N2O2BWSO4_QS;" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "image_path": "multipanel_detailed_caption/images/6.png", + "mask_rle": { + "size": [ + 940, + 940 + ], + "counts": "Pd]b06fl0N`SOb0Ul0?K6oKSOX[OQ1dd0ROW[OU1ed0lNX[OY1fd0fNY[O^1ed0aNZ[Oc1id0WNU[Om1id0SNU[OR2fd0QNX[OS2ad0QN^[OR2_d0oM_[OU2^d0lMa[OW2]d0hMc[OZ2\\d0fMb[O]2]d0bMc[O`2]d0`Ma[Od2]d0\\Ma[Oh2]d0XMa[Ol2\\d0TMc[Oo2[d0QMd[OQ3[d0oLd[OR3\\d0nLc[OT3\\d0lLc[OW3[d0iLd[OZ3Zd0fLe[O^3Xd0bLh[Oa3Ud0^Lk[Od3Td0\\Lk[Oe3Ud0ZLl[Og3Sd0YLl[Oh3Td0YLj[Oh3Vd0YLi[Oh3Vd0XLi[Oi3Wd0WLi[Oi3Xd0ULh[Ol3Yd0RLh[On3Yd0PLg[OR4Wd0oKWZO4=n3[e0oKWZO6;l3]e0oKXZO5;l3]e0oKWZO6;m3]e0lKYZO89l3^e0lKYZO88n3^e0jKZZO88n3^e0jKZZO87P4]e0jK[ZO68P4\\e0lK[ZO49P4[e0mK\\ZO38Q4\\e0kK]ZO38R4\\e0jK]ZO37S4\\e0iK^ZO45T4_e0fK]ZO54U4ae0cK\\ZO82V4ce0aK[ZO92W4ce0_K[ZO:1X4ce0_K[ZO:2W4ce0_K[ZO:1X4ce0_K[ZO:2X4be0^K\\ZO:2X4ae0`K\\ZO83X4`e0bK]ZO52Z4ae0bK]ZO41Z4be0bK]ZO40\\4be0`K_ZO3O]4be0`K_ZO20^4be0_K^ZO4N^4de0^K_ZO3M_4de0^K_ZO3M_4de0^K_ZO3M_4ee0]K^ZO4M_4de0]K`ZO4L_4de0]K`ZO4L_4ce0^KaZO3K`4de0]KaZO3K`4de0]KaZO3K`4de0^K`ZO1Lb4de0]K`ZO0Ld4de0[KaZO1Je4ee0YKbZO2Ie4ee0XKcZO3He4ee0XKcZO3Gf4fe0WKcZO4Fe4ge0WKcZO4Fe4ge0XKbZO4Fd4he0XKbZO4Fd4he0WKcZO5Ed4he0WKdZO4De4he0WKdZO4De4he0WKcZO5Gb4fe0ZKbZO4Ia4ee0\\KaZO4I`4fe0\\KaZO4I`4fe0\\KaZO4I`4fe0[KbZO5I^4fe0]KaZO5I^4fe0]KaZO5I^4fe0]KaZO5J]4ee0^KaZO5J]4de0`KaZO3K]4de0_KbZO4J\\4ee0`KaZO4J\\4ee0_KbZO5J[4de0aKaZO4K[4de0cK_ZO2M[4de0dK^ZO2MZ4ee0eK]ZO1OY4ee0eK\\ZO2OY4ee0eK\\ZO2OY4ee0dK]ZO3NY4ee0dK]ZO3NY4ee0cK^ZO4NX4de0eK]ZO20Y4ce0eK]ZO20Y4ce0dK^ZO3OX4de0eK]ZO30W4ce0eK^ZO4OV4de0eK^ZO5NV4de0fK\\ZO60R4ee0hK[ZO60R4ee0iKZZO61o3fe0lKWZO63n3fe0kKXZO72n3fe0kKWZO84l3ee0kKWZO95l3de0kKWZO95l3de0kKVZO:7j3ce0lKVZO:7j3ce0mKUZO99i3be0nKUZO99i3be0mKWZO98h3be0oKWZO87h3ce0oKXZO86e3ee0TLTZO78a3ge0YLoYO89]3je0\\LlYO7;[3je0^LkYO8;W3le0aLiYO8;W3le0aLjYO7:W3me0bLiYO7;V3le0cLjYO6;U3me0dLhYO8;S3me0eLiYO8:o2Pf0iLfYO9:l2Qf0kLeYO:9i2Sf0mLeYO:9g2Sf0oLdYO::e2Sf0QMcYO:;b2Sf0VMaYO7>`2Sf0YM_YO6`0_2Rf0\\M]YO5b0\\2Sf0_M[YO4e0Z2Pf0cM[YO2g0X2oe0gMZYO1h0V2ne0jMZYONl0S2me0oMWYOKP1T2ie0SNVYOEW1T2ee0WNUYOAZ1T2de0[NRYO_O^1Q2ce0`NX[O[1ld0dNV[OW1nd0gNV[OS1nd0kNV[On0md0SOW[Oe0md0[OV[O:Re0FS[OMVe02R4O2O000001O01O0O1O100001O3M00000001O010O00O2O0O1000000000000000O101N3A[SO0QQg2" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "image_path": "multipanel_detailed_caption/images/6.png", + "mask_rle": { + "size": [ + 940, + 940 + ], + "counts": "hd]a02ol0>L2N2N101O0000001O00001O01O000O2O000000000000000000010N1001N100000000000000000000XVO1Sf0OhYO;Sf0EhYOd0Tf0\\OjYOi0Sf0WOiYOQ1Rf0POjYOW1Sf0iNkYO[1Rf0fNlYO^1Rf0bNlYOb1Rf0^NlYOf1Rf0ZNmYOh1Rf0XNmYOk1Qf0UNnYOn1Pf0RNoYOX2he0hMVZO]2ge0cMXZO_2ge0aMXZO_2je0_MkYO^OhNT3]g0^MiYOBgNR3_g0\\MhYOR3Yf0mLeYOV3Zf0jLeYOZ3Xf0fLgYO\\3Xf0dLhYO\\3Xf0dLhYO]3Vf0dLiYO]3Wf0cLiYO]3Wf0dLhYO]3Wf0cLiYO^3Vf0bLjYO^3Uf0cLkYO^3Tf0bLlYO^3Tf0aLmYO`3Qf0aLoYO`3Pf0_LRZO`3ne0`LRZOa3me0_LSZOa3me0^LTZOc3le0\\LTZOe3ke0\\LTZOd3le0\\LUZOd3ke0[LUZOe3ke0[LUZOf3je0[LUZOe3ke0[LTZOf3le0YLUZOg3ke0YLUZOh3je0XLVZOh3je0XLUZOi3je0XLVZOh3je0XLVZOi3ie0XLUZOi3ke0WLUZOi3ke0WLUZOj3ke0ULTZOl3le0TLTZOl3le0ULRZOm3ne0RLRZOn3ne0SLQZOm3oe0SLPZOn3Pf0QLQZOo3oe0QLQZOP4oe0oKQZOQ4oe0nKQZOS4oe0mKQZOS4oe0mKQZOT4ne0lKRZOT4ne0lKRZOT4ne0lKQZOU4oe0kKQZOU4oe0kKQZOU4ne0mKQZOT4ne0lKRZOT4ne0lKRZOT4ne0lKRZOT4me0lKTZOT4le0lKTZOT4ke0lKVZOT4je0lKUZOU4ke0kKUZOU4ke0kKUZOU4ke0kKUZOU4ke0kKUZOT4le0lKTZOT4me0kKSZOU4ne0jKRZOV4oe0iKQZOW4oe0jKPZOV4Pf0jKPZOU4Rf0iKPZOV4Pf0jKPZOV4Pf0jKPZOV4Pf0jKPZOU4Qf0kKoYOU4Qf0kKoYOU4Qf0kKoYOU4Qf0kKoYOT4Rf0lKoYOS4Qf0mKoYOS4Qf0mKoYOS4Qf0nKnYOQ4Sf0oKmYOQ4Sf0PLmYOo3Sf0PLnYOo3Sf0QLmYOo3Sf0QLnYOn3Rf0QLoYOo3Qf0QLPZOm3Qf0TLnYOl3Rf0TLnYOk3Sf0VLlYOj3Tf0ULnYOj3Rf0VLnYOi3Sf0WLmYOi3Sf0WLmYOh3Tf0XLlYOh3Tf0XLmYOf3Tf0ZLlYOf3Tf0ZLlYOe3Uf0ZLlYOf3Tf0ZLmYOd3Tf0\\LlYOd3Tf0\\LlYOc3Uf0^LiYOb3Xf0^LhYOb3Xf0^LhYOa3Yf0_LgYO`3Zf0`LfYO`3Zf0`LfYO_3[f0aLeYO_3[f0bLdYO]3]f0cLcYO]3]f0cLcYO\\3^f0dLbYO[3_f0eLbYOZ3^f0eLcYOZ3^f0fLbYOZ3^f0fLcYOX3^f0hLbYOW3_f0jLbYOT3^f0kLeYOS3[f0mLfYOQ3[f0oLfYOP3Zf0PMkYOg2Yf0ZMkYO^2Wf0cMkYOV2Zf0jMhYOQ2[f0oMgYOm1[f0SNfYOj1\\f0VNfYOe1]f0[NdYOa1_f0_NcYO]1_f0bNdYOZ1^f0fNdYOU1_f0kNeYOm0_f0SOdYOg0_f0YOdYO`0`f0_OfYO5`f0M_31O0O2O0O010000000O20O1O0O2O00010O0000000O11O01O0000000001O001O3M0000O100O100O10000O101N1OV[`2" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "image_path": "multipanel_detailed_caption/images/6.png", + "mask_rle": { + "size": [ + 940, + 940 + ], + "counts": "Uh]:?gk0HQUOf0gj0\\OSUOm0hj0UORUOT1ij0nNSUOX1ij0i0M2N3M2M3N2N2M3N2N1O2N1O1N3N1\\NjLPYOX3ef0TMXYOm2gf0TMWYOn2gf0TMXYOm2gf0SMYYOn2ff0RMYYOP3ff0PMYYOR3ff0nLYYOT3ff0lLZYOU3ef0kLZYOV3PO\\L]g0d4of0hKaXOE`0c4of0mKQYOS4of0mKPYOT4Pg0lKPYOT4Qg0kKoXOU4Qg0jKPYOV4of0kKQYOU4of0iKSYOW4mf0hKTYOX4lf0gKUYOX4lf0gKUYOY4kf0gKUYOY4kf0fKVYOZ4if0fKXYOZ4hf0fKXYOY4hf0gKYYOi1YO8]g0PNZYOh1ZO7\\g0TNXYOd1[O9]g0VNUYOQ4kf0QLSYOo3mf0QLSYOo3of0nKRYOR4nf0mKTYOR4mf0kKUYOU4kf0kKUYOU4kf0lKTYOT4lf0lKTYOT4lf0lKSYOU4mf0kKSYOV4lf0iKUYOW4lf0gKUYOY4kf0fKVYO[4if0eKVYO\\4jf0eKUYO[4kf0eKUYO[4lf0cKTYO^4lf0`KVYO`4kf0]KWYOc4jf0\\KVYOd4jf0eKmXO[4Sg0fKlXOY4Tg0iKjXOX4Vg0hKjXOX4Ug0iKkXOV4Vg0jKkXOm3\\g0TLdXOi3_g0WLaXOg3ag0XL`XOh3ag0VLaXOi3_g0ULcXOk3]g0ULdXOi3^g0nKoWONf0S4\\g0nKkXOQ4Ug0nKmXOP4\\h0L5L2O4KO0000107H2O1N01O1O100O2N2fNeVO_N\\i0\\1kVOaNVi0\\1nVObNUi0X1PWOfNSi0U1SWOgNnh0V1UWOiNoh0P1YWOjNkh0P1[WOmNfh0m0aWOQOch0f0dWOXO`h0b0cWO]O_h0`0cWO^Och0<_WOBhh07[2IdUo;" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "image_path": "multipanel_detailed_caption/images/6.png", + "mask_rle": { + "size": [ + 940, + 940 + ], + "counts": "WQX2;il0801O000001O2M2ON2O2N1000000001O2M4M0000O2N1000000000000000000fVOFce09PZOc0ae0]O\\ZOh0be0XOYZOn0de0TOWZOS1de0POXZO[1_MaN^g05P[OR2nd0nMUZOEmN_2nf0lMnYOMROY2of0jMjYO4SOT2Sg0hMhYO7SOR2Wg0fMcYO;TOP2\\g0bM_YOa0ROn1bg0`MYYOd0SOm1cg0`MYYOg3ef0\\LYYOe3gf0[LXYOf3hf0ZLXYOf3hf0ZLWYOg3if0XLWYOi3if0VLXYOT3ZOjL_g01XYOP3BkLVg05WYOP3HfLRg0:UYOP3fg0PMZXOP3fg0oLZXOR3fg0nLZXOT3eg0kL[XOV3BdL^g04QYO\\3XOfLgg0MQYOQ4lf0QLSYOP4if0lK_XOKh0Y4gf0nKaXOIg0Z4gf0nKbXOHg0Z4gf0mKcXOIf0Z4gf0mKcXOIf0Z4if0jKbXOLe0Z4mf0fK^XO0f0Y4Ug0gKkXOY4Ug0gKlXOX4mf0hKYXO1j0W4kf0QLUYOo3kf0QLVYOn3if0SLWYOU3AgLXg04WYO\\2AXM>2kf0:WYOR1UO^O\\i0YOdVOMl0Y1YO\\OYi0]OcVONk0Y1[OZOWi0_OcVONl0W1]OZOTi0AcVONl0V1AXOPi0DcVONm0T1CXOmh0FcVONn0R1FXOih0GcVOOo0P1IWOfh00cWOf0JYOfh0N`WOh0LZOih0I[WOk0N[O:Inf03jXOf0O^O7Nnf0NmXOa01C32lf0JRYO;4IM4jf0IXYO53NK6^f0F]XO1Q33UN5]f0Oa[OLQN6]f0E_XOOT36PN6]f0Cl[O8gM4Rj0MmUO3Sj0MmUO3jf0CV[O:PN3`f0F]XO0R36QN4_f0H\\XO0S35QN3af0F]XO1Q36QN4jf0CX[O8nM5Uj0KkUO5Uj0JlUO7Sj0JkUO8Tj0HlUO:Rj0GmUO;Qj0FmUOWk0AiTO>Xk0CfTO=\\k0BbTO`0^k0A`TO`0ak0@\\TOb0dk0^O[TOc0nj0TO`UO:@c0kj0YOdUO4@d0dj0@mUOo0oi0VOPVOk0mi0YOPVOh0ni0\\OPVOd0ni0@oUOa0Pj0BnUO>Qj0DnUOWUO@kj0>WUOAkj0=VUOCjj0;XUOEWh0@eYOj0UNFUh0@fYOj0VNFSh0@gYOj0WNFQh0@hYOj0XNFog0_OjYOl0WND`j0=aUOC]j0>cUOB`g0AbZOo0mMA_g0@eZOn0mMBZj0>gUOCWj0>iUOCUj0=mUOC_g0]OWZOP1[ND\\g0]OYZOn0]NCYg0@ZZOl0`NCUg0A\\ZOj0bNCRg0C]ZOc0hNIlf0C]ZOa0kNJhf0E_ZOhd0BV[Oc0gd0]OW[Og0fd0ZOY[Oh0fd0XOZ[Oi0ed0WOZ[Ok0ed0UOZ[On0dd0QO\\[OR1UM`Njf0>Q\\OS1RMcNkf0:R\\OT1RMeNjf07T\\OU1PMfNkf05T\\OW1nLhNlf01V\\Oo1ic0RNU\\OP2jc0PNV\\OQ2ic0oMW\\OR2hc0nMW\\OT2ic0jMX\\OV2hc0jMX\\OW2fc0jMZ\\OW2ec0iM[\\OW2cc0lM\\\\OU2bc0lM]\\OV2bc0jM^\\OV2ac0kM_\\OU2ac0kM_\\OV2`c0jM`\\OV2`c0jM`\\OV2`c0iM^[OFeNb2le0hM][OKdN]2oe0hM\\[ONcNZ2Qf0hM[[O0bNY2Sf0gMY[O3cNV2Tf0gMX[O5cNT2Uf0gMX[O6bNR2Wf0hMV[O8bNf1bf0SNjZO9cNb1ef0UNgZO;cN^1hf0WNdZO=cNZ1lf0XNaZO>cNX1nf0ZN^ZO?dNU1of0]N]ZO?cNT1Pg0\\N]ZOb0cNP1Pg0_N\\ZOb0dNn0Qg0`N[ZOc0cNm0Sg0_NYZOe0dNl0Vg0\\NVZOh0dNl0Xg0YNUZOl0bNk0Yg0YNTZOm0dNi0Xg0ZNTZOn0dNg0Xg0[NTZOn0dNg0Xg0\\NRZOn0gNe0Wg0]NRZOo0fNc0Yg0^NQZOo0gNb0Xg0_NQZOo0hNa0Vg0aNRZOn0iN?Vg0cNQZOn0jNlNNl0Xg0ZOoYOP1lNgN4g0Sg0BmYOP1nNdN6c0Tg0HhYOQ1JPOcf0OcYOQ1LeNjf09[YOR1LcNjf0:[YOS1Wh0nNhWOR1Wh0oNiWOR1Uh0POjWOP1Vh0POiWOQ1Wh0oNiWOQ1Xh0nNhWOS1Wh0mNjWOX1Qh0gNoWOY1Qh0gNoWOY1Rh0\\N_VO2`1b1Zh0[NgWOe1Pj0O100O100O1O2N10VNcNeWO\\1Xh0iNgWOW1Sh0oNmWOP1Sh0QOmWOo0Sh0QOnWOm0Sh0SOmWOl0Th0TOlWOl0Th0TOlWOl0Th0SOmWOl0Th0TOlWOl0Th0TOlWOl0Th0UOkWOl0Uh0SOkWOn0Th0SOkWOn0Uh0QOkWOo0Wh0nNiWOS1_h0eNaWO\\1^h0cNcWO]1[h0dNfWO]1Vh0eNkWO\\1Rh0eNnWO]1Ph0dNPXO\\1Ph0dNPXO]1og0dNPXO]1og0dNPXO\\1Ph0eNoWO\\1og0fNPXOZ1Ph0gNoWOZ1Ph0fNPXOZ1Qh0eNoWO[1Qh0dNPXOV1bN]N`i0Wg0`NQZOR1hN?Vg0_NRZOR1fNa0Xg0]NSZOQ1dNc0Yg0\\NSZOQ1cNd0Zg0[NSZOP1cNf0Zg0ZNSZOP1cNf0[g0XNSZOR1bNf0[g0XNSZOR1aNg0\\g0WNTZOP1aNj0Zg0VNUZOP1aNj0Zg0VNUZOo0bNk0Yg0VNUZOo0bNk0Xg0WNWZOl0bNn0Vg0WNWZOk0cNn0Vg0WNXZOj0aNQ1Vg0UNYZOi0bNT1Sg0SN\\ZOh0aNW1Qg0RN]ZOf0cNZ1nf0PN`ZOe0bNb1gf0iMgZOd0cNj1_f0aMP[Od0aNk1`f0`MP[Oc0aNm1_f0`MP[Ob0bNn1^f0_MR[Ob0`No1^f0_MS[O`0aNP2]f0_MS[O?aNR2Zf0aMV[O;aNS2Wf0fMX[O4cNV2Tf0gMZ[O1cNX2Sf0gM\\[ONbN[2Rf0gM_[OHbN`2Pf0iMa[O_OcNh2me0hM_\\OV2cc0jM\\\\OV2ec0iM[\\OV2fc0iM[\\OV2fc0jMZ\\OV2fc0jM[\\OT2fc0lMZ\\OS2gc0mMZ\\OQ2gc0oMY\\OP2hc0PNX\\OP2ic0oMX\\OZ1gLjNRg0LW\\OW1lLjNnf0OV\\OV1oLgNlf04V\\OR1RMeNkf09T\\OP1bd0PO^[Oo0cd0QO^[Om0dd0RO\\[Om0ed0SO\\[Oj0fd0UO\\[Oi0ed0WO][Oe0dd0\\O^[O?ed0A][O:ed0G_[O1ed0O\\400001O00001O0O1O00101O0O11O2M3N1O0000O2O000000000001O000O101N1001N101O0000000000O10000000001O1N2NnPWa0" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "image_path": "multipanel_detailed_caption/images/6.png", + "mask_rle": { + "size": [ + 940, + 940 + ], + "counts": "^j`:2bl0k0YOe0oNP1YOg0M3O100O1000000000000000000O100000000000001O00001O001O2N2N0000O1O1N2O1O10000O100000000000O10000dNcLiXO]3Ug0fLjXOZ3Ug0gLjWOLg0]3_g0gLjWOLg0]3_g0gLkWOKf0^3_g0gLkXOY3Ug0gLkWOKf0^3_g0iLgWOLi0Z3ag0TM^XOl2bg0TM^XOl2bg0UM]XOk2cg0TM^XOl2bg0TM^XOl2bg0TM^XOl2bg0TM^XOl2bg0SM_XOm2bg0oL`XOR3bg0eL`WO0o0[3bg0cLbWONn0_3jh000000gNgL_XOY3`g0jL^XOV3bg0lL\\XOT3eg0jL\\XOV3eg0hL\\XOX3lh000000000000000000O1000000000000000000000000000000hN`LdXO`3\\g0bLbXO^3]g0dLbXO\\3_g0cLaXO]3_g0cLaXO]3_g0dL`XO\\3`g0jLYXOW3fg0mLWXOS3ig0nLVXOR3jg0nLVXOR3kg0nLTXOR3lg0nLTXOR3lg0nLTXOQ3ng0nLRXOR3Qh0kLoWOU3Xh0cLiWO]3Yh0`LhWO`3Ri0000000000ROeLkWO[3Sh0hLlWOX3Uh0gLkWOY3Si0100000000kNfLXXOZ3gg0gLYXOY3gg0eL[XO[3eg0bL^XO^3bg0aL_XO_3ag0bL^XO^3bg0cL]XO]3cg0eL[XO[3eg0gLYXOY3gg0gLYXOY3hg0eLYXO[3hg0bLZXO^3gg0`LZXO`3eg0bLYXO_3gg0bLXXO^3gg0dLXXO\\3gg0eLYXO\\3fg0bL\\XO^3dg0aL]XO_3cg0aL]XO_3cg0aL]XO`3bg0aL]XOd3^g0]LaXOf3\\g0ULjWOOj0l3ah000N2N2K5O100000000000000000000O10O100000000000000000000000000001O1N=gLnUO_2kl0^Mkok:" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "image_path": "multipanel_detailed_caption/images/6.png", + "mask_rle": { + "size": [ + 940, + 940 + ], + "counts": "e]db0?kl03N2O00001O01O4L5K1ON2M3O100O0aXOCQb0=n]OEQb0;o]OEQb0;n]OFQb0;o]OEQb0;[\\O6WM_O^f0;W\\OXMYOaf09V\\O?YMXOaf09g[O@\\MP1[f0Bd[O0RN>Zf0Bc[O0TN>Yf0Bb[O0UN?Yf0Aa[O0WN?Xf0A`[O0YN?Wf0A_[O1ZN>Wf0A^[O2ZN>Xf0@][O3ZN>Yf0_O\\[O4[N=Yf0^OjZOCZNc0c0g0;Yf0]OfZONXN;h0:Zf0]OeZO1VN\\2Ug0cMdZOT3\\e0lLcZOU3]e0lLaZOU3_e0kLaZOU3_e0kL`ZOV3`e0jL`ZOV3`e0jL_ZOW3ae0jL^ZOV3ae0kL_ZOU3ae0kL^ZOV3be0jL^ZOV3be0iL`ZOU3ae0kL_ZOU3ae0kL_ZOU3_e0mLbZOR3]e0oLcZOQ3]e0oLcZOQ3\\e0PMeZOo2[e0QMfZOn2Ze0SMeZOm2[e0SMfZOl2Ze0TMhZOj2Ye0UMiZOi2We0WMkZOg2Ve0XMoZOc2Re0\\MS[O_2md0aMT[O3eMb1Wg0[NT[O2hMa1Ug0\\NP[O6kM^1Tg0]NfZO?XNS1Rg0^NbZOc0\\No0Rg0_N^ZOe0`Nl0Qg0`N_ZOd0aNk0of0bN`ZOb0bNl0nf0aNaZOc0bNk0lf0cNbZOa0dNk0jf0dNbZOa0eNj0if0fNbZO>gNk0gf0gNbZO=jNj0ef0hNbZOSGHj88TGLj84UGMk83TG0k8OTG3k8MUG4j8LUG5k8KTG7k8JSG8l8HSG:l8FTG:l8FSGn8BRG>n8BSGdGi0BjNj8=dGi0BjNj8=cGk0AiNl8gFCY9=gFCZ9fFBZ9>fFB[91SFHb07[91SFGc08Z90TFIa07[90SFJb06[90SFIc07Z90SFIc07Z90SFJb06[92D0o8e1UG[Ng9f1XFZNh9f1XFZNi9e1WF_Ne9]1UFaN44e9b1ZF^Nf9b1YF_N4GZ9k1bF\\NNI60Z9k1aF\\N0I23]9R2aFkM24\\9R2aFjM44[9Q2jFoMS9S2iFoMW9R2fFQNX9Y24O1\\OaFUN1OO8`9b1dFUN;1n8i1YGWNH0^8i1^GWNM0I11O16j8c1^GWNMOJ2O;l8]1^GWNMOJ?m8[1\\GWNM0I109n8_1fGZN]O7m8`1eGWNA8j8a1dGXNB0L2n8f1eGWNBOM3l8h1dGVNI0d8j1cGUNI2d8i1YGTN04JM03m8h1YGTN01IO11O3o8h1bGTND7k8e1eGTN\\O8OIk8k1jGTN[O=k8a1WHSNj7m1T12UFQNK1a9o1bFQNL2>Mb8P2QGoM11O4^9k1cFPN78P9i1hFoM98n8l1RGTNo8l1c00UFmM20_9S2_FmM21^9R2`FmM13^9P2aFWN0IV9P2jFWNOK0KS9S2nFoM0M10]9S2>M300000VFPNb9o1^FTNJ11OY9l1lF`NOAn8o1SGTNJ30LZ9l1oFRNI13OV9k1eFUNS:k111lETNR:l1nETNR:k1YFWN[9i1eFXNZ9h1\\FUN73]9g1b0M2O11O00LgE_NY:e110O1000O000O1bNgEI3m0LVOZ:5fEH7j0IYOZ:8mE?IXO^:3bEK7b0JAe:0ZEN0O2c00_Oe:0YEN2O1c00_OT;:jDFg;NjS1m0^`NYO0O5KS:b1hEcN1LO11OO06OZ9j1`F[N100K01_9X2`FlM`9c2G6iFRMQ9a2oFaM0O10fen2" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "image_path": "multipanel_detailed_caption/images/50.png", + "mask_rle": { + "size": [ + 755, + 1136 + ], + "counts": "4T3_d000^Ob[OoM^d0m0c[OC1^O]d0n0d[OTO5DL8\\d0n0f[OSO6DJ9[d0n0g[OSOa0Mhc0h0b[OPO99Ye0g0l0O10000001O1O4L3kYOnNce0e1L7Q[ORNjc0Q2j[OmMH;Xd0i1l[OcNQd0`1l[OaNRd0a1m[O`NSd0a1k[OaNTd0_201O1O001O00000000O10002N1O0000000000000000000000000000000000000000000000000O10000001O0000000001O00O10000000000000000000000000000001O00O100000000000000000000000000000000\\\\OhLmb0X3S]OgLnb0Y3f000000000000000000000000000000000000000000000O10000001O0000000000000000000000000000000000000000000000000000000000O100000000001O00000001O00000000O1000O100000O101O0N2002N001O0O100001O000000O100001O000000000000O100001O000000O100000000001O0001O000001OO10000000000000000000000000O10000000000000000000000000001O00000000O100000000000000000000000000000001O001O00O100000000000000000000000000O10000000000000O100000000000000001O01O000001O000000O1000000000000000000000000000O1000000000000000000000000000001O000000000000O10000000O1001O0000010OO010000000001O000000O100000000000000000000000000000000000000000000000000000000000000000000000000000000001OO10000000000000000000000O11O000000000000000000000000000000000000000001O0000O100000000000000000000000000000000000000000000000000000000000001O0000O1000000000000000000O100001O0O1000000001O000000O0100000001O0001O000000000000000O10O10000000000001O000000000000000000000000000000000000000000000001O0000O100000000001O000000O1000000001O0000O1000000001O00000000000000000000000000O10000000000000000000000001O0000O100000000000000000000000000001O0000O10000000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000000000000000001O0000000000000000000000000000000000000000000001OO1000000000000000000000000001O00000000O10000000000000000000000001O0000000000O100000000000000000000001O00000000O100000000000000000000001O000000000000O1000000000000000000001O0000000000O100000000000000000000001O00000000O10000000000000000000000001O00000000O10000000000000000000000001O00000000O10000000000000000000000001O000000O10000000000000000000000001O00000000O1000000000000000000000000001O0000O100000000000000000000000000001O00O10000000000000O1001O0000000001O000000O10000000000000000000000001O000000000000O1000000001O0000000000000000000000000000000O1001O0000000000000000000000000000O10000001O00000000lLi[Ol2Wd0TMi[Ol2Wd0TMi[Ol2Wd0800000000000O10000000000mLh[Ok2Xd0UMi[Ok2Vd0UMj[Ok2Vd0UMk[Oj2Ud0VMk[Oj2Ud0VMj[Ok2Vd0UMj[Ok2Vd0UMi[Ol2Wd0TMi[Ol2Wd0TMi[Ol2Wd07100000000mLi[Oj2Wd0VMi[Oj2Wd0VMj[Oi2Vd0VMk[Oj2Ud0VMl[Oi2Td0WMl[Oi2Td0WMm[Oh2Sd0XMm[Oh2Sd0XMl[Oi2Td0WMl[Oi2Td0WMk[Oj2Ud0VMk[O8NR2Wd0fMk[O82n1Sd0jMk[O7;g1jc0RNl[O3d04Ah0Pd0QOl[O0k1`0Yb0@T^O>ma0BT^O8cMFXd02U^O7eMFVd03V^O5eMHUd03V^O4fMITd03V^O4fMITd03U^O5gMHTd03U^O4hMISd03T^O5hMITd02T^O4jMIRd03V^O1iMMPd02X^OOiMOoc02g^ONYa02g^ONYa02h^OMXa03h^OLYa04S30001N10d[OMXa03h^OMXa03h^OMXa03h^OMXa03h^ONWa02i^ONWa02i^ONWa02i^OMXa03T3000000000000000000000000O101O00000O10cV2" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "image_path": "multipanel_detailed_caption/images/50.png", + "mask_rle": { + "size": [ + 755, + 1136 + ], + "counts": "di[1351jf0j0A:G8I6K5K3M5K4L4L3N3L4L4L4M2M3M3N2N3M2N3ZMhLj^OKf1_3_?hLi^OJf1_3a?gLi^OJe1a3a?fLi^OId1d3b?cLj^OHd1f3b?bLj^OHd1g3a?aLk^OHc1i3a?_Lk^OI09R1`3R`0^Ll^OJN:R1`3S`0\\Ll^OKO:ODk0m3Y`0[Ln^OJNj1CJg`0oM\\_OLM`0=j1EJe`0PN\\_OLM`0=i1GId`0RN\\_OKKb0?g1GIc`0SN\\_OKKb0?f1IIa`0TN]_OJJc0?f1JHe0mMj>7_@KId0>e1LF``0VN]_Od07_1OfMDk1i`0\\N\\_Oe09]13AX`0]N\\_Of08\\14@Y`0^NZ_Og09[14@Y`0^N[_Of08[15AX`0^N[_Og08Y15BX`0^N]_Oe06[14BZ`0^N]_Od05\\14BZ`0^N]_Od06[1NH_`0ZNZ_Oe09Z1LHa`0ZNW_Oe0>e3Z`0hKV_Od0`0a2DgLL8k`0o0f_Oo13oLW`0R1f_On16nLU`0S1f_On16mLU`02S_Oe0c0[25nLU`01T_Of0b0[25mLV`02T_Oe0b0Z25oLU`01U_Of0a0Z25oLU`00W_Of0`0Z24oLV`01V_Of0a0X24PMV`02V_OJJf0f0^24oLW`03U_OIKg0f0\\24PMW`04Y_O`0<\\24PMW`04Y_O`0=[23PMX`06W_O??Y23RMW`06X_O>>Z23RMW`06Y_OX3X`0QL[_Of0X?Jb_Oh01FT2ZOSN1<=j?Ja_Oh03ET2[ORN1<=j?Ja_Og04Ec3Ii^N__Oh06BT2Z1X>[N^_Oi06CQ2[1\\>WN]_Ol06BQ2[1[>YN\\_Ok08AT2X1W>]N[_Om09^OY2S1T>cNX_Om0;^O_2k0n=2b_OSOh2b0f=;f05EPOQ`0FXNe`0W1g_O7Cb0OPNg`0W1h_Og2_ORLi`0V1j_OR3V`0_LT_OYO2Ld0\\4V`0^LV_OYO1L2O:[4_`0aLS_OZO2K2O:Z4``0lLT_OkN2O;X4``0nLS_OkN2O;X4``0QLT_O>M[O3O=U4``0SLR_O?OYO3O" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "image_path": "multipanel_detailed_caption/images/50.png", + "mask_rle": { + "size": [ + 755, + 1136 + ], + "counts": "Vaa>d0if0]Lm@e3R?[LR_OGb1P4[?ZLQ_OGc1P4\\?[Lm^OGf1o3]?`Lc@`3]?aLa@`3_?`La@a3^?`L`@b3_?_L_@c3`?^LX_O\\Oj0W4m?]LQ_OEP1o3o?[LQ_OGo0o3o?ZLS_OHm0o3o?YLT_OHl0P4Q`0iLn_OW3R`0iLm_OX3S`0hLl_OY3T`0hLS_OjNc0_4Y`0gLS_OkNc0_4Z`0fLU_OhNb0b4Y`0fLj_O\\3U`0dLk_O\\3U`0dLk_O\\3U`0dLj_O^3U`0bLk_O^3U`0bLk_O_3T`0aLk_O`3U`0`Lk_O`3U`0`Lk_Oa3T`0_Ll_Oa3T`0_Lk_Ob3U`0^Lk_Ob3U`0_Lj_Ob3U`0^Lk_Ob3U`0_Li_Ob3W`0_Lh_Ob3W`0^Li_Ob3W`0aK]_O8=W4V`0`K__O70X4]`0ZKc_O>0X4]`0YKd_O>1X4[`0ZKd_O>1W4[`0\\Kc_O<4X4Y`0\\Kb_O<7W4V`0^Kc_O;7V4W`0_Kb_O;7V4X`0^Ka_O<8U4W`0_Ka_O=7T4X`0_Ka_O=7S4Y`0_Ka_O>7R4Y`0^Kb_O`04Q4Pa0oKQ_OP4o`0PLR_On3o`0SLP_Ol3Qa0TLP_Ok3Pa0ULP_Oj3Qa0VLP_Oi3Pa0WLP_Oh3Qa0XLP_Og3Pa0XLR_Of3o`0YLR_Of3o`0YLS_Oe3n`0[LS_Od3m`0\\LS_Oc3o`0VLR^OKP1m3o`0VLU^OKn0l3o`0VLU^ONl0k3cb0O1N2N2N2NcM]Ll@b3T?`Ll@^3U?bLl@\\3T?fLl@X3U?hLm@U3S?mLm@Q3T?QMk@l2W?UMj@h2W?XMj@f2W?ZMj@d2X?ZMk@b2X?]Mj@`2W?_Mk@^2X?aMi@]2Z?aMh@[2\\?cMf@Y2\\?hMe@T2`b0K6K5K5J6J7H9Db]]1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "image_path": "multipanel_detailed_caption/images/50.png", + "mask_rle": { + "size": [ + 755, + 1136 + ], + "counts": "gYg34Zc00e@3Z?Mf@4Z?Kf@5Z?Md@3\\?Nc@2\\?O`@6_?Ia@8^?Hb@9]?Hc@8^?Fc@:^?Dc@<[c0O0O11O0O3L<@:I5RMgN[_O]1a`0fN\\_O_1b`0bNZ_Of1``0[N^_Oi1``0YN\\_Oj1c`0[NU_Ok1T`0kMZ^O;_1o1T`0gM[^O<^1Q2U`0bM^^O=[1T2X`0]M]^O?Z1V2i`0jMV_OY2h`0gMW_O[2h`0eMW_O]2h`0dMU_O_2j`0aMU_Ob2i`0^MU_Oe2j`0[MT_Oh2l`0WMS_Ok2l`0VMS_Ok2l`0VMR_Om2l`0TMQ_Oo2n`0PMR_OR3m`0mLT_OT3k`0lLS_OW3l`0hLT_OZ3k`0fLT_O\\3k`0dLU_O\\3k`0dLU_O]3j`0cLU_O^3k`0bLU_O_3j`0aLU_Oa3j`0`LT_Ob3k`0`LR_Ob3m`0`LP_Ob3n`0`LP_Ob3Pa0\\LQ_Od3o`0[LQ_Of3o`0ZLQ_Og3n`0XLR_Oi3n`0WLR_Oj3l`0WLS_Oj3m`0WLR_On1ZOVO\\a0WNh^Oe0a0o1[OVO[a0VNi^Oe0`0P2\\OUO\\a0UNh^Of0`0P2\\OUOba0kNR_On3m`0RLR_Oo3n`0QLR_OP4m`0nKU_OR4k`0lKV_OV4i`0iKX_OW4U`0_K__O81c@6BKd0j45ZKo>Kf@6BJe0k42]KP?Hg@6BJe0l40]KS?Eg@8BId0m4O^Kf?Lh_OHb0o4O]Kh?L^@g4I]Kj?Mc_OIe0e2^O\\O`0YNj?2W@Y2@[O?ZNi?4W@W2A[O?YNj?6U@V2B[O>ZNj?6V@V2AZO>[Ni?7X@[40^Kd?:\\@X41_K`?9`@X40_K_?:a@X4O^K_?;b@W4O^KV?GX@f0c0U4O^KU?d0m@[2]OSN`0oNU?d0n@Z2]OSN?POV?c0n@Z2]ORN`0QOU?c0n@Z2]ORN`0QOV?M[@3c0m2\\ORN?ROW?J^@4`0n2\\ORN?ROY?G^@5?o2\\OSN>ROZ?F]@7>a4KbKZ?E]@9>P2\\OeM1Q2?[NY?F\\@;>l1HG:WNT?KZ@>?e11F7XNo>OY@`01ZO9V2a0AM_No>2X@>1\\O9S2d0AL^Nn>5X@=2ZO9U2b0BOZNm>8V@>3YO9U2b0Bm?cNT_O=c0^1;Cn?[Oh_OQ1:Dn?ZOi_OS1\\OlMh0f1U`0eNR_O7e0g1HlMFa1k`0eNS_O5f0i1]O0j`0QNU_O5d0k1[OOm`0jMQ_O13;d0Y4Y`0YKR_O12c0Y4h`0hKX_OW4h`0jKW_OV4X`0ZK__Oa08T4k`0mKU_OR4k`0nKU_OQ4l`0oKT_OQ4l`0oKU_Oo3l`0QLT_Oo3l`0QLT_On3m`0RLT_Om3l`0TLT_Oj3Y`0cK`_Oc08i3X`0dK`_Oe06f3Pa0[Lo^Od3Qa0\\Lo^Oc3Ra0]Lo^Ob3Pa0`Lo^O`3Qa0`LP_O^3]`0jK\\_Oi08[3\\`0ZMd_Oc2^`0]Mb_Oi1XO`MVa0g0c_Og1XOaMVa0h0c_O_2^`0aMb_O^2``0aMa_O]2``0eLd^O=m0m2_`0eLf^O=l0l2_`0fLf^O>m0i2]`0jLf^O=n0h2\\`0jLh^O=m0g2\\`0iLj^O`0j0f2^`0gLj^Ob0j0e2]`0fLl^Oe0h0b2Qa0^MP_O`2Qa0`MP_O^2b`0nL_^Od0Q1[2^`0UM_^Oa0T1Y2\\`0YM^^O?W1U2\\`0]M\\^O>Z1R2[`0`M[^O7N@]1h2Z`0aM[^O7O^O_1h2W`0bM\\^O8O\\O`1i2T`0dM]^O6g1T2l?fM]^O6h1R2l?gM^^O7g1Q2j?dMc^O;f1n1g?eMe^O=g1k1d?gMf^O>i1Q1kMPOfa0@g^O`0i1m0kMPOha0Be^Oa0k1j0hMROja0Ad^Od0n1c0eMWO]b06T@:aMA\\b04e@@nLN0`0ia0^Oe^Oc0o28[>VOe^Oa0P3TOi^O>P3?V>TOi^O=Q32_LIfa0Gl^OSOl^O3W3h0ib0N20VKWOjBj0T=XOlB=YKHka0KlB<[KIha0LlB;\\KIha0LmB;ZKJha0KPC:WKKia0KVC?jnDY`0c1\\_OMNkN;HZ`0`1\\_ONOkN8J\\`0\\1^_OOOkN6K\\`0Z1__O1NkN7LZ`0U1c_O6K_NBOe07[`0U1b_O8K\\NFNb0:Z`0S1c_OIWNIOe05W`0W1b_O>IWNJNc08W`0U1b_O`0IUNKN`0MYO;Qa0U1a_Oa0JTNLN>MZO;Qa0U1a_Oa0JTNL1:L]O:Qa0S1b_Oc0ITNN50JG6Pa0T1a_Oc0EZN5>EBo`0S1a_Oc0B_N99EDm`0Q1c_O`0DcN79DDm`0P1c_Oa0DcN88DDm`0P1c_Oa0DcN88DDm`0P1c_Ob0CbN8k0a`02c_Oc0AaN:JI>i`0d0b_OZ1LcMK>g`0e0a_O[1McMIKN7k`0P1a_O[1ObMEL27h`0P1b_O\\12iMDKi`0o0`_OU1DcMa0^1`@h1OlLKV1f`06`_Oj1MhL0W1c`07__Om16jM[`09`_OP21gM_`09`_OQ21eM_`0:`_OR20cMa`0:a_OR2NdMa`0:a_OR2NdMa`0:b_OS2O_M_`0>b_OZ2JUMe`0b0`_OY2KUMe`0@W_Ob0:h2JVMe`0b0a_O4G=3mNe`0c0`_O2LMbNi`0l0T_ObN3b13=NbNi`0R1W_OO2o0[_OJV2WO_>o0\\_OZODAa25_>Q1]_OWOE]ONM`2=a>R1^_OUOD\\OO0Y15TOLD=m`0T1^_OkN@E=On09PO?h`0k0\\_OfNDG:0n0;mN=l`0k0\\_OeNDF:1n0=kNiN:o`0o0[_ObNg15mM:Sa0n0Z_ObNg1T1o>:Z_OcN4Fl0[1g?_1P2nNWMQa0;c^O=^1Q2oNVMPa0;f^O<]1Q2mNXMQa08i^O^_Ol1eNbM33ka0a0[_Oj1TOcMba0e0X_Oh1_a0YN`^Og1`a0ZN_^Of1aa0[N^^Oe1ba0[N^^Oe1ba0[N^^Oe1ba0[N^^Oe1ba0[N^^Oe1ba0[N_^Od1ba0[N^^Oe1ba0ZN`^Oe1`a0[N`^Oe1aa0SNS]OD]1X2`a0SNU]OD\\1X2_a0TNU]OC]1X2_a0UNj^Oi1Va0WNk^Og1Wa0VNm^Og1Ya0QNl^Ok1gc0CbZc:" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "image_path": "multipanel_detailed_caption/images/50.png", + "mask_rle": { + "size": [ + 755, + 1136 + ], + "counts": "mS\\b02^g0T1ZOR1\\a0jMX_OV1[OQ1\\a0iMY_OX1XOP1_a0hMY_OT4g`0lKX_OU4h`0kKX_OU4h`0kKX_OV4g`0iKY_OX4g`0hKY_OX4g`0hKX_OZ4g`0fKY_OZ4f`0gKY_O[4f`0eKZ_O[4f`0eKZ_O[4f`0eKY_O\\4f`0fKY_OZ4g`0fKY_OZ4U`0UK`_OOIb0b0[4S`0WK__OOJ`0c0Z4T`0XK]_O0K?b0Z4V`0XK\\_ONN?`0[4V`0XK\\_OM0?>\\4V`0ZKX_OL5=>]4V`0^K]_O5<_4V`0QKZ_O649=`4Z`0WKY_O8>1^Ol3l`0mKT_O7b00@j3l`0aLd_OECg3j`0bLV_OWO5`0Nd3g`0fLU_OVO5b0Nb3h`0hLR_OUO8c0M`3h`0nL[_OCL_3j`0nLY_ODL^3j`0nL[_OEJ]3k`0nL[_OGH[3m`0nL[_OIEY3Pa0eLe_OX4[`0WKP_O5g0d4Z`0UKQ_O6e0e4[`0SKR_O7d0e42RK[?0P@9c0e4MXK`?HR@:OG9o44[Kj?O[@f4J\\Kj?N]@e4I^Ki?M`@d4F`Ki?Ma@c4FaKV?Ai@;;c4EbKW?@i@;;b4FcKi?Jb@b4EdKi?Jb@b4EcKk?J`@c4EcKk?J`@c4EcKk?Ja@a4EeKV?Ai@:<`4EeKV?Cg@8>`4EeKU?Ea@LK;k0]4EgKT?Eg@6`0^4EgKT?Dh@7?^4EgKT?Cj@6?^4DiKS?Cj@6?^4DiKT?Aj@:<\\4IfKQ?Eh@>;V40bKn>Je@a0N_@f0?m3U`0]K[_Og0a0k3T`0^K[_Oh0`0i3V`0`KY_Oh0a0g3V`0bKX_Oh0a0e3X`0eKU_Og0b0d3Y`0gKR_Of0f0a3X`0jLj_OU3V`0iLl_OV3U`0TLo^O3n0h3S`0ULP_O2n0g3S`0nKQ_OLO?m0g3S`0mKS_OLM`0n0e3S`0nKY_O>c0d3T`0lK[_O`0b0b3U`0mKY_Oa0c0`3U`0QLV_O?f0^3V`0TLl^OLOc0n0]3X`0ZLj^O9O]Ok0n3]`0\\Li^O9N_Ok0k3_`0]Lh^O9O^Ok0k3^`0^Lh^O90\\Ol0k3]`0_Li^O9P1V3X`0aLh^O9Q1T3X`0cLg^O9R1S3V`0fLg^O6U1S3S`0hLh^O5V1Q3S`0jLg^O5W1P3Q`0fLe^OK3?X1o2Q`0eLg^OJ1b0X1n2P`0eLo^O=R1l2P`0gLn^O>R1j2P`0gLP_O>Q1i2P`0iLn^O>U1g2m?lLm^O=W1f2l?mLm^O=Y1e2i?oLm^O;]1d2f?RMl^O:_1e2b?RMo^O9a1b2a?VMm^O8c1a2`?WMn^O7d1_2_?XMo^O9c1^2_?UMQ_O=c1[2\\?UMT_O`0b1h1oMSNPb05T@c1PNTNoa08S@b1QNRNma0SOoW1HegNS1>TOmW1IegNS1>UOlW1HegNU1>SOlW1JegNT1=SOnW1j3PhNWLoW1k3ngNVLQX1l3mgNULSX1l3lgNULSX1k3lgNVLTX1j3mgNULRX1l3ngNULQX1k3PhNTLoW1l3RhNTLnW1iMXhN_OAl49mMmW1gM]hNYOGm4OSNmW1eM_hNZOGn4LSNnW1dMahNZOFP5HSNQX1aMSiNh7lV1WHViNh7jV1WHXiNCeNk7SX1bHXiNBiNi7PX1dHXiNBhNk7oW1cHYiNGdNQ41UORX1TMXiNGdNo37ROPX1WMTiNGeNP4:oNgW1iLShNa0U1GgNP4;nNfW1jLShNb0k0ASO6Mo3ihN`NW6Q1RQ1>ghN`NW6S1RQ1=ioNCWP1=ThN_N^7T1^P1=ShNaN]7S1aP1;RhNbN]7S1aP1`oNB`P1=aoNB`P1>`oNBaP1;lnNaNaIT1cW1;lnNbNaIR1cW1;VjNbNX22oKP1eW1;SjNeNX20PLP1eW1:SjNgNaNMi22nLP1eW1:SjNgNaNNh21PMo0eW19TjNiN^NMj22oLn0fW1:SjNjNT2NTLm0eW1:UjNmNo1LWLl0fW1:VjNoNl1JYLl0fW1:VjN[O_1_OeLl0fW19WjN\\O^1_OeLl0fW19WjN\\O^1^OgLk0fW19WjNQO`MOm3LfLk0fW19YjNlNcM1i3OfL8YO5]X1f0knNmN`I7XO6^X1e0jnNnNaI6WO7^X1d0knNoN`I5XO7^X1e0goNTOkG7_X1c0^kNZONLUL7_X1c0]kN^OMGWL7aX1c0ZkNBMCXL8aX1b0djNRO]Mc0BMT1@b0bjNUO]M3_33lK4gX1`0ajNVO^M1_35lK2gX1b0`jNWOf15SK2hX1a0fiNWOWN1X45SK2iX1`0eiNXOWN0Y46RK1jX1a0_oN^OgG1jX1a0`oN]OfG1lX1a0_oNGaP19aoNE`P1:coNB^P1>coN@_P1?aoN^ObP1b0i8O001O00`bN5XX1J[gNh0aX1WO^gNh0dX1YOZgNOYLCi\\1=lfN0dY10[fN0E@hL0ZY1?XiN@SN>f2FaLO^Y1=XiN@SN;i2J\\LOaY1c2VORU1d2VOSU1ShNb0d2POZU1?PhNc0d2nN]U1?ngNd0k\\1^ORcNc0n\\1AmbN`0S]1FfbN;Z]1FdbN;\\]1FbbN:hNgN`^1o0gbN;gNgNc^1n0dbN=hNeNe^1n0abN>hNfNh^1m0]bNc0U2^NRX1o0feNf0W2[NTX1R1_eNe0]2YNUX1^1idNa0R3QNVX1g3QgNYLUNOmZ1f3lfNfLcY1Y3[fNPM_Y1o2afNUM\\Y1k2bfNVM_Y1i2bfNUM`Y1j2`fNTMdY1k2\\fNQMhY1n2XfNoLlY1P3UfNmLoY1Q3RfNlLQZ1S3PfNiLTZ1V3leNgLXZ1Y3geNfL[Z1o2XcNXM]2IWN1^[1l2ncNZM\\2JVN4_[1g2ocN[M\\2JUN6`[1n2ZfNnLTN5d[1l2VfNQMSN5h[1k2RfNSMPN5Q\\1k2jeNfMXZ1Z2eeNgM]Z1i1ecNdMl1d0`Z1V2aeNiMaZ1U2`eNjMbZ1T2`eNQM\\N78GV[1m2nfNXMjMOW[1e2SgN\\MeM0Y[1a2SgN_MdM1Z[1^2SgN`MbM3][1\\2QgN`MbM4_[1[2nfN_MeM5a[1[2_gNdMdX1]2YgNbMkX1\\2kcN_MP35YY1Z2ecNcMP33^Y1`2afN`McY1]2\\fNdMhY1W2fcNiMj11cZ1S2`cNRNi1LkZ1n1[cNXNn0A^O;\\\\1i1WcN\\Nn0EZO7f\\1a1VcNbNh0HZO6l\\1Z1VcNfNd0IZO7R]1R1UcNmN=J\\O8[]1c0ScN\\O3I\\O:m]1HQcN:_OCC;d_13f`NCE:M@a_1`0m`NFE:LAg_1;g`NJF:LAl_16b`NOF;U`1Ll_NIO=W`1Gh_NM4=P`1Gk_NLNKLb0[`1Fk_NMMMLa0]`1Bk_N0LNIb0Ta1@R_Ni0l`1XOT_Ni0k`1WOU_Ni0k`1XOS_Ni0m`1WOS_Ni0n`1UOS_Nk0n`1SOT_Nk0Pa1POT_Nm0]a1L5K4L8BPPQk0" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "image_path": "multipanel_detailed_caption/images/24.png", + "mask_rle": { + "size": [ + 1620, + 1500 + ], + "counts": "QRTc1g0ca1e0]O`0A=F8I7H6K9G5J6K6K5J5L5J5K6K4L4M2M4K4M4L4M3L4M2M4L3N3M3L3N2N2N3M3L3N3MYLVdNg0g[1VOieN@UZ1?oeN@oY1`0SfN@kY1`0WfNAfY1=`fNA^Y1;kfNBSY1igNBUX1>mgNBQX1>QhNBmW1?ThNAkW1?UhNBiW1>XhNCfW1>ZhNCdW1>[hNDcW1=XhNIfW17YhNKfW16XhNMfW14ZhNeMVMl1^Z1`0[hNeMXMk1[Z1`0^hNeMWMl1ZZ1?_hNdMXMn1WZ1?`hNdMYMn1UZ1?bhNcMYMo1SZ1`0chNaMZMP2QZ1`0fhN_MXMR2QZ1`0ghN^MXMS2PZ1`0ghN^MXMS2oY1`0jhNMTW14lhNLTW14SiNFkV1;ViNEiV1;WiNEhV1mhNDQW1?nfN^L30i0U3UX1b1cgNdN\\X1j501N2O0O2O0O2N101N101N2O0O2O0O2O0O101O0O2O0O2O0O101N10001N10001N101O0O101O0O101O000O101O000O101TiNeE]U1[:bjNiE[U1W:djNlEZU1T:fjNmEXU1U:gjNlEXU1T:ijNkEWU1U:ijNkEWU1U:ijNlEVU1U:jjNjEUU1W:ljNhETU1X:mjNfETU1Z:mjNeESU1\\:mjNbESU1_:ojN_EQU1a:PkN]EQU1c:ojN]EQU1c:njN^ERU1b:mjN`ERU1a:ljNaERU1`:mjNaESU1_:kjNdETU1\\:ljNdETU1W8giNaIT1YNUU1[:ljNcEUU1]:ljNaEUU1_:mjN^ETU1b:_10O1000001_iNZERU1f:ljN^ERU1b:mjNaEQU1_:ojNaEQU1_:PkNaEoT1_:SkN^EnT1b:VkNZEjT1f:XkNXEhT1h:b100PNVhNVIjW1j80000aiNYEQU1g:njN[EQU1e:ojN[EQU1e:njN\\ERU1d:njN\\ERU1d:mjN]ESU1c:mjN]ESU1^8eiN`IV1TNUU1Z8jiN^IP1YNWU1[8diN_IT1WNYU1]:gjNcEYU1]:jjN`EVU1`:_1000000000001O000O[iNbEVU1^:jjNcEUU1]:kjNbEVU1^:jjN`EYU1_:^100O10001O0O1000000O2O00000O101O000O101O000O101O0O101O0O2O000O2O0O[iNQFgT1o9YkNQFgT1o9YkNQFhT1m9[kNQFeT1o9Q2O0O2O001N101N101N101N101VJUgNh1lX1WNWgNUMM^3mX1[OYgNSM1]OH3MT3SY15]gNRMO]OI32Q3jX1@`0@khP6" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "image_path": "multipanel_detailed_caption/images/24.png", + "mask_rle": { + "size": [ + 1620, + 1500 + ], + "counts": "e`Tc1?fa1j0]O=hbNcNiX1o1efN\\NQY1m1ffNZNUY1l1VfNaMhMl0l[1j1VfNeNfY1a1TfNcNhY1c1QfNbNkY1d1oeN_NnY1g1meN\\NoY1j1jeNZNTZ1j1heNXNVZ1l1eeNWNXZ1m1eeNVNXZ1n1ceNUNZZ1o1beNTN\\Z1o1_eNUN_Z1P2[eNSNcZ1P5N2O1N2N2N2N2O1N2N2O1N2O2N1O1O1N2O1O1O1O1O1N2O1O1N2O1O1O1O1O00100O1O1N2O100O1O1O1O2N100O1O1O100O1O1O1O100O1O1O1O100O100O1O010O101N100O100O1O100O100O1O100O10000O100O100O100O10000O10000O100O10000O10000O10000O10000O10000O100000000O100000000O10000000000O1000000000000O100000000000000O1000000000000000O1O100O100001O001O0001O0000000000000000000000000O10000000001O000O10001O00000O10000O10000010O0000000M4K4N2N2O1O10000000O100O2O2M2M3N2N2N2N2N2N2N2O1O2M2O2M2O3M2N2M2O1N101O0O101N101N101N1O101N1O2N2O1N2O0O101N1O1O2O0O1O1O100O1O1O2N3M1O2O1N3M3M2N3N1N2N2N2N3M4L3M2N1O2N2M3N2N2N3M2N2M3N2N2N2M3N2N1O001N1O102M2O2N2M2N3M2N3M3M3M3M5K8lJhbN^4k]1J6K6J6J6I5J7J7I6J8G9F:D?_Oe0]OVZP6" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "image_path": "multipanel_detailed_caption/images/24.png", + "mask_rle": { + "size": [ + 1620, + 1500 + ], + "counts": "\\eWm09na1>h1_OfN3N101N1X^NJg^11mbN4\\N0b^1NPcN2^N1e^1ImbN6_N1k`1OT_N2k`1NS_N4n`1Ln^N1Ya1Nh^N0V]1HafN7ZL0U]1L_fN1YZ12mbNJa1Od[1:ibNGo_1;n_NGX47UV13aeNGX4;UV1OaeNFX4?UV1MbeNBX4d0VV1K`eNAX4h0VV1I`eN_OY4k0VV1K\\eNYO\\4o0XV1H[jN;cU1F[jNclN8YJSO96kX1>elN8TJWO<3jX1?flN7RJWOa02fX1a0hlN6oIVOf01dX1c0hlN5lIYOi0McX1f0ilN^Y18TlNYOcJ9ZY1oaN@S^1=bfNAnT1=SkNFkT17VkNLiT12XkNOhT1MWgNL^17Z^1NPZ60QfI0]Y6;WfI20AnVXl0" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "image_path": "multipanel_detailed_caption/images/24.png", + "mask_rle": { + "size": [ + 1620, + 1500 + ], + "counts": "ZPh6;Vb16J5L3L4F:M3K4K6nKVOUfNm0]Y1HZfN:]Y13]fNO^Y1:QdNPNS2h1eY1j0VfNXOgY1n0TfNTOiY1S1PfNPOmY1V1leNnNPZ1Y1ieNkNTZ1[1eeNiNXZ1\\1beNiN[Z1[1aeNgN\\Z1]1aeNeN]Z1_1^eNdN`Z1a1[eNaNcZ1i4N2N2M3N2O1N2NaLfeNhKLc2\\Z1b1neNiKM_2TZ1f1RfNjKJa2RZ1d1XfNiKEe2QZ1c1hfN^NWY1a1kfN_NTY1`1mfNaNRY1^1ofNcNPY1^1nfNdNQY1]1nfN[MQO^OnY1Y3QgNXMTO^OiY1[3ifNaKA[1:HZY1]3kfNbK@U1?[O^OIgY1U4lfNcK_OS1b0[O]OJfY1T4mfNeK]OQ1d0\\O\\OJeY1U4nfNdK]OQ1d0]O[OIeY1V4ofNcK]OQ1d0]O[OIdY1W4PgNbK]OR1c0\\O\\OIbY1Y4RgN`K]OR1b0]O]OHaY1Z4SgN_K]OS1a0]O]OGaY1[4TgN^K]OT1`0\\O^OG`Y1\\4VgN[K]OX1<\\O_OFaY1[4XgNZK^OZ17KQY1c3egNdLWOJSY1X3dfNfKT1Z1POJWY1V3gfNeKS1]1kNIZY1V3ifNbKS1n2TX1`1jfN`KR1Q3SX1`1VhN`NiW1`1WhNaNgW1^NdgNd0e0n0fW1[NkgNe0>Q1fW1YNPhNd0:S1fW1VNUhNe05U1eW1UNZhNd01W1dW1UN^hNb0MZ1dW1TNahNa0K[1cW1TNdhN`0I\\1bW1SNihN?E^1bW1RNkhN?D^1`W1UNlhN\\jNc0_NiNgN6Q1OQO1\\X1>\\jNc0^NkNfN9o0JUO1[X1>^jNc0[NoNdN\\jNe0_NjNdN>m0DXO1[X1?[jNf0cNdNeNa0j0FWO0\\X1?ZjNg0QOnNXOLPO0\\X1`0ThNXO[1_1TNaN`12K5fN1[X1a0ShNZO\\1\\1TNcNb1KN:aN1\\X1a0RhN\\O\\1Y1UNfNj2L[M6L2[X1b0ShN[O\\1X1UNhNX2JQN3L4K2[X1c0ShN[O]1W1UNhNX2LnM2O3K2[X1d0RhNYO_1Y1SNgNX2NlM221K2ZX1=dgN\\O>4b1Y1PNfNZ21dM58ML1ZX1lgNYOX2NkMV1HYO:B03;0^ON2e04BH2XX1>mgNWOW10^O4ZOR1MXO7EK7`03A:4CG1XX1`0mgNUOT17nNJ44JQ1NXO5EK8a03@95CG1XX1`0YiN^OdNJ53JP10WO4FL7`04@95CG1WX1a0ngNSOT1:lNJ43Mo0MZO5AO8>7]O97BG1WX1a0ngNTOS19mNK32No0L[Oc0E1;[O97@I2UX1b0ogNSOS19mNK221NHh02Db1n0dNkNI2UX1a0PhNTOR18mNM03X1Q1bN^NS16TOI2SX1c0`iNXOaNN151HO_1_1_NaNT11WOK0RX1e0aiNTOaN114OKO^1j1@XNXOL1PX1f0niNTOWN4NKO_1LbNl1e0YNAM0PX1f0niNTOWN5MJ0_1LbNm1d0XNBM1nW1f0PjNSOWN5LL09Ka05TOl1c0WNCL2nW1f0SiNnN\\O3D325NK0Z1\\2SOgMGL2nW1f0QiNROXO3JO25NK0Y1n0bN;?kNJK2mW1g0QiNSOXO2JN35NKOZ1o0aN<>kNJI4nW1f0UhNnNc06_O2LM26NK0Y1n0cN=0dN373I4mW1g0VhNmNc06@1KO15OK0Y1n0bN>0fN245I4mW1g0VhNnNb06M0@4OLOX1o0bN>OhN137H4mW1g0UhNoNc05K3@13KL90=d2TOcMO2:H3mW1g0UhNoNc05J5@M7`0L0Z3@dL3lW1i0UhNmNe04Ia0K0MNU3DdL3lW1i0UhNmNe05H?N0LNS3FfL1kW1j0ThNnNf04H>00KNR3HeL0kW1k0UhNmNf04H>1OI0S3GdL0mW1k0ThNmNf03I?10D1W3EdL0mW1k0ShNnNh01I?2o0i2hNdL0mW1k0ShNnNh00J`02n0h2iNdL0lW1l0ThNnNg0OK`04l0B`Ng2;QM0lW1l0ThNnNh0MLa03l0_OdNh28RM0lW1l0ThNnNh0MN?0o0@cNh28RM0lW1l0ShNoNj0J;4B\\1@cN\\2LgM;G1lW1U1YjNaNbN^1@dNZ2NhM8G2lW1U1ZjN`N`N^1DcNW22gM6H2lW1U1UkN@hMPOh13gM6I1kW1V1UkN]Oa0VOgL5H1lW1W1TkN[Oc0XOeL5H1kW1X1UkNYOd0YOdL5H1kW1X1UkNZOb0YOfL4H2jW1X1UkNYObMTOZ27\\M4F1mW1W1UkNYOaMVOY26^Ma0bW1k0WkNWO^MZOY24aM?aW1b0UhNmNR3d0]M[OY24bM?`W1`0XhNmNP3d0VMVOM6b26aM>bW1=\\hNlNe03UONh1f0_NWOd21YM;3>cW1:_hNlNa07UOOg1c0`NXOd23XM90a0hW15]iNUOhN0c0OLJFh02XOd2j1PU1VO^iNXOgNm0;oNIc03ZOk10aNj1XW1SO_iN[OfNm07ROL?4ZOk12_Nh1^W1jN`iNBeNm02TON99@e13^Ng1hY1YNPfNm0OVO02=G^14^Ng1W[1UNddN301N2N2N2OdM^`Nh1b_1XN^`Nh1a_1f00000O1OVMf`N\\2W_1fMk`NY2R_1iMo`NW2P_1hMQaNX2Q_1dMQaN]2R_1^Mo`Nc2__12O1O11O001O1O1N101O00YO_M[aNa2d^1aM[aN_2b^1dM_aN[2^^1hMbaNX2^^1iMaaNW2_^1iMaaNW2`^1hM`aNX2c^1eMRaNK0`2P_1cMn`N10[2T_1cMj`N50Y2X_1`Mg`N81X2^_1iMa`NW2`_1hM_`NY2b_1fM^`NZ2d_1dM[`N\\2h_176J001O1N2O1SNo_NU1R`1dNW`NIHU1j1fNY\\1:XbNIGV1f1iN[\\17YbNJGU1e1jNZ\\19YbNIFU1i1gNW\\1=XbNIFS1R`1TOX`NV1g_1kNY`NU1g_1lNX`NS1i_1nNU`NR1l_1ROo_N6OJR`1n0Q`NPOo_1P1U`NlNl_1Q1Y`NlNh_1S1Z`NkNg_1T1Z`NlNf_1T1[`NkNe_1V1Z`NkNe_1V1[`NjNd_1X1[`NgNe_1Z1\\`NeNc_1[1_`NcNa_1]1_`NdN__1V1o_NgNc02^_1V1R`NeNb04\\_1W1k`NjNPY1LSlNY1oJkNmX1NRlNW1QKlNkX1NTlNV1QKlNlX1LTlNY1oJjNnX1MSlN[1lJhNRY1LSlN_1hJdNWY1LQlNa1fJdN\\_1\\1d`NdN\\_1[1d`NeN]_1Z1a`NiN__1T1a`NnN`_1P1a`NQO^_1o0c`NQO]_1>T`N]O`05\\_1:Y`N_O;7]_18\\`N^O8:\\_18\\`N^O89]_19Z`N_O97^_19n_N@30a06__1:l_NC2Mb06b_16l_NI0Jb06c_14o_NJ]a15k0G0O01O0l[6OTdI1Zb17\\]N3RO3R_N0j`15R_NNl`1n0000O100O1O2N1O0000SKhN_hNX1aW1jN\\hNW1dW1kN[hNT1fW1mNWhNT1jW1lNUhNT1lW1lNShNT1oW1jNRhNU1PX1jNPhNU1h\\1O001O00001O1OnKSO_fNl0YY1BVcNHd2e0VZ1ERcNJf2a0YZ1FobNJh2?[Z1EobNJf2`0`Z1BmbNKc2c0mZ1UOebN3^2g0h^1O[LTObbN3Y2j0R[1OidN4`Z1e0[eN]OeZ1f0YeNYOhZ1i0VeNWOjZ1k0QcNSNk1Q1U[1m0mbNUNl1n0W[1X1hdNfNZ[1Z1fdNeN[[1\\1ddNbN^[1^1bdNaN_[1`1_dN^Nd[1b1\\dN\\Nf[1d1ZdNZNi[1e1UcNfM6c0g\\1g1PcNlM5\\]1a1gbNRN_OI5b0h]1b1dbNSN^OJ5a0j]1b1bbNTN_OI5?l]1d1_bNUN@H4>P^1d1[bNWNAH3;V^1c1TbN\\NDF08_^1c1laN_NG6d^1U1faNcNI3f^1Y1aaNaN__1]1c`NbN_OJi_1b1o`NeNS_1h0T`NZOk0MV_1c0Q`N^Ok0OV_1JR`N82Nf0Oe_1Hl_N8MI^a1:b^NF\\a1O00100O10O010O001O00O2O001O000O2M2]Oc0M3M300100O2O0O2M2O2N2N2N2N2N2N2N2O1N2O1N2M3M3N2N2M4M2M4M2N2N3L3N2N3L4L3N3M2N2M4L3M4M3L4L4M3L4K5L5K4L4L4L4K7H7J6K5J6J6I8H8H8F>B`0^OTloa1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "image_path": "multipanel_detailed_caption/images/24.png", + "mask_rle": { + "size": [ + 1620, + 1500 + ], + "counts": "WWZl08Sb1`0F5\\OXOV_Nk0g`1XOV_Nk0g`1XOW_Ni0f`1ZOY_Nh0e`1YO[_Nh0d`1XO]_Ng0e`1WO\\_Nh0f`1UO\\_Nk0d`1TOb_Nf0_`1XOd_Nf0\\`1ZOe_Ne0\\`1ZOd_Ng0T\\1=WfNkNeMh0l[1h0\\eN]NSO3Di0g[1P1\\eNYNXOMEj0a[1\\1ZeNoMAKAl0`[1b1YeNiMGK[Ok0b[1h1WeNcMLKZOj0_[1o1[eNXMMOYOj0[[1V2aeNjLM7WOi0W[1Z2gfNmLRNi0T[1_2hfNhLUNh0Q[1c2ifNeLVNh0oZ1e2kfNcLVNh0lZ1h2nfN`LVNh0iZ1j2QgN`LTNg0iZ1j2TgN_LRNh0hZ1h2YgN`LnMi0gZ1g2]gN`LkMj0fZ1e2agNcLgMK1c0eZ1P3cgNdLeMI6a0_Z1U3egNiLoM0ZZ1Y3fgNfLUNNSZ1^3hgNcLXNMoY1b3ggNbL[NKlY1Z3ThNkLPNKjY1Z3XhNkLnMKiY1[3YhNjLnMKgY1]3[hNhLnMKeY1_3]hNfLnMKdY1_3`hNfLjMLdY1`3ogNTLZN4E1=7dY1a3ogNTL[N2F2<7bY1b3QhNULZN?2JbY1b3RhNVLZN?0JbY1c3ShNVLYN>2IaY1d3ShNWLYN=2HaY1d3ThN[LVN:4GaY1e3UhN[LTN:6F`Y1f3VhN[LSN:6E_Y1h3XhNZLQN:8D^Y1h3ZhNYMZNnN[Y1j3[hNXMZNnNZY1k3]hNVMZNnNXY1m3^hNUMZNnNWY1n3`hNgKmMV1kLZT1g0YkN]2=lLZT1g0ZkN]2;lL[T1g0ZkNd24eLbT1g0ZkNf2PX1ZMPhNh2oW1WMQhNk2mW1UMShNn2]1]LgQ1e0mlNn2[1^LgQ1d0nlNP3X1]LkQ1b0mlNW3R1VLRR1c0llN]3m0oKWR1d0llN_3k0lKZR1e0klN`3XW1_LihNa3WW1_LihNc3VW1\\LjhNh3RW1XLnhNm3mV1SLSiNo3kV1QLUiNR49[KmR1b0jlNX44WKQS1a0klNc4HmJ]S1`0klNf4EkJ_S1?llNh4CiJbS1>llNj4@iJcS1=mlNl4^OgJeS1=mlN[5oNYJST1XGAg8c0WG\\Oh8g0WGXOi8j0VGVOi8m0UGSOj8o0VGPOi8R1VGnNi8T1VGlNj8T1VGlNi8V1UGkNj8V1WGiNi8X1VGhNi8Z1VGfNj8Z1VGfNi8\\1VGdNj8\\1VGdNj8\\1VGdNi8^1VGbNj8^1VGbNj8^1VGbNj8^1UGcNk8]1UGcNk8]1UGcNk8]1UGcNk8^1UGaNk8_1TGbNl8^1TGbNl8]1UGcNk8]1UGcNk8]1UGcNk8]1UGcNk8]1UGcNk8]1WGaNj8]1WGcNi8]1WGbNj8^1UGdNk8Z1UGgNk8Y1UGgNl8W1VGhNj8X1VGhNk8V1UGkNl8S1UGmNl8Q1UGoNk8P1VGPOk8n0WGQOj8m0WGSOk8j0VGVOk8h0VGXOk8e0WG[Ok8a0XG^Oj8>XGBk89VGHR9JTG6o90000000O1000000O1001OaVe2" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "image_path": "multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 360, + 359 + ], + "counts": "QTf07Q;1O01IPE4P;20000WN7SHIj7=fG]OWO165k8a0bGCDIh8h0bG_ODKh8j0bG[OIHc8P1dGWOIIb8R1dGUOJIa8T1dGSOLH_8W1dGQOMG_8Z1dGnNMH^8\\1dGlNNH^8]1cGkNOH]8^1dGjNOH\\8`1dGhN0H\\8`1dGhN0H\\8a1cGgN1IZ8a1eGfN1IZ8b1dGeN2HZ8d1dGdNh8\\1XGdNg8^1XGbNh8^1XGbNh8^1XGbN3HY8f1dGbN3IX8e1eGbNLH21]8f1dGaNMH21]8f1dGaNMI01_8d1eGcNJ0a8]1eGbNK1`8]1eGbNKJ10_8d1eGbNLH21]8e1eGbNLH21^8d1dGcN2JZ8c1dGcNOM]8`1dGcNNN^8^1eGdN2IY8c1eGdN2IZ8b1dGeN2IZ8a1eGfNLN`8[1dGgNKOa8Y1eGhNJOb8X1dGiNJOb8W1eGjNIOc8U1eGlNHOd8T1dGmNGIN3h8U1dGoNF0g8o0dGQOE0h8m0dGSOD0i8k0dGUOC0j8i0eGUOA3k8e0RHZOP8c0QH]OQ8?QHBP89THFo74THLe90O00O2LXl`2" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "image_path": "multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 360, + 359 + ], + "counts": "[_f11P;9M11O0000000000jNOVG0e89WGGg8>VGBh8b0VG^Oh8e0WG[Oh8h0VGXOi8j0VGVOi8l0VGTOi8n0VGROi8P1VGPOi8R1VGnNi8T1VGlNi8U1WGkNi8V1VGjNi8X1WGgNh8Z1XGfNh8[1WGeNi8[1WGeNh8\\1YGcNg8^1a00O10001O0mFZNd8f1UG[NOOk8f1VG[NOOk8f1VG[NN0l8e1WGZNJ4o8b1WGaNi8_1WGaNi8_1WGaNi8_1WGaNi8^1XGbNh8^1XGbNi8]1WGcNi8\\1XGdNh8\\1XGdNi8[1WGeNi8Z1XGfNi8Y1XGfNh8Y1YGgNh8X1WGiNj8U1VGlNj8S1WGmNj8Q1XGnNi8P1XGPOi8o0VGROk8k0WGUOj8i0WGWOj8g0WGYOk8c0WG]Ok8`0VG@m8:VGFm84VGEXO1l:NTE2l:5000000000000002MQg]1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "image_path": "multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 360, + 359 + ], + "counts": "^^f26P;4M100000001O01N1000hN5SGLi8:TGFi8?UGAi8c0UG]Oj8e0UGZOk8i0SGXOj8k0UGUOj8m0UGSOj8o0UGQOk8P1TGoNl8R1SGoNl8S1SGnNMFL0g8]1_GmNNFL0f8^1`GlNMGMOf8_1_GkNNLa8[1aGhNNN_8[1cGgNNN_8[1cGgNNN_8\\1bGfNOM_8^1bGeNOM_8^1bGeNNN`8]1bGeNNN`8^1aGdN0L`8`1`GdN0L`8`1`GdNNNb8^1`GdNNNb8^1aGcNMOa8_1aGcNNNa8_1bGbNMOb8^1aGcNMOb8^1aGcNMOb8^1aGcNMOb8]1bGdNLOb8]1bGdNMNa8^1bGdNNMa8^1aGeNMNb8\\1bGfNLNb8\\1bGfNMMb8[1bGhNLIL0f8_1bGhNLMc8Y1bGjNKMc8Y1bGjNKMd8W1bGlNJMe8V1aGmNJMe8U1bGnNIMf8S1aGQOl8m0UGSOl8k0UGUOl8i0UGWOm8e0UGZOm8d0TG]Om8?UGAm8;UGEm86VGIU:0000000001O00O11O001O0KWf<" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "image_path": "multipanel_detailed_caption/images/11.png", + "mask_rle": { + "size": [ + 1403, + 4119 + ], + "counts": "Y]k`1=b122L12L7gNKkQ1b3omNQM0I0m2UQ1o6I7L2N2O1N2O1O000O2O0000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000001O000O1000001O000O10001N100O2O0O2N2N1M;En0ZMTnNdIRS1VOllNo4YV1kNR1_O?_O?K6K4L3N2N1O2N100O10001N10000O100000000O10001O00000000000000001O000000000000000000000000O100000000000000000000000000000000000000001O0000000000000000O1000001O000000000000000O100001O0000000000000000000001O0000000O100000001O0O100000001O0O101N1O2N2M3M4K4J8gNl1\\iNXKJMTS1Q8jlNoG\\Q1i9\\Od0K2N2N2O0O2O001O00000O10001O000000000000001O000000000O100000000000000000000000000000000000000001O000000000000000000000000000000O100000000001O0O100000000000000O2O00000000000O101O000O2O001N1O2Of0WO=]OU2gG]lN<3FJDmV1YOQ\\W28hghM`0ROn0@:H7I7J7J6J5J6K5J7J4K6J5L4K6J6J5K6K4L4K6J5L4K6K4L4M3L3N2M4M2N2N2N3M2M3N2O1N2N2N2N3M2N2O1N2N2M4M2M4L3N2M3N2M4M2N2N2N2N2N2N2O0O2N2N101N2N101N2N101N2N101N1O2N2O0O2N1O2N2N2N2N2N2O1N2N101O0O2O0O2O0O101O0O101N10eJknN0TQ1BdoN6[P1^OWPOdY1Q1M2N2N100O2O000O101O0000000000001O00000000001O00000000000000000000000O10001O0000000000000000001O0000001O0000001O00001O00001O00001O00001N101O1O1O1O1O2N4QiNfM[O<@1oP1U2jnNd3_P1T4H1O100O001O00001O0000001O000000000000000000000000000O100000001O000000O100000001O00000000000000000000O101O00000000000000000000O1000000000001O0000000O1000000000000000000000O100000000000O100O1N2N2RIfPOoKY36\\l0P3^7H8M3N2N2O1N201N1O100O10000O1000000000O10000000000000O100000000000001O0000000000000000000000000000000000O100000O100000000000000O101O0O2OinNPNch0o1[WOUNdh0k1ZWOXNfh0f1XWO^Nhh0`1VWOdNjh0Y1UWOjNmh0VOaoN[1`7Cii00WVO5cR14M3M3M3O1N2N2N2N2O1N2N2N3M2N2N2N2N2M3M3K5L4L4M3M3N2M3N2M3N2M3N2N2O1N2N2M3M3L4L4M3M3N2N2N2N2N2M4M2N2N2N2N1N3M3L4M3M3M3L4M3K5L4L4M3M3N2N2N2N2N2M3N2N2N2N2O1N2N2N2M3L4L4M3L3N3L3M4M2O1O1O1N0100O001O001000O100O10O10000O10000O100001O1O2M2M4J5L5N2O1O001O100O010O100000000O10000000000000000000000000000000000000O10001O0UMdmNWL\\R1[3TnNcLlQ1Y3YnNfLgQ1W3]nNhLcQ1U3anNjL_Q1S3enNlL[Q1Q3hnNoLXQ1l2mnNTMSQ1b2WoN^MiP1^2[oNbMeP1\\2]oNdMcP1[2^oNeMbP1Y2`oNgM_P1Y2boNgM^P1W2doNiM\\P1V2doNkM\\P1S2foNnMYP1Q2hoNoMXP1Q2hoNoMXP1P2ioNPNWP1P2ioNPNWP1o1ioNRNWP1P2goNPNYP1Q2foNoMZP1S2coNnM]P1V2_oNjMaP1[2YoNfMgP1m2enNTM[Q1Q3_nNPMaQ1S3YnNPMgQ1R3TnNQMlQ1P3QnNRMoQ1o2omNRMQR1o2mmNRMSR1o2kmNQMWR1P3fmNQMZR1X600000000000001O000O10000000000000000000000001O000000001O000001O01O00001O001O10O01O2O2M2N3N2M2O2M2O1O2O1N2O1O1O1O01O01O01O010O1O103L4L11M2N2N2M3M3N2M3M4L3M2O2M3M3M2N3M2N2N2N2N3M2N2N3M2O1N3M2N2N3M4L5K4L3M3M2N2N2N2N3M2N2N2N2O1N3M2N2N3M2N3M3M3M3L4M2N3M2N3M3M3M4L3M2N3M2N3M2N2N2N3M2N2N3M2N3M3M3M3M3M3M3M3M3M2N2N2N2M3N1O2N2N3M4L4L3L4M3M3M2N2M3N4K6K3L4KX`b18lU\\NHO;`N:RR1BXoN<`N09k064Fd0fP1PN\\oNU:_P1e0N2N1O2O001O000O101O0000000000001O000O1000000000000000000O10000000000001O0000000000000000000000000000000000000000000000000000000000000000000O10000000001O00000000O1000O100000000000000O010O1N2L4lMboNcHfP1d6e2L5L3N2N2O1O1\\KWlNV1iS1gNTmN>mR1@YmN;hR1B^mN;bR1CcmN9^R1EfmN9ZR1FgmN:YR1FhmN9XR1GimN7XR1HimN8WR1HimN8WR1IhmN7XR1IhmN7XR1IhmN7XR1IhmN7XR1IhmN7XR1IgmN8YR1HgmN8YR1HgmN8YR1HgmN8YR1HgmN8YR1IfmN7ZR1IfmN7ZR1IfmN7ZR1IfmN7ZR1IfmN7ZR1IfmN7ZR1IgmN6YR1JgmN6YR1JgmN6YR1JgmN6YR1JgmN6YR1KgmN4YR1LgmN4ZR1KfmN5ZR1KfmN5ZR1KfmN6YR1JgmN6YR1KfmN5ZR1KfmN5ZR1LemN4[R1LemN4[R1McmN4\\R1NbmN3^R1NamN2_R1O_mN2aR1O^mN1bR1O]mN2cR1O\\mN1dR10YmN2gR1OWmN2iR1NUmN4kR1MSmN4mR1MPmN5PS1MjlN6WS1KdlN9\\S1J^lN9bS1IZlN9fS1KRlN9nS1j30000001N1000000000000O2O000000000O1000001O000O100000001O0O10001N10001N10000O2O0O2O000O101O0O101O0O101O0O101O0O2O000O2O1N1O2O0O2N2N1O2N2N2O1N2N2N2N1O2N2O1N2N2N2O1N3M2N2N2N2O1N3M2M3N2N3M2M3M4M2M4L4L3N3L3N3L4L4L4K6J7I7I8F>A`0_O?B>B_dm1LRQQND9an0LoQOVODm0]n0M_RO]N^Od1Un0MiUO1Yj0MjUO0Xj0NjUO0\\j0JgUO2[k0mNlTOi0ek0gNbUO0Uh[Z1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "image_path": "multipanel_detailed_caption/images/11.png", + "mask_rle": { + "size": [ + 1403, + 4119 + ], + "counts": "VTg5e0aZ1T1YO?A>D=C=E8G:G8G:H6J6J7I6J5K6K5J6J5L5J5K6K5J6K5J5L4L4M2M4M2M4L3M4M2M4L3N3L4L3N3L4M2M4M2M4M3L3N3L4L3N3M2M4M2N3L3N3L3N2N2N2N2N2N2N2N2N2N2N2N2N2M3N2N2N2M3N2N2N2N2N2N2N2N2O1N2N2N2N2N2N2N2O1N2N1O2N2O1N2N101N2N2N101N2N2N2O0O2N2N2O0O2N2O1N1O2N2O0O2N101N2O0O2N101N101N1O2O0O2O1N101N2O0O2O0O2O0O2N2O0O2O0O101N101N100O2O0O2O001N101N101O0O101O0O2O000O2O0O101O0O2O0O101O0O101O0O2O000O2O000O2O000O101O0O10001O0O1000000O2O0000001N1000001O0O1000001O000O1000001O0O100000001O00000O1000001O0000000O10001O000000000000001N100000000000000000001O000000O1000VXOX_Oad0h`0][O[_Obd0e`0\\[O]_Odd0c`0[[O__Odd0a`0[[O`_Oed0``0Z[Ob_Oed0^`0Z[Oc_Ofd0]`0Y[Od_Ogd0\\`0Z[Oc_Ofd0]`0[[Ob_Oed0^`0\\[O`_Oed0``0][O^_Ocd0b`0a[OX_Oad0h`0Y301O00000O10000000001O0000000O10000000001O000O10cWOc_O\\e0]`0aZOi_O\\e0W`0bZOn_O[e0R`0eZOP@Ye0P`0gZOQ@Xe0o?hZOR@We0m?jZOT@Ve0k?lZOR@Ue0n?lZOP@Ue0P`0mZOm_OTe0S`0mZOj_OUe0U`0mZOh_OUe0X`0nZOb_OVe0]`0T30000O10001O000O101O000O101O00000O2O0000001N10001O0O10001N10001N10001N10001N10001N10001N101O0O2O0O2O001N100O2O0O2O0O2O0O2N_WOi@`d0W?^[Ok@bd0T?][Oo@cd0P?Y[OTAgd0l>U[OXAld0f>S[O]Ald0c>S[O^And0`>R[OaAnd0_>Q[ObAPe0\\>P[OfAod0Z>Q[OfAod0Y>R[OgAod0X>P[OiAPe0V>Q[OkAod0T>R[OkAnd0T>S[OlAnd0S>R[OmAnd0R>T[OmAmd0Q>T[OoAld0Q>T[OoAmd0o=T[ORBkd0n=V[OQBkd0m=V[OSBjd0m=V[OSBkd0k=V[OUBjd0j=X[OUBid0j=X[OUBhd0j=Z[OUBgd0j=Z[OTBgd0k=][ORBdd0l=c[OlA`d0S>d4O2O1N2N101N2N2O0O2N2N101N2N2N2O0O2N2N2N2O0O2N2N101N2N2O0O2N2N1O2N2N2O1N2N2N2N2N1O2N2N2N2O1N2N2N2N2N2N3M2N2N2N3L3N2N2N2N2N2N2N2M3N2N2N2N2N2M4M2N2M4M2N3L4M2N3L3N3L3N3L3N3L3M4M2M4M3L4L3M4L4M2M4L4L4L4L4L4L4L4L4L4L5K4K6J6J6J6J6J6J6J6I7J6I8I8G8H8H:DoYOPBQf0P>oYOPBQf0P>oYOPBPf0R>oYOnAQf0R>oYOnAQf0R>oYOnAQf0R>PZOmAoe0U>PZOkAPf0U>PZOkAPf0U>PZOkAPf0V>oYOjAPf0W>oYOjAQf0V>oYOjAQf0V>oYOjAQf0W>nYOhARf0Y>mYOhASf0X>mYOhASf0X>lYOiATf0W>lYOiATf0X>jYOiAUf0X>jYOiAVf0W>iYOjAWf0V>hYOkAXf0V>eYOlA[f0T>dYOmA[f0T>dYOmA\\f0S>dYOmA\\f0S>dYOmA\\f0T>cYOkA^f0U>bYOkA^f0U>dYOiA\\f0W>fYOgAYf0Z>jYObAWf0^>kYO`AUf0`>mYO^ASf0b>nYO\\ASf0e>nYOYARf0g>oYOWARf0i>oYOVAPf0k>QZOSAPf0m>QZORAoe0n>SZOo@ne0Q?SZOn@me0R?UZOl@ke0T?YZOh@ge0X?_ZOb@ae0^?_ZOb@ae0_?^ZOa@be0_?\\ZOd@ce0\\?SZOn@me0R?RZOo@me0R?RZOPAme0P?RZOQAne0o>QZORAoe0n>PZOTAoe0l>QZOTAoe0l>PZOUAPf0k>oYOWAPf0i>oYOXAQf0h>nYOYARf0g>mYOZASf0f>kYO\\AUf0d>jYO]AVf0c>hYO_AXf0a>eYObA[f0^>UYORBkf0n=SYOUBlf0k=SYOVBmf0j=RYOWBnf0i=RYOWBnf0i=QYOXBof0h=QYOXBof0h=PYOYBPg0g=PYOYBPg0g=oXOZBQg0f=oXOZBQg0f=oXOZBQg0f=oXOZBQg0f=oXOZBQg0f=oXOZBQg0f=PYOYBPg0g=PYOYBPg0g=PYOYBPg0g=PYOYBPg0g=PYOYBPg0g=QYOXBPg0g=PYOYBPg0g=PYOYBPg0g=PYOYBPg0g=QYOXBof0h=QYOXBof0h=QYOXBof0h=RYOWBnf0i=RYOVBof0j=RYOUBnf0k=RYOUBnf0k=SYOTBmf0l=SYOTBmf0l=TYOSBlf0l=UYOSBlf0m=UYORBkf0n=eYObA[f0^>iYO]AYf0b>jYO[AVf0e>lYOXAUf0h>mYOVASf0j>oYOSARf0m>oYORAQf0n>PZOQAPf0n>RZOQAne0o>RZOPAPf0o>oYOSAPf0m>PZOSAPf0m>oYOTAQf0l>nYOUARf0j>nYOXARf0g>mYOZASf0f>mYOZASf0f>lYO[ATf0e>lYO\\ASf0d>lYO]AUf0a>kYO`AUf0`>jYOaAVf0_>jYOaAVf0_>iYOcAVf0]>iYOdAXf0Z>hYOgAXf0Y>gYOhAYf0X>eYOjA[f0U>eYOmA[f0R>bYOQB^f0o=YYOZBgf0e=VYO_Bjf0a=UYO`Blf0_=TYOaBlf0_=SYObBmf0]=TYOdBkf0\\=UYOdBlf0[=SYOfBmf0Y=TYOgBlf0Y=TYOgBlf0Y=TYOgBmf0X=SYOhBmf0W=TYOiBmf0V=TYOiBlf0W=TYOiBlf0W=TYOiBmf0U=UYOjBkf0V=UYOjBkf0V=UYOjBlf0T=VYOkBjf0U=VYOjBkf0V=XYOgBif0X=[YOdBef0[=^YOcBbf0]=`YOaBaf0^=`YOaB`f0^=cYO`B]f0`=dYO_B]f0_=eYO`B[f0`=fYO`BYf0`=hYO_BYf0_=iYO`BWf0`=jYO_BVf0a=kYO^BVf0`=lYO_BTf0a=mYO^BTf0a=mYO^BSf0a=nYO_BRf0a=oYO^BRf0a=oYO^BQf0a=PZO_BPf0a=PZO_BQf0`=oYO`BQf0_=PZOaBQf0^=oYObBQf0]=oYOdBQf0\\=oYOdBRf0Z=nYOgBRf0Y=nYOgBSf0W=mYOjBSf0V=mYOjBTf0T=lYOmBTf0S=lYOmBTf0R=lYOoBUf0P=kYOPCUf0ohXOPBQg0Q>mXOSBof0n=QYOSBnf0n=PYOUBmf0l=SYOUBlf0l=SYOUBkf0m=SYOVBkf0j=UYOYBgf0i=XYO[Bdf0e=[YOQCoe0Q=PZOQCme0P=SZOQCle0P=SZOQCke0Q=SZOQCke0P=UZOPCke0Q=TZOPCje0Q=VZOoBie0S=UZOnBke0R=UZOnBje0T=UZOlBke0T=TZOmBke0U=TZOlBke0T=UZOlBje0V=TZOkBle0U=TZOkBke0W=TZOiBle0W=SZOjBle0W=TZOjBke0W=SZOjBle0W=TZOiBle0X=SZOhBme0X=SZOhBle0Y=SZOhBme0Y=RZOfBoe0Z=QZOfBne0\\=QZOdBoe0\\=QZOcBPf0^=nYOcBQf0^=oYOaBRf0_=nYOaBQf0a=mYO`BSf0`=lYO`BUf0a=iYO`BVf0a=iYO`BWf0a=gYO`BYf0`=gYO`BXf0a=hYO_BXf0b=gYO^BXf0c=gYO^BYf0c=gYO\\BYf0d=gYO\\BXf0e=jYOYBVf0h=jYOWBUf0j=lYOVBSf0k=nYOSBRf0m=oYORBPf0o=PZOQBPf0P>PZOPBoe0P>QZOPBne0Q>RZOoAne0R>QZOnAne0S>RZOnAme0R>SZOnAme0S>RZOmAme0T>SZOlAme0T>RZOmAne0S>RZOmAne0T>PZOmAoe0T>PZOmAPf0S>oYOnAQf0R>nYOoAQf0R>mYOPBSf0Q>jYOQBVf0o=fYOUBZf0k=dYOWB[f0j=bYOYB^f0g=aYOZB_f0g=_YOZBaf0f=_YOZB`f0g=`YOYB`f0g=`YOYB`f0h=_YOXBaf0h=_YOXBaf0h=_YOXBaf0h=^YOYBaf0i=^YOWBbf0i=^YOWBbf0i=^YOWBbf0i=^YOWBbf0i=^YOl_OSOk1_g0Z>]YOi_OXOk1Zg0]>^YOg_OZOk1Xg0^>^YOf_O\\Ok1Vg0_>^YOf_O]Oj1Ug0`>^YOe_O^Ok1Tg0a>]YOd_O@j1Rg0c>^YOc_O@j1Rg0c>^YOb_OAk1Qg0c>_YOa_O@k1Rg0d>^YO`_OAl1Qg0d>^YO__OAn1Qg0d>]YO]_OCo1Pg0d>QZO\\Aoe0d>PZO\\AQf0d>nYO]AQf0d>oYO\\AQf0d>nYO]ARf0c>mYO^ASf0c>kYO^AUf0b>jYO^AWf0b>iYO_AVf0a>jYO_AVf0a>jYO_AUf0b>kYO^AUf0b>lYO]ATf0c>mYO\\ASf0d>nYO[ARf0e>oYO[APf0e>QZOZAoe0f>RZOYAne0g>RZOYAne0g>SZOXAme0h>SZOYAle0g>UZOXAje0i>VZOWAje0j>UZOVAke0j>VZOUAje0k>VZOUAje0k>VZOUAje0k>VZOUAje0k>VZOUAje0k>WZOUAhe0k>XZOUAhe0k>XZOUAhe0k>XZOUAhe0k>XZOUAhe0k>XZOUAhe0k>XZOUAhe0k>XZOUAhe0k>XZOUAhe0k>YZOTAge0m>XZOSAhe0m>WZOTAie0l>WZOTAie0l>WZOTAie0l>VZOUAje0k>VZOUAje0j>VZOWAje0i>VZOWAje0i>UZOXAke0h>UZOXAke0h>TZOYAle0g>TZOYAle0g>SZOYAne0g>RZOYAne0g>RZOYAne0g>_YOX_OCR2nf0f>_YOZ_OAP2Qg0e>^YO\\_O@o1Rg0e>^YO\\_O_OP2Sg0d>]YO^_O_On1Tg0d>]YO__O^Om1Ug0c>_YO`_OZOm1Xg0c>^YO`_OYOn1Yg0b>^YOb_OUOn1]g0`>^YOmAbf0S>^YOmAbf0S>bYOiA_f0V>bYOhA_f0X>aYOhA_f0X>aYOhA_f0W>bYOiA^f0W>cYOhA]f0X>cYOhA]f0X>cYOhA]f0X>cYOhA]f0X>dYOgA]f0X>cYOhA]f0X>cYOhA]f0W>eYOhA[f0X>gYOgAXf0Y>iYOfAWf0Z>kYOdAUf0\\>mYObASf0]>oYObARf0]>oYOcAPf0]>QZObAoe0^>QZObAoe0^>RZOaAne0_>RZOaAne0^>SZObAme0^>SZObAne0]>RZOdAme0\\>SZOdAme0\\>RZOeAne0[>QZOfAoe0Y>RZOgAoe0X>oYOjAQf0V>nYOkARf0U>lYOmATf0S>hYOQBYf0n=dYOUB\\f0j=cYOXB]f0h=bYOYB^f0g=aYOZB_f0f=`YO[Baf0d=_YO\\Baf0c=`YO]B`f0c=_YO^Baf0b=_YO^Bbf0a=^YO_Bbf0`=_YO`Baf0`=^YOaBbf0_=^YOaBcf0^=]YObBcf0]=]YOdBcf0\\=]YOdBcf0\\=]YOdBdf0[=[YOfBef0Y=\\YOhBcf0X=]YOhBcf0X=]YOhBdf0V=]YOjBcf0V=\\YOkBdf0U=\\YOkBef0T=[YOmBdf0R=]YOnBcf0R=^YOmBcf0Q=^YOoBbf0Q=^YOoBcf0P=]YOPCcf0o<^YOQCbf0o<_YOQCaf0m<`YOSC`f0mB=F:F:F9H9H6I7J7H7J6J6K5J7I6J6K4L4L4L4K5L4L4L4K5L4L4L4K6K4L4M3L3N3L4M2M4M2M4M2M4L3N3L3N2N3^TO`Fcd0d9X[ObFdd0`9Y[OdFcd0`9Y[OcFed0_9X[OdFed0`9X[ObFfd0`9W[OcFfd0a9W[O`Fhd0b9U[O`Fjd0b9T[O_Fjd0d9T[O]Fkd0e9S[O\\Fld0f9Q[O\\Fnd0g9oZOZFPe0h9nZOYFPe0j9nZOVFRe0m9jZOUFUe0m9iZOTFUe0o9hZOSFWe0o9eZOSF[e0o9aZOTF^e0n9]ZOVFbe0l9[ZOVFde0l9YZOUFge0m9VZOUFie0m9UZOTFje0n9TZOSFke0o9TZOQFke0Q:SZOPFle0R:SZOnEle0T:SZOlEle0V:SZOjEle0Y:RZOhEle0Z:SZOfEle0\\:SZOdEke0_:TZObEje0`:UZO`Eje0b:UZO^Eje0d:UZO]Eje0d:UZO\\Eje0f:UZO[Eie0f:WZOZEhe0h:WZOXEhe0j:VZOXEhe0j:WZOVEie0j:WZOVEhe0l:WZOTEhe0n:VZOSEje0n:UZOREje0P;UZOPEje0R;TZOoDke0S;TZOmDle0T;SZOkDme0V;RZOkDme0W;RZOiDme0Y;RZOgDne0Z;QZOfDne0\\;QZOcDoe0_;PZOaDPf0`;oYO`DPf0a;PZO_Doe0c;PZO]Doe0e;PZO\\Dne0e;RZO[Dne0f;QZOZDne0h;QZOXDne0i;RZOWDme0k;SZOTDme0l;SZOTDle0n;SZOSDke0n;UZORDje0PnYOSBiMA[h0\\>lYOQBoM]OVh0c>kYOPBof0Q>PYOnAPg0S>PYOmAPg0T>oXOlAPg0U>PYOkAPg0V>oXOkARN]OSh0i>jYOlAPN_OTh0f>kYOlAmMBWh0b>lYOmAjMEWh0`>nYOmAfMG[h0\\>oYOiBoe0Y=oYOiBPf0W=PZOjBne0X=QZOiBne0W=RZOjBle0X=RZOjBme0V=SZOjBle0X=SZOhBme0X=SZOiBke0Y=TZOgBle0Y=TZOgBke0[=TZOeBle0[=TZOeBke0]=TZOdBke0\\=UZOdBje0^=UZObBke0^=VZOaBie0a=VZO_Bje0a=VZO`Bie0a=VZO_Bie0b=WZO^Bie0c=VZO]Bje0c=VZO]Bie0d=WZO]Bhe0d=WZO\\Bie0d=WZO\\Bhe0e=XZO[Bhe0e=WZO\\Bie0e=VZO[Bie0f=WZOZBie0f=VZO[Bje0e=VZO\\Bhe0f=WZOZBie0f=VZO[Bje0e=VZO[Bie0g=UZOZBke0f=UZOZBje0h=TZOYBle0g=TZOYBke0h=UZOXBke0i=TZOWBle0i=TZOWBke0j=UZOVBke0k=TZOUBle0k=UZOTBje0m=VZOSBje0n=UZORBke0n=UZORBje0o=VZOQBje0P>VZOoAje0Q>VZOoAje0Q>VZOoAie0S>VZOmAje0S>VZOmAje0S>VZOmAje0T>UZOlAje0U>VZOkAje0U>VZOjAke0V>UZOjAke0W>TZOiAle0W>TZOiAke0X>UZOhAke0X>UZOhAke0Y>TZOgAle0Y>TZOgAke0Z>UZOfAke0Z>UZOfAke0[>TZOdAme0\\>SZOdAme0\\>SZOdAle0]>TZOcAle0]>TZOcAle0]>TZOcAle0^>SZObAme0^>SZObAme0^>SZObAle0_>TZOaAle0_>TZOaAle0_>TZOaAle0_>TZOaAle0_>TZOaAle0_>TZOaAle0`>SZO`Ale0a>TZO_Ale0a>TZO_Ale0a>TZO_Ale0a>TZO_Ale0a>TZO_Ale0a>TZO_Ale0a>TZO_Ale0a>TZO^Ale0d>SZO\\Ame0d>SZO\\Ame0d>SZO\\Ame0d>SZO[Ane0e>RZO[Ane0e>RZO[Ane0e>RZO[Ane0e>RZOZAoe0g>PZOYAPf0g>PZOXAQf0h>oYOXAQf0h>oYOXAQf0h>oYOXAQf0h>oYOXAQf0h>oYOXAPf0i>PZOWAPf0i>PZOWAPf0i>PZOWAPf0i>PZOWAPf0i>PZOXAoe0h>RZOWAne0i>RZOWAne0i>RZOWAne0j>QZOWAne0i>RZOWAne0i>RZOWAne0i>RZOXAme0h>SZOXAme0h>SZOXAme0h>SZOXAme0h>RZOYAne0g>RZOYAne0g>RZOYAne0g>RZOYAne0g>RZOYAne0f>RZO[Ane0e>RZO[Ane0e>RZO[Ane0e>QZO\\Aoe0d>QZO\\Aoe0d>QZO\\Aoe0d>PZO]APf0c>PZO]APf0c>oYO^AQf0b>oYO^AQf0b>oYO^AQf0b>oYO^AQf0b>oYO^AQf0b>PZO]AQf0b>oYO^AQf0b>oYO^AQf0b>PZO\\AQf0c>PZO]APf0c>PZO]APf0c>PZO]APf0c>PZO]APf0c>PZO]APf0c>PZO]APf0c>QZO\\Aoe0d>QZO\\Aoe0d>QZO\\Aoe0d>QZO\\Aoe0d>QZO\\Aoe0d>QZO\\APf0b>QZO^Aoe0b>QZO_Ane0a>RZO_Ane0a>QZO`Aoe0`>QZO`APf0_>PZOaAPf0_>PZObAoe0^>QZObAoe0^>QZObAoe0^>QZObAoe0]>RZOdAne0[>RZOeAne0[>RZOeAne0[>RZOeAne0[>RZOeAne0[>RZOfAme0Y>TZOgAme0X>SZOhAme0X>SZOgAne0Y>RZOgAne0X>SZOhAme0X>SZOhAme0X>SZOhAme0X>SZOhAne0V>SZOjAme0V>SZOjAme0V>SZOjAme0V>SZOjAme0U>TZOkAme0T>SZOlAme0T>SZOlAme0S>TZOlAme0T>SZOlAne0S>RZOmAne0S>RZOlAoe0S>RZOmAoe0R>QZOnAoe0R>RZOmAne0S>RZOmAoe0Q>RZOnAoe0R>QZOnAoe0R>RZOmAoe0Q>RZOoAne0Q>SZOnAne0Q>RZOoAne0Q>RZOnAoe0Q>RZOoAoe0P>QZOPBoe0P>QZOPBoe0o=RZOPBPf0o=PZOQBPf0o=PZOQBPf0n=PZORBRf0m=nYOSBRf0l=oYOTBQf0l=oYOSBSf0l=mYOTBSf0k=nYOUBRf0k=nYOUBRf0k=nYOUBSf0j=mYOUBTf0j=mYOWBRf0i=nYOWBSf0h=mYOXBSf0g=oYOXBRf0g=nYOYBRf0g=nYOZBQf0e=QZOZBPf0e=PZO[BPf0d=QZO]Boe0b=RZO]Bne0b=SZO_Bme0`=TZO_Ble0a=TZO_Ble0`=VZO`Bje0_=VZOaBje0^=WZOcBie0\\=WZOdBie0[=YZOdBhe0[=XZOeBhe0Z=YZOfBge0Z=YZOfBhe0X=YZOhBge0X=YZOhBhe0V=YZOjBge0V=YZOjBhe0T=YZOlBge0S=ZZOmBge0R=YZOmBhe0R=YZOnBge0R=YZOnBhe0P=YZOPCge0P=YZOPChe0nC;C?B?Ac0]OVgQR1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "image_path": "multipanel_detailed_caption/images/11.png", + "mask_rle": { + "size": [ + 1403, + 4119 + ], + "counts": "R^fc4W1YZ1g0[O?D=C;F:E:G9]QOTLkd0S4lZOWLmd0P4jZOWLQe0P4eZOYLUe0n3_ZO\\L\\e0j3\\ZO\\L_e0l3[ZOWLae0o3ZZOSLce0T4XZOnKde0W4YZOjKde0\\4YZOeKde0_4YZObKde0d4YZO\\Kee0i4XZOWKfe0m4XZOTKee0Q5XZOoJee0V5YZOjJee0Z5ZZOeJde0_5ZZOaJce0d5[ZO\\Jde0f5\\ZOYJbe0k5\\ZOUJbe0o5\\ZOQJbe0S6]ZOmI`e0W6_ZOhI`e0Z6_ZOfI_e0^6`ZOaI^e0b6aZO^I]e0f6aZOZI^e0i6aZOVI]e0n6`ZORI`e0P7_ZOPI_e0T7^ZOmHae0V7]ZOiHbe0Z7]ZOeHce0]7[ZOdHce0`7[ZO_Hde0d7[ZO[Hee0h7XZOWHhe0l7WZORHje0P8UZOoGje0U8SZOjGne0X8QZOhGme0\\8QZOdGne0_8PZO`Goe0c8PZO]Goe0f8oYO[Gne0h8RZOXGle0k8RZOVGke0m8UZOSGie0o8VZORGhe0Q9WZOoFge0S9YZOmFee0U9[ZOkFce0W9]ZOiF`e0Z9_ZOfF`e0\\9`ZOdF^e0^9aZOcF]e0_9cZO`F\\e0b9cZO^F\\e0d9cZO\\F\\e0g9bZOYF]e0i9cZOVF\\e0l9cZOTF\\e0n9cZOQF]e0Q:bZOoE]e0S:bZOlE^e0U:cZOjE\\e0X:cZOgE]e0[:bZOeE\\e0^:cZOaE]e0a:bZO_E]e0c:bZO\\E^e0f:aZOZE^e0h:bZOVE^e0l:aZOTE^e0n:aZORE^e0P;aZOoD_e0S;aZOlD^e0V;aZOjD^e0X;bZOgD^e0Z;aZOfD^e0\\;bZObD^e0`;aZO`D^e0b;bZO]D]e0e;cZOYD]e0i;cZOVD\\e0k;eZOSD[e0o;eZOPDZe0R[[OjAid0W>W[OeAkd0\\>V[O`Ald0b>U[OZAmd0f>S42O0O2N101N101N101O1N101N101SWOj@Se0V?lZOl@Re0V?kZOm@Te0T?iZOn@We0R?fZORAXe0P?dZOSA\\e0m>bZOUA^e0l>_ZOWA_e0j>`ZOWA`e0j>^ZOXA`e0i>`ZOWA`e0i>_ZOXAae0i>]ZOYAae0h>_ZOYA`e0g>_ZO[A`e0f>_ZOZA`e0g>_ZO[A`e0e>`ZO\\A^e0f>aZOZA_e0f>`ZO\\A_e0e>`ZO[A_e0f>`ZO\\A_e0d>aZO\\A^e0f>aZO[A^e0e>aZO\\A_e0d>aZO]A]e0d>cZO\\A]e0e>aZO]A^e0c>bZO]A]e0d>bZO^A]e0c>aZO^A^e0c>aZO^A_e0b>_ZOaA`e0`>^ZOaAae0`>^ZObAae0^>^ZOcAbe0^>\\ZOdAbe0]>]ZOdAce0\\>\\ZOeAce0]>[ZOdAee0\\>[ZOdAee0\\>[ZOeAce0]>[ZOdAee0\\>[ZOdAee0\\>[ZOdAee0]>YZOdAge0\\>YZOcAge0^>YZObAge0_>XZOaAhe0_>XZOaAhe0_>WZObAie0^>WZObAhe0`>WZO`Aie0`>WZO`Aie0`>VZOaAje0_>VZOaAje0`>UZO_Ake0b>TZO_Ale0a>TZO_Ale0a>TZO_Ale0a>TZO_Ake0c>TZO]Ale0c>TZO]Ale0c>TZO^Ake0b>UZO^Ake0b>UZO^Aje0c>UZO^Ake0c>TZO]Ale0c>TZO]Ale0c>TZO]Ale0c>SZO^Ale0c>TZO]Ale0c>VZO[Aje0e>XZOYAhe0h>YZOVAge0j>ZZOUAfe0k>[ZOTAee0l>[ZOUAde0k>\\ZOUAce0l>]ZOTAce0l>]ZOTAce0l>]ZOTAce0l>]ZOTAce0m>\\ZOSAde0m>\\ZOSAde0m>\\ZOSAde0m>\\ZOTAce0l>]ZOTAbe0m>^ZOSAbe0m>^ZOSAbe0m>^ZOSAbe0m>^ZOSAbe0n>^ZOPAce0P?]ZOPAce0P?]ZOPAce0P?]ZOPAce0P?]ZOPAce0P?]ZOPAce0P?^ZOo@ae0R?_ZOm@be0S?^ZOm@be0S?^ZOm@be0S?_ZOl@ae0T?_ZOl@ae0T?_ZOl@ae0T?_ZOk@be0U?^ZOk@be0U?^ZOk@be0U?^ZOj@ce0V?\\ZOk@de0U?\\ZOj@ee0V?ZZOk@fe0V?YZOi@he0W?WZOj@ie0V?WZOi@je0W?UZOj@ke0V?UZOj@ke0V?TZOj@me0V?RZOk@ne0U?RZOk@ne0U?QZOl@oe0T?PZOm@Pf0S?oYOn@Qf0R?mYOPASf0P?mYOPASf0P?lYORASf0n>lYOTASf0l>mYOUARf0k>mYOVASf0i>mYOYARf0g>nYOZAQf0f>nYO\\AQf0d>oYO]AQf0b>nYO_ARf0a>mYOaARf0_>nYOaARf0_>nYObAQf0^>nYOcARf0]>nYOcARf0\\>oYOdAQf0\\>oYOdAQf0\\>oYOdAQf0\\>oYOdAQf0\\>oYOdARf0[>oYOdAQf0\\>oYOdAQf0[>PZOeAPf0[>PZOeAPf0[>QZOdAoe0\\>QZOcAQf0\\>oYOdAQf0\\>PZOcAPf0]>PZOcAPf0]>PZOcAPf0]>PZObAQf0]>PZOcAPf0]>PZOcAQf0\\>oYOdAQf0\\>oYOdAQf0\\>oYOdAQf0\\>oYOdAQf0[>oYOfAQf0Z>oYOfARf0Y>nYOgARf0Y>nYOgARf0Y>nYOgARf0Y>nYOQ@aNS1ag0k>oYOQ@cNR1_g0l>nYOQ@eNR1]g0m>nYOQ@eNR1]g0m>nYOR@cNS1^g0k>oYOR@cNS1_g0j>nYOT@`NT1bg0g>oYOlAQf0T>oYOlAQf0T>PZOkAQf0T>oYOlAQf0T>oYOlAQf0S>QZOlAoe0T>QZOlAPf0S>PZOmAPf0S>QZOlAoe0S>RZOmAne0S>SZOlAne0S>RZOmAne0S>RZOmAne0R>TZOmAme0R>SZOnAme0R>SZOnAme0Q>UZOnAke0R>UZOnAle0Q>UZOnAke0Q>VZOoAje0Q>XZOmAie0R>YZOlAge0S>]ZOjAce0V>bZOeA^e0Z>eZOdA\\e0[>fZOcAZe0\\>hZOcAXe0]>iZObAWe0^>iZObAXe0\\>iZOdAWe0\\>iZOdAXe0[>hZOeAXe0Z>hZOgAXe0Y>hZOgAYe0X>fZOiAZe0V>gZOjAZe0U>eZOlA[e0T>dZOmA\\e0R>dZOoA]e0P>bZOQB^e0o=bZOQB_e0m=aZOTB_e0l=`ZOUBae0j=_ZOVBae0j=^ZOWBbe0h=_ZOXBbe0g=^ZOYBbe0g=^ZOYBce0e=_ZOZBae0f=_ZOZBae0e=aZO[B_e0d=aZO\\B_e0d=aZO\\B`e0b=bZO]B^e0c=bZO]B^e0b=dZO]B]e0b=cZO^B]e0a=eZO^B\\e0a=eZO^B[e0a=fZO_BZe0a=gZO^BZe0`=gZO`BYe0`=gZO`BZe0^=hZOaBXe0_=gZObBZe0\\=gZOdBYe0\\=gZOdBZe0Z=gZOfBYe0Y=hZOgBYe0X=gZOhBZe0V=gZOjBYe0U=hZOkBYe0T=gZOlBYe0S=hZOmBYe0R=gZOnBZe0P=gZOoBZe0Q=fZOoB[e0oc0m>8I7I6J6J5M3L4L3N3L3N3M2M3N2N3M2N2N2N2N1O2N102M=C100O1O100O100O100O10dMPDT1o;mNPDT1P:G8I6K4K6K4M3L3M4M3L3N2N2N2N3M2N2N2N1O2N4L3]OM8LI010o0n;TOWD=2^OM>MC0o0m;UOXDNK46GI05:LG102o0j;WOUDHN3OM2:5GI04=O1`;\\OUDKN:a0C@h0;3a;]OVDJ07c0M@38`0_;_OVDKO5e0NZOKO:?=^;@TD79A4d0H\\O4g0c;ATD4;F1a0J]O3g0c;IVDB:027CJo:EXDGK7>MG3i0=o:EYD0IH<3L1g0?o:FXD44EKNN2l0a0P;FVD54DLNR1c0h:FUD0LKTDK1WOK`0QM3O1O1N1O3M2N2N2M3N3L3N3M2M4L4L4L5J6J7H9F\\o[4" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "image_path": "multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 500, + 565 + ], + "counts": "VW_4e0k>9G7K5J5L4L5K3N3L4L3N3M2N2M3N2N2O1N2N1O2N5XCYMZ3E1i;5RFGRN0>3@6n;1VFETN1Kb0k;IfDEP1j0[:AdDE8O[O09k0`;BdDC66YOJ=k0`;CcDC55Me0\\;CaDD36JKCh0o;D`DC37Ji0d;^O]DB3;If0g;_O[DA2>Ic0j;@XD@2`0F]O3R1n;0VDHK8o;1TDIM5P<3oCJ2ROMl0RB3M1OQOPDhMI?We0`;FYDHG13O0??2^;FbDHB10>>3^;E_DEC43OO`0?3\\;E`DGB33NO`0b02Z;FaDEB510N91Bf0d0W;FfDK]O2NJ191Ff06XO6o;MgDK]O6N01Ff05ZOLN7n;2hDI^Od0h0_OTO6n;MXDH82Gd0f0^OVO7m;MiDJ^Od0f0]OXO7k;MjDK]O2K;k0DXO7k;L^DIJ1L124N9i0EXO7k;L]DKJOM034O:e0GYO5l;LfDL\\ON54N;f0GWO5n;KgDMJ=95V;AgDLIa092V;BgDLEQ;DmD:XO_Ok0b0P;EnDHVO<2Ck0?QODn;6cDGM8EHN3n03RO5m;NcDIK6HMk01ROK06m;3`DJM`0d0ASOLN7o;2^DIMd0d0^OSO7n;N^DHGO1e0i0_OSO7n;O]DEI39:ECe05ZO6m;1`DDB3>:BEf03[O6n;2]DCE2>`05D]O5n;3\\DBF3>?34];I_DD4<07^;H_DE2<08_;I]DCNd031c;JYDBMh03Ng;1oCZO6i03Li;8TDM1ROMb0n;?SDL4SOKb0o;b1XDlMIb0o;b1XDlMIb0PO2O1N2O1N2M4M2N2M3N3M2M4M3L3M5K4L5J6J8G:CZR;" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "image_path": "multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 500, + 565 + ], + "counts": "]f_1b0m>:G8I6K4L4L4L4L4M2N3L3N2M4M2N2N2N2N2N2N2N2O0O2N2N2O1N101N1O2O0O2O0O100O2OO1L3TNgC>Z<^OgCkNOe13YNQ9G8I5L5K4L4L3M4M3L3N2M4M2N2O1N1O2MaMaEN]:3cEOZ:2fEOX:1iE0U:1kE0T:0lE1R:0nE0Q:1oE0P:0PF1n90cDH;8R;0bDJQOO>5O3n;0dDHE4J4l;1`DGO3^OLM:U<0]DF84YOML9U<1]DGAM>5^O215T<0^DF94SO125S<1^DE4N[O7NLON3k;1[DB30<7A6e;1[DBGN82NN5MEa0j;0`DEGb0NXO70J01`0i;1`DEJMH90F>OM?c;1aD_OB1?OB0O2a00@O3`0i;0aD_OB1]10lN`0d;0`D@DOi1b0R:OVF_ORN0Ob0i;OVF@PN01a0i;OUFAQNO1a0i;ObDAo07fN9i;O`DEf0KRO808i;0`DD60DLO22>e;0`DDLLH0O57K002`0d;0`DAN0FO00N1701O2`0d;0_DAO1ONG7NK224[;0]DA21JN626=\\;1\\D@?0D15>\\;1XEAM?l:OWECnNNf0`0U;O\\DC2NG2KN2040NO=W;1]DB]1=W:0\\FOe91ZF0g9OYF0h90XF0i9OWF0j90VF0k9OUF0l90cDH?7o:0SF0n90RF0o9OQF0Q:OoE1Q:OoE0S:OmE0U:OkE0W:OiE0Y:OgE1Y:0fEO]:1ZE4i:Z22N1O2N3M2N2N3L3N3L3N3L4L4L5J6J7H:DWcZ3" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "image_path": "multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 500, + 565 + ], + "counts": "oS`56U?c0C6I8J5J5L4M3L4L3N3L3N2N2M4M2N2N2N2N2O1N2N1O2N2O1N1O2N2O0O2O0O2O0O2O0O2O001N100O2O00001N100000001O0O10000000000000001O00O10000000001O000O10001O000O101O000O2O0O101O0O2O0O2O0O2N101N101N2N2N101N2N2N2N2N2N2N2N2N3M2M3N3L3N3L4L4L4L5J6I8I:CeTZ1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "image_path": "multipanel_detailed_caption/images/23.png", + "mask_rle": { + "size": [ + 1848, + 1080 + ], + "counts": "0`0c0ICO010O0O5KR13Xc1;n[NICO010O0O5KR13Xc1;n[NICO010O0O5KR13Xc1]4a[NjK^d1i4O1O001O001O000000000000O10000000000000000000000001O00000000000000000000000000000000O100000000001O000000000000O1001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O11O0000000000000000O1000000001O00000000000000000000O10000000000001O000000O10000000000000000001O0000000000O100000000000000000000000000000000000000000000001O00000000O1000000000000000000000000000000000000001O0000000000000000000000O10000001O000000000000000000O11O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000001O0000000000000000O1000000000000001O00000000O1000000000000001O0000000000O100000000000000000000000000000000000000001O00000000000000000000O1000000000000000000000000000000000000001O000000O10000000000000000000000000000000000000000O100O1O1O:RKTn1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "image_path": "multipanel_detailed_caption/images/23.png", + "mask_rle": { + "size": [ + 1848, + 1080 + ], + "counts": "Vgi4P1`h1>E;E9H7J7H8J4K6J5L5K4L4L4K5L5K4M2M4M2N3L3M4M2M4M2N2M4M2N2M4M2N2N2N2N2N2N2N2N2N2N2N2N2N2N2N2N2O0O2N2N2N1O2O1N2N2O1N101N2N101N1O2O0O2N2O0O2N101O0O2O0O2O1N100O2O0O2O000O2O0O2O001N10001N10000O2O000O2O00000O2O000O2O0000000O2O00000000001N10000000000000001O0O1000000000000000O100000001O000000000000000000000000001N10000000000O2O00000O101O000O2O00001N1000001N10001O0O101Ni`NTHd\\1l7\\cNUHd\\1i7\\cNXHd\\1h7]cNWHc\\1h7^cNWHd\\1h7f201N101N10001N2O0O2O0O2N101N101N101N101N2O0O2N2N2O0O2N2N101N2N1O2O1N2N1O2N2N2N3M2N2N2N2N1O2N2N2N3M2N2N2N2M4M2N2M3N3M2M3N3M3L3M4M3L3M4L4M3L4L4K6K4L4L5J6K4J9H6J6I8G=A;PO^YjX1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "image_path": "multipanel_detailed_caption/images/23.png", + "mask_rle": { + "size": [ + 1848, + 1080 + ], + "counts": "h]if0;Qi1f0]O`0D;G8G8H7K4K6J6K4L5J5L4L5K3M4M3L3M4M2M4L4M2N3M2M3N3L3N2N3M2M3N3M2N2N2N2M3N2N2N2N2N2N2N2O1N1O2N2N2O1N2N1O2O1N1O2N2O1N1O2O1N101N1O2O0O2N2O0O2O1N101O0O2O0O101N101N101N101O0O2O0O101O0O101O0O10001N10000O2O000O2O0000001N1000000000001N1000000000001O0000000O1000000000000000000000000000000001O0000000000000O100000001O000O100000001N10001O000O101O000O2O000O101O000O2O0O101O0O101N101O0O2O0O2O001N101N1O2O0O101N101N101N101N2O1N1O2O1N1O2N2N101N2N2O0O2N2N2N2N2O1N2N1O2N2N2N2N2N2N3M2N2N2N2N2M4M2M3N2N2N2M4M2M4M2M4M2M4M3L4K5L4M3L4L4L4L5J6J6J6J7I7I6I9Ed_1l5PcNeI]M?b_1n5PcNcI^M?b_1o5obNbI_M?a_1P6PcNbI]M?b_1Q6PcNVJo\\1l5PcNTJo\\1n5PcNRJP]1o5nbNRJQ]1o5obNQJP]1Q6obNPJP]1Q6obNoIP]1S6obNmIP]1U6obNlIP]1T6PcNmIn\\1U6QcNmIl\\1U6ScNnIi\\1S6WcNnIh\\1S6XcNmIf\\1T6ZcNlIf\\1U6YcNkIf\\1V6ZcNjIe\\1X6ZcNiIe\\1W6[cNiId\\1Y6[cNgIe\\1Y6[cNfHjMk0j^1a6[cNcHmMj0i^1d6YcNaHPNj0f^1f6ZcN_HQNj0f^1h6`cNXI_\\1i6acNVI`\\1j6`cNoHf\\1S7YcNmHg\\1S7YcNmHg\\1T7XcNlHg\\1U7YcNkHg\\1V7XcNjHg\\1W7XcNjHh\\1W7WcNiHi\\1W7XcNhHg\\1Z7YcNdHh\\1\\7ZcNaHg\\1_7\\cN]Hd\\1e7`cNTHb\\1l7d22O000O101O0O10001O0O10001O0O10001O000O1000001O00000O10001O000000000000000000000000001O000000O100000000000000000000000000O2O000000000000001N1000000O2O00000O2O00000O2O000O101Oe`NoGo\\1P8PcNVHl\\1j7TcNXHk\\1g7UcNZHj\\1e7WcN[Hj\\1d7VcN\\Hj\\1c7WcN^Hh\\1b7YcN]Hh\\1b7XcN^Hh\\1a7ZcN]Hh\\1b7YcN]Hg\\1b7]cNZHd\\1f7h2O0O2O0OY`N\\H]]1c7bbN^H_]1`7abNbH^]1^7bbNbH_]1\\7abNeH_]1[7`bNgH`]1W7`bNjH`]1U7abNkHoMJU_1[7kbNmHmMKX_1V7lbNZIU]1e6kbN\\IT]1c6mbN]IT]1a6mbN_IS]1a6mbN_IT]1_6mbNaIS]1_6nbNaIR]1]6obNcIR]1[6PcNdIQ]1[6PcNdIQ]1Z6QcNeIo\\1Z6ScNeIn\\1Z6RcNfIo\\1X6RcNhIn\\1W6RcNjIo\\1T6QcNmIP]1R6obNnIR]1Q6nbNPJS]1n5mbNSJT]1k5lbNUJV]1i5kbNWJV]1g5kbNYJU]1g5lbNYJT]1e5mbN[JU]1b5mbN]JT]1a5mbNTJVMNn_1l5nbNTJVMOm_1k5mbNWJUMOo_1g5nbNfJR]1Y5mbNjJS]1T5nbNlJS]1R5nbNnJS]1P5nbNPKS]1n4nbNRKT]1k4mbNUKT]1i4mbNXKS]1f4nbNZKS]1d4nbN\\KS]1b4nbN_KS]1^4nbNbKS]1[4PcNdKR]1Y4obNgKR]1V4QcNiKP]1U4QcNkKP]1S4QcNmKQ]1o3QcNQLP]1m3QcNSLQ]1i3QcNWLP]1g3RcNXLP]1d3ScN[Lo\\1b3ScN]Lo\\1_3TcN`Lm\\1]3UcNdLl\\1X3VcNhLm\\1S3VcNlLl\\1P3VcNPMl\\1k2WcNUMk\\1g2WcNYMk\\1c2XcN\\Mk\\1_2WcNaMl\\1Z2VcNeMn\\1T2VcNlMl\\1o1WcNQNm\\1g1XcNXNk\\1a1YcN_Nk\\1Z1XcNfNl\\1R1WcNPOQ]1b0TcN^O[]1JnbN6Zc101O1N3L8Hkc`X1" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "image_path": "multipanel_detailed_caption/images/23.png", + "mask_rle": { + "size": [ + 1848, + 1080 + ], + "counts": "Waj4S1_h1B]hif0" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "image_path": "multipanel_detailed_caption/images/23.png", + "mask_rle": { + "size": [ + 1848, + 1080 + ], + "counts": "ZamX1n0ch1?B;F:H7H7J6J6J6K4K6K4L4L4L3M4L4M4J5L3N2M4M2N2M4M3M2M3N3M2M3N2N3M2N2N2N2M3N2N2N2N2N2N2O1N2N1O2N2N2O1N2N1O2O1N1O2N2O1N2N101N2N101N2O0O2O0O2O0O2O0O2N101N101N101N10001N10001N1g`NXHb\\1i7[cNZHc\\1g7\\cN\\Hb\\1d7^cN]Ha\\1d7^cN]H`\\1d7`cN\\H`\\1e7_cN\\H`\\1d7`cN\\H_\\1e7acN[H_\\1f7_cN[Ha\\1e7_cN[H`\\1f7`cNZH`\\1g7\\cN\\Hd\\1d7VcNbHi\\1_7UcNcHk\\1]7TcNdHl\\1]7TcNbHk\\1_7UcNaHk\\1_7VcN`Hj\\1`7WcN_Hi\\1a7XcN^Hh\\1c7WcN]Hi\\1c7XcN\\Hg\\1e7ZcNZHf\\1f7ZcNZHf\\1f7[cNYHe\\1h7[cNWHe\\1i7[cNWHe\\1i7\\cNVHd\\1j7\\cNVHc\\1k7]cNTHd\\1l7\\cNTHd\\1l7\\cNTHd\\1m7\\cNQHe\\1o7[cNQHe\\1o7\\cNPHd\\1P8\\cNoGe\\1Q8\\cNnGd\\1R8]cNmGc\\1S8^cNlGb\\1T8_cNkG`\\1V8`cNjG`\\1V8`cNjG`\\1V8acNiG_\\1W8acNiG_\\1X8`cNhG`\\1X8`cNhG`\\1X8_cNiGa\\1W8^cNjGb\\1V8]cNkGc\\1U8\\cNkGe\\1U8ZcNlGf\\1T8YcNnGf\\1R8ZcNoGe\\1P8[cNQHe\\1o7[cNRHd\\1n7[cNTHe\\1k7[cNUHe\\1k7[cNUHe\\1k7[cNUHe\\1k7[cNUHe\\1k7ZcNVHf\\1j7ZcNVHf\\1j7ZcNVHf\\1j7ZcNVHf\\1j7ZcNVHf\\1i7[cNWHe\\1i7[cNWHe\\1i7[cNWHf\\1h7ZcNXHf\\1h7ZcNXHf\\1h7ZcNXHf\\1h7[cNWHe\\1h7\\cNXHd\\1h7\\cNXHe\\1g7[cNYHe\\1g7\\cNXHd\\1h7\\cNXHd\\1g7]cNYHd\\1f7]cNYHc\\1g7]cNYHc\\1g7^cNWHc\\1i7]cNWHc\\1h7`cNVHa\\1i7`cNVH`\\1j7acNUH_\\1k7bcNTH^\\1k7dcNTH]\\1k7ccNUH]\\1k7ccNUH]\\1j7ccNWH^\\1h7acNYH_\\1g7`cNZHa\\1d7_cN]Ha\\1c7]cN`Hb\\1_7_cNaHb\\1^7^cNaHc\\1_7^cN`Hc\\1^7_cN_Hc\\1a7`cNZHc\\1d7i20001N101N101N101N101N101N100O2O0O2O0O2N2O1N2N101N2N1O2O1N1O2O1N2N2N2N101N2N2N2N2N2N2N2N2N1O2N2N3M2N2N2N3L3N2N2M3N3M2M3N3M2M4M2M4M3L4L4L4L3M4L4L4L5J6J5L5K5J7I6I9F;D=Cf0QO^^f4" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "image_path": "multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 500, + 500 + ], + "counts": "oQ74a06k=5SBLW;L_Fb0YNBU;4UFa0jNXOP;a0XE]O^OQ1;POn:c0aDXOa0e11]N^OM^;a0kDg1IjM<0CN];>oDd1IQN71S;:TEV1HbNM3U;5VE4[O@0j05UOO760T;8lD@OeD\\O9Mc12_N7n:a0fDYO90a1LfN9k:6eDK0G2O47`1LjN4l:1eD5OF4N16`1LmN2j:4hDK3MN003LLc14nN1i:5^EF\\O6^1LSO7a:3aEDZO:=HY18_94kDMNG2OO;9L\\13`93lD1026H^12`94kD0028H\\13a93kDBOo08ZO]12a93kDe04XOa1O`94kDB30Mh04Ca1O`92XE:KC\\12a91nDC3h03AZ14b90mDFMk09[O[14c9NmDHL637O@3:^13d9NmDF=>AA3`0OAS1:R:OREGIO0?NA41J95GU18o9OgDI1O30M30O5;KCV29S9MgDKO07150D5\\22o8MgDK7O;3h11hMLY;<`EFn0N]:=UDCE072KOL10O6O00f04Y;9XDCD36OMOL45L000O`05_;8YDCD45MO0K45L000Oa04^;:XDBF1b00l0O`N3S<>RD_O10R20hM3S<=hFBY94jAKW>5iAKX>3hAOW>2fA3V>NjA3T>OkA1U>OnAIW>7jAFY>9gAIW>7hAJX>5hANV>2jANDJ^>8nA2Q>OoA1P>0PB0P>0QBJCM\\>:PBHU>8lAHk=NlA99Mh=KnA89O\\Y1UOgN8^jMZ:LTE:4C32Il2;YMa:LWE6:j2LUMf:KfES3CSMg:JdEU3EQM_;n2cDQM];k1bDRNG90C;7\\;j1hEVNX:i1iEWNW:g1kEYNU:e1mE[NS:Z1YFeNh9W1[FgNg9T1YFSOf9f0`FoNgM8j;=X`T5" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "image_path": "multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 500, + 500 + ], + "counts": "dZU3130X?9O0003N0OLKh@5Z?21N10001OO22M00]O4`AJ^>8^AGL0e>9_AGLOf>:^AHJNh>;>0O100O100N2]Nc1O100mBnMd0eBlMV=T25000000O100001O0N2O010O100AnM[CS2Z\\:o2N1O0O100001O0000000000000000000000000000000000000000000000000000000000000hNYDcNg;e2000000000bNZDnNf;S1YDmNg;`2000fNYDgN>hNi:k3`0hNYDcNg;^1ZD`Nf;a1ZD^Nf;c1YD]Ng;d1YD[Ng;e1ZDZNf;f1]DWNc;k1[DUNe;k1[DUNe;l1\\DRNd;o1\\DPNd;Q2\\DnMd;R2\\DnMd;S2\\DlMd;T2\\DlMc;V2\\DjMd;V2\\DjMd;W2[DPM0f0e;]2[DcMe;V3001OO100000000001O000000000000000000000000O1001O000000O2O0001O000O100001N01010OO100000O100001O00000000000O2OZo_5" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "image_path": "multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "mask_rle": { + "size": [ + 500, + 500 + ], + "counts": "ccR352NW?9N101O000000O10001OO1000010O00O2O1O0001RLI]H7_7O_H1]73cHMZ77eHIX79iHGR7>nHBo6`0RI@l6b0kEUOCOR3;^7a0lE[Oe25]7`0mE^Of22c65aF:5Ga2Ih6:`F56O^2Aj6g2PIYMo6R3gHmLZ7[3_HbLc7a3[H[Li7f3Q23M2]E[LS9h3iF_LR9e3iF^LV9Q5N10000000O1O2M5^OlE[KZ:_4`0_Oa0YOh0K7QMnCQ2mdGA\\8c0aG]O_8CTEd0Y2Jb8BYEc0S2Kd8@_Ed0j1Lf8AbEc0e1Mi8_OfEd0]1Nm8]OiEf0V1NP9]OlEe0P10T9[OnER10O100000000000000000000^N6ZCJc<:\\CFbZCBi<7[CIj5bAKL2`>4`AOO0^>1cAON1`>ObA0N0a>0aA0N0b>O`A000`>0aAOO2_>OfAKJ7`>O`AO4M`>1[A4Q?JPA6W?0F0PA0o>;0GPANP?2PAMQ?2PANP?2PAOo>1PA0P?0PA0e=N^C0PO0`=8]CDVONJ3b=>]C_O[O2U=b0mC]ORWE_OUO31N5K?4_O3`;>WE6TOZOd01_O1b;?VEPGBP9>PGBn1]Oe2P1\\KYOeMN\\4J`2o0^KYOgMM[4M_2m0_KZOfMMZ4Ma2l0_K[OeML\\4Ka2n0VIUOn1>V8_OlFn0oNWOo1;^9>cDWOo1nNZOo1NRNOm3Hi1d0\\Ki0k0XOo1^Oh1a0^Kj0j0mNSNMg9<\\G;oN\\Ol1MUNOd9=fF]O7l0_O^Ok1MUNNf9=dF^O7l0_O_Ok1KUN0`9c0^FWO52>h0ABg1KUN0b9f0cFVO>e0BEe1JVN0a9g0cFVO?c0DFb1JWN0a9g0cFVO`0`0FH_16h7HH\\18i7;cFVOc0400Q11VNNd9g0bFVOg12N3UNNd9g0bFVOg12NmJMT52lJNT54kJJW5BXF>a40Y5_O[F;]48^5FdJ:Y5]OTF8d4;\\5]OQF0f4c0W:00000000000N2\\L_OUHa0h7EUH;i7LRH4k71THDkLNQ;`0RHAnLOo:c0QH_OoLNn:h0PHYOSMOk:m0oGTOVMOj:o0iESOU1;P9e0fETO1K>1h9\\O_Ec15UON6OWO9>T:`3lE`LR:c3lE^LR:d3nE\\LP:f3PF[Lo9f3PFZLo9g3QFYLn9g3SFYLm9g3TFmKF0U:S4VFXLj9h3VFXLi9g3ZFXLf9f3\\FZLd9Q3QGoLo8o2SGQMm8o2SGQMl8o2UGQMl8n2TGRMl8n2TGRMl8Q3QGoLo8W3kF\\LTO2Q:c3iF\\LUO2R:d3gFZLUO4T:e3dFVL\\O2o9k3hFnKZO5o9n3nFRLR9o3mFQLS9R4`FkKA4n9Z4WFdKJ2o9Z4WFeKH2Q:Y4WFeKH2Q:Y4WFdKJ1P:[4XFbKH3P:[4YFaKG4Q:Y4aFgK_9Y4XFdKH2P:_3lESM;^OJOP:Z3VFSM0EINQ:X3[FQMLIGOS:S3_FRMHKF1S:l2hFSMA0D1T:c2eG]MWN0U:[2kGfMoM0U:X1lE^On1ZORNOV:W1lE@k1ZOTNNV:W1lE@j1[OTNNW:V1mE@f1]OUNNZ:R1nECa1]OXNL[:R1oED^1\\O\\NKX:T1PFD[1]OR9m0fEGU1]OV9;WECc0f0l0\\O\\95[F>8]O`9N^Fc02_O[;?dDA_;=aDCa;<_DCc;:n1N3L3N1M_o=" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "image_path": "multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "mask_rle": { + "size": [ + 500, + 508 + ], + "counts": "jXf086M^>l0]DTOXM0O0Y:n0^HVOVMN10Y:n0_HAVMCZ:m0^HBWMA[:m0^HBWMA[:m0^HBWMA[:m0^HBWMAZ:n0_HAWMAZ:n0_HAWMAZ:n0`H@UMC[:m0`HL`74aHK_75aHK_75aHK_75aH_OTMD[:m0aH_OTMD[:m0`H@TMD\\:l0`HLa73_HMa73_HM`74`HLa73_HMa73_HAUMC\\:l0_HAUMC\\:l0_HMa73_HMa73^HNa73_HMa74^HLb74^HLb74_HKa75^HLb74_HKa74aHK`74dHkNRMh0Z:=dHkNRMh0Y:>aHkNSM12f0Z:>bHkNSM00g0[:>bHkNSM00g0[:>fHkNoLg0[:>aHkNTM01g0Z:>aHQOSMa0\\:>aHQOSMa0\\:>`HROTM`0\\:?_HQOUM`0\\:>kFkNnNON4N1j02UOa0]:=bF3EoN\\Oa0\\:>aF6EkN^Oa0\\:>aF7DjN_Oa0\\:?_F8DiNA`0\\:?^F:DgNB`0\\:?^F:DgNB`0\\:?^F:DgNB`0\\:?^F:EfNAa0Z8[OgG2000O3L<1MV1=;D`NC0Of0X8]OiG0000O101MMO520N1W1c0PHBP8>PHBP8>PHBP8>PHBP8>PHBP8>PHBP8>PHBo7?QHAo7?QHAP8>QHAo7>SHAl7`0TH@l7a0SH_Om7`0TH@l7`0TH_Om7a0SH_Om7a0SH@l7`0TH@l7`0UH_Ok7a0TH@k7a0UH_Ol7`0TH@l7`0TH@l7`0TH@l7`0TH@k7a0UH_Ok7a0VH^Oj7b0VH^Oj7b0UH_Ol7`0TH@l7`0TH@m7?SHAm7?SHAn7>RHBn7>SHAm7?RHAo7?QHAP8>PHYOSMKR;h0kG\\OYMFm:n0iG\\Od8`0\\G@g8<[GBf8>ZGBf8=\\GBd8>\\GBd8<_GCa89eGD\\8<^3N3N100O1000001N4L3LZQS4" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "image_path": "multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "mask_rle": { + "size": [ + 500, + 508 + ], + "counts": "TUX48Z?7K4K4M001O0000000000000000001O0000000000000000O10000001O0000000000001O00000000000001O00000000000O0100000000000O100000000000000001OO100000000000eMYOaC10e1;QOS0000000000000000001O00000000O10000000000000001O000000000000001O000000000000000O100G9O1O10O10000O0100000000000001OO100000000001O1N102JW[9" + }, + "dataset_name": "multipanel_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "image_path": "natural_detailed_caption/images/4010.jpg", + "mask_rle": { + "size": [ + 600, + 800 + ], + "counts": "fmf21^l23VfM2M2O1O1O2N3Mg0YO8H001O0000000000001O000000000000000000001O0000O10000001O00O1000000000000000000000000000000000000O10000O101O0O3M3Nb0^O4L2MPRc:" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "image_path": "natural_detailed_caption/images/4010.jpg", + "mask_rle": { + "size": [ + 600, + 800 + ], + "counts": "0i7o:0000O10000O1000000O100000000000000O1PNVEhJOe0k:`4\\EgJKh0i:`4_EaJNm0d:a4RF]Ko9c4RF\\Kn9e4QF[Ko9W4mDYKT1`0o9S4ZEUKf0i0o9R4[EUKg0g0o9S4\\ETKg0h0m9T4SETK0OQ1h0m9U4fFiK\\9W4cFhK^9Y4aFgK_9Y4aFfKa9Y4_FVK[N3[;h4YFTKT:l4mERKT:m4m1DXBfKh=Y4ZBfKf=Z4ZBfKR2LZ9^4eDeKP20Y9\\4jD`Km15X9[4SGeKn8Z4SGeKm8[4SGfKm8Y4SGfKP9X4PGhKQ9W4oFiKR9V4oFgKS9Y4aDiKj=U4VBlKk=i3oAZL6Nk=g3PB[L3OP>c3mA_L12Q>]3nAQMR>n2mATMT>g32TOjAjL1CX>a3iAjL_>T3eAdL`>]3aA`La>b3]A]Ld>d3ZA\\LO2_>c3aA[L04^>a3`A\\L22_>g3aAYL`>g3^A[La>P4N2N3MO2M2CbA`L^>\\3gAeLW>Z3jAgLV>X3kAgLU>X3lAgLc1Ef:`3PDdLP>Z3UDcLk9Z3k2OWAfLU>X3lAQMl=n2R1L3N2L5JYBZMmM1a=`2dDgM];U2eDkM];R2bDoMZNGaS1d@SOFLIUORAm0k>[Om@f0R?Dc@=\\?`10000O1000\\Og@iMY?X2f@hMY?^2a@cM_?k20ZOd@bMM9_?T2m@iMS?W2o@gMQ?Y2o@gMQ?Y2o@gMP?[2o@eMQ?Z2PAfMP?Z2PAfMP?Z2PAfMP?Y2QAgMo>Y2QAgMo>Y2QAgMo>Y2QAgMo>Y2QAgMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>Z2RAfMn>[2QAeMo>[2QAeMo>Z2RAfMn>Z2RAfMn>Z2RAfMn>Y2SAgMn>Q2YAoMg>o1[AQNe>n1\\ARNd>m1]ASNc>l1^ATNb>k1_AUNa>k1_AUNb>i1_AWNa>i1_AWNa>i1`AVN`>i1aAWN_>i1aAWN_>i1aAWN_>h1bAXN_>g1aAYN_>g1aAYN_>g1aAYN_>g1aAYN`>f1`AZN`>e1bAZN^>f1bAZN^>f1bAZN_>e1aA[N_>e1bAZN^>f1bAZN^>f1cAYN]>g1cAYN^>e1cA[N]>d1dA\\N\\>a1hA]NZ>V1RBjNo=g0`BXOe=6hBJf[a2" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "image_path": "natural_detailed_caption/images/402.jpg", + "mask_rle": { + "size": [ + 600, + 800 + ], + "counts": "UaX<5cb01O1O001O1O1O1O1O001O1O1O1O001O001O00001O0000000000000001O0001O00000000001O0000000001O00000000001O001O001N2O1O1O001O1O1O001O1O001O1O1O002NiUS1" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "image_path": "natural_detailed_caption/images/402.jpg", + "mask_rle": { + "size": [ + 600, + 800 + ], + "counts": "bfc33db03M2O1N1O1M3J6N2O1O10001O000000O100000001O0O4^OZPc:" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "image_path": "natural_detailed_caption/images/402.jpg", + "mask_rle": { + "size": [ + 600, + 800 + ], + "counts": "_hh71eb03N1O1O101N1O100O100O10001N100000000000000000000000000O100000001O0001O000001O2O3LK5O1O2O00001O001O00001O001O001N101N2N2OiWh5" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "image_path": "natural_detailed_caption/images/402.jpg", + "mask_rle": { + "size": [ + 600, + 800 + ], + "counts": "oTo38_b01O2N2N2O1O1N101O1O0O2N2O1O1O1O1O2N1O1O2fJXOVDL`3o0i7E_DDf3h0c7O\\D@Q4c0T7P1lHQOc6_1]IcNV6h1jIYNh5T2XJnM^5Z2bJjMk4e2UKaMZ4j2fK[Mo3k2QL\\Mn2^3RMgL]2e3cM`Lh1P4XN_M[O_3e0VMfMj3Z2ZLWMQ4i2oKVMR4j2oKTMR4l2nKTMR4l2oKSMQ4m2oKSMQ4m2PLRMP4n2PLRMP4n2QLQMo3o2QLQMo3o2RLPMn3P3RLPMn3P3SLoLm3Q3SLoLm3Q3SLoLm3Q3TLnLl3R3TLnLl3R3ULmLl3R3TLnLk3S3ULmLk3S3VLlLj3T3WLkLj3T3VLlLj3T3WLkLi3U3WLkLi3U3WLkLi3U3XLjLh3V3XLjLh3V3YLiLg3W3YLiLg3W3ZLhLg3W3YLiLf3X3ZLhLf3X3[LgLf3X3[LgLe3Y3[LgLe3Y3\\LfLd3Z3\\LfLd3Z3\\LfLd3Z3]LeLc3[3]LeLb3\\3_LcLX20RI_3f4aLe1Y5\\NfJ\\1b5dN^JU1i5kNWJJT76lHcN[8]1fGRL_OD4\\Oa9=^Fa3e0TL];2cD[3i9G4M2]MB_Fc0W2Km1NeK;X2Ol1NeK8X20n1MeK9W2On1NgK9kN\\OT2`0U3OhKd1l0aNW30hKe1m0]NW33gKg1m0XNX36gKh1l0UNZ36fKn1f0PNa35fKV2>hMi37dKW2=eMm3a0UKT2g0^MQ4\\5gKhJW4]5aKhJ]4o7N2N1O2N2O2M3N2N1010O2O1N3N0O1N2O0O101N1O1O1O001O1O001O001O1O001O001O1O1O1O001O1O1O1O1O1O1N2O1O1O1O1O2N1N10000O10iJlFQ4JgKN4]9OTGl3b9QLdFh3_:J6I6K7I5B>L4M3N1O2N2O0O101N1000000O101O0010O01O1O000001N1O1O1O2N3N1N2lDlMX9V2`FVNZ9m1_FZN^9h1^F^N^9d1]FbN`9b1XFgNb9_1SFkNi9Y3M3XFnJ;3Z8Z5_GlJ`8l5N2N1O2N1O1O1O2N100O2N1O1N2O1O1O2N2N1O100N3N2N1O2N2N1O2N2N2N2N2N2N1O2N2N1O000000O100O10000001O0000000000O10000000000001O00001O00001O00001O00001O001O1O1[InGZ6S8cIQH[6\\8N1O1O2N7I2N1O1O2N001O100O00001O0O01O1O100O100O1O100O100O100O10O1O001O01000O010000O01000O0100000000O100O10O10O1O100O100000000002N3M2O2M1O1O00001O0000000001N101O001N10000O2N1O2L301O1O0100O01O1O001O001O1O0O2N2N101N200O1O001O1N2O1O1N2O\\FmJ\\9R5dFoJ\\9P5dFPK]9n4cFSK^9l4bFTK`9i4aFVKa9i4_FXKa9g4_FYKb9f4]F[Ke9b4[F_Kg9^4ZFbKh9U4g0L4K5L5I7POT1cMcCa1Y=@?Fh^n1" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "image_path": "natural_detailed_caption/images/000000518836.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "P9R5n900O2N2O3K6VOQFZLQ:_3m0K4G9I8H7K7E?Dc1TNUUT9" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "image_path": "natural_detailed_caption/images/000000518836.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "oXo7h0U>i0YO1O1O0000011N9H1O0O1000000O01\\OUCSOkf4@[K`0f4_OZKa0f4^O\\Ka0e4^O[Kb0h8O00jKAaK>^4CcK<]4CdK=\\4CeK<[4CfK=e8O00001O00001O00001O001O001O000001O00O110O00000O101O00001O1O001O0000001N10000000\\[l0OedSO1O0O1000000O2O0000000O1O1000000O100000000O1000000000000O100O1O100O10000O1000000O100O1O100O100O1000O0100O1O100O1O10000O100N2O100O1O1N2O100O1O1N2O1O1O1N2O1O1O1O1N2N2O001O1N2O1O1O2N1N2O001O1O1O1O10000010O1O1O0O2O100O1O1O1O1O1O1O1O1O1O1O1O1O1O1O2N1O001O1O1O1O1O1O1O1O2N001O001O1O1O001O1O2N1O1O002N1O1O1O1O1O2N1O001O2N1O1O1N3KPgc1" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "image_path": "natural_detailed_caption/images/000000205601.jpg", + "mask_rle": { + "size": [ + 427, + 640 + ], + "counts": "lj^71Y=3N1O2N100O1O1O1O1O1O001O0010O01O001O001O001O00010O00001O010O01O01O010O010O11O2M1HUCOT=0000000O01000O1000000000000O10000000000O1001OO101OO]D" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "image_path": "natural_detailed_caption/images/000000205601.jpg", + "mask_rle": { + "size": [ + 427, + 640 + ], + "counts": "bP]21mf9BYF5Q:T11jNPFD3WOm9S2SFmMm9S2PFPNP:X20MnEhMR:\\2OO100TNSFR1P:mNRFDMf01XOX:nMWO1Ne8>\\Ic0f6^OZIc0d6^O\\I;nMWOf8`0\\IAnM8S98PI@PN0NMQ9d0RI_OoM3o8`0QI]OPN44I^8i0]IZOPN64GX8P1dISOoM8NK\\4N_On0XNQOnM`0[4DZOl0_NDY2CROk0fNBX2FmNh0nNmNjM\\4D_K=c4EYK;i4GRK;n4IlJ9U5JeJ8[5K`J7a5MXJ5i50oI2R64eIMZ6:^II^6YNaHT2h0Fi6VN`HY2:EY7QN^He4c7\\K[Hb4h702M5K4K5M2MO14L5MLWLhGa3\\8aLQHn2Q8RMUHf2Q9HmN_M`HZ2b7gMcHR2^7PNfHi1\\7XNeHe1]7ZNgH]1_7cNcHY1_7fNQG0Y1T1j7kNlF4Z1l0m7POiF5Z1g0P8TOeF9Z1?S8XOcF=Y14X8^O`F?Y1NZ8C]F`0[1C_8MVFc0d;2XD\\Oa;7]D>^;:O5L2N2N2i0kN[]48hVKBk;c0N1N2O1005K2M6HX`b1" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "image_path": "natural_detailed_caption/images/000000107939.jpg", + "mask_rle": { + "size": [ + 431, + 640 + ], + "counts": "ciY39R=5K4N3M2000000dDOZ91XFZOSOR1d:DTFd0k9]OTFd0k9\\OTFf0l9YOTFh0k9XOTFj0k9WOTFj0l9VORFm0l9SOTFm0l9SOSFP1k9POUFQ1k9nNTFT1k9lNUFU1j9kNUFW1j9jNUFW1k9hNTFY1l9hNSFZ1k9fNTF\\1l9cNTF^1k9bNUF^1k9bNTF`1k9`NUFa1j9`NTFb1l9]NTFd1k9\\NTFf1k9ZNUFg1k9XNTFj1k9VNUFk1j9VNTFl1k9TNUFm1k9QNVFP2i9PNVFR2j9mMUFU2j9kMVFV2i9jMWFW2h9iMXFX2g9iMWFY2h9gMXFZ2h9eMXF\\2g9dMXF^2g9e001O01O000010O01O000000001O01O0001O0000010O01O0001O00001O00001O0000001O000TMTFU2l9jMUFW2k9hMVFW2j9iMWFV2i9kMWFT2j9kMXFS2h9mMXFS2h9lMZFS2f9nMZFQ2g9oMYFP2g9PNZFo1f9QN[Fn1f9QN[Fo1d9QN]Fn1c9RN^Fm1b9SN^Fm1b9SN_Fl1b9SN_Fk1b9VN^Fi1b9WN_Fg1b9YN_Ff1b9YN^Ff1c9VNnECa0U2c9ZN_Fc1b9^N]F`1e9`N\\F^1f9aN[F]1f9cN[F[1h9bNZF\\1j9aNWF]1l9aNUF]1P;N2N4L4L4L5J5L4L5K4L5K5Jac]3" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "image_path": "natural_detailed_caption/images/000000107939.jpg", + "mask_rle": { + "size": [ + 431, + 640 + ], + "counts": "`i\\3i0X<`0L3N2O1N2O1O1N2O1N2O1N2O1N2O1N2O1N2O1N2O1O1N2O1N2O1N2O1N2O1O1N2O001N2O1O1O100O1O1O01O0000010O001O00000010O000001O000001O01O0000010O0000000000001O00000010O01O1O1O1O1O001O1O100O1O1O1O1O001O1O1N2O1N2N1O2N2N2N2M3N2N2N2N2N2N4L4L4L5J5L5K4L5K4L5Jac]3" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "image_path": "natural_detailed_caption/images/000000437374.jpg", + "mask_rle": { + "size": [ + 428, + 640 + ], + "counts": "l8c3i90O]N[FAe9>^F@b9T20100O01000O01000O010O010O10O010O10O10O01000O010O010O010O01O0100O010QN_F3`9NeFL\\93jFHU99UG\\Ol8d0XGWOi8h0[GUOd8l0_GPOb8o0aGnN_8S1dGjN\\8V1fGgN[8X1gGfNY8[1PHZNR8e1j110O01000O010O010O00100O100O01000O10001O0O?Bb0]OWY\\7" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "image_path": "natural_detailed_caption/images/000000437374.jpg", + "mask_rle": { + "size": [ + 428, + 640 + ], + "counts": "[V`04W=4M4K7J7I5J3N2M3N2N1N2O1N3N;UDXN[;P2M4KROoDHl:7WEJe:6^EJ`:5bEL\\:4fEJY:7hEKU:4WFHa99cFD\\9" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "image_path": "natural_detailed_caption/images/000000437374.jpg", + "mask_rle": { + "size": [ + 428, + 640 + ], + "counts": "[\\Q13X=2ZOMQD3n;0PD0o;4mCMHKU<;PDKHNW<8nC2RY:POUEc0a0>X:IgE7V:LjE4T:OkE1S:2kEOT:2lEMS:6lEJd9oNZFY12GY9T1fFkNW9Z1hFeNU9_1jFbNS9b1lF]NQ9g1oFYNl8l1TGTNi8P2UGQNg8S2YGmMd8V2\\GkM`8X2_GiM_8Z2`GeM^8^2aGbM^8a2`G[M;YO;0X6^4]IgK3O^6[4_IlKHOe6X4cIlKENe6X4fI`LT6f3kI[LR6g3oIYLm5k3kIjJ0\\1S6l3lI\\LR6f3mI[LR6`5O1N2O1O1O1O1O1O1O100OPNWJ_Li5b500O100O10000O1aLcHo0]7oNhHn0X7nNSIk0m6SO[If0e6ZOeI=[6BhI;Y6CkI;U6DnI:R6EQJ8P6GSJ7m5HZJ2f5M`JMa52eJI[55lJFT5:oJCQ5OYg@Kn96PFKn97RFIo96RFIo95YFKb92_FNP:000I0R[h1" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "image_path": "natural_detailed_caption/images/2407508.jpg", + "mask_rle": { + "size": [ + 333, + 500 + ], + "counts": "gSc21[:2K5K4N1N201WFBf9;ZFEh9:7NcW\\2" + }, + "dataset_name": "natural_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "image_path": "natural_detailed_caption/images/2407508.jpg", + "mask_rle": { + "size": [ + 333, + 500 + ], + "counts": "fja2153h9NUFMG^l0OYUO;XNH_l0LYUORON0UOE`02Da0\\1ge0dNaYO=\\1>ROMOXOEZ185Pe0[Nf[Oo0]OSO:T1_OMNU3Te0jKQ[OL:S1CnNLg01P2Md1od0RLW[On9MfEbd0>_[O_:gc0VE\\\\Oc;`c0`D_\\O`;ac0aD]\\Oa;ac0`000O1000000000001O00000000000000001O00000000O100001O000001O000001N100000O100O10000000000000001O0000000001O01O01O000000000000001O000O100000O100000000001O000000001O00O10000O1000000O1000000O1000000000000010O001O000000000000O10000001O000O01000000000000001O000000000000O100000000000000O100000000001O0000000000000000001O001O001O00000000O10000O100000000001O00001O000000O100O10000000000000000001O000000O100000000000000000000001O0000O100O1000000001O00O100001O0000000000O1000000001O0000001O0000000000O100O100001O00O10000000000001O00O1000000001O000000000000001O00O100O1000000001O000000001O00000000O1O1000O1000000O0100000000001O000010O0000000001O00000O1000000000000000O11O0000000001O0000O100001O0O100000000000000000000000000O100O10001O00000O10000000O1000000000000O100O1O11O001O0O10000000001O00001O000000000000001O00O10000000000001O000000O100000000000000000000000000000000000001O000000O1000000000000000O10000000000000000000000001O00000001O0O11O000000000000000O1000000001O0001O0O10000000O1001O000000O2O000000000000000O100001O000O1001O0000000000O101O00000000001O001O002O2M1N3N2N3M3L7J5aDn[Oe:jd0\\EnZO`3fNlLcf0SO]YO6m0;=_3iNkL`f0UO]YO5o0:<`3iNkL_f0XO[YO5o09>_3jNkL\\f0[OXYO6R17?^3jNjL\\f0KYZONa0]3jNjL\\f0MUZOOe0\\3gNhL^f0OUZOMf0^3eNfL`f0OUZOMe0^3gNfLcd0VOo\\Oh0SONd0]3bNlK2k0fd0VOo\\Oh0SONd0\\3cNnK0j0gd0VOP]Of0SO0c0\\3iNhL^f0LVZO0d0[3hNiLmd0VOX\\Of0_O0e0Z3gNjLld0WOY\\Oe0_O0f0Y3fNkLld0WOY\\Oe0_O0f0Y3fNkLkd0XOZ\\Od0@Oe0Z3gNjLid0YO\\\\Od0_OOd0Z3iNjLgd0YO^\\Oc0_O0b0[3jNiLgd0ZO]\\Ob0@0b0\\O_Nk3;mLhd0ZO[\\Oc0@Of0X3gNlLhd0[OZ\\Ob0ANg0Y3fNlLhd0[OZ\\Ob0BMf0Y3hNlLgd0[OY\\Ob0CNf0W3hNmLid0XOX\\Of0ANf06eNi14`MB>if03XZONf05hNi10aMC>hf05XZOKh05hNj15iM\\f09UZOHf09gNk1Sm0TNeROb0_eh4" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "image_path": "ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "l^f`05lo02N1O1N10000000000O1000000002Ne`k>" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "image_path": "ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "nXg38c0Obm0K]RO>M00Dhl05WSO9Oc0nk0QOXTO9E90?Ql0POZTOZ1AISl0oNZTOg2ek0`MQTOe2nk0]MmSOg2Ql0ZMoSOe2Rl0[MnSOe2Rl0[MnSOe2Rl0[MnSOe2Rl0[MnSOe2Rl0[MnSOf2Ql0ZMoSOf2Ql0e0000O10000001O0O100000O10000001O00O1000000000000000000000000000000000000O11O00000000000000000001O0000001O00O10001OO10000000O2O0001N100O1000O1000000000O1000O1000010O00001O001O0000000000000000000001O0O2O01O00000001O0O10000001OO1000000O11O0000000O101O000000000000O1000001O0001N100000000000000000O1000000001O000O101O0000000000O1O1O100000000001O1O1O1O2TTOPLck0P4\\TORLck0o3\\TOQLdk0o3\\TOQLdk0o3\\TOQLdk0o3\\TOQLek0m3\\TOSLdk0m3]TORLck0n3\\TOSLdk0m3\\TOSLdk0n3[TORLek0n3[TOSLdk0m3\\TOSLdk0m3[TOTLdk0m3\\TOTLck0l3]TOTLck0l3]TOTLck0l3]TOTLck0l3]TOTLck0k3_TOTLbk0k3^TOTLck0l3]TOTLck0k3^TOULbk0k3_TOSLak0m3`TOSL`k0m3`TOSL`k0l3aTOTL_k0l3aTOTL_k0m3`TOSL`k0n3_TORLak0o3]TOSLbk0n3\\TOSLdk0m3\\TOSLdk0m3]TORLck0n3]TORLck0n3]TORLck0m3^TOSLbk0m3^TOSLbk0m3^TORLck0n3^TOQLak0P4^TOQLbk0o3^TORLak0n3_TORLak0n3_TORLak0o3^TOQLbk0P4\\TOQLdk0V401O000000O101G\\TOSLdk0l3^TORLck0n3]TORLck0n3]TORLck0n3]TORLck0o3\\TORLck0n3]TORLck0n3\\TOSLdk0U40001O00O10000I[TORLek0m3]TOQLdk0m3_TORLak0m3`TORLak0n3_TOSL`k0n3_TORLak0n3^TOSLbk0m3^TOSLbk0n3]TOSLbk0n3\\TOSLdk0V4N2O01O000O1000000000000000000001N10000000O2O00000G]TOTLck0l3]TOTLck0l3]TOTLck0l3]TOTLck0l3]TOTLck0l3]TOTLck0U400O100000H]TORLck0m3^TOSLck0l3]TOTLck0l3]TOTLck0k3_TOTLak0l3_TOSLbk0m3^TOSLbk0m3^TOSLbk0m3^TOTLak0l3_TOTLak0l3^TOULak0m3^TOSLbk0n3]TORLck0V400000G^TOSLck0l3]TOTLck0l3]TOTLck0k3_TOSLbk0m3^TOSLck0k3^TOULbk0k3^TOULbk0l3]TOTLdk0k3\\TOULdk0k3\\TOULdk0k3\\TOULdk0l3ZTOVLek0k3ZTOULfk0S400000HZTOULfk0j3[TOVLek0j3[TOVLdk0j3^TOTLck0k3^TOULbk0k3_TOTLak0l3^TOULbk0k3^TOULbk0k3^TOULbk0l3]TOTLck0l3]TOTLck0m3[TOULdk0T401O00000000O1G\\TOULdk0j3^TOTLck0k3^TOULbk0k3^TOULck0k3\\TOULdk0k3\\TOULdk0k3\\TOULck0m3\\TOSLdk0m3[TOULdk0l3[TOTLdk0U400000H\\TOSLdk0l3]TOTLck0l3^TORLck0l3_TOTLak0j3bTOTL_k0k3cTOTL]k0l3bTOUL^k0k3bTOUL^k0j3cTOVL]k0k3bTOVL]k0k3bTOUL^k0l3`TOUL`k0m3^TOTLak0m3]TOTLck0l3]TOULbk0k3^TOULbk0k3^TOULbk0k3^TOTLck0l3]TOTLck0l3^TOSLbk0m3^TOSLck0l3]TOTLck0l3]TOSLdk0m3\\TOTLck0l3]TOTLck0l3]TOTLbk0m3^TOSLbk0m3]TOSLdk0m3\\TOSLdk0n3[TORLek0n3[TORLek0m3\\TOSLdk0m3\\TOSLdk0m3]TORLck0n3]TORLck0m3^TORLck0m3_TORLak0n3_TOQLak0P4_TOQL`k0o3`TOQL`k0o3`TOQL`k0o3`TOQL`k0P4^TOQLbk0P4]TOQLbk0W40000000H]TORLck0n3^TOPLck0P4]TOPLdk0o3\\TOQLdk0o3\\TOQLek0m3]TORLck0n3\\TOSLdk0m3\\TOSLdk0m3\\TOSLdk0n3[TORLek0n3ZTOSLfk0U4O001N1000O1000000O1JZTOQLfk0o3ZTOQLfk0n3[TORLek0n3[TORLdk0o3\\TOQLdk0P4[TOQLdk0V401O000000H]TORLck0m3^TOSLbk0m3^TOSLbk0l3`TOSLak0l3_TOSLbk0l3_TOTLak0l3_TOTLak0l3_TOTL`k0m3`TOSL`k0n3_TORLbk0m3^TOSLbk0m3^TOSLck0m3[TOTLek0l3[TOTLek0m3ZTOSLfk0l3[TOTLek0l3[TOTLek0l3[TOTLek0l3\\TOSLdk0l3]TOTLck0k3_TOSLbk0m3^TOSLbk0m3^TOSLbk0m3^TOSLbk0m3]TOTLck0l3]TOTLck0m3\\TOTLck0l3]TOTLbk0n3]TORLck0n3\\TOSLdk0m3\\TOSLdk0m3\\TOSLdk0m3\\TOSLdk0m3]TORLck0m3^TOSLbk0l3_TOSLbk0m3_TORLak0m3`TOSLak0l3_TOTLak0l3_TOTLak0m3^TOSLbk0m3^TOSLak0o3]TOSLbk0n3]TORLck0V400000O1I\\TOQLdk0n3]TOQLdk0n3^TOQLbk0n3_TORLak0n39O11O0VTORLak0n3_TORLak0o3^TORLak0o3]TORLck0o3\\TOQLdk0P4ZTORLek0V4O00H\\TOSLdk0l3]TOSLdk0m3\\TOSLdk0m3]TORLck0n3]TORLck0n3]TORLck0n3]TORLck0n3]TORLck0n3]TORLck0n3]TORLbk0o3^TOQLbk0o3^TOQLbk0o3]TORLck0n3]TOSLbk0m3^TOSLbk0m3^TOSLbk0m3^TOSLbk0m3^TOSLbk0l3`TOSL`k0m3`TOSLak0l3_TOSLbk0m3^TOSLck0l3]TOTLck0l3]TOTLck0l3]TOTLck0l3]TOTLck0l3]TOTLck0l3]TOTLbk0m3^TOTLak0l3_TOUL_k0l3aTOTL_k0l3aTOTL_k0k3bTOUL^k0j3cTOVL]k0j3cTOVL]k0j3cTOVL]k0j3cTOVL]k0j3cTOUL^k0k3bTOUL^k0k3bTOUL^k0k3bTOUL^k0k3bTOUL^k0k3bTOTL_k0m3`TOSL`k0m3`TOTL_k0m3`TOSL`k0n3_TORL`k0o3`TOQLak0n3_TORLak0m3`TOSL`k0m3`TOSL`k0m3`TOSL`k0l3bTORL^k0n3cTORL]k0n3cTOQL^k0o3bTOQL^k0o3bTORL]k0n3bTOSL^k0n3aTORL_k0n3aTORL`k0n3_TORLak0n3_TOSL`k0m3`TOSL`k0m3`TOSL`k0m3`TOSL`k0m3`TOSL`k0m3`TOSL`k0m3`TOSL`k0l3bTORL_k0m3bTOSL^k0l3cTOSL^k0m3bTOSL^k0m3bTOTL]k0k3dTOUL\\k0k3dTOUL\\k0k3dTOUL\\k0l3cTOSL^k0m3cTOQL^k0o391000OZTOQL]k0m3dTOSL\\k0m3<000000001O1O00001O001O0000O10000O1000000000000000O100000000000O101O0000O100000000001O00O10O101O000000000O10000000O101O000000000001O000O1000O1000001O0000O10000O20O00000O100000000000000000000001O000000O10000000000000000000000000000001O000000000000000000000000000O10O2O0000000000000000000000000000000000000000000000000000O100O10000O1AoSOnLQl0Q3UTOiLlk0X3>1O2N1O00O1N200O1O100O1O10001O0O2N2M3ZM[SOm1il0mM\\SO>F41Dgm03\\RO34Fem01[RO28HYn02doi3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "image_path": "ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "Scb97Wn0JZSO9bl0JRROOR1;f3HeMKgc05jYO0P1>P1E911Kic0f0`ZONX1H6M01oc0`0XZO4_1C51LLTd0m1i[OXN19JJVd0j1i[OUNaNWO_1T1IN\\d0d1i[OYNM>EIdd0a1h[OZNLc0_OGR1gNla0f2d]O]NJf0QOOOVN^14Sb0R3S]OcN^Oc2KhKZ16Yb0n2g\\Oh3n0[I[b0n2e\\Oh3IRIh0`0jb0l2_\\Oa3OWIc0>ob0k2^\\O`3OZI0J:b0Yc0j2^\\O`3OfI32`c0m9\\\\OoFdc0h:O00000000O100001O000000000000000000000000000000000000001O00000000000000000000000000001O0000000O100000000000000000000000O2O0000O100000001O000000000000000000000000001OO100000000000000000000000000000000000000000000O10001O0O10O2O000000000000000000000000O100001O0000O10000000000000000001O000000000000001O000CaCX]O^XMTj0NjUOc0BW2:SMG5cj0MkUOf3KXLG5bj0OjUOe33\\LSj01hUOc35\\LSj03fUOo2DYMc0ESj03eUOP3FWMa0GTj01gUOb35]LTj00hUOc34]LTj0NjUOe32]LSj0OkUOd32]LSj00jUOc35YLSj04gUOd3Qk0]LnTOc3Rk0]LnTOc3Rk0]LnTOb3Sk0]LnTOc3Sk0[LnTOd3Sk0\\LmTOd3Sk0^LkTOc38WLUj07bUOb39YLRj08cUO`39[LRj05eUO`37]LTj03eUO`34aLVj0NgUOa31cLXj0KhUOb30cLXj0KhUOb3OdLYj0JhUOc3ObLYj0LgUOc3OaLZj0LfUOd30`L[j0KeUOf30^L[j0KfUOh3N]L\\j0KfUOh3M^L]j0IgUOj3K]L^j0JfUOj3K\\L_j0KeUOi3L\\L_j0LeUOg3M\\L^j0NdUOf3O[L]j0OdUOg3NZL^j0OeUOf3L\\L^j0OfUOe3K]L_j0MgUOf3J]L`j0LfUOg3J]L`j0KgUOg3J_L^j0JhUOg3J_L^j0JhUOg3I`L_j0IhUOg3I`L_j0IhUOg3I`L_j0IhUOg3I`L_j0IhUOh3G`Laj0HgUOi3H_Laj0HgUOj3G^Lcj0HdUOk3I]Lcj0IcUOk3I\\Ldj0IcUOk3H]Lfj0GbUOl3I\\Lej0GcUOm3H\\LWk0c3jTO]LVk0c3jTO]LVk0b3lTO]LTk0b3mTO^LTk0`3mTO`LSk0`3mTO`LSk0`3nTO^LTk0a3lTO_LTk0a3lTO_L_j0GiUOj3H_L_j0JfUOg3J`L_j0LeUOd3LaL]j0MeUOd3L`L_j0LeUOd3L`L_j0LfUOd3J`L`j0LfUOd3J`L`j0LfUOd3J`L`j0KgUOf3H_Laj0KfUO[4Zj0dKfUO]4Zj0bKgUO^4Yj0bKfUO_4Zj0`KgUO`4Xj0aKhUO_4Xj0`KiUO`4Wj0_KjUOa4Vj0aKhUO`4Xj0`KhUO_4Xj0fKcUOZ4]j0fKdUOY4\\j0gKeUOX4[j0gKfUOY4Zj0dKiUO\\4Wj0cKjUO]4Vj0jK`UOY4`j0d00\\O^UOiKbj0V4`UOiK`j0W4`UOiK`j0W4`UOiK`j0X4_UOhKaj0X4_UOhKaj0X4_UOiK`j0W4`UOiK`j0V4aUOjK_j0W4`UOiK`j0W4`UOiK`j0W4_UOjKaj0k4000[O_UOjKaj0V4`UOiK`j0V4aUOjK_j0U4bUOjK_j0V4aUOjK_j0V4aUOjK_j0V4aUOjK_j0V4aUOjK_j0V4aUOjK_j0U4bUOkK^j0T4dUOkK\\j0U4dUOkK\\j0U4dUOkK\\j0V4cUOjK]j01^UOb34^L^j0O_UOc33^L^j0N`UOd32]L^j0ObUOc30^L^j0NcUOd3O^L^j0LeUOf3M^L^j0LeUOf3M^L^j0KfUOg3L^L^j0JgUOh3K^L^j0KfUOg3L^L^j0LeUOf3M^L^j0MeUOd3M_L^j0NdUOc3N_L^j0OcUOc3M_L`j0MdUOe3K^Lbj0LbUOh3K\\Lcj0KcUOi3J\\Lcj0KcUOi3J\\Lbj0O`UOf3N[Lbj0l40000000000001O0000001O001RK]UO\\4cj0ZK\\UO15d4_j0cK_UO^4aj0a0000000000000O100O1000000000]O]UOhKcj0W4^UOiKbj0W4^UOiKbj0O^UOd31]Laj0N`UOc30`L_j0KcUOe3O_L^j0KdUOf3N_L^j0JeUOg3M_L^j0JeUOg3M^L_j0LcUOg3L^Laj0LbUOg3L]Lbj0MaUOg3L\\Lcj0NaUOX4_j0gKbUOY4^j0fKcUOZ4]j0eKdUO[4\\j0eKdUO[4\\j0fKbUO[4^j0gK_UOZ4aj0c00000000000000000000000000000000000000QK^UO_4bj0`K`UO_4`j0aKaUO^4_j0aKdUO]4]j0bKdUO]4\\j0cKdUO]4]j0bKdUO]4]j0aKcUO`4]j0`KcUO`4^j0]KcUOc4_j0\\KaUOd4]j0^KcUOb4\\j0`KdUO_4[j0cKdUO]4\\j0cKeUOi3H]Lbj0KfUOh3H]Lbj0KfUOh3H^Laj0JgUOh3H^Laj0JfUOi3I]Laj0KeUOh3J]Laj0KeUOi3H]Lcj0KdUO[4\\j0eKdUO[4\\j0eKeUOZ4\\j0eKdUO[4\\j0dKeUOi3G^Ldj0IeUOi3G^Ldj0JcUOi3I]Ldj0KaUOi3K]Lbj0K`UO_4`j0a0001PK]UO`4cj0`K`UO]4aj0aK`UO_4_j0bKbUO]4^j0bKcUO^4]j0aKeUO^4[j0`KiUO^4Wj0aKjUO_4Vj0`KkUO`4Uj0`KaUOL2c4^j0aK`UOL2c4^j0cK]UOL4b4]j0lK^UOW4aj0e0000000001OO100000UK_UOV4aj0iK`UOW4`j0iK`UOW4`j0iK`UOW4`j0iK`UOW4`j0iK`UOW4`j0e0O10\\OaUOfK_j0X4cUOhK]j02`UO`34^L\\j0JaUOJ0m33^L]j0K`UOI1m33_L\\j02aUO_33_L\\j01cUO`30_L]j01dUO_3O`L^j00cUO`30^L^j02bUOa3O]L_j02bUOa3O]L_j01cUOc3L]Laj01bUOb3M]Laj01bUOb3M]Laj0L]UOH4P4N]L`j0K^UOH4P4N]L`j0J`UOH1S4N[Laj0JaUOF2T4L\\Laj0IiUOk3F\\Laj0HjUOk3F]L`j0GlUOk3D^L`j0HkUOj3E^L`j0IjUOh3G`L^j0IkUOe3HbL]j0IkUOe3IaL\\j0IlUOf3HaL\\j0HmUOh3EaL^j0FmUOj3E`L^j0FmUOj3E`L^j0FmUOj3E`L^j0GlUOi3F`L^j0HkUOi3F_L_j0HkUOi3F`L^j0GdUOG2Q4MaL]j0FeUOH1Q4MaL]j0EfUOI0Q4MaL]j0EnUOj3EaL]j0FnUOh3EbL^j0FmUOf3FdL]j0GlUOe3GdL]j0GlUOf3FcL^j0GlUOg3DcL`j0EmUOi3BbLaj0DnUO_4Rj0`KoUO`4Qj0`KoUO`4Qj0`KoUOa4Pj0_KPVOa4Qj0^KoUOb4Qj0^KoUOb4Qj0]KPVOc4Pj0]KPVOc4Pj0\\KQVOc4Pj0^KoUOb4Qj0_KnUOa4Rj0`KmUO`4Sj0aKlUO_4Tj0`KmUO_4Sj0aKnUO_4Rj0`KoUO`4Qj0`KoUO`4Rj0^KoUOb4gj01000000001O000O2O1O000000000000000000O10O1000O1000000001O000000O10000O10010O0001O0]OdTOaL]k0\\3fTOcLZk0]3fTOcLZk0^3eTObL\\k0]3dTOcL\\k0]3dTOcL\\k0]3dTOcL[k0^3eTObL[k0^3eTObL[k0^3dTOcL\\k0\\3eTOdLZk0\\3gTOdLYk0\\3gTOdLYk0[3iTOdLWk0\\3iTOdLWk0\\3jTObLWk0^3iTObLWk0^3jTOaLVk0^3kTObLUk0^3jTOcLVk0]3jTOcLVk0]3jTOcLVk0]3jTOcLVk0^3iTObLWk0^3iTOaLWk0`3iTO`LWk0`3iTO`LWk0`3iTO`LWk0`3iTO`LWk0`3iTO`LWk0`3iTO`LWk0`3iTO`LXk0^3iTObLWk0^3iTObLWk0^3iTOcLUk0_3jTOaLVk0_3jTOaLVk0_3jTOaLVk0`3iTO`LWk0`3iTO`LWk0`3iTO`LXk0_3hTObLWk0^3iTObLWk0^3iTObLWk0^3iTObLWk0^3hTOcLXk0\\3jTOcLVk0]3jTOcLWk0\\3iTOcLXk0\\3iTOdLWk0\\3iTOdLWk0\\3iTOdLXk0[3hTOeLXk0[3hTOeLXk0\\3fTOeLYk0]3fTOcLZk0^3eTObL[k0_3dTOaL\\k0_3dTOaL\\k0_3dTOaL\\k0_3dTO`L]k0_3dTOaL\\k0_3dTOaL[k0`3eTO`L[k0`3eTOaLZk0_3fTOaLZk0_3gTO`LYk0`3gTO_LYk0c3fTO]LZk0c3fTO]LZk0c3fTO]LZk0c3fTO^LYk0b3gTO^LYk0b3fTO_LZk0a3fTO`LYk0`3gTO_LZk0`3gTO`LYk0`3gTO`LYk0`3gTO`LYk0_3hTOaLXk0^3jTObLUk0^3kTObLUk0^3kTObLUk0_3jTOaLVk0_3jTOaLVk0`3iTO`LWk0`3hTOaLXk0_3hTOaLXk0`3gTO`LYk0a3fTO`LYk0`3gTO`LYk0`3gTO`LYk0`3gTO`LYk0`3gTO`LYk0_3hTOaLXk0_3hTOaLXk0^3iTObLWk0]3kTObLVk0]3jTOcLVk0^3hTObLYk0^3gTObLYk0^3gTObLYk0_3fTOaLZk0_3fTObLYk0^3gTObLYk0^3fTOcLZk0^3eTObL[k0^3eTObL\\k0]3dTOcL]k01eTOd2N[M]k00gTOX3Yk0hLgTOX3Yk0hLhTOW3Yk0hLgTOX3Yk0hLgTOX3Zk0gLfTOY3Zk0hLeTOX3[k0jLbTOW3]k0g0O1O1O10000000001O000000000000000000000000[OfTOcLZk0]3gTOaLZk0_3fTOaLZk0^3gTOaLZk0_3fTOaLZk0^3gTObLYk0]3hTOcLXk0^3gTObLYk0_3fTOaLZk0_3fTOaLZk0`3eTO`L[k0`3eTO`L[k0_3fTOaLZk0_3eTOcLZk0]3fTOcLZk0]3fTOcLZk0]3gTObLYk0^3gTObLYk0^3gTObLYk0]3hTOcLXk0\\3iTOdLWk0[3jTOeLVk0[3jTOeLVk0[3jTOeLVk0[3jTOeLVk0\\3iTOdLWk0\\3iTOdLWk0^3gTObLYk0S400000000000000000000000000001O000000001O00000TLcTOT3_k0iLbTOW3_k0^L`TOL5f3]k0ZLkTOf3Uk0ZLkTOf3Uk0ZLlTOe3Tk0\\LkTOd3Uk0\\LkTOd3Tk0^LeTOINi3]k0iL_TOX3ak0d0O10000O100O1N20QLfTOW3Yk0iLiTOV3Wk0jLjTOU3Vk0kLiTOU3Xk0kLhTOU3Xk0kLhTOU3Yk0jLgTOV3Yk0jLfTOW3Zk0iLeTOd20YM[k03eTOc21ZM[k02dTOd21ZM[k01fTOd2O[M\\k0OgTOe2M\\M]k0NfTOf2M\\M]k0MhTOf2K]M]k0ImTOi2F^MUl0a2kSO`MVl0_2jSOaMVl0_2jSOaMVl0a2hSO_MXl0b2gSO_Mdk0_OmTOR3_O_M]k0^OgTO:4^3Tk0jLjTOW3Vk0j0000000001O0000O100001O00001O1O0PLdTO[3\\k0dLfTOZ3\\k0dLfTO[3\\k0cLdTO]3Ql00O\\OiSO^MXl0`2iSO_MXl0a2hSO_MXl0`2jSO_MWl0_2jSOaMVl0_2jSOaMVl0`2iSO`MWl0a2hSO_MXl0a2hSO_MWl0b2iSO^Mkk0@[TOT3J\\M\\k0]3dTOdL[k0[3fTOeLZk0[3fTOeLZk0[3fTOeLZk0[3fTOeLZk0[3fTOdL[k0\\3eTOdLZk0]3fTOdLYk0\\3fTOeLZk0Z3gTOfLYk0[3fTOeLZk0\\3eTOdL[k0\\3eTOdL[k0]3dTObL]k0^3cTOcL]k0\\3cTOdLak0GdTOQ3KXMTl0Y31[OkSO\\MUl0d2kSO\\MUl0d2kSO\\Mek0^OiTOV3B\\MUl0d2kSO\\MUl0d2kSO\\MUl0d2kSO\\MUl0d2kSO\\MUl0d2kSO\\MUl0d2kSO\\Mck0AjTOg3Vk0YLjTOg3Vk0YLjTOg3Vk0YLjTOS3C\\MUl0c2lSO]MTl0c2mSO\\MSl0c2nSO]MRl0c2nSO]Mak0BhTOQ3G]M`k0CiTOP3G]M`k0CiTOP3G]M`k0CjTOn2G_M_k0CjTOn2G_M\\k0EnTOl2F_M[k0FPUOj2E`M\\k0FnTOj2F`M\\k0FnTOj2F`M]k0EmTOl2E_M^k0EmTOk2F`M]k0ElTOl2G_M\\k0EmTOl2H_M[k0EiTOP3L[M[k0EiTOP3L[M[k0FgTOP3NZM[k0FgTOP3NZM\\k0EfTOQ3NZM\\k0EfTOQ3NZM]k0CfTOS3MZM_k0@fTOU3K[Mlk0e2UTOZMkk0f2UTOYMlk0g2TTOYMlk0g2TTOYMlk0g2TTOYMXk0AgTO05W3LXMXk0KiTOm2OXMWk0\\3iTOdLWk0\\3iTOdLWk0\\3hTOfLWk0[3hTOeLXk0[3hTOeLXk0[3hTOeLXk0[3hTOeLXk0Z3iTOfLWk0Z3iTOfLWk0Y3kTOfLUk0Z3kTOfLUk0Y3lTOfLUk0Z3kTOfLUk0Z3kTOfLUk0Z3kTOfLUk0Z3kTOfLVk0Z3iTOfLWk02eTOb24\\MXk01dTOc24\\MYk00cTOd24\\MZk0ObTOe24]MZk0MbTOf24]MYk0NdTOd23^MXk0OeTOd21^MXk0OiTOc2N^MYk0NjTOe2L]MZk0NjTOf2K\\M[k0OiTOe2L[M\\k00hTOe2L[M\\k00hTOe2L[M\\k01fTOd2O\\MZk0LeTOm22WMYk0JgTOo20WMYk0IhTOo20XMXk0IhTOo20XMXk0IhTOo21WMWk0JhTOo21WMWk0JhTOo21WMWk0JiTOn20XMWk0IkTOm2OZMVk0IkTOm20YMUk0ImTOHHQ36]MVk0JPUOi2J]MVk0KoTOh2K]MVk0KkTOl2OYMVk0KlTOk2NZMWk0JoTOg2K_MXk0GPUO1DdQ1Q:XnN[FS2CPNO21M10=0DfQ1Q:UnN]F8^O=0C000O4MO21M10=0DgQ1f:\\nNUE>1CN0104K021M10<2CgQ1g:ZnNVE?ODN<5^O22S10lNgQ1h:XnNVE5MO2i0=SOk01kNgQ1R;\\nNlDL1:K3a0Bj02kNgQ1R;\\nNWE5C3o1EUNgQ1g8ZnNVI1]N6B3o1EUNgQ1e8]nNWIN^N5D3m1FUNgQ1e8]nNWIN_N1G5j1HUNfQ1f8\\nNVIN`N0H7h1GUNhQ1g8YnNUI1`NML6d1JVNhQ1f8XnNUI3TO2Z1bQ1f8\\nNRFOZ1dQ1d8]nNSFMZ1fQ1c8]nNlHdQ1T7\\nNkFOXONnNhQ1n:\\nNkFOYONmNfQ1o:^nN_F0ZN0Z1NlNeQ1i:enNeF5mN^O14CeQ1h:fnNeF5oN\\O05DdQ1h:enNfF6DAnNdQ1h:[nNPG`0\\O_OlNfQ1i:YnNQG7QNK[1MlNgQ1g:YnNRG72`Q1S;_nNnD`Q1R;_nNoDLjM`Q1X=cnN`DLbNfQ1P=\\nNkB0V12lNdQ1n:^nNmF0aNJM8EaQ1W9enNiGKi01\\N2N1K]Q1V9inNiGGk02ZNk0LcP1P9PoNVG_O21b12YNl0LcP1o8QoNXG]O21c11YNl0KdP1o8QoNYG\\O12c11YNl0LcP1U9knNUG_O05b11XNm0MbP1V9inNXG]OL:a10ZN8HN4ZQ1V9hnNhGHm0OZN5K11ZQ1U9inNhGHn0NZN4L20ZQ1T9inNjGHm0M[N4L3N[Q1T9inNjGHn0L[N4K4N[Q1S9jnNlGEo0MZN3L5K]Q1R9knN_J]O^M;QO]Q1Q9knN_K^OaK9O^Q1P9knNnJ@XLNK80_Q1o8fnNQKGULLM6NaQ1P9cnNQKJULLM5MbQ1P9bnNSKJTLLN3LeQ1P9anNRKKeLNZOeQ1P9`nNRKMeLLZOgQ1P9^nNYGJS34jMcQ1l7]nNPK1RMcQ1o7ZnNPK3oLeQ1b<\\nN]CeQ1b<]nNQCJXOjQ1g=]nNnBLYOhQ1h=_nNjBN[OdQ1l=^nNgB0\\ObQ1m=_nNfBO\\OcQ1o=[nNhBO\\OeQ1n=YnNiBO[OgQ1m=WnNnBLWOmQ1Z?01O0O101O000000000000kIXnNbLhQ1]3YnNcLgQ1]3ZnNbLfQ1^3YnNcLgQ1^3WnNdLhQ1c9O003M4L1O0000000000000000N2K5O010O1000000000001O0000000000000000000000001O000O1000000000000000O1000001O00000000000000000000000000000000000000O1001O00000000000000000000000000000000000001O00000000\\nN]@[Q1c?enN]@\\Q1b?cnN_@^Q1`?bnNa@^Q1^?anNc@cQ1Y?]nNg@hQ1T?XnNl@PR1l>PnNTAQR1k>omNUAQR1k>PnNTAPR1l>PnNSAjQ1NVnNo>0SAjQ1NVnNo>1RAQR1m>omNSAQR1m>omNSARR1l>nmNTASR1k>mmNUATR1j>lmNVAUR1i>kmNWAVR1h>jmNXAUR1i>lmNVASR1k>900000O1O1fmNSAoQ1o>PnNSAgQ1U?XnNm@cQ1W?]nNj@aQ1W?^nNk@aQ1U?_nNl@_Q1U?anNl@^Q1T?bnNm@]Q1S?cnNn@\\Q1R?dnNo@[Q1Q?dnNPA\\Q1P?enNo@\\Q1P?dnNPA\\Q1P?dnNo@^Q1P?cnNm@_Q1S?anNk@bQ1T?`nNf@dQ1Z?9000O1TnNe@cQ1[?\\nNg@cQ1Y?]nNh@bQ1X?^nNi@aQ1W?_nNi@bQ1V?]nNk@dQ1T?\\nNl@eQ1S?[nNn@fQ1P?ZnNPAiQ1m>WnNSAjQ1l>UnNUAkQ1k>UnNUAlQ1j>TnNVAlQ1j>TnNVAlQ1j>TnNWAkQ1i>UnNWAiQ1k>WnNTAhQ1n>XnNRAgQ1o>YnNPAgQ1Q?YnNo@fQ1R?ZnNm@fQ1T?[nNk@eQ1U?[nNk@eQ1U?[nNk@dQ1V?\\nNj@dQ1V?\\nNk@cQ1U?]nNl@bQ1T?^nNm@`Q1T?`nNl@`Q1T?`nNk@aQ1U?_nNi@cQ1W?]nNg@eQ1Y?\\nNf@cQ1[?90000000O100O1O1O1O001O100000000000000O10000O100]nN]@YQ1c?fnN^@ZQ1c?enN]@[Q1c?enN^@ZQ1b?enN_@[Q1a?enN_@[Q1a?enN_@[Q1a?enN_@[Q1a?enN_@[Q1a?enN_@[Q1a?enN_@[Q1a?enN_@[Q1a?enN_@[Q1a?enN_@[Q1a?fnN^@ZQ1b?fnN^@ZQ1b?fnN]@[Q1c?enN]@[Q1c?enN]@[Q1c?enN]@[Q1c?fnN[@[Q1e?81[nN[@]Q1e?cnN[@]Q1e?bnN]@]Q1d?anN]@_Q1j?000000010O04L1OO1D0PQ1h=bnNTBb04lP1g=cnNTBb05XOJZQ1[=_nNUC=Ab05WOLZQ1Y=bnNSC;Cb06UOL\\Q1X=anNVC9Bd04VOL\\Q1n=fnN_B=GBK[Q1P?inNVA`Q1]?01O1O001O00001O01O0O1O1O1O001EWnNn@jQ1Q?XnNn@hQ1S?VnNn@kQ1Q?UnNo@kQ1[?00000FUnNo@kQ1P?XnNm@hQ1S?ZnNk@gQ1S?. It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "mask_rle": { + "size": [ + 1584, + 1224 + ], + "counts": "Wa`:9Ua13O1N100000000000000001O00000000001O0000001N2O1O2M3M[bS15_]lN4L4M2O00001N10000000000001O000000001O0000000000000001O0000000O10000000000O10000000000001O000000001O000000000000000000000000000000001O00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O2O00000000000000001O000O101O001O00O100000000O10000O2O0O10000O10000O10000000001O00000001O000000001O000000000000000000000001O000000001O00000000000000000000O10000000000000000000000000000000000000O1001O0O100000000000O10000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000001O000000001O000000O10001O0000O100000001O000000000000000000000001O0000000000000000000000000000O10000000000000O10000000001O00000000000000000001N100000000000000000000000000000O1000000000000000000O10000000O100000001O1O001O1N2O1N5Jfbhk0" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "mask_rle": { + "size": [ + 1584, + 1224 + ], + "counts": "o`]:`06BX_15h`N;0?k^1`1TaNhM[^1l2K1N2N10000O100000001O000000000000000000000001O0000000000001O000000O1000000000000000000000000000000000000001O00001O000000000000000000001O00000001N1000000001O00000O1001O000O1000000000000000000000000000000000000O2O0000000000O100001O000000000000000O10000000000000000000000000000000000001O000000O100000000000000000000000000000000000000001N10000000O1000000001O0000001O00000000000000000000000000000O10000000O100O10000O1000000000O10000O100O1O100O1O10O10O100O100O1N2N2N20000000000000000O2O0O100000000O10000001N2O1N2O0O101OO100000000O2O001N2O0000000O100000000000000000000000000000001O0000001O0000O100O1000000001O001O000001O0000000000O10000O10000000O100000000001O010O01O1O00O100000000000000000000O10000000000000000000000000O10000000O2O000000000000000000O1000001O0000001O000000000000O100O100O100000O1000000000001O001O1O00000O10000000O10000O10000O10O10000000001O0000000O2O000001O00000000001O000000000O1000000000000O100000000000000000001O0000000000000000001O0001O0000000000000000000000000000O10000000000O10000001O01O0000000001N100000O10000O10O1000000000000000001O00001N100000O1000000O1000000000001O0O10000001O00001O001O00000000000000O100000000O100000000010O00000000000000000000000000O100000000O10000000O10000000001N100001O00000000000O1000000000000000000000001O00000001O000000O10001O000000000000000000O11O000000001O000000O100000000000001O0000000000O1000000000000000000000O100000000000000000000000000000O1010O000010O01O000000000O1001N10000O10000000000000O10000O1000000000000000000000000000000000000001N11O00000000001O0000000O100001O0000000O101O00000000001O000000000O100000001O0000001O0O2O001N3M`0@e0lNf_NGWa1Ic`X:" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "mask_rle": { + "size": [ + 1584, + 1224 + ], + "counts": "XU_:7Va15M2N10000000000000000000000000000000O10001O00000O101N4Komn0OSRQO4L3N3L2N100O2O000000000000000001O00001O000000000000000001O000000000000000O10000O10000000000001O0000001O000000001O01O0000000000000000O1000000000000001O0000000000O1000000000000000000000001O00000000000000O1000000001O0000O1000000000000001O0O10001O0000000000000000001O000000000000O10000O100O10000O10000O1000001O000O1000000001O0000001OO10000000000000000000000000001O000001O00O1000000001O0O100000000000O1000000000001O0000000000000000000O0100000000000000001O000001O000000001OO1000000001O000O100000000000O10001O0000000O1000000000000001O001N101O000000000000000000000000000000001O00000000000000000000000000000000O10000000001O0000000000O10000000O1000000000000000000001O000001O00000O100000O1000000000000000000000000000000000000000O100000000000O100000001O00001O00001O00001N10001O0O2O3Jihck0" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "mask_rle": { + "size": [ + 1584, + 1224 + ], + "counts": "je]::3M6K139=OC41k[11hcNT1JP17WNKGe[1^5M3N2N100O2O00001O000000000001O01O0001O0000000000O010000001O0000001O000000001N1000000000000000000000O101O0000001O00000000000000000O1000000000000O1000000000O1000O010000000000001O00000000000000000000O10O100O1O1O1O1N2O1O1O1O100O10O0100O100O100O10000O1000000O101O000000000O10000000001O0000000O10000000000001N10000000000000000000000001O00000000000000000000000000001O00000000000000000O10001O0000000000O10000000001O000O10000000000000O11O00000000000000000000000000000000001O0000000000000000000000000O1000000000000001O00000000001O0000000O10O1000000000000000000001O00000000000000000O10000001O0000000000O1000O100001O01O000000000000000000000000000000000000000000000000000O2O00O1000000000000000000000001O0000000000000000O100001O0000000000000O1000000000000000000000000001O000000000000O10000000O2O00000000O1000000001N1000000000000000000000000000000000000000000000000O1000000000000000000001O00000000000000000000001O0001O01O0000000000000000001O00000O1001O00000000000000000000000000000000000000000000000000000000000000000000000000000O10000001O00000000000000000000000000000000000000000O1000000000001N1000O10000000001O00000000000000000000000000000000O100000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000000O10000000O10000000O10000000000000000001O000000000001O0000000000O1000000001O000001O00000000000000000O10000000000000O1000000000000000001N0100000000000000000001O000000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O10000000000O10000000000000O1000001O000000001O00001N101N2N2M." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "mask_rle": { + "size": [ + 1584, + 1224 + ], + "counts": "_]_:6Wa15M1O2N1000001N100000000001O000000000000000O10000000000O3MjYj0F`fUO3J4N3M2O2N1O2N101N101O0000000000001O00001O001N10000000000000000000O1000000000000000000000000001O0000000O1000000001O00000000000000000O100000001O00000000001O0000000001O000000000000000000000000001O00000O10000000O1000001O000000000000O1000000000000000000000O1000000000001O0000001O0O101O00000000000001O0000O100000000000O2O0000000000O1000000001O0001O0000O100000000000000001O00000000000000000000000001O0000O100000000000000O1000000000001O00000000000000000000000000000O101N1O2N1N3M3KVSmR1" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "mask_rle": { + "size": [ + 1584, + 1224 + ], + "counts": "fm]:2Za1=T_N\\l0`1gSOPNNa0Zl0_1hSOoM1`0Wl0KeSOP15bN2a0Ul0LeSOQ14`N4c0Sl0LeSOP1d0SOgk0LgSOo0c0VOfk0IhSOP1c0WOek0HjSOo0b0YOdk0HjSO:KJg04dk0IiSO9MHg06ck0JgSO81Ff08bk0KfSO73Df0:ak0JhSO54De0=_k0GlSO3]14hj0GoSO0]18dj0HRTOK]15K1O01O00000O2O0_ORXOjH2Glg0^7TXOiHSh0U7nWOkHRh0U7nWOkHRh0U7nWOlHQh0T7oWOmHPh0S7PXOnHog0S7PXOmHPh0S7PXOlHPh0V7oWOhHSh0c7O2O2N1O0000O0N3O100001O1OO10OFoWOfHQh0Z7PXOeHPh0[7PXOeHPh0Z7QXOgHng0Y7RXOhHng0W7RXOjHmg0V7SXOiHng0V7TXOhHmg0X7SXOgHng0Y7RXOgHng0Y7RXOhHmg0Y7QXOiHng0W7RXOkHlg0V7RXOkHng0c71O1O10000001O00O1O1EoWOeHRh0[7nWOdHSh0\\7mWOdHSh0\\7mWOeHRh0[7nWOfHQh0[7nWOgHPh0c72O10O1000001O2NN2O2N1001N2O2N1OO1N2O1O1O100O100O2O000000O101O001O100O00001O1O1OO1O1O100001O1O2NO1O1DPXOfHPh0[7QXOdHog0\\7QXOdHPh0[7PXOeHPh0[7PXOeHPh0[7PXOfHog0Z7QXOdHQh0\\7PXObHQh0]7PXObHQh0^7oWOaHRh0_7nWO`HSh0`7mWO`HSh0`7mWO`HSh0`7mWO`HSh0`7mWObHPh0_7oWOdHog0]7oWOdHQh0f701O00000000O100GnWObHSh0g7O001O2N00N2O10000000000O1O1IlWO`HUh0`76004L001N3N2N1O001O103L0001J5DPXOfHRh0Y7nWOfHSh0Z7mWOfHRh0[7nWOfHQh0Z7oWOgHPh0Y7PXOlHkg0T7UXOgHPh0Y7PXOeHRh0[7oWOcHRh0]7oWOaHRh0_7nWO`HSh0`7lWObHSh0^7mWObHSh0^7mWOaHTh0_7lWO`HUh0_7700000000000000000000000000000000O10000001O00O100000000000000000000^ObWOaI^h0^6cWObI]h0Q70ZObWOiI]h0W6dWOiI\\h0X6cWOhI]h0n6000000000BbWOYI_h0d6dWOZI]h0e6dWOZI^h0c6eWO[I]h0d6dWOYI_h0e6cWOWI`h0i6=O1O0O11O00000O11O0O1000O100O2O00O10O1001O00000000000O1001N10000000000000000O100001O0000001O001O0O1O100O100O1000O100000000O2O000000O100001O010O000O10000000001O00000000O101O0O1000000000O2O00aIhVOU6Xi0kIhVON1n5Wi0c0001N1000000000000jIfVOe5Zi0SJgVOL2P6Xi0QJQWOm5bi0YOUVOXKki0f4YVOWKhi0V3VVOfM9mNbi0Z3ZVOgM4oNai0Z3\\VOTMEK>Gai0Y3]VOTMFKoQOCPn0=PROCPn0=oQODQn0mQOCRn0=mQODSn0oQOBQn0>oQOBQn0=PROCPn0=PROCPn0=PROCPn0=PROCPn0=PROCPn0=PROCPn0=PRODPn0;PRODQn0PROAPn0?j0N1L400O1000VQOHkm08UROHkm0:SROFlm0oQOBQn0>nQODQn02]OYm05eRO>2]OZm03eRO`0Rn0^OoQOb0hn000000000000000001O000000000000O1000000000000000000000O1000000001O0000O10000000O100000000000000000000000000000000001O00000000O100O2O0000O10000000001O0000O101O0O101O0O100O1000000001O0O2O001O00000000000000000000000001O0O100000O10001O00001O0O100000000000000SO]O\\ROc0dm0]O\\ROc0cm0^O]ROa0dm0_O\\ROa0em0^O[ROb0em0^O[ROb0bn00000000000000000000000O1000000000001O00000000000000O10000000000ZO_OlQOa0jn000000000000ZO@kQO`0Tn0AlQO?Tn0AlQO?Un0_OlQO`0kn001OO1YOAlQO?Sn0ClQO=Tn0DkQOSn0BmQO>Sn0BmQO>Sn0AnQO>Rn0CoQO7OPm0OoRO0J95IRm0NoRO0I:6HRm0OnROi00XORm0OnROi00XORm0OnROi00XOSm0NnROOJ:6H2BH7al05fSOO05ON1OYl0NiSON05OOO0Zl0HmSO4K5OON2Zl0EbSOL7905OON2Yl0FcSOK7905OON2Yl0FbSOL7:030ON2Yl0EPTO7I30ON3Pm0JRSO5NO11ol0KRSO6LO4AH;Um0OSSOm05SOhl00RSOn06ROgl02kROS1>kNSl0GnSOn1O[NQl0JoSOk10[NPl0MbSOJ8n16[Nok03jSOb17[Nok05gSOa1:ZNok06fSO`1;ZNnk08fSO^1oMR?bQ1VAkoNFcN4LW=5PC`Q12koNBcN51S=5QC\\Q18\\nNTO`03I;D62P==lBTQ1o0]nNTO7`>;o@A3ZQ1ia0ToNQ^OE5UQ1Pb0\\oNP^OcP1Sb0[oNn]OcP1Tb0]oNl]OaP1Vb0^oNj]ObP1Vb0^oNj]OaP1Vb0`oNj]O_P1Wb0UoNe]OM4nP1Ub0VoNi]OL1mP1Vb0WoNk]OLNmP1Vb0WoNn]ONIkP1Yb0WoNo]ONGkP1Zb0VoNR^ONClP1[b0VoNT^OLAnP1\\b0UoNS^ON@mP1]b0UoNT^OM_OnP1]b0ToNU^OSQ1ka0mnNU^OSQ1ka0mnN\\^OlP1ea0SoN\\^OkP1ea0ToN]^OkP1ca0UoN]^OkP1ca0UoN^^OjP1ca0UoN]^OkP1ca0UoN]^OkP1ga0QoNZ^OnP1fa0RoNZ^OnP1fa0RoNZ^OnP1fa0SoNY^OmP1ga0SoNY^OmP1ga0SoNY^OmP1ga0SoNY^OmP1fa0ToNZ^OlP1ea0VoNZ^OjP1ea0WoNZ^OjP1da0XoN\\^OhP1Xa0QoNV^O8b0gP1Wa0SoNU^O7d0fP1Wa0ToNS^O8e0dP1Xa0SoNU^O8c0eP1da0ZoN\\^OfP1da0ZoN\\^OfP1ea0YoN[^OgP1ea0YoN[^OgP1ea0YoN[^OgP1ea0XoN\\^OhP1ca0YoN]^OgP1Wa0RoNV^O8b0gP1Wa0SoNU^O7c0gP1Xa0SoNT^O6c0hP1Ya0SoNR^O6c0iP1[a0QoNR^O6c0iP1[a0RoNP^O6e0hP1[a0aoNf^O^P1Za0aoNg^O_P1Za0`oNg^ONYOlo0Qb0TPOh^ONXOno0Qb0SPOg^OOXOno0Rb0QPOf^O3WOlo0Tb0QPO`^OhP1ca0VoNR^OTQ1Pb0knNn]OVQ1Sb0knNi]OWQ1Xb0jnNd]OXQ1]b0gnNc]OYQ1^b0anNh]O^Q1ib0N2N1O1O2N00001O00000000000000001O001O1O001O001O0000001O00001O000000001O00000000O1000000000000O100000000O10000000001O01O010O1O010O00O10000O101O00001N101O001O0U]OenN]b0`Q1Z]OhnN21Pb0ZR1Jf0ZO2X_OjlNn?PT1U@lkNR?TT1n@mkNQ?TT1n@kkNT?bS1Z@mlNa0AV?aS1Z@nlN`0@W?aS1Z@olN>@Y?`S1W@SmN>_O[?TT1c@PlN6M5L_=WT1SBWlN2M5La=PT1UB[lN1M7Ka=nS1TB]lN0N9Hc=nS1QBWmN:mNc=mS1SBVmN9nNd=kS1VB\\lN0O7Ic=SS1SBVmN:G014K`=US1XBRmN>B120N[=[S1aBglNm?XS1g001lM[_ORoN0ODISa0PQ1X2N2O1O100O10000O10O10000O1001O000000000000000O100000000001O01O0VoN\\\\OdP1dc0\\oN\\\\OdP1dc061O000O10000001O000O1000000000O01000000O101O00000000000000O10000001O000O100000000000O1000000010O000000000O100000000000000O1000000000000000O01000001O00000000000001O00000001O000O100000000000000000000000000O101O0001O000001O001Oh0XO2`]OTnN>5AMm`0_S1Q_OelN0NT`0WT1n_OgkN3O0O>1`=hT1lAjkN3N0Bc=gV1]BViNX=TW1O1O2M4M2TCahN_<_W1`CchN_<]W1aCdhN^<\\W1dC`hN^<_W1QQ1TEUPOk:ko0UEVPOj:jo0VEVPOlL_Nc=[Q1aEWPOjL`Nd=YQ1bEWPOjL`Nd=YQ1bEWPOlL\\Nd=]Q1`EWPOi:io0WEWPOi:io0WEWPO`5[N]L^Q1SNWPO`5[N]L^Q1SNVPOa5\\N\\L^Q1SNVPOa5\\N\\L^Q1SNUPOk:ko0UEUPOk:ko0UEUPO\\5_NeL\\Q1oMUPO[5bNeLXQ1PNUPO\\5cNdLXQ1PNUPO\\5cNdLYQ1oMTPO^5cNbLYQ1PNRPOl3aN_J3c3ZQ1RNQPOm3bN_J2b3[Q1RNooNo3eN\\J2b3ZQ1SNcoNiLWOV7J\\J2b3ZQ1SNeoNgLUOX7J\\J2b3ZQ1SNooNo3eN\\J2b3ZQ1SNQPOm3bN^J4a3ZQ1TNRPOk3`N_J5b3YQ1SNTPOZ5jNcLQQ1TNVPOoL_Ne7b0WMiP1UNWPOlLbNe7?ZMhP1UNWPOkLcNe7?[MhP1TNVPOmLaNf7`0YMiP1TNVPOP5QOlLiP1TNVPOQ5POkLjP1TNVPO]N[Nm4f0aNiP1UNVPO]N\\Nm4d0aNjP1UNTPO_N^Nk4d0aNjP1UNTPO_N^Nk4d0aNjP1UNUPOWM]Ne0O^5f0aNiP1UNUPOVM_NX71\\HNe51]I6P5WQ1iMbnN\\Mo0JXO`0L?Md53]I4Q5WQ1iMbnN\\M]19fN`0Md54[I5R5TQ1kMcnN[M^1L`N65h0Lb5c0^NkP1kMbnN[Mg1O`N_7?[MhP1WNkPO[4POmLUP1hNlPOY4SOlLQP1jNnPOY4WOgLko0POnPOY4XOfLjo0QOmPOY4ZOfLio0POnPO[4VOfLmo0oNlPO\\4WOeLmo0oN_POkLZNP1H^5a1iMoo0mN\\POoL[Nm0I^5a1iMoo0mNQPOmLcN60j0L]5a1iMoo0nNaoNnLUO`0K`0OZ5b1jMno0oNVoNm5l0TKno0QOnnNQ6T1nJno0k8SPOUGmo0k8TPOTGlo0l8TPOTGlo0l8TPOTGlo0l8TPOTGmo0k8RPOVGno0j8RPOVGno0j8RPOVGno0k8QPOUGoo0k8QPOUGoo0bLmnNm:V1`Hno0aLonN^ODL0j:`1ZIoo0UL[nNNe0H0Z4[OQ2g1jM]P1PL\\oNNcN41Z3Mi2g1jM_P1lKgoN2YN^3Jj2h1iMmR1ZO\\kNT22gJ1Ig1P4WS1PLUkNc2Kg2:fJm1n3PT1TLkiN3Oj01W4j2gNnT1TL^hNj0KP3n2OfV1F`iN8kV1[OWiNe0kV1_LjeNK<`0OD11Q3`3QX1]LneNMT2e3Z]1N5L5J2N2M2_Mh^N7M9ca1^Oc^N346]a1Eb^N@M6:?Za1_O[^NN[1OeN5ia1K[^NMf12S`1OhX^=" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "mask_rle": { + "size": [ + 1684, + 1191 + ], + "counts": "YQYh0>Td13O1N100000000000000000001O01O01O01O01O000001O0000000O10000O1000000000000000000000000O10000000000000000000O101N100000000000000O1000O10000000001O00000000000000000001O0O10000000000000000000000000000000000001O00001O00001O00000000000000O10O100000000000O100000000O1000001O000000000000O100000000001O000000000000000000000000000000000001O000000001O01O0010O01O000001O00000O10000O1000000O100000000000000000000000000000000O100000000000000000000000000000000000000000000000000000000000001O1O1O2N2M3M2MfeZh0" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "mask_rle": { + "size": [ + 1684, + 1191 + ], + "counts": "mga=4f\\18acN451MA;7Gd0X2]OcMS16jN4O41lW1`5_hNmJWOa4eW1j0nhN3nV12nhN1PW10PiN0PW11nhN0QW11mhN2RW1OjhN4UW1MjhN4VW1MhhN4XW1MghN3YW1NfhN2ZW1NehN3\\W1NbhN2^W1O_hN3aW1`6000000000001O000O01]I`hN2`W1MahN3_W1MbhN2]W1OchN1]W1NehN1[W1OdhN2\\W1NdhN2\\W1OchN1]W1OchN2\\W1NdhN2[W1OdhN2\\W1OchN1]W1b61000000000000000000O11O00010O0000001O000O10001O0001O0O10001O00000000000001O01N100000001O0000000000000000O1O100O1O100000000O101O00O2O000000000ZIdhN4\\W1LdhN4\\W1LehN3[W1MehN3[W1MehN3[W1MehN3[W1MehN3[W1c60000001N10000000000000001O001O1O2N7I1O1O00001O00000fIVhNJkW1O]hNOcW1NahN1`W1LchNVJAd5lW15mhNKSW14ohNKRW14nhNLRW14nhNLRW14nhNLRW14nhNMRW12nhNNRW12nhNNRW12nhNNRW12nhNNRW12ohNMQW14nhNLQW15ohNKQW15ohNKQW15PiNJPW16PiNJPW16PiNJPW16PiNJPW16PiNJPW16PiNJPW16PiNJPW16PiNJPW16ohNKQW16nhNJRW16nhNISW18lhNHTW18lhNHUW17khNIUW18jhNHVW18jhNHVW18jhNHVW18jhNHUW19jhNHVW18jhNHVW18jhNHVW18jhNHVW18jhNHWW16khNIUW17khNIUW17khNIUW16lhNJTW16mhNISW17mhNISW16nhNKQW15ohNKQW15ohNKQW15ohNKQW15ohNKQW15ohNKQW15ohNKQW15ohNKRW15lhNLTW14lhNKUW16jhNJVW17ihNIWW17ihNIWW17ihNIWW17ihNIWW17ihNIWW17ihNIWW17ihNIXW16hhNKWW15ihNKWW14jhNLVW14khNKUW15khNKUW15khNKUW15khNKTW15mhNKSW15mhNKSW14nhNLRW14nhNLQW14PiNLPW14PiNLPW14PiNLPW14PiNLPW14PiNLQW13ohNMQW13PiNLPW15ohNKQW15ohNKQW16nhNJRW16nhNJRW16mhNKSW15mhNKSW15mhNKSW16lhNJTW16lhNJTW15lhNLSW15mhNKSW15mhNKSW14nhNLRW14nhNLRW13ohNMQW13ohNMQW13ohNMQW12QiNMoV13QiNMoV13QiNMoV13QiNMoV13QiNMoV13RiNLnV14RiNLnV13SiNMmV13SiNMnV12RiNNnV12QiNOoV11QiNOoV11QiNOoV12PiNNPW12PiNNPW13ohNMQW13nhNNRW12nhNNRW13mhNMSW13mhNMSW13mhNMSW12nhNOQW11ohNOQW11ohNOQW11ohNOQW11PiNNPW12PiNNPW11QiNOoV11QiNOoV11QiNOoV10RiN0nV11QiNOoV11QiNOoV13ohNMQW14nhNLRW15mhNKTW14lhNLTW15khNKUW15khNKUW15khNKTW16lhNJTW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW16lhNJTW16lhNIUW17khNIUW17khNIUW16lhNJTW16lhNJTW16lhNJTW15mhNKSW15mhNKRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW17mhNITW17khNIUW17khNIUW18jhNHVW18jhNHVW18jhNHVW18jhNHVW18jhNHVW18jhNHVW18jhNHVW18jhNHVW18jhNHWW17ihNIWW16jhNJVW16jhNJVW15khNJWW15ihNKWW15ihNKWW14jhNLVW14ihNMVW14jhNLVW13khNMUW13khNMUW13khNMTW13mhNMSW13mhNMSW13nhNLRW14nhNLQW14PiNKQW15ohNKQW15ohNKQW15ohNKQW15ohNKRW15mhNKSW15mhNKSW15mhNKSW15mhNKTW14lhNLTW14lhNLTW14lhNLTW13mhNMSW13mhNMTW11mhNOSW11mhNOSW11mhNOSW11lhN0TW10lhN0TW10lhN0TW10lhN0TW10lhN0TW10lhN0TW1OmhN1TW1NlhN2TW1NmhN1SW1NnhN1SW1OmhN1SW1OmhN1SW1OmhN1SW1OmhN1SW10lhN0SW11lhN0TW11khNOUW11khNOUW11khNOUW10lhNOUW1NnhN2RW1MohN2RW1MohN3QW1LPiN3QW1MPiN1QW1NPiN1QW1NPiN1QW1OohNOSW10nhNNTW11mhNMTW14lhNKUW14lhNLTW14lhNKUW15khNKUW14lhNLTW14lhNKUW15khNKUW14lhNKUW15khNKUW15khNJVW16jhNJVW16ihNJXW15ihNKWW15ihNKWW15ihNKWW15ihNKWW14jhNLVW14jhNLVW14khNKUW15khNLTW13mhNMSW13mhNMSW13mhNMSW13mhNMRW14nhNLRW14ohNJRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW16nhNJRW16nhNISW17mhNISW17lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW15mhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW17khNIUW17khNIUW17khNIVW16jhNJVW16khNIUW17jhNJVW16jhNJVW16jhNJUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIVW16jhNJVW16jhNJVW16jhNJVW16jhNJUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW17khNIUW17khNHVW18jhNHVW18jhNHVW18khNGUW19khNGUW19khNGUW19khNGUW19khNGUW19khNGUW19khNGUW19khNGUW19khNGTW1:lhNFTW1:khNGUW19khNGUW18lhNHTW18lhNHTW18lhNHSW19mhNGSW19mhNGSW19mhNGSW19mhNGSW19mhNGTW18lhNHTW18lhNHTW18lhNITW15mhNKSW15mhNKTW14lhNLTW14lhNLTW14lhNLTW14lhNKUW15khNKUW15khNKUW15khNKUW15khNLTW14lhNLUW13khNMUW13jhNNVW12jhNNVW12jhNNWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOVW12jhNNVW12jhNNVW12jhNNVW12jhNNVW12khNMUW12lhNNTW12lhNNTW12lhNNTW12lhNNTW12lhNNTW12lhNNTW12lhNNUW11khNOUW11khNOUW11khNOUW11khNOUW11khNOVW10jhN0VW10jhN0VW11ihNOWW11ihNOWW11ihNOVW12jhNNVW12jhNNVW12jhNOUW11khNOVW10jhNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihN0VW10jhN0VW10jhN0VW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW11ihNOWW10jhN0VW10jhN0VW10jhN0VW10jhN0UW11khNOUW11khNOUW11khNOUW11khNOUW11khNOUW11khNNVW12jhNNWW11ihNOWW11ihNOXW10ihNOWW11ihNOWW11ihNNXW12hhNOVW12jhNNVW12jhNNUW13khNMUW13khNMUW13khNMTW14lhNLTW14khNMUW13khNMUW13khNMUW13khNMTW14lhNKUW15khNKUW15khNKUW15khNKUW15lhNJUW15khNKUW15khNKUW15khNKUW15khNKUW15khNKUW14lhNLSW15mhNKSW15mhNKSW15lhNLTW14lhNLTW14lhNLTW15khNKUW15khNKUW15khNKUW15khNKUW15khNKUW15khNKUW15khNKUW15khNKTW16lhNJTW16lhNJTW16lhNJTW16lhNJTW16lhNIUW17khNIUW16lhNJTW16lhNJTW16lhNJTW16lhNIUW16lhNJTW16lhNJTW16lhNJTW15mhNKSW14nhNLRW12QiNMoV12RiNMoV11SiNOmV1OUiN0mV1MUiN2lV1KWiN3kV1KWiN3kV1LViN2lV1MUiN3kV1LViN3kV1MUiN3lV1KUiN5kV1KUiN5kV1JViN6jV1JViN5kV1KUiN4mV1JTiN3oV1MQiN2PW1MQiN2QW1MohN2RW1MohN2RW1MohN3QW1RNdfNYO[2d2SW1PNhfNWOV2h2SW1PNifNWOT2h2UW1oMjfNVOR2Z1_MhNfY1g0lfNTOP2Z1eMdMIf0lY1U1SgNjNd1e1TNeMjY11TfNi0=Ib1e1PZ1[NedNK^1:bM3`\\1BfdNNY1:iMN]\\1GfdNJX1?hML^\\1GogN8gKMna1OV^NKPb11[Yd=" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "mask_rle": { + "size": [ + 1684, + 1191 + ], + "counts": "RnZ7:?JXb1`0`]N2LG]b1h1N2N100O2O000000000000000000000000000000000000000001O00001O000000O1000001O0000O1000000000O10001O00000O10O100000000000000001O01O000O100000001O000000000000000000001O00000000000000O10000O1000000O100000000000000000000001O0001O0000000000000O10000O10000O10001O000000000001O00001OO100O100000000000O10000000000000000000O10000000000O1000000000000001O0000000O10000000O101O00O100000000000000000O1000000001O00000O100001O00000O1000O10001N1O010O1O1O1O1O1M3N20000001O000000O10000000000O1000000O1000001O00000000O100O100N2O1O100O10000O100001O1O3N0O1O0000000000000000000O10000O100O100000001O00000001O01O00ROSNU_Nm1ha110000O10000000001O00000000000000001O00O2O0000000O100000000000000001N100O2N100001O00000O101O0O10001N1O10000000O1000000000001O00000001O00000000010O01O01O01O000000000O100000O10O10O10O0100000000000000100O0010O00000000O100O100001O000O101O0000000O1000000000O100000000O100000000000000000O101O0000000O10000000000000000000000000000000000000000O1000000000000001O00010O001O01O0001N10000000000O100000000O100000000001O0000000O100000000000000000O10O1O1O1O1O100000000001O000001O00010O01O000O2O000O10O2N1N2N2O101N100000001O0000001O01O000000000000000O2O000000000O10O1001O0O1000000000O1000000000000000O10000001O0O2O001N10000001O000000000001O0000000000O100O1000000000000010O0000000O1000000000001N100000000000O100000000001O000001OO10O100000000000000000O1000000001O0000000000O100000000001O0O100001N100000000000000000000000000001O0001O00000000000000001N101O00000000O100O10000000010O0000000000000O100000000001O0000000000O10000O1001O0000000000000000O1000000001O00001N10001O01O0001O002O0O2N100O1O000000000000O100O1O001O1O1O10000O1000000O1000000O10000000000000000001O0O2O00001O00000000000000O1000000O1000000000000O10000000O100000000000000001O01O0000001O00000000000000000000000000000000000000O1001O00001O0000000000000000000O100O2O0N2N2M4L4L3M4J7L6Jf_V7" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "mask_rle": { + "size": [ + 1684, + 1191 + ], + "counts": "h]Y7d0kc17L2O2O1N10000000000000000001O01O0000000000000000000000000000000O100000000000000000000O1000000000000000000000000000000000000O1000000000000000000001O00000000000000000000000000000000000000000000O1000000000000000000000000000000000000000000001O00000001O0000000000000001O00000000000000000000000001O00000000000000000000O1000000000000000O100000000000000O10000000000000000000000000000000000000000000000000000000000000000001O000000000000000000000000O100000000000000000000000000001O00000O100000000000001O000O1000000000000000001O000000000000000O10000O1O1O1N1N3N2M3O100O100000000O100000001O000O10001O0000000O101O0O2N^mZV1" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "mask_rle": { + "size": [ + 1684, + 1191 + ], + "counts": "ico[19Zd13M101O000000000000000000001O00000000001O0000000000O1000000000000000O100000001O00000000000000000000O10000000000000001O0000000000000000000000000000O10000000000000000001O00001O00000000000000000001O0000000000000000O01000000000000001N100000000001O00O10O100000000001O0000000000000000001O0000000000000000000000000000000000000000000000000000000000000000000000000000000001O00000001O00000000001N4KdZV7" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "mask_rle": { + "size": [ + 1684, + 1191 + ], + "counts": "[UZn0=Vd13N001N100000O2N101N2M7GkRWn0" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "image_path": "ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "giYh07io02N101O000000000000000O1000000001OO100000000001O00000000000000000001O00000000000000000000000001O0000000000O10000000000000000001N1000O100000000000000000000000001O0000000000000000000000000001O0000000000O100000000000000000000000O100000000001O000000000000000000O10000000000000000000000000001N10001N1OfQS3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "image_path": "ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "fWS34ko03O001OO10001N02O0O20N1OSQ2KSoM00000_o;1[PD8L2PQOFVn0;hQOHWn09gQOHYn08fQOIYn09fQOHYn09eQOH[n08eQOH[n08dQOI\\n07dQOI\\n07cQOK\\n05dQOK\\n05dQOK\\n05eQOI\\n07eQOH[n08fQOGZn09eQOH[n08dQOI\\n07cQOK\\n05dQOJ^n05cQOJ]n06hQOEYn0:gQOFYn0:hQOEXn0;gQOFYn0:gQOFYn0:bQOCE4in09aQOL_n04bQOCE4in09gQOFYn0:gQOFYn0:gQOFYn0:gQOFXn0fQOAZn0`0eQO@[n0`0fQO_OZn0c0cQO^O]n0c0bQO]O^n0d0ZQOZO04en0b0[QO[OO3fn0a0bQO_O^n0=fQOB[n0iQOBWn0=jQOCVn0>jQOAVn0?f000000000000SQOBWn0>iQOBWn0>ZQOB71_n0=YQOE6O`n0WQOCXo0=iPOB90_n0=gQOEYn0:gQOFYn0:fQOGZn09fQOGZn09YQOD63an09YQOD63an09YQOD63`n0:hQOEXn0;hQOEWn0;jQOEVn0iQOBWn0>jQOAVn0?jQOAVn0?jQOAVn0?jQOAVn0?ZQOA91]n0>ZQOB80^n0>ZQOA:0\\n0`0YQO@;0\\n0`0iQO@Wn0`0iQO@Wn0a0d0O1000001OSQO@Yn0?hQOBWn0>iQOBWn0>iQOBWn0>f00000001RQO@Yn0`0fQOBYn0>fQOCZn0=fQOCZn0=WQOB;0^n0>VQOD;N_n0>VQODiQOBWn0>iQOBWn0>YQOB:0]n0>YQOC9O^n0>YQOC9O^n0?XQOA;0]n0?hQOAXn0?hQOAXn0?hQO@Yn0`0gQO@Yn0a0fQO_OZn0e0bQO[O^n0e0bQO[O^n0`0gQO@Yn0`0d0O10000000SQOAXn0?XQOB:0]n0?hQOAXn0?hQO@Yn0`0gQO@Yn0`0gQO@Yn0`0d01O0000O100OTQOBWn0>iQOBWn0>iQOBWn0>iQOBWn0>iQOAXn0?hQOAXn0?hQOAXn0?hQOAXn0`0gQO@Xn0a0hQO@Wn0`0iQO_OXn0a0hQO_OXn0`0iQO@Wn0`0iQO@Wn0?jQOAWn0>iQOCVn0iQOBVn0a0hQO_OXn0a0hQO_OXn0b0gQO^OYn0c0eQO^O[n0b0eQO^O[n0a0gQO^OYn0a0hQO_OXn0a0d00000000001O00000001O3M1oPO]Obn0c0]QO^Ocn0b0^QO]Obn0b0`QO]O`n0b0bQO]O^n0c0bQO]O^n0d0bQO[O^n0e0bQO[O^n0f0aQOZO_n0f0aQOZO_n0f0>O1O1O11O1O10O0O1O1N200002N1O0000L4O10000001O0000O10000000000000000O10O101O0001N100000000000000001N1000001O0000O100O10000000000000000000O101OO101O00000000000000000000000000001O0O100000000000001O001N101N103Ji[Z`0" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "image_path": "ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "cXR31no03N2N1gPONcn02]QOOan02`QOLan04f00O11O0000lPOI^n07aQOK^n05bQOK^n06aQOJ_n06aQOJ_n06aQOJ_n06aQOJ_n06aQOJ_n05bQOK^n05bQOKbn00`QOOYo00000O_o73]PH2jPOL^n06]QOOan02_QONan03_QOL`n05cQOG^n0:c010000oPOD]n0=aQOE^n0;bQOE^n0;bQOE_n0:aQOF_n0:bQOE^n0;bQOE^n0;bQOE^n0;bQOE^n0;bQOE^n0;bQOE^n0;bQOD_n0" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "image_path": "ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "Qjak07io02O00000O100000000000000000000000000000000000O10000000O100000000000000000000000O100O100N3LgTR3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "image_path": "ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "gaS3Z5[OR:Lh]OM4?h2?W5[OS:2m]O5h2?T5AZHAXa0<`^O4j2>S5FVHBXa06e^O4j2?Q5GWHCUa04h^O4k2>Q5HVHCUa04g^O4l2?P5GWHCUa04h^O3l2?P5HVHCUa03h^O5l2=Q5ITHCWa02h^O5l2X9KaAJW5;X9JbALU5:Y9IbANU59Y9IbANU58[9J`ALV5:Z9J`ALV5GTHBYa0Oh^O8g2>U5FVH\\O^a08a^O7f2?U5EWH[O^a0;_^O7g2>U5EWHZO_a0=\\^O6i2?T5FWHXOOOZa0V1\\AMT5FWHXON0Za0V1]ALT5GVHWOO1Za0S1`ALQ51UHPOZa0S1aAKP5@VHKOGZa0;f^O4m2>n4@XHKMHZa0l4AYHJMHZa0Q1dALl4BWHJ0FXa0S1dALm4BVHI1FXa0T1cAKn43WHmNYa0U1bAKj4nNZHU12lNXa0V1bAKo4AUHIba0m0XAIQ5_OXHJ`a04`^O7c25NZa0d0mAH_4E\\HMZa0=SB1Y4ZOSH092EOca0>YB6l4ZOS9`0QB6kf0JUYO6l4ZO\\GO`a0a0XB6T4ZOSH19OH0`a0?XB7S4\\ORH1:MH1aa0=YB8S4[OQH2b0LSa0=n^OCc2e0S5\\O\\:2n]OOc2`0Ph0@]UO1b2>kh0BUWO=lh0CTWO=lh0CTWOXl001NiJGhZO9Xe0GiZO8We0HhZO9Xe0GhZO9Ye0EhZO;^j0101O1N3N1aXO\\O_?c0a@^O`?`0a@@_??b@B^HN`f0?SAC]HN`f0?SAB]?>b@CV:3lB9o2CV:5jB8i2JnHHf`06bC9j2JlHIh`02cC3dCS=aMkBk>3cCR=m[BoB4RNc=n>YBoB5RNc=n>YBPC4RNc=n>YBVBLjN0:8Ib=m>ZBTBOjNN;8J`=n>[BSB0iNN;7K`=n>[BTB6TONIb=o>ZBVB6dNI650T=JPCU?JWB:iNINT=NoBT?JWB;hN[=M`BT?JWB;hN\\=K`BW?GVB>hNj=R?hAUB?iNi=R?hAUB?hNj=S?gAWB=gNl=Q?hAYB:gNn=n>iA[BLbN86R>m>kA`B3cNQ>n>jA`B4dNQ>l>kA_B5eNP>l>jA_B7_NF0Y>R?jA_B7^NH0d=1gBR?G]B8^Nb=3_BQ?H\\BY>dNoAP?HYB\\>gNlAP?IWB\\>iNkAP?HWB0fNU>3SBP?HWBLjNY>ORBQ?IUBLlNY>MSBR?HUBLlNZ>LRBS?HVBKkN\\>KPBU?I_BW>[NPBV?J^BV>\\NPBV?J^BV>]NoAV?J]BV>^NPBU?ISBMmNX>MRBS?IRBNmNX>NQBS?IRBOlNX>NPBT?IRBOlNZ>LmAV?KQBOmNY>KnAW?KgAJWO7Nd>T?[AgAKUO71b>S?[AQB0mNe>R?[ARBNmNf>R?\\AQBNmN6JK100i=W?XBQBNmN6JJ21Oj=V?WBRBNmNP?[AhAKUO62d>Q?[AhALSO64c>Q?ZAiA9UO]>S?ZA\\AJC2L;1`>T?YA\\AKB1M;0a>V?WAeAV?WAfAX?VAfA?ROX>X?YAgANRO90`>W?YAhAKTO9Oc>U?XAQB1lN=Ke=Y?mAQB0kN=MDOi=X?VBQB0kN=MDOj=W?UBQB1lNR?ZAgAMTO71b>T?ZAgA:UO]>T?XAhA:TO^>V?WAfA;SO_>W?VAdA=TO_>V?UAfA=SO^>W?VAfA^?Y>b@gAn>SOZAV?IfA]?Z>b@gA]?Z>b@jA8ROa>U?UAlA7PO9MBOk=Y?SBkA7PO9MB0j=X?UBiA7RO8Md=W?oAgA8TO_>U?[AcA;VOY>W?\\AUAINa?m>g@TAHO`?n>h@SAJMQ?GWAY?ORAJKQ?LUAW?LVANHP?KWAW?IfAn>SOYAW?IPB2gNZ>2lAW?GVBMdN]>OoAQa0P>o^OPBQa0o=P_OPBQa0o=P_OQBf`0E^_Oa0IU=3dBg`0F]_Oa0ID0Z=2lBQa06m^OX=OdBSa0`>O0XNVAYBj>h=TATBOcNn>f`0SA[_OP?\\`0RAc_O10l>n?QA]@2E2Ok>X?TAUA1NNFT?W?QAPAO;o>c>UAi@4:AOU?n>gA_AX>a>hA^AY>b>gA]AXO^Oj>V?mA\\AXO@j>T?XAn@9>E@k>R?WASA7;G_Om>R?UAYA18MZOo>U?SAZA0KNOS?No@_>OjA1J4Im>2UA]>InA2Hb?Hf@a>FoA3GQ`0\\>j_OmA6EQ`0^>h_OnA7AU`0a>c_OmA9AU`0a>c_OlAk`0T>U_OjAZ`0Ef_O2:_>EiAb`0Ej_Oo>d`01U_OZAZ`0f>d_O\\A\\`0c>c_O_A[`0b>f_OTAN3[`0h>i_ORAN6c?In@o>GXA[?Jm@n>IWAZ?Jm@Q?HUA[?Jm@Q?HUA[?Jm@R?_OPA24b?Jh@1CS?Oo@43a?Kh@]?Hd@c?0d@]?d?d@[@\\?e?e@Z@\\?a1a@V<3YB]?`1`@W<2ZB^?_1a@V<1\\BMG[?7f@c00_O2d=0aBX?Hh@3N10O2d=ObBX?Gk@HN4N72O0e=ObBX?GRAKE7300d=N^BOEQ?ORA8:0N2Ic=M_BOEP?1RA6?IM9Fc=M_BOEQ?0QA7S11nNe=N^BNFR?NRA9a0FJ9Ef=NcBW?H\\AGJ8Cg=0aB>AV>8dBLhNj=0\\B^?LlALgNl=0ZBd?HgAOhNm=N[B`a0c=`^O]Baa0a=`^O_B`a0a=`^O^Baa0b=_^O]Bb0NR`0e=[_O]Bd0NT>LnBi=ZN^Bc0MQ>0RCe=ZN`Ba0GK0g=6cCd=YN_Bb0F@?P>HdCe=ZN^Bb0F@?P>HdCe=ZN^Bb0F_O`0Q>GdCe=ZN^Bb0G^O?S>FcCf=YN^Bc0=a=_OdCe=XN^Bd0>b=]ObCg=XN_Bd0HZO`0Z>_OaCj=XN^Bi07]?Z=j_O`Bh06^?Z=k_O_Bh06^?Z=i_OaBh06_?Y=i_O`Bi07^?X=j_O`Bh09^?W=j_O_Bh0;^?W=i_O_Bj08\\?Z=j_O_Bj0JSO7Y`0`=j_O`Bi0ITO8X`0_=k_O`Bi0IUO7V`0a=l_O^Bc0G_O3L7U`0b=m_O]Bc0H^O2L8U`0b=m_O[Be0`0\\?W=o_OYBe0`0T?YOY@o=NYBe0H\\Oa0f?@\\@n=LYBl09k>A^@l=J\\Bl08j>A`@Y>e0VBR>CUAN074Q>f0WBP>;ZA^=f0WBn=OWAI4Q>h0VBl=2XAH3P>h0WBl=2YAG2S>g0TBn=b0YA\\=h0QBo=X?[AW@;0Da0i=_OeBS`0Cm_O:2D?i=AeBQ`0Dm_O:f0^=\\OcBQ`06c@W=[OdBR`05c@X=ZOcBS`05c@X=ZOcB[`0L]@R>e?eAd_O0h0Y=VOeCba0Z<_^OfCaa0Y<`^OgC`a0Z<_^OfCaa0^=O001O0001O0\\O`^OgB`a0Y=`^OgB`a0Y=_^OhBaa0l=0lN_^OhCaa0X<_^OhC\\`0nNi_O1f0X=VOhC\\`0POf_O2h0U=WOhC1ROR`01m_O10Nb0V=^OgC1UO8M\\?1Z@31Lc0X=[OhC1UO8Lc?2o@a>\\O_Ad?OQAe>XO]Ag?KTAj=QORC4YOh`0d=V_ORC2ZOc?K\\AZ=nNPC5>O^O`?L]AW=SOkB5b0O]O\\?O\\AW=FTCZOH9M[?0\\AW=FTCZOH9L]?0[AW=E\\Cc`0d<\\_O]Cd`0c<\\_O\\Ce`0dc@gB1mN\\?[>e@_BIVO:MX?^>f@^BIUO;NV?_>f@]BKUOn?^>W@]BP`0c=o_O]BR`0d=m_OZBT`0g=l_OYBT`0g=k_OmAMJ4Ij?`>U@jA0MON4KY?`>d@gA31KO2K[?^>e@gA22H32I_?\\>d@fA1c0K\\O`?[>c@iBNlN_?[>c@iBNlNg0Jl=b>]AiB0kN`?\\>^@jB2jN`?n>b@VANL3N]?P?d@TAMK6OY?R?f@QAMK70W?T?f@n@S`0R?n_Ok@S`0W?;Oa_Ok@T`0T?l_On@S`0a>d_OjA9Hb>]@jA7MR?b@YA0D54X?c?a@]@31\\?l?c@T@]?l?b@U@f0Jl=Q`0^AU@f0In=S?[A]A1E02_?k>f@XAMF36Z?l>h@VA6MR?m>h@UA8MP?n>h@UA7MQ?P?g@RA9LR?R?e@QA:MQ?S?\\@o@21a0MQ?c?n@_@P?b?b@_@11\\?a?a@a@0O_?m?_@T@a?l?_@T@a?l?_@U@`?o0`@b=0_A`0GF1_>V1]Ab=N`Aa?l0b@o=_?PBb@Y=LVBd??a@Z=MVBc?>a@\\=MUBc?=a@]=8jAX?9Z@HON9V>6jAZ?8Z@HON8X>6hAZ?;W@H?U>0hAZ?c0e@f=1hAX?`0Z@]Oe_OeA\\`0[>e0000O101O5K1O0O2o^OZAe0OQ?h>X@ZAf0OR?g>X@[Ae0NS?g>X@ZAf0OS?f>X@YAf01R?Y=Q@iC0ROYa0T=h^OiC2POWa0V=h^OjC1POWa0W=h^OhC1ROVa0V=i^OhC2ROTa0V=j^OiCOSOWa0T=j^ORDVa0l;k^ObC0D2DN30N;Nb?W=S@aC4CL=4]Oe>NXAMa0W=^O`C5DJ>4^Od>MZALa0W=^O`C5EI>4^Oe?P=W@aC6CJ?3]Of?P=W@aC6CIa03[Og?Q=V@aC6a0MmNg?Q=V@aC5b0MmNh?k=[@ZCKkNk?j=Y@hCg?R<^@oCb?P<_@QD_?P<`@QD`?o;_@RDa?n;^@SDc?l;]@UDb?l;^@SDb?m;`@RD`>RNSBR>XOkCd?V<\\@iCd?X<[@iCd?Z>0O1PNY@fCg?U<^@kCb?T<_@lC[>SNbBo=UOmCZ>TNaBn=VOnC_?R0000000QN[@bCe?\\<]@dCd?W<`@iC`?V1OO1000000000O1000000000000001O0O100000000000O101O0iM[@RDe?n;[@RDe?m;\\@SDd?m;\\@SDd?n;[@RDe?U>000O10O2O000001N1cNY@QB2^Of?U>Z@gA6OL23Od?X>TAeA[O2b?Y>TAcA\\O2a?[>SAcA\\O1b?[>bAeA^>\\>aAdA_>]>`AcA`>]>R@fA9O?NV?e>Y@_A4LM0f?e>X@`A5KM0f?f>W@_Ab0KW?g>U@^A6KI021j?f>U@^A5LJ02Oj?g>U@^A5LJ02Oj?h>T@^A6IK120i?i>R@^A=Jj?S?U@n@j?R?c0EY_O[Ah`0d>Z_OXAi`0a>V_O^A21h`0h>W_OYAh`0h>U_O]A?Ig?U?V@n@k=NbCU?^NQALJl=0jCU?]NSAKJm=NkCU?\\NTALIk?T?X@SAMIk?T?W@TAMIl?S?W@TAMIl?S?X@TAKIm?R?Y@UAKGm?T?Z@SAl?m>U@RAk?m>Y@o@h?Q?Y@n@g?g>j_O^A`0Kf?g>j_O_A?Jg?S?W@n@GLm?U?W@UAJHP>NjCU?[NVAJJo=KlCV?ZNUAJLl?P?Y@UALIl?R?X@UALIl?S?W@TALJm?R?W@\\Ai?c>X@]Ah?c>Y@[Ai?c>Y@[Ah?e>X@ZAi?f>W@VAIFo?T?^@PAg?e>k_O_A>Lg?e>k_O_A9GJ50Km?V?W@UAKGm?T?X@\\Ag?d>Y@]Af?d>Y@]Af?c>Y@^Ag?b>Y@^Ag?b>Y@^Ag?b>Y@^Ag?b>Y@]Ah?c>X@]Ah?b>Z@\\Ah?c>X@\\Ai?d>W@\\Ai?c>X@]Ah?X>P@`A8IJ4NMP`0Q?X@o@H40LP`0R?W@]Ah?d>W@]Aj=]OlCV?YN_Aj=[OmCV?YN_Ah?b>W@^Ai?b>W@^Ai?b>V@_Aj?a>V@_Ai?b>W@^Ai?b>X@]Ah?T=V@bC2YOi?T=W@bC0YOi?V=W@`C1ZOh?U=X@aC0YOi?W=V@`C1YOi?X=T@aC2XOj?e>V@[Aj?f>U@\\Al=[OmCY?UN^Am=YOnCY?UN^Aj?c>V@]Aj?c>V@^Ai?b>W@^Ai?b>W@^Ai?b>W@]Aj?c>U@]Al?\\?1N1XOS@[An?d>T@[Al?e>T@ZAm?f>S@[Al?e>S@]Al?d>S@]An=ZOmCS`0RS@ZAm?f>S@ZAm?f>S@ZAo=]OlCX=TNeD0hNn=]O^BOo0W=eN_El=[M`BOn0X=fN^El=ZMaBOn0S`0aO1YOQ@\\Ao?d>R@ZAo?f>Q@ZAP>^OkCo?S000O1SNP@jCP`0TO100005mMj_OdBOm0W`0_fNfCi=TNcB0n0U>iNXBLn0i=eNdB0n0V>hNWBMn0g?m00000000000gM_@RBLj1f?S<_@SBLT1MQO14g?e=^@RBMi1e?^>aNj_ORB0`0V`0oTJSCS1nNg2JR10P1U>TJSCT1mNi=n=UATCS1lNi=a=QAgB3ON?U1YOj=a=PAhB3ON?U1YOj=P>SAVCS1kNi=n=UAWCQ1lNj=m=TAWCR1nNg2HT7S>SEWC7gN=8T3HT7e=QEfB4m06hN9O^O:h3JU7a=TEgB1m05jN9IUO1m00jN09l0LTOS1X19hNdNT1NV3md0D=Hf1e0\\MXOc1l0LXOaN8LN2Hg0i3WgMe0HXOll0JRUO4em0H^VU3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "image_path": "ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "`[R31>2_n0W1XOjNWRO\\1EeNim03ZROd1em0]NXROe1hm0:01O00000O1000001O001O00000000000000O1000000001O0000000000000000000001O0000O100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100001N10000000000000000FeMnRO[2kl0dMSSO12[2kl0dMSSO12[2jl0fMSSOO3[2jl0fMSSOO3[2jl0fMSSOO3[2jl0fMSSOO2\\2kl0dM]SO\\2dl0cM\\SO]2dl0cM\\SO]2dl0cM\\SO]2dl0cM\\SO]2dl0cM\\SO]2dl0cM\\SO]2kl0dMmRO\\2Sm0eMlRO\\2Rm0eMnRO[2Rm0eMnRO[2Rm0eMnRO[2Sm0dMmRO\\2Sm0cMnRO]2Sm0bMmRO^2[m000001O000000O10001O0000000000O1@bMTSO11]2jl0cMTSO21[2kl0dMSSO12[2kl0eMQSO14Z2kl0dMSSO12[2ll0bMSSO31[2Sm0cMnRO]2Rm0cMnRO]2Rm0cMnRO]2kl0bMSSO31[2ll0cMRSO22[2kl0dMSSO13Z2jl0eMSSO13Z2jl0eMSSO13Z2jl0eMSSO13Z2jl0eMTSOO3\\2jl0cMTSO11]2[m00000000001O000O101O000000000001N10000000000000000000000O10000@bM]SO^2bl0cMVSOO1]2il0eMTSOO3]2hl0dMTSO04\\2hl0dMUSOO3]2il0bMUSO12]2Zm0O100000_OcMTSO12\\2il0eMTSOO3\\2il0eMTSOO3\\2il0eMTSOO3\\2il0eMTSOO3\\2il0eMTSOO3\\2jl0cM^SO]2Sm00000000000000001O00000000O100000000000000000000000000000000000001N10000@aM^SO_2bl0aM^SO_2bl0aM^SO_2bl0aM_SO]2Sm001O00^OdM_SO\\2al0dM_SO\\2al0dM_SO\\2`l0eM`SO[2`l0eM`SO\\2_l0dMVSO03\\2gl0eMUSOO4]2fl0dMUSO05\\2fl0dMUSO05\\2fl0cMWSO03]2fl0cMWSOO3_2fl0bMWSOO3_2nl0bMQSO^2ol0bMQSO^2nl0cMRSO]2nl0cMRSO]2nl0cMRSO]2nl0cMRSO]2ol0bMQSO^2ol0bMQSO^2ol0bMQSO^2Zm0O0000000000000000000O100000000000EdMQSO\\2ol0dMQSO\\2ol0dMQSO\\2Zm0000000000000O1000000001O000O1000000001O0000EcMRSO]2nl0cMRSO]2nl0cMRSO]2Ym000000000000000000000000000O1000000O10010O0000000000000O1000000000000000000000000000000000000000000O1000000000000000000000000000O10000000000000O10000O1O1M3N2O100N200O100000000O10000O10001O00000000000000000000000000000000000000000000O100000O1000000001O0000000000000000000000O1000010O0000000000000000O10000001O000000000000000000000001O0000O10000000000000000000O100000000000000000000000O100000000001OO1000000001O000000000000000001O0000000000000000000000O10000000000000O1001O0000000000000000000O100001O00000000000000000000001O000000000O11O000O1000000000000000000000O10000001O00000000O110O000000000000000000000000000000000000000000000000000000000000000000000000000000000001O00O10000000O101O00O1000000000000000000000001O000000000000000000000000000000000000000000000000000000000000000001OO100000000000000000000000000000000000000000000000000000000000000000000000000001O00O100000000000000000O11O000001O00O10001O01O00000000iNPRO=Pn0AVRO;jm0DXRO;hm0D[RO9fm0F]RO8cm0EaRO:_m0EcRO:]m0FcRO:]m0FcRO:]m0FdRO9\\m0FeRO:\\m0EeRO:[m0EgRO:Ym0FiRO8Wm0HiRO8Wm0HiRO7Xm0IgRO8Ym0HfRO9Zm0GgRO8Ym0GiRO8Wm0HjRO7Wm0HjRO7Vm0IjRO7Vm0IiRO8Vm0IjRO7Vm0IiRO8Wm0HkRO6Um0JlRO5Um0JlRO5Tm0KmRO4Tm0KmRO3Tm0LmRO4Sm0LmRO4Sm0JoRO5an0M^jX3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "image_path": "ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "VYR3638Wo0;M2PQOA[n0T1O2hQO`Nkm0`1:10^OfQO]OZn0c0gQO\\OYn0W1O1O1O00O1000000000kQO]Nlm0d1SRO\\Nmm0d1SRO\\Nlm0e1800000000000000O101[OeQOA\\n0?eQO@[n0S1O2O000000O_O\\NgROd1Xm0^NgROb1Ym0]NhROc1Ym0\\NgROd1Ym0\\NgROd1Ym0\\NgROd1Ym0\\NgROc1lm0000000001O001lQO[Njm0d1H\\NgROd1Ym0\\NgROd1Ym0\\NgROd1Ym0\\NgROd1km01O001O0000O100001O0000O1000O10001O01O000O100000lQO[Nlm0d1URO]Njm0c1VRO]Njm0c1VRO]Njm0c1VRO]Njm0c192O0000001lQOZNlm0e180000O10000O100000O0100000000000000O101O000000000001O0O11O0000000000O1000000001O000000000001O000mQO\\Nim0d1:00O10O101O000000O0100001O0001O0001O000001O0lQO[Nlm0e1TRO\\Nkm0d1900O11O00001O0000O10O1001O0000O010000000000000000O1000000001O000O1000000001O0000001O000001lQO[Njm0e1URO\\Nkm0e1TRO[Nlm0d1URO\\Nkm0d1URO\\Nkm0c1WRO\\Nim0d1WRO]Nhm0d1VRO]Njm0c1URO^Nkm0d1RRO]Nnm0k10000O1000FSRO`Nmm0_1TROaNlm0_1TRO`Nmm0`1TRO_Nlm0a1VRO]Njm0d19EiQOmNVn0R1kQOoNTn0Q1jQOQOVn0]10000000000mQOYNlm0g1TROYNlm0h1RROZNmm0g1QROZNom0l100000IRRO[Nnm0c1URO\\Nkm0d1URO\\Nkm0c1WRO\\Nim0d1:00001O1O1O00001O0lQOXNom0h1QROYNnm0g1RROXNom0g1SROXNmm0h16O1001O00000000000mQOXNmm0h1RROYNom0g1oQO[NPn0k1000JPRO[NPn0e1PRO[NPn0d1RRO[Nnm0e1RRO[Nnm0e1RRO[Nnm0f1PRO[NPn0k100000IQRO\\Nom0c1SRO\\Nmm0c1TRO]Nlm0c1TRO]Nlm0d1SRO\\Nmm0d1SRO[Nnm0e1RRO[Nnm0d1SRO\\Nmm0c1URO\\Nkm0d1VRO[Njm0e19000mQO[Njm0f1UROZNkm0f1TRO[Nlm0e1TRO[Nlm0d1URO\\Nkm0d1URO\\Nkm0d1URO\\Nkm0d1TRO^Nkm0c1SRO^Nmm0c1RRO]Nnm0d1QRO\\Nom0c1RRO]Nnm0b1TRO]Nlm0c1URO[Nlm0e1TRO[Nlm0f1RRO[Nnm0f1PRO\\Nom0k1000000000O11O00000000O1H8000000001O00000000001O000lQOXNom0h141000O101O00mQOYNlm0g1SROZNmm0f1SROZNmm0e18O1000lQO\\Nlm0c1TRO]Nlm0c1SRO^Nmm0b1SRO^Nmm0a1TRO_Nlm0a1URO^Njm0b1WRO^Nim0b1XRO]Nhm0c1WRO^Nim0b1VRO`Njm0_1TROcNlm0^1SRObNlm0_1SRObNmm0^1SROcNlm0]1TROdNkm0\\1UROdNkm0\\1UROeNjm0[1VROdNkm0\\1UROdNkm0\\1UROdNkm0\\1WROaNjm0`1WRO\\Nkm0d1900001O000000O1000000000000O101O0000000001O00O10000001iQO`Nkm0`1TROaNlm0a1QROaNnm0^1TROaNlm0^1VROaNjm0_1XRO_Nhm0a1ZRO]Nfm0c1<001O00000000O11O00000000O1O1000000001O0O1000DfQOQOZn0o0fQOQO[n0n0eQORO[n0Z10O100000000001O001OO1O1O10000000000001O000O10000010O001O010O0000000O0O2FbQOPO_n0P1aQOPO_n0P1aQOPO_n0Y1000001O1O1O0000000000O1O10000O10000001O001O0000000001O0000000000000000O01O1000000000000[OaQOH_n07cQOH]n07dQOI\\n06fQOIZn07eQOJ[n05fQOLYn01jQOOVn01jQONWn02iQONWn02iQONWn02hQOOXn01hQOOXn00iQO0Wn0OjQO1Vn0NkQO3Tn0MlQO3Tn0LmQO4Sn0LnQO3Rn0LoQO4Qn0MnQO3Rn0MnQO3Rn0MnQO2Sn0NlQO3Tn0NkQO3Tn0NkQO3Tn0OjQO2Un0OjQO2Un0NkQO3Tn0MlQO3Tn0LmQO4Sn0LmQO4Sn0KnQO5Rn0JPRO4Qn0LoQO4Qn0KPRO5Qn0JoQO5Rn0KnQO5Rn0KnQO5Rn0KnQO5Rn0KnQO4Sn0MlQO3Tn0NkQO2Tn0OmQO0Tn0OlQO1Tn0NlQO3Tn0MlQO2Un0MlQO3Tn0LmQO4Sn0KoQO4Qn0LoQO4Qn0LoQO4Qn0LoQO4Qn0LoQO4Qn0LoQO4Qn0LoQO4Qn0LnQO4Sn0KnQO5Qn0LoQO4Qn0KPRO4Qn0LoQO4Qn0LoQO4Qn0LoQO4Qn0LoQO4Qn0MnQO3Rn0MnQO3Rn0MnQO3Rn0MnQO3Rn0MnQO3Rn0MnQO3Rn0MnQO3Rn0LoQO4Rn0KoQO4Qn0MnQO3Rn0MnQO3Sn0LnQO3Rn0MnQO3Rn0MmQO4Sn0LmQO4Sn0LmQO4Sn0LmQO4Sn0LmQO4Rn0MnQO4Qn0LoQO4Qn0LnQO5Sn0JmQO6Sn0JmQO5Tn0JmQO6Sn0JnQO6Qn0KnQO6Pn0KPRO6om0JQRO6om0JQRO7nm0IRRO7nm0IQRO8om0GRRO9nm0GRRO9nm0GRRO8om0HQRO8om0HQRO7Pn0IPRO7Pn0IPRO7Pn0IPRO7Pn0IPRO7Pn0IPRO6Qn0KnQO5Rn0KoQO4Qn0LoQO4Qn0LoQO4Qn0LnQO5Rn0KnQO5Sn0JmQO6Sn0JmQO6Sn0IoQO6Qn0JoQO7om0JQRO6om0JQRO6om0JQRO6om0JQRO6om0JQRO5Pn0KPRO5Pn0KPRO5Pn0KQRO4om0LRRO3nm0MSRO2mm0NQRO4om0LQRO4om0LPRO5Pn0KQRO4om0LQRO4om0LTRO1lm0OURO0km00TRO1lm0ORRO3nm0MQRO4om0LQRO4om0LQRO4om0LQRO4om0LQRO4om0KSRO3nm0MQRO4Pn0KPRO5Pn0KoQO6Qn0JoQO6Qn0JoQO7Pn0JoQO6Qn0JoQO6Qn0JoQO6Qn0JoQO6Qn0JoQO6Qn0JoQO5Rn0KnQO5Rn0JoQO6Qn0JoQO6Qn0JoQO6Qn0JoQO6Qn0JnQO7Rn0InQO7Rn0InQO7Rn0InQO7Rn0InQO7Rn0InQO7Rn0JmQO7Rn0IoQO6Qn0JoQO6Qn0JoQO5Rn0KmQO6Sn0JmQO6Sn0JmQO6Sn0KlQO5Sn0LnQO3Rn0NmQO3Rn0MnQO3Rn0MnQO3Rn0LnQO5Rn0KnQO5Sn0JnQO5Rn0KnQO5Rn0JoQO6Qn0JoQO5Rn0KnQO5Rn0KnQO5Rn0KnQO5Rn0KnQO5Rn0KnQO6Pn0LoQO4Qn0LPRO4nm0MSRO2mm0NSRO2mm0NRRO3nm0MQRO4om0LQRO5nm0KRRO5nm0KSRO4mm0KTRO5lm0KVRO3jm0MURO4km0LTRO5mm0JRRO7nm0IRRO8mm0HTRO7lm0IURO7jm0IZRO3fm0M[RO3dm0M\\RO3dm0M[RO4em0LZRO5fm0KXRO7hm0IYRO6gm0KZRO3fm0MZRO2gm0NZRO1fm0OZRO1fm0OYRO2gm0NYRO3fm0MZRO3fm0MXRO4im0LURO6km0JURO5lm0JURO5lm0KSRO6mm0JSRO6mm0JRRO6om0JQRO6om0JQRO6om0JRRO6mm0JVRO4im0LXRO3hm0MXRO4gm0LYRO4gm0LXRO5hm0KVRO7jm0IURO7lm0ITRO7lm0ITRO6mm0JSRO6mm0JSRO6mm0JRRO7nm0IRRO7nm0IRRO7nm0IRRO7nm0ISRO6mm0JSRO6mm0JXRO1hm0OXRO1hm0OTROE^O=^n0NSRO6mm0JSRO5nm0KRRO5mm0LSRO4mm0MSRO2mm0NTRO2km0NURO1lm0ORRO3nm0LSROE@:]n01SROE@:]n01RROG@7_n02QRO4om0LQRO4Pn0KPRO5Pn0KPRO4Qn0LoQO4Qn0LoQO3Rn0LoQO4Qn0LoQO4Pn0MQRO3nm0MRRO3nm0MXRONgm02ZROMfm03YRONgm02YRONhm01XROOhm01XRONim02XROMhm03YROLgm05YROJgm06ZROIfm06[ROJem06ZROJgm06YROJgm06ZROHhm06R101N100O101N2Ncmh3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "image_path": "ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "h\\g34d1Nbg02]WO50M4NO5H34M5KG95K2Nbf08SYO34NO0M4KO7M0OM;NEM7>@Xe0MhZO\\1FTO2e1Lf17^LZe0KhZOT2IR43QJ[e0J\\ZOL7X2LU43oI\\e00bZOe8O[G_e01aZOd80\\G^e02_ZOc83[G]e0Z9cZOgF\\e0Y9dZOgF\\e0Y9dZOgF\\e0Y9dZOgF[e0[9dZOeF\\e0[9cZOfF]e0Z9cZOfF]e0Z9cZOfF\\e0[9eZOdF[e0[9fZOeFZe0[9fZOeF[e0Z9dZOgF\\e0Z9cZOfF]e0Z9cZOfF]e0Z9cZOfF]e0Y9dZOgF\\e0Y9eZOfFZe0[9fZOdF[e0[9fZOeFZe0\\9eZOdF[e0\\9eZOeFZe0[9fZOeFZe0[9fZOeFZe0[9fZOeFZe0[9fZOeF[e0Z9eZOfF[e0Z9eZOfFZe0[9fZOeFZe0\\9eZOdF[e0\\9dZOeF\\e0[9dZOeF\\e0[9dZOeF]e0Y9dZOhF[e0X9eZOhF[e0X9eZOhF[e0X9eZOhF[e0X9eZOhF[e0X9eZOhF[e0X9eZOhF[e0X9eZOgF\\e0X9fZOgFZe0Y9fZOfF[e0Z9eZOfF[e0Z9eZOfF[e0[9dZOfF[e0Z9eZOfF[e0[9dZOeF\\e0[9dZOeF\\e0[9dZOeF\\e0[9dZOeF\\e0[9dZOeF\\e0[9dZOfF[e0Z9eZOfF[e0Z9eZOfF[e0Z9eZOfF\\e0Y9dZOgF\\e0Y9cZOhF]e0W9dZOiF\\e0X9cZOhF]e0X9cZOhF]e0X9cZOhF]e0Y9bZOgF^e0Y9bZOgF^e0n90010N1000000O10O1000000000000001O001O1O00O1000000000000000000000001O000000000000O10000000000000O100000000001O00O1ZOcZOhF]e0W9eZOhF[e0W9fZOiFZe0X9dZOiF\\e0X9cZOhF]e0X9cZOgF_e0n9O0000001O0000YOcZOjF]e0V9dZOiF\\e0W9dZOhF\\e0Y9dZOgF\\e0Y9cZOhF]e0X9cZOhF]e0X9cZOhF]e0X9cZOhF]e0X9cZOhF]e0X9cZOhF\\e0Y9dZOgF\\e0o901O0000YOeZOhF[e0X9eZOhF[e0X9fZOgFZe0Y9fZOfF[e0Z9eZOfF[e0Z9eZOfF[e0Z9eZOfF[e0Z9eZOeF\\e0[9dZOeF\\e0[9dZOeF\\e0[9dZOeF\\e0[9cZOgF\\e0o9O101OYOfZOgFZe0Y9fZOfF[e0Y9gZOfFYe0Z9gZOfFYe0Z9gZOfFYe0Z9gZOfFZe0Y9fZOgFZe0Y9fZOgFZe0Z9eZOfF[e0Z9eZOfF[e0Z9eZOfF[e0Z9eZOfF[e0P:00000001O01O00O01ZOeZOfF[e0Z9eZOeF\\e0Z9eZOfF[e0Z9eZOeF\\e0[9dZOeF\\e0\\9cZOdF]e0\\9cZOcF^e0]9bZObF_e0_9`ZO_Fbe0o91J_ZOkEbe0T:_ZOlE`e0n9]ZOWF1Lbe0l9_ZOWFONae0k9`ZOVF0Nae0k9aZOUFfe0k98O101O0001O00O10O0100O101O0000000O1001O001O0000O100000000000001N1000000001O000000O1001O000000O1000O01010O001O1N2O001OO10000O1000000000000O100000000000000000000000000000000000000001O0000000O100000O1001O0000000000000000O100000000000001O0000000000001N100000000000000000001O001O1O00000000O1O10O01000001N100O10000O11O0000000O100000001O00O2O00000O10001OO101O0O10000000000001O01O0000000O2O00000001O0001OO1000O10Q[O[Fbc0e9^\\O\\Fac0d9`\\OZFac0f9]1000O10000000000000000000000000000000000000000000000000000000000000000001O000000000000001O00000000O1000000000000000001N10000001O00O1O010O10000000000000000000001O00000O10000000001O00000000000000000000O10000000001O000001OO1000001O0001O0000000000000000000000000001N100000000000000000000000000O1000000O10000000000000O1001O00000000O100001O000000000000000000000000O100001O0O1001O0O2O00000001OO10001O0000001O0000000O1000O100000000O10000000O0110O00010O0O100O2O00000001O01O000000O100000001O000O1000O1000000O11O001N1001O0O100000000000001O0000O1000O100000001O00001O0000000000000000001O00000000O1000000001N02N100O2O01O0000000O101O00000000000001N10000001O00O10O10000000000001O00000000000000O2O0000001O0000O2O00000000000000000O10O1000001N1mFfYO^8\\f0`GhYO]8Xf0\\GcYOL89MZ1KkN7W4MiK]f0@TZO4GZ1JmN7k0CQ2?hLlg0Y1QXOlN93E2JGO?342Hki0DRVO2K72I]1OoN0<2N2C4Yj0MdYOJZg0OaRi3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "image_path": "ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "ich35f05NG`m0;[RO3KLgm08VROO1Khm09QRO05Iim0o0VROQOim0P1WROQORm0A^SO`1]OPOTm0A^SOT2al0mM]SOT2bl0mM]SOT2cl0lM]SOU2bl0kM^SOU2cl0iM^SOW2cl0hM]SOX2cl0hM]SOX2bl0jM]SOW2bl0kM\\SOU2cl0QNXSOo1hl0QNYSOo1fl0QNYSOP2hl0jMPSON8X2hl0iM^SOW2bl0iM^SOW2al0kM^SOU2bl0lM]SOS2dl0nM[SOQ2fl0oMZSOQ2fl0oMZSOS2HSNYl0IoSOi2Pl0WMPTOi2Pl0VMQTOj2Pl0UMPTOk2Pl0ZMlSOe2Ul0ZMkSOQ2JWN]l0GhSOR2KWN_l0DgSOj2[l0TMeSOl2gl02O000000000000000^OQMRTOo2mk0TMQTOl2ok0[MjSOd2Vl0^MjSOa2Vl0_MjSOa2Vl0_MjSOa2Vl0`MhSOb2Wl0^MiSOb2Wl0_MhSOa2Xl0^MhSOc2Xl0\\MiSOd2Vl0\\MkSOe2Tl0ZMmSOf2Sl0ZMmSOf2Tl0YMlSOg2Tl0YMlSOh2Sl0XMmSOg2Tl0ZMlSOe2Tl0\\MkSOc2Vl0^MiSOa2Yl0^MgSOb2Yl0^MgSOb2Yl0]MhSOd2Wl0SMhSOO2n2Vl0RMRTOo2ok0PMjSOV3al0OJmL_SOf2:WMXl03_SOd2Vm02jROXM21el0U30OO2M2HEcSOaM]l0_2fSO_MYl0b2mSOXMSl0h2mSOXMRl0i2mSOYMRl0h2iSO\\MWl0W3O10000000000000000000O1010O0000000O10O10000000000000000000000O2O00000000000000000000000HdSOkL\\l0]301N1000000O1000KgSObLYl0^3gSObLYl0^3gSObLZl0]3fSOcLZl0b30O10000001O000000O0K6O100001cSOaLVl0_3iSObLXl0\\37EcSOQM^l0m2<001O1O1TSOQM6MUl0Z3iSOfLWl0\\3fSOfLZl0`30010O0000000O1O10O100001O00000O1000000010O000O100O10000001O00O100O100000010OO01O1L4O1O11O0aSOeLXl0[3gSOfLYl0Z3gSOfLYl0[3fSOeLZl0a30O11OJfSOeLZl0[3fSOfLYl0Y3iSOfLWl0Z3iSOeLXl0[371O00O10bSOcLXl0]3gSOfLWl0Z3iSOgLVl0Z3hSOgLWl0Z3iSOfLWl0b30001O0000O100O100O1001O00001O00001O000000000000O100001O0000000O100000001O00O10000O100001O00O101O000001O0000O100O2O000001O0000O10000O100O1IhSOeLXl0b301O00000O0100O11N2O000000010O00O100000000000000JgSOdLXl0]3hSOcLXl0]3hSObLYl0^3hSOaLXl0_3hSOaLXl0_3hSObLWl0^3iSObLWl0^3iSObLWl0^3iSObLWl0^3iSOaLXl0_3500000cSOaLXl0_3hSOaLXl0_3hSObLWl0^3iSObLWl0^3hSOcLXl0c3O1000001O00O101N10000KfSOcLZl0]3gSOcLXl0]3hSOcLXl0]3gSOdLYl0b3000001O00000000O100O11O001O0000000001O0001O00O10OBhSOTMYl0k2iSOTMWl0l2iSOTMWl0\\3O000000O1JiSOaLXl0W3gSOlL_l0T3`SOmL`l0Y31O10000J`SOlL_l0S3cSOnL[l0R3eSOoLZl0Q3fSOnL[l0S3cSOmL^l0[300cSObLWl0^3iSObLWl0_3gSObLYl0d3O000000000000O010GhSOhLYl0W3iSOhLWl0W3kSOgLWl0W3:O1000O2O5K0dSObLUl0^3kSObLVl0^3hSOcLXl0]3hSOcLXl0]3gSOdLYl0b30N3N1N2O1OcSOdLWl0\\3iSOeLVl0\\3iSOdLWl0\\3hSOeLXl0\\3fSOeLZl0a300000JfSOfLYl0Z3hSOeLXl0[3gSOeLZl0a3000AhSOTMYl0l2fSOVMYl0Y30JgSOdLYl0\\3gSOdLYl0\\3gSOdLXl0]3hSOcLXl0]3hSOcLXl0]3hSOcLXl0]3hSOcLXl0]3hSOcLXl0]3hSOdLWl0\\3iSOdLWl0\\3iSOcLYl0\\3fSOeLZl0a30BfSOTM[l0Y301OJfSOeLZl0[3fSOeLZl0[3fSOeLZl0[3fSOeLZl0[3gSOdLYl0\\3gSOdLYl0\\3gSOdLYl0\\3gSOdLYl0[3hSOeLXl0[3hSOeLXl0[3hSOeLXl0\\3fSOeLZl0`31O110IeSOgLZl0X3hSOgLXl0Y3hSOgLXl0Z3fSOhLXl0a3000000010O0000O0100O1001O00000O1000IiSOcLXl0]3iSOaLXl0_36N100001aSObLYl0d3O000000O10000LfSO`L[l0_340000000bSObLYl0^3gSObLYl0]3hSOcLYl0\\36N3M200001O00000`SOgLWl0Z3dSOlL[l0T3dSOmL]l0[30GdSOmL]l0R3dSOlL]l0T381O1O1O0OjL_SOm2bl0RM`SOm2`l0SM`SOl2al0TM_SOl2`l0VM^SOk2al0[OSj0h4nUOWKRj0i4oUOVKQj0j4oUOVKQj0j4nUOWKRj0i4mUOXKRj0i4mUOYKRj0g4mUOYKUj0f4kUOZKVj0d4lUO[KUj0d4lUO[K_i0NdVOg4ej00O10OVUOXK]j0h4bUOZK\\j0g4dUOYK\\j0h4cUOYKii0O\\VOh4K[Kfi0O^VOf4L[KZj0e4fUO[KZj0e4fUO[KZj0e4fUO]KXj0c4oUOVKQj0j4b01O2N1000001O01N100ODUKeUOj4hj01000000001O1O\\OXKoUOh4ej000000lTOWKPk0i4PUOXKoj0h45O[OZKQVOf4oi0\\KoUOd4Qj0ZKPVOg4Pj0XKQVOh4oi0WKQVOj4oi0VKQVOj4oi0WKPVOi4Pj0WKQVOh4oi0ZKoUOf4Qj0ZKoUOf4Qj0XKQVOh40XKZi0OgVOi4MZK\\i0MgVOi4L[KRj0e4nUO[KRj0e4oUOZKQj0f4QVOXK^i0N_VOk45UK\\i01^VOj46UKmi0j4RVOWKni0i4PVOYKPj0g4oUO[KPj0e4oUO\\KQj0d4PVO[KPj0e4PVO[KPj0e4QVOZKoi0f4RVOYKni0g4RVOXKoi0h4RVOWKni0i4e000000^OWKlUOh4Sj0ZKdUOM1i4Zj0XKhUO0Li4\\j0WKhUO0Li4[j0XKiUOOLi4[j0XKiUONMj4Zj0XKRVOg4oi0WKRVOi4dj00O0000000000000000000000000ZOWKSVOj4li0XKRVOi4ni0YKoUOh4Pj0YKoUOh4Rj0WKnUOi4Rj0VKoUOj44TKYi00eVOk4OXK\\i0MeVOk40WK[i0NeVOk4Pj0UKPVOk4Pj0XKmUOh4Sj0YKlUOg4Tj0YKlUOf4Uj0WKnUOi4Rj0VKoUOj4Rj0UKnUOk4dj00IVKWUOj4_j0WKcUOOMl4`j0TKiUOl4gj0O001N10_UOVKki0j4f0000L4N2N2O1FmJfUOS5Yj0oJfUOQ5Zj0PKeUO0Of4\\j0[KeUOM2f4Yj0]KQVOb4oi0]KRVOc4ni0\\KSVOd4mi0[KTVOe4li0[KSVOf4mi0ZKSVOf4mi0[KRVOe4ni0]KoUOd4Pj0]KPVOc4Pj0\\KQVOd4oi0[KQVOf4oi0ZKPVOg4Pj0YKPVOg4Pj0YKoUOh4Qj0YKmUOi4Qj0YKmUOh4Sj0YKgUOl4Yj0SKiUOl4Xj0RKjUOm4Vj0RKkUOn4Uj0RKkUOm4Vj0SKkUOl4Uj0UKkUOi4Vj0YKdUOKNY41jKON^j04cUOL0T45jKM0\\j05dUOLNU4=jKQj04lUOQ44kKoi05mUOP4ClKPk04]UOP4BnKPk02^UOW4`j0jK^UOY4`j0hKYUOJN_4hj0gKZUOJM`42_Kaj06gUOZ4_j0fKaUOZ4_j0eKbUO[4b0`Kkh05cVO[4a0aKlh04cVO[4^j0fKbUOX4_j0jK_UOV4aj0jK_UOV4aj0jK_UOW4`j0hKaUOm3DULkj0MaUOo3DULm0Hlh03dVOP4DTLk0Jmh03cVOo3DTLm0Jlh03dVOX4a0eKkh03dVOX4b0dKjh04dVOW4d0dKhh04eVOX4_j0hKaUOW4`j0hKaUOX4_j0hKaUOP4BQLmj0OaUOP4BQLmj0OaUOo3CRLmj0N`UOP4DQLmj0MaUOP4CTLkj0LbUOP4CTL_k0l3aTOTL_k0m3`TOSL`k0m3_TOTLak0l3_TOTLak0l3_TOTL`k0n3_TORLak0W40G_TOQLak0o3aTOoK`k0Q4`TOnKak0Q4aTOnK_k0R4aTOnKnj0O]UOQ4FPLlj03\\UOk3IRLkj03]UOj3HTLjj01_UOk3FULkj0O`UOl3EVLjj0MbUOn3CULkj0MbUOn3CULkj0MaUOP4CSLlj0N_UOP4ERLlj0O^UOo3FSLkj0N^UOP4FSLlj0L_UOQ4FQLlj0M`UOQ4DQLmj0N_UOQ4DQLmj0M`UOR4CQLmj0N_UOQ4DQLmj0N_UOR4CPLnj0O_UOP4CQLnj0O_UOP4BRLoj0N_UOP4BRLnj0N`UOQ4BQLnj0N`UOQ4BQLnj0N`UOQ4BQLnj0N`UOQ4CPLmj0OaUOP4BPLnj01`UOn3gj0RLZUOm3fj0RL[UOn3ej0RL[UOn3ej0RL[UOn3ej0RL\\UOm3@QLnj02bUOl3ej0TL[UOl3ej0TL[UOk3fj0VLYUOj3hj0TLZUOk3fj0ULZUOk3ATLmj01cUOk3_OULmj00dUOk3_OULmj00cUOl3@TLlj01cUOk3fj0VLXUOk3hj0ULWUOl3ij0TLWUOl3ij0SLXUOm3hj0SLXUOm3hj0SLXUOm3hj0SLXUOm3hj0SLXUOm3hj0TLWUOk3jj0ULVUOk3jj0ULVUOk3jj0ULVUOk3jj0ULVUOj3kj0ULVUOk3jj0ULVUOl3ij0TLWUOl3ij0TLWUOl3ij0TLWUOm3hj0SLXUOm3hj0SLWUOo3hj0QLXUOo3hj0PLYUOP4gj0QLYUOn3hj0QLXUOo3hj0QLXUOn3ij0RLWUOn3ij0RLWUOo3hj0QLXUOo3hj0QLWUOP4ij0PLWUOP4ij0oKXUOQ4]k0O001O00O1000000000O1000000O1001O1O1O2NO100O1000000000001O00000000O2O0000O10000O100O1O10000001O01O100O000O1000O1O1000001N1000000010O01N01000000O101O0O10010O00000000O1000000000O100O011O0001O000O1000000O100001O0O100000000001O0000000000000000O10000O1001O000000000000000O2O0001N1000000000000001O2M3N00O1O10001O0001O01N1000O101FoKaTOR4Sk0mKRUO2JR4Sk0lKSUO2JR4`k0lKaTOS4hk0O0O1000000000O10000001O[OoKZUOQ4fj0PLYUOQ4fj0oKYUOQ4hj0oKXUOQ4hj0nKXUOS4hj0mKXUOS4[k001N^OoKWUOP4ij0QLVUOn3kj0RLUUOn3kj0QLVUOP4ij0PLWUOP4^k0O00O100000000001O000O100000O10000000001N1000000O11O00001O00000001O0O100O2O000000O10001N1O1001O0O101O01O00O10O^OnKWUOR4hj0oKXUOP4ij0PLWUOo3kj0PLUUOP4kj0PLVUOo3_k0O00000001N2O1HnK_TOS4_k0oK`TOQ4Tk0mKRUO2JQ4Tk0mKRUO1KR4Sk0mKRUO1KR4Sk0mKZUOR4gj0nKQUO1KQ4Tk0nKQUO1LP4_k0QL`TOo3`k0RL_TOn3`k0TL_TOl3ak0TL\\TOo3Yk0lKPUO[4Pk0eKPUO\\4oj0dKQUO4HQ4Vk0mKRUO0JR4Tk0nKRUOOKS4Sk0nKZUOQ4fj0oKZUOQ4fj0oKYUOR4gj0nKXUOR4jj0mKVUOS4jj0mKVUOS4jj0mKWUOR4ij0nKWUOR4ij0nKWUOQ4jj0oKWUOP4ij0PLVUOR4ij0nKWUOR4ij0nKWUOR4ij0mKXUOS4Zk01KlK]TOU4ak0lK^TOU4gk001O00001O1O1N2O000000O1KPLYTOP4gk0PLYTOP4fk0RLYTOo3ek0RL[TOn3dk0SL\\TOm3dk07100O1O010kKZTOn3gk0QL\\TOm3dk0SL[TOn3ek0RLZTOo3gk0oKZTOQ4ik02O100000000001O1O2N2N001O00O1000000000001O001O000001N100O1001N10000000O1000000000001O001N100000000000010O000000O100O1000000000001O0O010O100000000001O001N1001O000001O1N1000001O00O1O1000O1001O0000000O01000000000001O000jLmSO]2Tl0cMlSOi11RNSl06jSOk11oMUl0Z31O0lLjSOk10PNXl00mSO6Li07oNRl0JRTO:KOM37GSl0GSTO3lo05L3M3N2N1O0O1000O10000001O0000000000000000000000000000001O1O00000000O10000O1000O10000000001YQO]Omm0d0RRO]Onm0d0RRO[Onm0f0f00O100000O100O1O010000000000O2O1O1O000000O1O2N100O1000001N10010O1O000O2O1O0O2N3M2N2N3N\\RZ1NfmeN00000_P`01`o_O0O10001O01N101O`Qa11]n^N22MXTX55^kgJ8M100O1000000O1O100O1000000O101O000O100000000000001O0000O100000001O00O100000001O000000000000O2N2N2N2N2M5Jkej3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "image_path": "ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "n]\\?7io05L1O000000000000000000000000000001N2N_aQ`0" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "image_path": "ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "mask_rle": { + "size": [ + 1548, + 1170 + ], + "counts": "Pc08k5JTJL_1kT1a0[kNP4MQJM_1gT1e0ZkNn32nIMa18]NaS1V7okN]J;_NMd14^N\\S1X7[lNUJ7bNNf1LfN6A^R1g7]mNhI5dNN_2OcMRR1m7nmN\\I2eN0_2NhMeQ1P8[nNTI2eN0`2LkM_Q1Q8bnNQI1cN3`2FnMbQ1l5^nNUJ1g0490bN5T3SQ1Z3dnNPJ0g04VOLk0:n1o7eL^`0n6YWOgI0g04VOMj09n13lLU7MSa0T6ZWO^J20Og03g07X11mLT7JXa0V6YWO\\J11Oh01g07Y12mLS7H[a0U6YWO]J0i34TO3kLT7F\\a0U6YWO]J0h34WO0lLV7C\\a0T6\\WO]JOi322S7dKaa0P6_WO]JNn3O5o6`Kea0n5bWO]JMo3M9l6]Kha0n5dWOZJLR4L=h6YKla0l5hWOYJK6Ib2LdM5_1L_N4R1L`Nk6Ola0d5TXO[JC9Hc2IeM6]10_N1Q1NdNf6Oma0b5nXO^JeNj2EeM7]12]N0Q1OfNd6Nma0b5`YOPKhM22CLk01^O9]12[N0P10kN\\o0^5dROPKgM23_OMm02_O5`12ZN1o0OmNZo0_5dROPKgM24]ONi07A1b10\\N0o0OmN[o0_5TROaJ]N]28gMMg1J^N3n0NkN]o0_5aQOZJYO`M0f2FVO[o0j3cPO^J=2Gl2n0GkNVO^o0Y4lPO^Mc0YLVOU5Uo0W1oPO\\Me0ZLVOT5Uo0X1mPO\\Mg0WLXOV5So0V1jPOcMj0PL[OV5Qo0V1kPOgM5[]OW1jNfNh07XOl33]2KdI`0b0Bo3R5YK]`0\\1aZOfNh06YOo3O]2MbId0a0\\OR4S5XK^`03]ZOQ15mNh05ZO]6IQJNP33bLf0`0YOS4U5WKh>0c]O4`NP15mNh04[O^6HPJ0Q30bLi0\\5\\4^Jg>1a]O5eNm03nNg04]O^6GPJ0R3NbLl0[5Z4^Jg>2h\\O3K3Dk03nNg04]O^6GPJ0\\3j0c1Y4_Jg>3g\\O4J1Gj03nNg04]O]6HPJ0^3i0mLWOa3R5cKg>5Y\\OJ4566Ci05mNg04]O]6HoI2`3g0hL[Oc3m4fKg>?Z\\OD;`1HZNg06[O`4JQM0iN3e3f0aL]Od3h4jKi>a0W\\OBk0_\\On0P1XNPOV1Ij15\\N0kN31JY32aL4:9a3]4RLj>k0_\\On0P1XNoNV1Li13\\N1lN3OK5Mo28QM8b3]4QLk>l0^\\On0n0YNQOV1LY1MfM6U12jN4OK5MP38PM7c3[4SLl>k0_\\Ol0n0ZNROW1HX13fM1U15jN3NL6LP39PM5c3[4SLn>l0^\\Oj0NVNc05AX1Db01bN5IOV16hN5ML6KS37PM4c3]4QLn>n0_\\Oh0KYNd04BY3DRM7JLU1:fNi04RO5K40\\1<_N2c3]4RLo>m0_\\OLF[OM77Ec02E1Bn22\\M5KLU1l1eN]N73JHg19aN0c3^4QLP?n0_\\OKE\\ON66Fb03On2[O\\M6KJT1Q2bN]N8`0c1AaN0c3]4RLP?o0^\\O6DVO6Fb021n2YO]M6KJT1f2hNYNe1@cNNa3_4RLo>P1`\\O5AWO8Eb020o2[O\\M4LIV1?aNk02SO61LHg1;bNN^3_4TLn>R1a\\Oa0I^Nb02HNBQ31\\M4LHX1;eN8HO6J7FC3;Od1>bNL\\3_4ULo>X1\\\\O:O^Nb01^O1K2Om23]M4e1JXN=IN4IU3`0PMG]3`4VLn>[1\\\\O7O^Nb02_OT1KQ12WN4Y23bMLk3c0TLDb3^4ULo>g0Y\\OG5S1M`Nb02^OT1NYONR11nN4[2OeMKh3i0QLCb3_4ULo>e0]\\OG2T1LaNb02^OT1NXO0R1OoN5\\2JT2h0YKCb3]h0lLZWOG7Q1KbNb02^OT1NWO1S1NPO4Z2LW2f0VKDc3]h0nLUWOH=m0JcNb01_OU1LXO2R1MQO6X2LhMLj3i0lKCd3]h0aMeWOOHeNc00^OW1KWO3R1MRO5W2MgMMb1111mM>6Ld3]h0aMeWOOGfNc00^OW1KVO4T1JRO7V2MgMNa102OoM`05Kc3^h0aMfWONFgNc00^OW1KVO4U1IRO7T2OhMLk0NWO4b1LPNb0:C]3eh0aMgWOLFhNc00^OX1ISO9W1FRO7T2OhMLk0OVO4j1=SND\\3ch0dMgWOJGhNd0N^OZ1HPOUN4b3kg0RNgWO\\O2jNm0^1A0AW16aM0d1O[N42Ck1>SN3_3og0UNdWO\\O1kNl0a1@LCX19^MOf12VN17Bl1NF\\1FkN>Y1FCN:[1BeN_Od1f1fNRN7Y3Sh0]NbWOWONPOd0h0C_L[g0f0bXO21?a0O]OkNOT10_12\\M5^2F>b0e0DbL\\g0c0cXO31=6_NKa1OiNMV1O`13]M3\\2H?a0d0DcL]g0c0aXO43NLSO8JM`10hNLX1N^16^MO\\2L?=e0EZLgg0l0VXO46LKTO7KN_10iNK[45hKOe1JaN2e2=e0DZLjg0X2YXOcNNUO4LN^12hNJ\\45hKOd1LaN1e20V5af0nGZYOi38BGRO6POJZO6Z7cf0cGVYO02U48CGPO7oNK[O5Z7cf0bGWYO10W47BFQO;mNKZO5UOEd7of0RHWYO3Mj0OZ29WMEn22]O:]NFWO50M09KD;1d52\\JXg0_OgXOk0O[28UMHn21\\O<[NGYO20O09ID<1d51]JWg0AfXOj01[26UMJn20\\O=ZNEZO31OO;FD>Od51^JWg0CcXOi04\\23UMLn2N[O`0[NCZO300O9b0CQ2MfM0e17WNVg0e0iXOS21WMJl5?hJBZO50N06j3DlN8TMIMPg0OTYOR14R2Nk38iJE[O4Z62dHJOjf04UYOn09P2Kn36S4MmDL0ff0a1aYOGE85a53S42jDK2df0a1dYOFGHJWO7m6IY4=`DK6IN`f0a1nYOFHFLWO4bb0a0f]Oge0U1PZOGHDLXO4bb0a0f]OP7Bj6c1mAHGCNWO3cb0a0g]OI@k1N^14hL3n=_1nAIHAOXO2cb0a0g]OIAi1NR1c0SMGn=Z1QBKI_ONYO2^?J\\Cg0aMGCi1Kh0U1TMGl=h0^BKI_ONYO2U7Ij02^HNe1>fMJc07Ff1H`0i2V;[MZBT179J]ONZO3\\?L\\C:QMJa08Ff1I4U3a;jL_BW12;K\\ONZO3\\?L\\C:QMJa09Fd1I1[3o;lMVB:L\\OM[O4Z?M]C9QMJa09Fd1IKc3TOTO4AO]3KcLMm45SLLn02G7o04SMT3>aL40Be0\\4f_MP=;hBi0NWO1\\O4W9LTGKO5`11]NNg07YOGi1i28VMcMS13nNj2KlB8bNHc1o3mMcL3_O4OMm01SO[4kHbA5WOLb1b0kM_35QL04ON011N3N<3^O05OCFLO50M11WME;2C=Og03VOYMD;1H:M2M12O3^ON862LG040K7L2N5Hm06QO23MJn07nN117OD;US<\\OmD4oNk00Vl0KTSO:J24J?V;VOQE1E4S14nNl02gl03]RO33J?U;VOSE0E4R1c1oNVl04[RO34J`0U;TOTE0E4Q1d1POVl02\\RO42Kb0R;SOWE0D3Q1f1POUl01\\RO53Ka0R;ROXE0D3P1g1QOTl01\\RO53Kb0Q;QOYE0D3o0i1POTl01]RO42La0S;POYEOF3k0l1ROil02QSOS;WNYENG4?W2\\O`n0U;WoNbE2LW2^O`n0T;WoNcE0KZ2^O_n0T;WoNdEOJZ2_O`n0T;UoNdE1HZ2Aan0PF@2H73ZS1d7UlNUH6Q1MiN>=ZS1d7UlNWH3Q1OgN?=ZS1d7UlNXH2R1LhNb0:[S1d7VlNXHOa1?cN]S1c7UlNZHN`1`0cN]S1c7UlNZHN_1a0dN\\S1c7TlN[HOX1g0jNVS1d7QlN\\H2S1i0nNTS1Y8SlNhHj0oNSS1Y8TlNTH4WOf0QS1Y8TlNRH7WOd0>RS1X8RlNTH7WOf0]OAcS1j8_lNhGOI5FA@cS1i8_lNfG1L1H:=C^OcS1l8[lNgG4K0G<a0B_OV2Onn0g:doNkEWOAbS1d:XmNjEVOBbS1c:YmNkEUOBbS1_8ZlN_Io0`NTOCcS1^8ZlN_Io0`NTODbS1]8[lN_Io0`NTODbS1l7ZlN^H1c1n0_NUODbS1k7[lN^H1d1m0_NUODbS1k7\\lN]H0e1m0^NVOEa2LWn0o7VoN\\HOf16nM=`0@E`2NWn0m7WoN\\HOf16oM=@F`2OUn0n7WoN\\H0105207^O;=@Fa2NTn0P8UoN\\H2O25003Ba0:^OFb2LTn0b8VoNkG24111Cb09^OF_S1_8]lNjG24111Da08_OF_S1_8]lNjG24110Eb07@E^S1P8[lNmH3A22OEb06AD_S1P8\\lNmH0C33KEe05CB]S1R8^lNlHNC3a0`0\\O\\S1d7SlNlHND2a0`0[O]S1d7SlNcIO]Ob0\\O\\S1d7SlNbJ1\\Ma0>ZS1e7SlNaJk0kMZOAPS12QmNQ8IZH1e1l0^NWOBn2Ndk0>PROF_Oa8NZIk0aNUOCn2Ock0a0lQOCE^81YIh0dNTOBo2Obk0f0hQO_OJ]82WIh0fNSOBo20`k0f0kQO]OI^82WIh0eNTODm2Oak0d0PROZOFT8HWH8_1h0eNTOCn20^k0=gROd7kNWH6OHT1P1QOUODn2O[k0;oROe7gNXH40HT1P1QOVOCm2O[k0:RSOg7eNWH31HT1P1POWODn2MXk0;USOh7cNWH30IU1o0oNXOE\\Q18noNh7dNUH32IQ1Q1SOUOE[Q18PPOh7cNUH31J;NMU1MROEXQ17WPOj7^NTH40L;ML=B75SO<4CEV33Uj03gROO4N:P8@dHL=5SO<4CEV33Uj03fRO07J9S8^OeHM<6QO<6BFU32Vj02gRO15L8T8[OUH2M0k0W1WOlNHW30Tj04hRO03ONMJd81aGNl0W1XOjNHY3OTj04hRO012MMKb84_GMl0X1YOhNJ[3JSj07hRO1O4MLL_86aGKj0[1ZOeNJPQ12VoN2N4MLL5Oh79QHKa0OZO]19cNKPQ11WoN3N3MLL42g76SHKa00YO\\1:cNKPQ10WoNO48FMM23h75SHKa0M^O]13hNLlP11ZoNM39FNL24g74THKl0Y1VOiNLlP10QPO6SOMM33g73UHKl0Y1UOjNMkP1ORPO6SOMM43U7LkH6MIm0Z1SOlNLiP11TPO4ROLO64Q7KnH4MKm0X1TOlNLiP11TPO4SOH1:3o6JoH3NKl0Y1UOkNLiP11TPO4@2Ho6ImH5NJ>KJ]1JlNMgP12SPO5VO<4f6HhH7ML>MIY1NkNMeP13aoN0K6N:5g6IdHd0=]OHX10jNMdP14`oN6G1385h6KaHd0?\\OGY11hNNdP14aoN8C1566h6i0oHSOF:L;5^ONbP15`oNo0H@7g6j0PIROF9O:2@NaP16aoNo0F@9f6j0PIROF9092AKbP17aoNn1NP5EnH8Jc0`0\\OG8091CJaP17coNm1NQ5DmH7Nb0?\\OF9181CHbP18doNm1MQ5FkH510O5`0HF81:0b3Hdh07dSOn1LQ5GiH44O23>JC92:1a3Hch07gSOl1KR5GiH45M43>J_O;3;0`3Ich06hSOm1KQ5DoH13123J\\P17\\oNHg03Gf0G[OGd7OPIMJ2K0JdLW41Rg03[UOL2S1QOkN1j6IdLYQ1iLhoNN3Q1POlN2g7JfJYQ1hMhoNN3R1nNlN4d70dH3X1a1_N_m05gQOO4Z1QOW62dH3V1`1dN\\m03jQON4Z1QOX62bH3W1`1dN\\m03jQON4Z1QOX61eH1V1c1`N]m05iQOM5[1QOX6OeH1W1RQ1dNhoNM4\\1ROZ5IPJ4D1V1SQ1dNioNJ6^1POY5JgJ35TQ1cNUQOY1kMV5KhJ16Nkn00[QOL7g1SOP5HjJ248aN>Ojn0N]QOM6g1TOo4GkJ159`N>0jn0L^QOM5h1TOo4GlJ04;_N=2jn0I_QOO5g1SOP5HkJO5=]N<3hn0JUQOOk0h1iNn4FmJ31<]N;5en0NVQOMl0g1iNn4FlJ60Y1aNPn03UQOLk0h1kNm4EgI2a06a0Z1_NQn08oPONl0f1lN^5IXING9W1lP1iN[oNNk0g1nN[5KkH1N046;E:`2[Olk03SRO0j0f1POZ5KkH2L154;G:^2]Okk02SRO2j0d1QOZ5KkH1N224;H;^2\\Ojk02TRO3h0e1ROY5KnHKO703:HB_O4T20P5LWI4HKX18hNS10Um00oQO>A_O6T2OP5LWI4HJZ19eNS11Um00oQO>A_O6j1GXN8R7LWI4HJZ1:cNS13j2Jle06]VO=B@5i1IWN7T7LWI2IJY1RNNj6D\\I6`2NZMZm03oRONJM<3h05WOj2LPMNj6D\\I6`2M\\MZm01oROOJ0;Oi05XOj2LPMNj6D\\I6_2OeMol0HYSO0a0O<5XOl2IoL1i6D\\I6_2OeMml0I\\SONc0075ZOn2GmL3i6D[I7`2OeMjl0HaSOMd0116\\Oo2EnL4g6E[I7`2OeMZ6Dl?3`[O0YO5\\OQ3DmL6f6E[I7`2OeM[2D[10_b04SZOMl02K5\\OT3@kL:e6E[I8_2NfMZ6Ej?3TZOMl02K5\\OS3BkL8f6E[I8_2OeMY6Ek?3TZOMm01K6[OQ3EkL5h6E]I6^20eMgl0H[VO4YMQ3FjL4i6E]I76Ie17WNfl0I\\VO2YMS3FiL3j6E]I75Ke18TNdl0MYSOOe23fMR38c3VO^I75Ld19SNbl0OXSO1e21gMc0]Oc1l0`4TO]I92Ng15SNcl0OXSO1f20fMc0^Ob1m0b4RO\\I;0Oi1I^Njl0FZSO0_1K^O7^Ol2>i3PO\\Io0j1QO_Nll0E[SOOj0O]OOd04@<@i1n0`4PO\\IQ1[2ik0eM]SOM`09FGd02A<@i1n0i4=TK[k0PN_SOL>=EEe02B8Cj1n0i46bZOJ;f0B_Of03D7Cj1n0k49TK_5iMl>5eZOJ9h0C\\Oe05E5Cl1n0j49UK]5jMm>3][Oc0UOZOf06D4Dm1m0j49UK]5jMm>2^[O4\\[O=nNB8Ne07Dh2LQM1R7l0]IgNS1cl0aN^TOf0UOVOg08Bo2FlL7P7NSI99BP1R2`NPh05nUOOJ012NZ3L^3P1^InNm0Tl0bNnSOe0L9K]O>1NZ3L]3Q1^InNn0Rl0dNmSOc0O9J\\O>2OY3L^3P1^InNm0Rl0dNoSOb0O:I]O=11X3K_3a0nH]O`00m0Rl0dNnSOb01:H]O<13W3K`3?PI]O?0l0Sl0dNlSO=:?A]O<13W3K`3=TI]O;2l0Rl0eNlSO<`0b2]OWM0O=o6;VI]O93k0Sl0eNkSO==>@^O3EK=b0`2_OVM00:Q7?_O^O3FJ`0@]O4CL>?c2]OWM1Na2nk0ZMiSO2LM`0k0KXO55MR3DgL9S7ERI?7_Og1^l0lMjSO2LN?i0NYO26MQ3FfL9T7CSI=9Ae1_l0kMiSO3LN?i0OXO17MQ3FfL9T7CRI<;Dc1\\l0mMjSO3KN?i0NYO150Q3DgL:S7CRI;h00WO251Q3AiL:R7EPI;>E853Wl0TOhSOP27RN362P3@jL;Q7EPI;=G665Tl0UOhSOQ25RN552[3J`3FPI:>HD4M3i0Sl0UOhSOR24QN643\\3I`3FPI:>HB7M1k0Rl0VOgSOQ25QN554[3I`3FPI:>HB7N1h0Sl0WOgSOQ25RN445R3]OjL;P7FPI9`0HA70Oe0Vl0WOgSOP26SN256o2@kL8Q7FPI9`0HB510c0Wl0WOgSOn19TNN68o2@iL8S7EPI8b0GB600a0S8WOg;1U\\Om1:UNM59o2@jL7S7EPI8Q1LSO2`0Q8ZOg;OW\\Ol19VNL5;o2@iL6T7EPI7R1MSO2?P8ZOh;OW\\Ol19VNL403c5H^JK^e0R2^ZOWN2M5Z3H\\40]H5d0MBa2ZO5`0XM3Xe0h1hZOUN141U3L[41^H1j00YO<^O_1M`NOj1_1Pc0o0iZOTN3aJhCU3]Ld<2o@9;h0ROXO1h1a1ZO`N\\;JTF1e0_2^LPN1Y?Oo@;;g0QOZO0f1c0RN4W1AW;EcE0^N2o2[2_LQN1V`08_@g0QO[OMf1d0WN2R1FU;DcE0_N2P3o1`LSNO64Y`05_@f0RO]OJe1d0eNJb02U;CdE1_N12Kj00NR2iNRNO54\\`03a@d0POb1>ZNI?4T;BeE1^N22Kk0ONR2jNPNO64_`00`@e0oN_1a0`NF<4U;BWDMg04UO22Li00OP2lNoMN73b`0N`@7eN18l1?XNZOJ`0j01X;BVDOg02UO41Kj00BL@S22VNGf`0Ma@3eN64i1?_N]OF2H1Q1;[;AVDO1NO4K52Jj00BL@S23UNDjb07X]O6Hn0c0mN]OK8;N`0;\\;AUD11MO3K62Jj00BLAR23UNCmb06V]O6Mk0;TO32Ba0<^;_OSD31M02K71Ij02BJAS25SNA[=OWE0N8dM6Nh0;XO22B>=H\\OH34c08[MN62Hj01]O4@m18ZO_O`:5^DOo48QK6`0EH3j0OPO=0hNT=3hBDO5]2NZMm18\\;HeB0U5S1dKhNF5g1>RNf0Ji::\\DD13\\2OYMn18^ODi82lFa0R44fJFn0CF4h1?QNe0Lj:7YDH31]2NYMn17^OGg83lF6kNHY5d0gJEn0BF5i1?oMe0Ll:LnC19340]2NYMm18^OFi81mF7lNHU5d0jJFb1Gi0?nMg0JALd9OaF0[O:4N]2NYMm17_OGh8O\\F0G9FHU5d0jJGa1Gi0>oMh0F]O7e9JQG8kN5b2G[Mk17@Fj8NYF4I6GGT5d0lJG^1Ij0=oMc10U8IQG7lN5c2F[Mj17BEa96YE3HGU5c0lJHZ1Km0;nMc12U8IPG6mN6IA7MP27nMi18BEa96ZE2IEU5e0lJHW1MMc13[4ObIGj2;`M90B4M]20ZMi17DGZ:2jI=iJI=MQ1K_N=Lg0GA;]5FnIh0c2CcMa02YO4N]22XMj17DHY:1mI^OGNMh1LnM;0f0MTO1`12g2c2hLlNg2_NWLb1NZN1c11c8KTG9d2LVM>^OGNMh1LoM:Oh0LSO2`12g2M^Kj2m0jN^OXNU36hLb1LZN3b11c8NnF9k2HWM>[OIOIk1NmM:Oh0JVO1`11h2M^KU30nL4g14SNV39eLc1LZN4`11d81RF3K9l3B\\M;SOMh2GSM9Lj0JUO2f6^3QIaNe3UN]Lm10@1c82RF5H96\\O^30jM?nNMk2EUM8J_1IZ5c3kHcN^1iMh08PNn1O_O2c83PFQ1OkN^3LkMc0kNJn2DWM8Hj8g5PGTJk0OU16PNP2O^O2a85QFP1NmN_3IlMi0dNEU3CVM9Hj8h5oFSJk00V15PNQ2O\\O1c86oEQ1OlNT5f0gJBX3BWM9Ii8g5PGTJi01W13QNR2OZO1d86oER1NkNV5Y1oM_NWM;Hi8g5QGSJh02W13QNQ20[OOe87nER1NjNX5Y1aNkN\\Lh8f5RGTJg01X12SNQ2N\\OOe87nES1MiN\\5g0aJ]Ol31]Lg8f5RGUJf01W12UNo1O]ONe87nES1LiNa5c0_JAh32_Ll2H[2o5aJUJe00X12UNQ10[OOP1Ne7LTG1O9LT1LgNc5c0\\JCh32`Li2J]2m5aJ\\J>H`11VNR1O[OOP1Me7NRG4O4NW1k5WOQJCh32_Lj2J]2m5aJ]J=Ga11VNR1N\\O0o0Nd7MTG3N4NX1n5VOmIBm32[Ll2J]2m5aJ]J=Ga11VNP2N]ONd7MUG1O3N<;Fa5?mI@Q42XLm2H_2o5_J]J=Fb11VNP2N]ONd7MfG2^OBc32jLa0VOGX2E\\Nd2oNZ3c6cJXIc13VNT2KZO0_2Kh12jJ==Db30jLa0XOGV2H]NU2mNjM1m5T6QJSJa0Ec12WNU2JYO1]8KeF>=Ec3NiLb0YOGKHb14ROn1]Oj3h5UJoI>Hb13WNV2IXO2`7J\\G1JO=?Ha15XNW2GWO3_7L[G2406NI1M0Y41hL>WOHNJb11YNL8R2Ln3l5RJjIe2OcMU2HWO3_7LZG5047IKO0O22`31]M`0UOE2Gd12WNO7P2DPNJf5M^J]6MjId20dMT2HXO2]7NZG8L3;FMN00N5b3O^M`0SOD`2H_M08Q2_Oo3LRJ^6NhIe20cMU2IWO1^7OXG:K2=FLM14J1f30]M?TODa2F_M5:R8XOmG^6NhId21dMT2IWO0_7OXG;I4=ELL34G3h3M]Ma0SOBc2G^M:7k7\\OnG]6OhIc22eMS2IXOOl7`F3=Fc0NUO2i3M]Ma0SOBa2J^M<6d7_OQH\\6OhIX1IL;TOQ2IXONm7T1kFUOf0LTO1j3M\\Mb0SOBa2K]M<9`7]OTH\\60gIV1LL9UOQ2IXONm7T1jFVOg0KVOO[5a0mJ@a2M\\MNO3`08YOo1JWONg7Z1nFROj0IXOKY5c0WN@bL:`1Z6XNYI[62eI>NO3`08ZOn1IXONg7Y1nFSOk0JWOHZ5e0VN@cL8_1\\6YNYIZ62eI?L03`09XOo1IXONg7Y1lFTOn0Jl42_O2_4ASL`0^OAl33WLQ51YKCa1W74RI]Nk1G_OLm74lF3`02[O6a5O`J_On34VLP52ZKBa1X71RI`Nk1G^OLm74lF2a03ZO6b5N_J@n34VLP51[KC`1X71RI`Nk1G^OMl73lF34EKdHnMU2K^ONk6LeG`0NI724IO6L=h5]OZJLd4S5]KaMS7HeHnMT2K]ONl6MeG`0KI924KN4L>i5ZO\\JNb4R1RKi2;_NEkM_7U2fHPNR2K]ONl6MeGm04^O5LK4M>k5WO\\J0b43XK>LV39cNGhM^7U2fHSNP2J_ONj6NeGl05\\O51I2Ma0n3YOUMKnN070b40^KFGk33UMHCNm2OYN7_N5G]2Ki1F_O3f6NhGg06[O4?TOi36_MLZNQ71mHf1ZOU1X8lNlKZO^63\\E6S1c0M@_O5[4ChKKZ51cJ8@95O5^Od0=TOJMf1]8hNoK[OZ66[E5U1c0L]OBH]O;i4IjKJZ51cJ7A:4OU1KjNh1Y8\\NQL_OW6e0_F:LZOEJ[O;h4JkKIl30kL1VO8_O:5O5^Oc0VNJe1T8kMXLKo57\\E5T1NVO0<22M\\O3Y4Ml0MjK8lNQ2a0VNIV1L[N[8Mm61f@6;H54K3644M]O36Ca3=Y1JfKJYOn2;iMFU11VN`8MP7:g@Jm04ZO172[O33Bc33UL4S50ZKKI0Mn0Lk0`0R1b8TMd68g@Mm00YO294[O3OCe30XL5P50[KLG1Nj00l0=R1c8VMb67g@Nn0OYO1KKL:N3U4Hf00[KMF0Oj00k0>gN@h1R9jMb67g@No0NXO1KNJ911T4If00[KMF0Oi02k0;hNCg1P9kMb67g@Nn0NYO2KMK803T4Fg02ZKMFO0j01k0;hNCg1P9lMXKNj67WEOo0NWO2LNK713T4Ch04XKNGNOk00l0;gNFf12kMa82bKO]63^E47Nn0OWO2LNM505^9EgFNOk0Fl09fNIf10lMb82aKO\\64_E28Nn0NVO5LMN414]9FgFNOj0Gm07fNKf1NmMc81`K0]64^E18OP1JVO8LMM424[9FiFMNl0Ff0=mNFe1NmMd80_K1]65\\E19Og11[NOM146Y9EjFL0l0Eb0?RODe1NkMe80^K3]65YE2:Nh11\\N1I085W9EkFL1:A22j0h0[OZOc1^=oMTM`0aEEg11]N2E2>2i3C[LMj34SLM17D3Oj0f0_OZO\\1e=TNoL>`EGg11]Ne04Ce3D\\LJl34SLM16G3Li0f0CYOX1h=UNoL>^EHf10_Nc09E]3G\\LIn33SLM15K1JLHh0k0CYOHMi1m=TNnL>]EH6Io06QOc0;GZ3F]LIn33SLM15K10b0=E[Oh1i=mMlL?^EF6Kn06QOc0fNM7Ll1m9lM\\I7i2FQN4gKOd02_O1R14PO34JJ86Mn24i1FZKME30X1=gNN20P2i9nMZI5k2GPN4fKOf01_O1S12QO41JL:6Ln21XMBk36PLND3OY1>gNMO3Q2GmMd92fI5j2HQN3fKOf01_O26I<5E7NIL<7KQ3KUMHj36PL;Bo0?gNMM5S2DnMe92eI4k2HQN3fKOf02]O28H=3E9OEN>4MR3GTMLi36PL:CP1>hNLL6R2EnMf92cI4l2IPN3fKOf03YO4LV1J_1_OI5cL5;g0LTOa0LPOU2h:oMaHNT3MmM3_Lh0>KW1L^1]OI6dL3;i0JXO?JPOT2l:nM_HNU3MmM3_Lh0>K@]OF27=T2CPNJo22dL7:4\\O0T2P:dM]H0U3MmM2aLi0S2S:cMZH2U3MmM2aLj0;I\\OEGO<:a1^OmM>l6F^I7>l1Y:iMTH2T3NnM1kK177C3V1GiN8b00HN`0:X1APNj1\\:jMRH2T3NnM1kK275D4U1CPO896JMe04Q1HPNkMfGMnM1jK472G57J?2E6JLJJd08W1JRN3[OF1?5KKM?7Y1HUN;l6GVJ3POm1W;hMnG4U3NmM0kK466B4=G=4[OG1;9LHO>7Z1DXN4[O;7ELO`03b1OXNEn4P1cKk1TOUMm:2PH2Z3NmM1aLj0@YO>3\\O?OF4K?3e1LTNHd4Z1nKZ1f:]M[G3Z3MoM1_Lk0_OYOa01]Oc0EG6c1HUNIm31RLZ10fNE10a2g;^MYG3[3LnM3^Lj0_OZO6IJ4O[10jNb07\\1HXNIl3;iKm0=o1[;jLWG2\\3LnM3_Lh0_O[O10`0X1]OlNc05e0AoN9NHl3>gKi0a0Q2Z;iLXG1[3MnM4^Lg0_O\\O11a0U1^OlNc06b0BQO8NHm34\\GMUO3]K28OVO4I32O_1J\\Nm03VOb5H[J;45J_2:RMf>2\\GMVO4[K38OWO3I32O_1K[N6N37Ii4I`K2@<23O]2:QMg>3[GMWO3ZK48OVO4J22O_1LYN2712Mh4J_K2@;34N]2a?UMkFLWO3ZK48OVO3K220_1LUN3QNf;3fF3d3JZN2SM7[OMTO7MN10e16\\56oHHJ3NZ1jLI6Lm1NkMLg0=K@i37PLi0I6K9Z=`NfE2\\91bD3;1nN?KF8No1HnM0b0>JAi38oKi0I6K8^`0bNoK1bD2<2mN?KD:0Z2GSN?JAi38oKk0Ff1]`0XMnK0cD2<3lN`0HD=O[2GSN=JCh38oKl0Df1_`0WMRM1mC3jNn05UOZ31SMFd37PLj3KQLjbNRD7Y95bDI=48MJ`0Z1[O`06^MI`5=dJW15]Nh`05fJ7_DK>2:K[OM6:f1D?7YMK`4O^L`0TOo0ia0eNoI8_DK>2;JXO0;1a47ZOGQLODT2ga0oMoI8\\DL`01;JXO1Y1Li2;2IQL0BS2la0lMnI9\\DM?01=IXO1W12Q3KL4PL0AS2ma0lMRJ6XDO?1>HWO1X12R3LI5PLZ2_a0dMRJ6XDO?1>IWOOX13R3LI5PLZ2JaMd>NfC6^94XDO>2>IXONW13S3NG5PLY2IeMd>JhC6^95WDN?3=HP11c2OF4QLY2HhMc>KeC3e91WD0>2=Jo01d2MF7oKX2IgMd>McC2i9MVD4<1>Io02U75RGY2KcMg>N^AMe16W:KWD5;0>Jo02T77QGb1JeNV?Cn@1LLb15W;KlC5W1Of0NT59^H^12cNQ?Fl@=]1EY;MkC4U13h0KT53WHN5e1:`Nk>Im@=^1DX;OkC2T16g0KiMKm65hH03b1=bNi>Il@?^1BY;OkC2S18bNHa11]NMm63iH12b1?\\Nk>Oh@>_1BY;OkC1T19^NLc1M^NMm64gH14b1S`0XNP@`0^1BY;0kCOT1:]NMi0GG3CNm6?iHZ1S`0WNQ@`0^1BY;0kCOT1:]NMh0HH1DOl6?iH[1S`0UNQ@a0]1Cn:MgC2a0MT1<[NNh0HJO0OS6`0UI]12VNe?M]@a0\\1Dn:NgC0b2:^MM8GM2\\1NT5?VIh1n?ZNaADo:MgC0b2:^MM8Gk10b4?VI]13SNn?b0^ADo:MgC0c29\\M0f2DQ4>XIf0LTOf`0:j@DP;LfC1b0NT1;VN3h::Z6I]@Co:MfC2`0OV17UN7k:6X6K_@Am:OfC2`00V15TN:m:2W6M`@_Om:1eC2?16M1f0`0k0^Ok66]8<\\@\\Ol:3eC0?3]OM=2f0`0k0^Oah0a0iC_OlANa06ZOL>1g0a0j0^Oah0a0jC_OfB4POL>10M2e0_1\\Oj5N^H0Pb0d0SLAeB1QOL`00N02d0^1]Of56]HKTb0`0ULEaBN`0LTOc0^1]Of5b0b:OVLDbBIc00QOd0[1]Ok56i:`j0CfUO0aj0On:01N2O001O00001O000O100000N2O101TIK^mN6bR1J^mN6\\Y1O1000O1000000000000aJJ]gN0W16\\W1J]gN0W15]W1K\\gN0W14^W1LgjN2ZU1Nd40`aN0`^12^aNNa^13_aNMa^13_aNMb^13]aNMc^13]aNMc^14\\aNL^10Q10dV16jfNL`1NQ11eV16ifNK`10Q1OhV13hfNM`12j[1NUdN3l[1LTdN4l[1LTdN4R^11O0jMKVdN4X[1MPcN1h12X[1OnbNOj12W[11mbNNl10X[12lbNNm1OW[13mbNMl10X[12mbNMk11Y[10nbNNi12S^1O0Oe^47UaK0O100010O1N102N4LaQ61^nI4M00O3LUT?2lk@NlP30ToL0ST?2jk@2O01O1O``13]_N10O3KXR38^lLHPbN=o]1EnaN, functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "image_path": "ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "U]b55i02O10L40Sk04TTOJ<:6OOM4OUk09_TOO42NL5N]j0IgUO?B2O3OI15a0]OXi09^VO[1CVO1G24a0^OYi0?UVOl2OYMa0]OZi0S5cVOoJ[i0T5bVOmJah0KhWOZ5\\OfJ16kh0JhWOZ5ZOXKmh0^OiWOR6Wh0nIhWOS6Xh0mIgWOT6Xh0mIgWOT6Yh0mIfWOS6Yh0nIfWOS6Zh0mIeWOT6\\h0iIfWOW6\\h0gIdWOY6]h0fIcWOZ6]h0fIcWOZ6]h0fIdWOY6[h0hIfWOW6Yh0mIUWOJ;X6ah0SJ^WOm5bh0mIUWOK:X6bh0kIdWOV6[h0jIdWOW6\\h0iIdWOW6Zh0kIeWOV6[h0lIbWO]5BRKkh0E`WOW5HSKhh0F_WOW5KRKfh0G_WOW5KRKfh0DbWOY5HTKfh0CaWOZ5ITKeh0AYWOJ4a5NTKeh0BVWOL7_5LSKgh0H^WOU5ISKih0I^WOm5bh0SJ_WOl5ah0TJ_WOl5ah0TJ_WOm5`h0SJ`WOm5ah0RJ_WOn5ah0PJWWOE5\\6eh0lIXWOKN[6kh0iIWWO]6mh0`ISWO`6nh0_IQWOb6Qi0[IPWOf6Ti01N2H8O1O100000000000000O10000O1000000O10O1000010O01N100000001O1O1aWOgHTh0X7mWOhHSh0V7oWOiHRh0U7>00O1000cWOkHog0T7PXOoHng0Q7RXOoHmg0U7oWOmHPh0U7nWOkHRh0U7nWOkHRh0V7lWOlHSh0a7000000000O1O1N2001O00001O2N1kWO]Hjg0T7UXOSIOJlg0l7000000O101N100O10000001O1O1O00000001O00O10000000000000O100O10001N100001O0O101O00000000000001O00000000O02O000000000O10000001O000000000000000TO]XOYIdg0f6]XOZIcg0f6]XOZIcg0g6\\XOYIeg0g6YXOZIgg0i6VXOXIig0e6ZXOZIfg0f6[XOZIeg0f6\\XOYIdg0g6\\XOYIdg0g6[XOZIeg0a70001OO1VOWXO\\Iig0b6\\XO[Idg0d6]XO\\Icg0d6]XO\\Icg0d6\\XO]Idg0`70001O00UOYXO\\Igg0d6YXO\\Ifg0`70001O00O100TO\\XO[Ieg0c6\\XO]Idg0d6[XO\\Ieg0`700000O1VOZXOYIfg0f6[XOZIfg0e6[XOZIdg0g6\\XOYIdg0h6ZXOYIfg0i6XXOXIgg0h6YXOXIgg0f6\\XOXIeg0g6\\XOYIdg0f6]XOZISg0\\OUYOZ7HQIJLjg0R7\\XO[Idg0e6[XO\\Ieg0e6XXO]Ihg0^70000O100000000000000@jGUYOV8kf0kGTYOU8lf0kGTYOU8\\g0000O101O000O100000000000000000O10O100000UO^XOWIcg0g6_XOXIag0h6_XOYI`g0g6`XOYI_g0i6`XOWI`g0j6_XOUIbg0l6]XOTIcg0k6^XOUIUg0]ORYOY7MZIQg0]OSYOT7O`Imf0]OTYOR70aIlf0]OTYOS7O`Inf0\\OSYOU7N_Iof0\\OSYOV7M_Iof0[OTYOV7M_Igf0[OTYO18U7M_Igf0[OTYO17U70^Idf0^OTYO07k6:gI[f0^OSYO17j6YI]OMmf00QYON8l6=YI]OMnf0OPYOO8m6MTI34INof0NoXO09l6LUI44GNUg0OTYOm6HSI7>mf0ATYOn6HSI7>nf0@TYOn6GSI8?if0\\OPYO49j6JUI5a0gf0^OQYO29\\6FiId0;\\f0^OQYO2:[6EjId0;[f0_ORYO1:[6EjId0;Zf0ARYOO;[6EkIc0:Zf0BSYON;[6ElIb09[f0BSYON:]6EkI4E7c0cf0BSYON:k6IRI7c0cf0BSYON9j6LSI5c0df0@SYO07k6MSI4b0ef0@SYO07k6MUI2`0hf0_ORYO16l6MVI2>jf0^OQYO26m6LUI3>nf0BSYOl6KTI4>nf0BSYOl6JTI6>mf0BSYOl6KSI5?if0^OPYO38j6LUI4`0hf0^OQYO27k6MRI5c0ef0_ORYO18i6aI[f0JWYOd6?bI[f0IVYOe6`0aIZf0JVYOe6?bI[f0IVYOm67ZIcf0IVYOo65XIef0IVYOo65XIef0IVYOo66WIdf0JVYOm68oHEMnf06WYOe6JoHa07DKkf0:WYOc6>WIBKhf04PYO18i6>WIALhf0XImf00dXOi6`0UIff01cXO16i6a0VIff00dXO04k6b0VIdf00fXOO4k6b0WIcf0OgXOO4k6b0XIbf0OfXOO5l6b0VIcf0OfXO04k6c0WIbf0NfXO15j6c0XIbf0LfXO25j6b0YIif0MeXOj6b0YIif0LfXOl6`0XIjf0LfXOl6`0XI\\OLUg00oXOk6a0YI[OLVg0OoXOk6`0ZI\\OKUg01oXOi6`0[I]OJTg02PYO_6FdIj01]OISg03PYO_6FdIj01bf0JiXO05j6`0[Icf0KhXO04k6a0ZIbf0LTYOk6:XIcf0MTYOl67VI\\g0j6cXOVI]g0k6bXOUI_g0j6bXOTI_g0m6`XOSI_g0n6aXORI_g0o6`XOPI`g0Q7`XOoHag0P7_XOPIbg0n6_XORIag0m6j0DoVOgIRi0Y6nVOgIi0G^g0b6iWOgIh0I^g0`6jWOgIh0J\\g0l6bXOVI]g0j6cXOVI]g0k6bXOUI^g0l6aXOTI^g0n6aXORI`g0n6^XOSIbg0m6^XOSIbg0m6oWOmH;6gg0m6lWOoH<4ig0k6\\XOVIdg0g6^XOYIbg0e6`XO\\I_g0c6bXO]I_g0a6bXO`I]g0`6cXO`I^g0`6aXO`I_g0`6aXO_I`g0a6`XO_I_g0c6`XO\\Iag0d6_XO\\I`g0e6`XOZIag0g6^XOYIbg0h6^XOVIcg0j6mWOSI92kg0l6kWORI;0kg0m6i0N1O1O10001BlVOlIUi0S6lVOmITi0U6jVOlITi0b6bWOYI`g0h6^XOZIag0g6^XOZIag0g6^XOZIag0f6_XOZIag0f6^XO\\Iag0d6_XO`I]g0`6cXO\\Iag0c6`XO\\Ibg0c6_XO\\Iag0c6`XO\\Iag0d6_XO\\Iag0d6_XO]I`g0c6_XO^Iag0a6`XO_I\\OIog0h6eXO_I]OHmg0j6gXO]I^g0b6cXO]I^g0c6bXO]I]g0d6cXO\\I]g0e6bXO[I^g0e6bXO\\I]g0e6bXO[I\\OJmg0j6hXO\\I[OJmg0h6jXO]I\\g0a6fXO_I[g0_6fXOaI\\OIeg0f6oXOaI\\OIeg0f6oXObI[OHfg0f6oXOjIQg0V6oXOjIPg0X6oXOhIPg0Z6oXO^IYOMgg0g6nXO[I]ONcg0h6oXO[I_OLh0Lke0n6mYOZI@Lh0Lke0n6nYOXIAM`g0k6oXOXIALbg0k6mXOXICL`g0l6mXOWI`g0h6bXOUIag0i6l0O1N2O2O1OO100cWO[I]g0g6bXOZI]g0h6`XOZI_g0]6hWOgIi0M^g0\\6iWOgIi0M^g0\\6iWOgIi0N]g0[6jWOfIj0O\\g0[6jWOfIj0O]g0Y6kWOfIi01\\g0X6nWOcIh05Zg0X6mXOhIUOKdg0\\6WYOiIUOKdg0\\6WYOiITg0W6lXOiITg0X6kXOhIUg0X6kWOdIj04[g0Y6iWOeIl00[g0d6cXO[I@O`g0h6nXOYIBO`g0h6nXOXIC0_g0^6UXOiIi0JA0ag0\\6VXOjIh0JA0ag0\\6WXOhIg0MAOag0[6XXOhIh0LAO`g0]6XXOfIUi0Y6lVOfIUi0Z6901O000000O2O0O2bVOdIUi0\\6kVOeITi0[6lVOdIUi0\\6kVOdIi0N]g0]6kWOeIh0N]g0]6kWOdIi0O\\g0\\6mXOeIRg0[6nXOeIRg0Z6oXOgISOMag0\\6\\YOgIQg0Y6nXOfISg0Z6mXOfISg0Z6mXOfIRg0[6nXOeIRg0[6nXOdIRg0]6nWObIh01Zg0]6mWOeIh0M[g0^6mWOfIQi0Z6oVOeIg0N\\g0]6mWOeIg0N]g0\\6lWOeIh0O]g0[6kWOfIh0O^g0Y6lWOfIg01^g0X6kXOhIVg0W6kXOhIUg0W6lXOiITg0W6kXOjIUg0V6kXOjIUg0V6kXOjIUg0V6kXOjITg0X6Z11N2O1O000000O100O10000000000000000O100000001N2N2^JZVOm3MULli0_OSVOO>>HV1NRONK;^OC4kj08_UO52LN40J;GXj08`UO34I11007HXj06_UO48E14M1Ul02jUOLYhb5" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "image_path": "ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "Tec52f21`i0OaTO9K1Q1MSO02KYi08aVO3L23H16O01J5K\\h0:]WO4H54I13N31F8M[h08^WOS1ITO2Q1MgN=K[h0:YWOj48nJ;DYg0[6YXOSJ9Hde0M[[OZ6eNSJ;Hde0J][O[6cNTJGce0Ha[O\\6\\NVJ?Gce0I`[OR7lNVIde0I_[OQ7lNVIfe0J][OP7mNVIfe0J\\[OQ7mNVIge0H][OS7jNVIie0G][OS7jNWIie0D^[OU7hNXIje0C^[OU7hNXIie0E^[OS7jNWIhe0F^[OS7kNVIge0H\\[OS7oNSIee0J\\[OS7PORIde0J][OT7oNRIde0J][OT7nNSIee0I][OT7kNVIhe0F\\[OU7lNUIhe0F\\[OU7kNVIie0EgZOLHZ7GUIje0FdZO3DX7GPIRf0EaZO6Ek7je0nG`ZO9Dj7le0lG`ZO@h7oe0jGaZOn8_e0SG`ZOm8`e0SG`ZOm8`e0SG`ZOm8`e0RGaZOn8_e0RGaZOn8_e0RGaZOn8`e0PGaZOQ9^e0PGaZOP9_e0PGaZOQ9_e0mFbZOS9^e0mFbZOT9Tf010000XOkFeZOS9[e0PGcZOP9]e0QGbZOn8^e0SGbZOm8^e0RGcZOn8^e0QGbZOo8^e0QGbZOo8^e0PGcZOP9]e0PGcZOo8`e0PG_ZOP9be0oF^ZOP9DQGce0OiZOo8ESGae0MkZOP9DSGbe0LjZOQ9CUGXf0k8hYOWGVf0i8jYOWG]e0KP[On8CUG_e0NmZOm8DUG_e0NmZOn8CSG`e00mZOm8CTG`e0NmZOn8CTG`e0MnZOP9@VGMH_e02T[OP9@[G\\e0CU[OR9_O[G[e0EU[OP9@[G[e0EU[OP9@UGNJ\\e02V[Oo8ASG0IZe05U[Oo8ARG`e00oZOn8ARG`e0OP[OP9^OSG2HYe04X[OQ9]OYG[e0FX[OQ9]OYG\\e0EW[OR9]OTG[f0k8gYORG[f0n8eYORG[f0n8eYORG[f0n8eYORG[f0n8dYOTG[f0m8dYOTG[f0l8eYOTG[f0l8eYORG]f0n8fYOkF^f0U961010O0000000O10001O0001O0000107H1O0O101O00N2G`YOPG`f0n8;O1001YYOPG[f0Q9cYOQG\\f0o8dYORGZf0P9dYOQG\\f0Z9O101gYO]FQf0d97O02N00O1O1O20N2O2N1N1000O1O1H8000100O0O1O101O0000001O01O102M1O1N1000N21O000000001O0O2OO1N2L4N2001O3M1OL5N10000001O1O1O0O0^YOfF_f0Y9bYOhF]f0X9cYOhF]f0X9bYOjF\\f0^901OO1O1O1KcYOfF]f0Z9cYOfF\\f0a9O001O0O100O1000000O11O0O1L5M2O10O101O00121M0O1O000O1010N101N10eYOaFTf0`9jYObFUf0e90000O10JjYO`FVf0`9mYO^FQf0d9oYO\\FQf0d9oYO\\FQf0e9mYO\\FSf0i900001InYO]FRf0b970O101O1gYO\\FSf0i91nNjYOWHVf0i7jYOVHXf0h7jYOTHYf0l7gYOSHZf0m7fYOSHZf0n7dYOSH\\f0i80O2O0O1VOaYOQHaf0n7_YOQHbf0o7^YORHaf0n7`YOQH`f0o7`YOSH^f0l7cYOUH]f0j7cYOUH^f0j7cYOUH_f0i7cYOVH]f0i7eYOVH[f0k7cYOVH]f0k7aYOWH]f0k7bYOVH]f0j7cYOWH\\f0i7dYOVH]f0i7dYOWH[f0i7gYOUHZf0k7gYOTHYf0l7fYOUHZf0l7dYOVH[f0k7dYOVH[f0j7eYOVH[f0j7eYOVH[f0i7fYOVH[f0i7gYOVHXf0j7iYOVH4YOfe0c8SZOUH0WO21ke0g9TZOZFke0f9UZOZFle0e9TZO[Fle0d9VZOZFke0_9?O1000000000000VOaYOQH_f0n7dYOeGLMdf0]8bYOdGLLdf0a8^YOdGNLcf0b8\\YOoG`f0j8O1_YOcF\\f0d8dYOPHaf0o7aYOoG`f0P8bYOmG`f0R8aYOmG`f0T8_YOmG`f0S8`YOnG_f0S8_YOnGaf0R8_YOoG`f0Q8`YOPH=[Oae0e8SZOQH9[Oce0e8TZObGN4:Ede0e8VZO_GN61GM1ne0P9RZOZG1GN0oe0^9RZObF1Ome0f9SZOZFme0f9SZOZFme0f9SZOZFme0f9SZOYFne0d8PZO[H2QOne0c8RZOdG0;O^Ooe0c8RZO[H0ROne0c8RZOXH3UOke0d8PZOWH6UOje0d8PZOUH8XOge0b8SZOTH6[Ohe0_8SZOUH6\\Oge0^8UZOUH3^Ohe0\\8VZOUH3_Oge0]8UZOUH3^Ohe0_8RZOUH4^Oie0^8QZOXH3ZO6I[e0e8\\ZOYH2YOne0]8PZOWH7YOje0`8oYOSH=ZOee0b8PZOPHcf0P8]YOnGff0Q8ZYOoGff0Q8ZYOoGff0Q8ZYOPHef0P8[YOPHdf0P8]YOQHbf0n7`YORH_f0m7eYOcGM4[f0Z8iYOaGM4Zf0[8iYOaGM5Yf0Z8gYOdG02Yf0[8fYOeGN27Hfe0a8TZOhGMO8Jfe0_8UZOhGMO8Jee0_8XZOdGM37Jde0_8YZOaGM76Hee0`8bZOgGJIde0`8XZObGJ89Fee0`8WZOSH4]Oee0`8WZOTHM[O00le0a8VZO\\HNSOle0a8WZOZHNUOke0`8YZO`GO=NDje0_8jZOdG_OMge0_8kZObG_OOfe0_8lZO`G_O1ee0`8Y[O`Ggd0b8UZO^G703Oae0c8TZO_G704Nae0c8TZO_G8O3Obe0b8TZO^Gk00Qe0a8X[O_Ghd0]8oYOdGY1Nid0]8QZOcGU12id0[8RZOcGd04DNfe0[8RZOcG3177JLie0X8TZOdG216:IIke0X8\\ZOeG09KIje0X8kZOlG]OLhe0X8kZOkG^OLhe0X8_ZOfGJ41Mfe0Y8^ZOgGI54Jfe0Z8ZZOjGJ4Zf0R8kYOlGJ5Xf0o7fYOfG1:NKNN]f0W8nYOjGL0\\f0V8iYOhGN0Zf0W8Q1N20O10000YYOhGee0Y8Q12M2O1O0000OlXOcG023N`e0]8\\ZOdG023Nae0\\8lZOdGD0`e0\\8_[OdGad0\\8b1O1000mYOeG`d0[8_[OfGad0Z8_[OfGad0Z8`[OeG`d0[8`[OeG`d0[8nZOgGbe0Y8]ZOhGce0X8]ZOhGce0X8]ZOhGde0W8[ZOiGfe0W8ZZOiGfe0W8ZZOhGge0X8ZZOgGee0Z8[ZOeGPd00f\\O[8[d0dGe[O\\8ZOeGfe0[8ZZOfGee0Z8ZZOhGee0X8[ZOhGee0X8[ZOgGge0X8ZZOfGge0[8o000O100O10iXOgGff0Y8[YOfGef0Z8a0000000L[XOlGeg0S8]XOlGcg0T8]XOmG;Ogf0U8lXOnGiQOD[l0IRUOb0bNF\\l0HRUOb0bNF\\l0HRUOc0aNF\\l0GSUOc0`NHYn08fQOIZn07fQOIZn07gQOHYn08hQOGZm0NnRO;HGYm01mRO8JGXm03lRO7LFXm03lRO7KGYm03kRO6KHYm07gRO20G]l0DSTOd0@00H\\l0FRTOd0AO1F\\l0HQTOc0BO1F\\l0HQTOd0AN2F\\l0HQTOd0AN1H\\l0ESTOe0@N1H\\l0ESTOe0_OO2G\\l0ESTOe0_OO2G]l0DRTOf0^O03F]l0DRTOf0^OO4F]l0GnSOe0AN4F]l0GnSOe0@O5E]l0FoSOg0^ON6E]l0EPTOh0\\OO6E^l0CQTOi0[OO6E]l0EQTOg0\\O04E_l0DRTOd1@gN^l0GPTOb1BgN^l0IoSO`1CfN^l0JPTO_1CfN]l0KPTO_1BgN_l0IoSO`1BgN]m0Y1aROhN_m0X1aROhN_m0X1aROhN`m0V1bROiN_m0V1aROjN_m0V1aROjN_m0V1aROjN^m0W1aROjN_m0W1_ROjNam0V1^ROkNbm0U1_ROjNam0V1`ROiN`m0V1bROiN]m0X1cROhN_l0DUTOe1[OgN`l0DUTOe1ZOhNal0CUTOf1XOhNcl0BTTOg1YOgNcl0BTTOf1ZOhN_m0X1aROhN_m0W1dROgN\\m0Y1cROhN]m0Y1]ROlNcm0i10000000000SOeMTTO[2il00000001O000000000000000000000VOdMoSOi0XOh0cm0k0000000000000000000O11O0000001O00O101O01O00001O00000[N\\ROm0dm0SO\\ROm0dm0h0000000O10001O0O10O10O101O000001O0O10001O0000001O00000000000000000000000000000000000000000000O1000000O1000000000000O1000000000000000O10000001O000000000000O10001O00001O001O010[N[ROl0em0SO\\ROm0dm0SO\\ROm0dm0RO]ROn0cm0RO]ROn0cm0QO^ROo0cm0PO]ROP1cm0oN^ROQ1bm0PO]ROP1cm0RO[ROn0dm0h00000000000O100O2O00000000000000O1O10O10001O000000000000000000000O1001O000001O00000000000000000000O10000001O000000O10000000001O001N100010O00000O10000O1000000000000000000000000O1000000000000000000000000000000O101O00010O0001O0O2N2O1O0000000000O0100001O0O1000000O1000000000000O100000000000000000001O00000000001O000000000001O000000000000000000O100000000000O10000O10000O11O0000000001O01OO101O0000000001OO0100000000O10000000O10000001O0000000000000000000000O2O00O2O000001O000000000000001O00001O00001OO100O10000O10000000000000000000000000001O00000000000001O0000000000001O000000000000O1000O10O2O000O100000000000000000O1000000000000001N10000001O0000000000001O0001O00000O100000000001N1000000001N11O00000000000000000000000WNbROo0_m0POaROP1_m0oNcROP1^m0oNbROQ1^m0oNaROR1_m0h000O1001O0000000O10O1000000000001O001O0000O10O10O1000001O0001O00001O00000001UN^ROU1bm0jNaROS1`m0mNaROR1^m0nNcROQ1^m0oNbROQ1_m0nNaROR1_m0oN`ROQ1`m0PO_ROP1am0RO]ROn0cm0RO\\ROo0dm0PO^ROo0bm0oN`ROQ1`m0lNcROT1]m0lNcROT1^m0kNcROT1]m0lNcROT1]m0lNcROT1]m0lNcROS1^m0lNcROT1]m0lNdROS1\\m0lNeROT1[m0lNeROT1[m0mNdROS1\\m0mNcROT1]m0lNcROT1]m0lNcROT1]m0kNdROU1]m0iNdROW1\\m0iNdROW1\\m0iNcROX1]m0hNcROX1]m0hNcROX1]m0hN^ROGMa1fm0fNdROa0HIdm0EeRO`0KHbm0FdROa0MG`m0FeROa0NFTn07PROFSn06PROGUn04n0Md]a5" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "image_path": "ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "RTU93ko05L3N1O2N1O100O2O0O1O100O10000000000001O00001O000000000000000000000000001O000000O1001O2N1O001O0O10000O10O010O1O2M200001O00000O0100000001O00000O101O000000000000000000000O10000000001O01O00000000000000000000O01000000000000000000000000000O100000000001O000000000000000000000O10000000000000O1000000000000000000000000000000000000O1001O00000000O11O000001O00O10000O10001O0O10000000000001O00O1000001O0O101O00000000001O01O000O2O0O11O000000000000000001O0000000000O2O1O1O0O10000001N2O0O2O1N10000O1000000O10000O100000000000001O0001O000000000000000000000001O000]NZOkSOf0Ul0ZOkSOf0hm000O1000]N[OjSOe0Vl0\\OiSOd0Wl0\\OiSOd0Wl0[OjSOd0km0O001O0O\\N@iSO`0Wl0@iSO?Wl0BhSO?Xl0AhSO?Yl0@gSO`0Yl0AfSO?Zl0@fSOa0mm0000000000001O00000O2O[NBgSO>Yl0CfSO=Zl0CfSO=Zl0CfSO=Zl0DeSO<[l0CfSO>Yl0BfSO?Zl0@gSO`0Yl0@gSO`0Yl0@fSOb0Yl0_OfSOa0Zl0_OfSOa0Zl0@eSO`0[l0@eSO`0[l0_OfSOa0Zl0_OfSOa0Zl0_OfSOa0Zl0_OfSOa0Zl0_OfSOa0Zl0@eSO`0[l0@eSO`0[l0@eSO`0[l0@eSO`0[l0_OfSOa0Zl0_OfSOa0[l0]OfSOc0lm00010O00000000000000]N]OhSOc0Wl0^OiSOb0Wl0^OiSOb0Wl0^OiSOb0Wl0^OiSOa0Xl0_OhSOa0Xl0_OhSOa0Xl0@gSO`0Yl0@gSO`0Yl0@gSO`0Yl0@gSO`0Yl0_OhSO`0Yl0@gSO`0Yl0@gSO`0Yl0@gSO`0Yl0AfSO?Zl0AfSO>[l0BeSO>[l0AfSO`0Yl0@gSO`0Yl0@gSO`0Yl0@gSO`0Yl0@gSO`0Yl0@gSO`0Zl0_OfSOa0Zl0_OfSOa0Zl0_OfSOa0Yl0@gSO`0Yl0@gSO`0Zl0_OfSOa0Zl0_OfSOa0Zl0_OfSOb0Yl0^OgSOb0km010000000001O00001O0000000000000000O100000000000000O1000000O10000000000000000001OO100000000O101N1001O00000000000]N@eSO?\\l0AdSO?\\l0AdSO?\\l0AeSO>Zl0CfSO=Zl0DfSO:[l0FfSO9Zl0HfSO6[l0JfSO5Zl0JhSO5Xl0KiSO4Wl0KjSO5Vl0KlSO2Ul0On1001O00O1O100001O1O0000O10000O11O000000O10000001O001O0000O1000001NSP20moM101O0000000000000000002N2N1YPOKZ14bl00VROJW19cl0HgSO9Pn00O1N3N101N101\\N^OgSOb0Yl0^OgSOb0Yl0_OfSOa0Zl0AcSO`0^l0@aSOa0^l0@`SOa0`l0_OZROLP1f0el0^O[ROLP1f0el0^OaSOb0^l0^OcSOa0^l0]OdSOc0[l0_OdSOa0\\l0@cSO`0]l0AbSO`0]l0@cSO`0]l0@cSO`0]l0_OdSO`0]l0@cSO`0]l0_OdSOa0]l0]OdSOc0mm000001O0000O1000000001O00O1000001O00`N]ObSOc0^l0]ObSOc0nm000O101O0O2N2N3M2N4LjZb5" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "image_path": "ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "P[g35h01NNdl01XSO5O50Hik01WTO4J524ODlk03UTO3K6O9N\\ORl05TTO2K[1M_NRl06TTO2Kh1Yk0fMYUOa0C3Hh1Zk0eM[UOa0Aa2Tk0nLZUOb0B`2Tk0nLZUOc0A_2Uk0nLZUOc0A_2Uk0nLYUOe0A]2Vk0nLYUOe0@^2Wk0mLYUOe0@_2Vk0lLYUOf0_Oa2Wk0iLZUOf0^Ob2Xk0iLZUOn3gj0SLjTOM9P4mj0SLiTOO9n3nj0TLgTOO;m3nj0SLiTOO9n3nj0SLiTOO9o3HQLnj00RUO08P4EULnj0KVUOO7Q4EVLmj0IfUOQ4]OVLck0j3]TOVLbk0j3_TOWL2KUk0n3iTOWL1LVk0m3iTOWL1MUk0l3iTObL?\\OWj0h4hUOZKWj0g4hUOXKdi01`VOg4oi0\\KbUOK9j4Sj0bKkUO^4Tj0dKkUO\\4Uj0dKkUO\\4Uj0dKkUO\\4Uj0dKkUO]4Tj0cKlUO]4Tj0cKkUO]4Vj0eKgUO\\4Yj0fKdUO[4\\j0e01O000eJdUOQ5\\j0oJcUOR5\\j0:0O100000^VO_Jdh0a5nVO_JL3Ui0^5nVOaJJ3Xi0\\5nVOaJJ3Xi0\\5[WObJeh0^5]WO`Jch0>eVO]4Zj0bKhUO]4Xj0cKgUO^4Yj0bKfUO_4Zj0bKcUO`4SVO`4O0001nUO\\Jei0f5XVO\\Jgi0d5YVO\\Jgi0c5[VO[Jfi0c5\\VO\\Jei0c5=000oUO]Jdi0d5[VO]Jdi0d5ZVO]Jfi0c5ZVO]Jfi0c5ZVO]Jfi0c5[VO\\Jei0c5\\VO]J20Si0d5jVO[J42Qi0c5kVO\\J31Ri0c5jVO]J40Ri0d5iVO]J5MSi0f5hVO]J5MSi0e5jVO\\Jei0c5<10O1001O00000PVO[Jdi0d5]VO]Jbi0c5]VO^Jci0b5]VO_Jbi0a5^VO`Jai0a5^VO`Jai0`5_VOaJLJ]i0e5gVOaJLK[i0e5iVO`J_i0_5bVO`J_i0`5bVO^J_i0b5aVO]J`i0c5`VO^J`i0a5_VOaJ`i0`5_VOaJLJ]i0e5gVOaJLJ]i0e5gVO`Jai0_5`VO`Jai0`5`VO]Jbi0c5^VO\\Jci0d5=1O0\\VO]Jjh0c5UWO_Jjh0a5k0O100000PVO]Jbi0c5^VO_J`i0b5_VO_J`i0a5`VO_J`i0a5`VO`JLK[i0d5jVOaJJNZi0a5kVObJKNYi0`5lVObJKNYi0`5lVObJJO[i0^5kVOdJHO]i0^5jVOcJIN^i0_5iVOcJIN^i0_5iVObJKN[i0`5kVOaJLNYi0a5lVO`JNLVi0d5lVO`J_i0`5aVO`J_i0`5`VObJ_i0_5`VOaJ`i0_5`VOaJLJ]i0e5gVOaJLJ]i0e5gVOaJ`i0_5`VOaJ`i0_5`VOaJ`i0_5`VOaJ`i0`5_VO`JMK\\i0f5fVO_JML]i0e5fVO_JML]i0d5gVO_Jbi0`5_VO`Jai0V5WVOkJ:M`i0W5WVOlJ9M`i0W5WVOlJ9M`i0W5WVOlJ9M`i0X5VVOkJ:N_i0W5WVOkJ9O`i0V5WVOkJ9O`i0`5_VO`Jai0`5_VOaJ`i0`5_VO`Jai0a5^VO_Jai0b5_VO^JNK[i0h5eVO_JNJ]i0g5eVO_JNJ]i0f5gVO^JNJ\\i0g5gVO_J`i0`5aVO_J`i0a5`VO^Jai0b5_VO^Jai0b5_VO_J`i0b5_VO^Jai0b5_VO^Jai0b5_VO^Jai0b5_VO^Jai0b5_VO]Jbi0c5^VO]Jbi0c5^VO^Jai0b5_VO^Jai0c5^VO]Jbi0b5_VO^Jai0b5_VO^Jbi0`5`VO]Jbi0b5_VO\\Jci0d5=0PVO\\Jci0d5]VO]Jbi0d5\\VO^Jci0b5]VO^Jci0b5]VO^Jdi0a5\\VO^Jei0b5[VO^J3OSi0c5jVO^J30Ri0c5kVO[J42Qi0c5kVO[J50Qi0e5jVO[Jfi0e5ZVO[Jfi0e5:0100O1001O000000O10PVO]Jbi0c5^VO^Jai0b5_VO^Jai0b5^VO_Jbi0b5]VO_Jbi0a5^VO^Jci0a5_VO^Jai0b5_VO]Jbi0c5>00000000000001O000001O00000O100PVO\\Jci0Z5TVOlJ9Kbi0Y5UVOlJ9Kbi0Y5UVOlJ9Jci0Y5UVOnJ7Idi0X5VVOoJ6Idi0X5VVOoJ6Idi0W5WVOPK4Kdi0U5XVOPK4Kdi0U5XVOPK4Kdi0W5VVOnJ6Lci0c5\\VO^Jci0c5\\VO]Jdi0c5\\VO]Jdi0b5]VO^Jci0V5WVOmJ7Lci0U5YVOmJ6Lbi0V5YVOnJ5Lci0V5WVOnJ6Lci0W5UVOoJ6Kei0W5TVOnJ7Ldi0V5UVOnJ7Ldi0V5UVOnJ7Lci0X5UVOlJ8Lci0a5^VO^Jci0b5^VO]Jbi0c5]VO^Jci0b5]VO_Jbi0a5^VO_Jbi0a5^VO_Jbi0a5^VO_Jbi0a5_VO]Jbi0b5_VO]Jci0Z5RVOkJ;Jdi0c5\\VO^Jci0b5]VO^Jci0c5\\VO^Jci0b5]VO_Jbi0`5_VO_Jbi0a5^VO^Jdi0`5?00001O0nUO_Jdi0a5\\VO_Jdi0a5>O010HdUOkJ\\j0U5cUOlJ]j0\\501nUO^Jci0b5]VO_Jbi0a5^VO^Jci0a5?000000001nUO_Jbi0a5^VO_Jbi0a5?O1HdUOkJ\\j0T5eUOlJ[j0U5dUOkJ\\j0]50001N10GfUOkJZj0S5hUOmJXj0R5iUOnJWj0R5iUOnJWj0R5jUOmJVj0S5jUOlJWj0T5iUOlJWj0U5gUOlJYj0U5fUOkJZj0V5eUOjJ[j0V5dUOkJ\\j0T5fUOkJZj0S5hUOmJXj0R5jUOlJ7Mai0W5XVOlJ7N`i0V5YVOlJ7Mai0X5VVOlJYj0_5O000O10000000000000000000000001N100000000000001O0IcUOjJ]j0W5bUOiJ^j0W5bUOiJ^j0]50000001N10000O100000O2O1O000000000000000O10000000000000JcUOhJ]j0W5dUOiJ\\j0W5dUOiJ\\j0X5cUOhJ]j0X5cUOhJ]j0^50001O001O00000000000000O11O000000000000GfUOjJ[j0V5eUOjJ[j0V5eUOkJZj0U5fUOkJZj0U5fUOkJZj0U5fUOkJZj0T5hUOjJYj0U5iUOiJXj0W5hUOiJXj0W5hUOiJWj0Y5hUOgJXj0Z5fUOhJYj0Y5eUOhJ[j0X5eUOhJ[j0_5001O0O10000000000000000000000000000000000000000JcUOhJ]j0W5eUOhJ[j0X5eUOhJ[j0X5eUOhJ[j0X5eUOgJ\\j0Y5eUOfJ[j0Z5eUOfJZj0[5fUOdJ[j0\\5eUOeJZj0[5eUOfJ[j0[5dUOeJ\\j0`50O100O20O1O00HbUOmJ^j0[500000000000000000000000001OJcUOhJ]j0W5eUOhJ[j0X5dUOiJ\\j0X5cUOhJ]j0^500000IdUOiJ\\j0U5gUOjJYj0U5hUOkJXj0U5hUOkJXj0W5fUOiJZj0X5eUOhJ[j0Y5dUOgJ\\j0X5eUOhJ[j0W5gUOgJZj0W5hUOiJXj0U5kUOjJUj0V5;00001O001^UOiJXj0W5gUOkJXj0U5gUOlJYj0U5fUOlJYj0U5gUOiJZj0W5fUOiJZj0W5fUOiJZj0W5fUOiJZj0W5fUOiJZj0W5gUOhJYj0X5gUOgJZj0Y5fUOgJZj0Y5fUOgJZj0Y5fUOgJZj0X581^UOgJZj0Y5eUOhJ[j0Y5cUOiJ\\j0X5cUOhJ]j0^5000O2O000KaUOiJ^j0V5dUOiJ\\j0W5dUOiJ\\j0W5dUOiJ\\j0W5dUOiJ\\j0X5bUOiJ^j0]50JbUOhJ^j0X5dUOgJ\\j0Y5dUOgJ\\j0X5fUOgJZj0Y5fUOgJYj0[5fUOeJZj0[5fUOeJZj0[5fUOeJZj0[5fUOeJZj0[5fUOeJZj0Z5gUOfJYj0Z5gUOfJYj0Y5hUOgJYj0X5800O11O1^UOgJZj0Z5cUOjJ[j0^500000O1IdUOiJ\\j0W5dUOiJ\\j0W5dUOiJ\\j0V5eUOjJ[j0V5eUOkJZj0U5fUOkJZj0U5fUOkJZj0U5fUOjJ[j0V5eUOjJ\\j0U5dUOlJ[j0T5eUOlJ[j0T5eUOlJ[j0T5eUOlJ[j0U5dUOkJ\\j0U5dUOkJ\\j0V5cUOjJ]j0^5OHdUOjJ]j0U5dUOkJ\\j0T5fUOkJYj0V5gUOjJYj0U5hUOkJXj0U5iUOiJXj0W5hUOiJXj0W5iUOgJXj0Y5gUOiJXj0X5fUOjJYj0W5eUOjJ[j0_5O00O100000000000000HdUOkJ\\j0U5eUOiJ\\j0W5dUOiJ\\j0W5eUOhJ[j0W5fUOiJZj0W5fUOiJZj0W5fUOiJZj0W5fUOiJZj0X5eUOhJ[j0X5eUOhJ[j0Y5cUOhJ]j0W5eUOhJ[j0X5eUOhJ[j0W5fUOiJZj0V5gUOjJYj0V5gUOjJYj0V5hUOiJXj0X5gUOgJZj0Y5fUOgJYj0Z5gUOfJYj0Z5gUOfJYj0Y5hUOgJXj0Y5hUOgJXj0Y5hUOgJXj0Y5hUOfJYj0Z57O1C[UO`Kcj0_4_UOaKOE\\j0h4gUOcKLG\\j0c4lUOeKHH]j0_4nUOiKFG\\j0_4oUOjKEG\\j0]4QVOlKDE\\j0\\4SVOoKRj0Q4nUOoKRj0W4hUOiKGE\\j0h4gUObKNGZj0i4fUO`K0GZj0j4dUO`K2FZj0j4dUO`K2FZj0j4dUOaK1E[j0j4dUOaK1E[j0j4dUOaK1E[j0i4eUObKOF]j0g4eUObKNG]j0f4fUObKNI[j0d4iUObKLJ[j0c4iUOdKLH\\j0Y4aUOmK82KH\\j0X4bUOmK84JF\\j0Y4cUOlK75JF[j0Z4dUOlK63KH[j0X4eUOnK51KJZj0W4fUOnK60JKYj0X4gUOmK60JKYj0X4gUOmK52IJ[j0W4gUOnK33JH\\j0W4gUOoK28Wj0i3gUOjKOK4b0Vj0i3hUOhK87EJ[j0W4hUOgK=4AMZj0X4iUOeK>4@OYj0W4_VOjKXOOZj0U4jUOhK<4@OZj0U4jUOhK<4@OZj0T4kUOiK61D40N[j0U4iUOjK78EI[j0V4hUOjK68GH[j0W4fUOPL21MHZj0Y5eUOhJ\\j0W5dUOiJ\\j0W4eUOQL00OH]j0U4eUORL01NH^j01dUOi31ULN019OG]j01eUOi31ULOO1:MH]j01eUOi3a0^L]OH]j01dUOj3f0XLZOM[j03dUOh3h0WLYON[j02eUOh3h0XLXOM]j02dUOg3h0[LVOL^j02eUOe3h0]LUOL^j02eUOe3i0\\LTOM_j01dUOe3j0]LSOM_j01eUOd3j0\\LSO0]j0OgUOe3Q1\\Ldi0c3\\VO]Lei0`3]VO`Lji0W3YVOgLji0`1kTOXO3Hfl03WSO13O5Jfl00ZSOOO44Ijl0J]SO2O0fkh3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "image_path": "ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "`_h34f02Un07fQON_m0L_RO90N^m0M^RO73M^m00ZRO56M_m0k0[ROZOcm0d1M2O2N1fROcMnl0^2oROeMPm0]2lROfMRm0e201O001O1O001O0O101O0O10000O10000]OnLWTOR3ek0UMWTOl2gk0YMVTOg2ik0]MSTOe2kk0i00001O0dTOPLgj0Q4WUOQLhj0o3XUOPLij0b0gTOg2nk0XMTTOf2nk0YMRTOg2nk0YMRTOf2ok0ZMQTOg2nk0YMRTOg2nk0YMRTOg2nk0ZMPTOg2Pl0e0000O10001OBTLgTOl3Yk0TLgTOm3ek00100000O2O0XTOQL^k0o3bTOQL^k0P4aTOPL_k0o3bTOQL^k0o3:0000001O0001O0O1000O10001O00O1000gTOQL`j0n3aUORL_j0o3_UORLaj0n3QUOQL41kj0o3PUOQL31mj0n3oTORL40mj0o3nTOQL50mj0o3nTOQL50mj0o3nTOQL50mj0P4mTOPL7Olj0Q4mTOoK80kj0Q4mTOoK9Ojj0R4mTOoKbk0Q4^TOoKbk0P4`TOnK81ij0P4^UOQLcj0m3]UOTLcj0l3]UOULbj0k3_UOSLbj0m3PUORL50^j00_UOP4LQLTk0OPUOP4LQL`k0o3`TORL_k0n3aTORL_k0n3aTOSL5Lmj0Q4nTOTL3Loj0P4nTOSL5Lmj0R4mTORL^k0o3bTOPL_k0P4aTOPL`k0o3`TOQLak0m3`TOSLak0l3_TOTLak0k3`TOUL`k0l3_TOUL`k0k3`TOUL`k0l3^TOVLak0j3_TOVL`k0k3`TOUL_k0m3`TOSL7Jmj0U4jTORL8KWj0NUVO^4CeKnj0\\4QUOdKoj0\\4PUOeKej0M]UO`4LdKgj0L]UOU4ISL4Kej0N^UOS4KRL5Kaj01`UOP4LQLPk01SUOm3Xk0SLhTOm3Yk0RLgTOn3Zk0PLhTOP4;oKVj00`UOP4KPL61lj0o3nTOQL41oj0n3lTOoK82lj0P4e01O1O1O1@lKQUOU4mj0mKQUOU4nj0kKQUOP4IoKUk02RUOn3KnKTk04QUOm3Vk0SLiTOn3Wk0RLiTOn3MoKSk02PUOP4LPLRk00RUOP4LPLSk0OQUOQ4KRLTk0KRUOS4JRL^k0P4`TORL_k0X4O10001O0fTO^KUk0b46N101N1001N101O01OVOaKSVO^4li0QLgUOn3KdKTj0>QVOn3KdKTj0?PVOm3KeKUj0>oUOn3LdKUj0>oUOc3GUL4KVj0=nUOd3HTL4KVj0>nUOb3HTL7JSj0`0nUOa3ITL8IRj0c0lUO`3KRLej0>`UO_3LRLdj0`0`UO^3LRLdj0`0_UO_3MQLdj0`0_UO_3MQLdj0`0_UO_3MQLej0?^UO`3MRLej0=^UOa3NRL5KSj0b0jUOa3NRL2OVj0=jUOc3LSL2NYj0;iUOe3JSL4MYj0;hUOQ4MeK\\j08hUO]4Yj0bKgUO_4Xj0aKhUOi3J^L]j0KhUOf3MPL05[j05iUOd3NPL06Yj06iUOc3OPL16Xj0IdUO84i3OQLO6`j0NcUOl3MPLO7bj0LbUOm3MQLM7fj0I`UOo3LRLN6gj0G_UOS4K^Lgj0^O^UOT4K^LQk0c3nTORL0OSk0Z4nTOeKhj0K\\UOk4bj0XK]UOS4K\\Lgj0B^UOR4K\\Lej0D`UOo3M\\Lbj0FaUOn3M\\L\\j0MfUOg3NRLOO\\j09gUOe3ORL1NYj0iUOP40bKXj0=hUOQ40bKXj0=hUOP42bKVj0>hUOP42bKVj0>hUOf3LPL8KTj0?iUOd3LRL7KTj0?iUOc3NRL5LUj0>iUOb3NTL4LUj0>iUOc3MTL4KVj0>hUOd3NTL3JXj0=gUOf3MTL1K[j0;gUOf3LVLOK]j0:hUOe3LUL2JXj0>kUOn3NdKVj0?lUOl30dKSj0a0nUOj3OfKRj0`0nUOk30dKSj0a0mUOk31cKRj0b0mUOk32bKQj0c0mUOj33cKPj0c0mUOa3JQL:Koi0c0mUOa3KnK;Nni0b0lUOa3=mKgi0b0lUOa3=mKgi0b0lUOa3ij0`LWUO`3ij0`LVUOa3OkKej0d0\\UOa3NlKgj0b0[UOc3LmKij0>\\UOe3JQLjj0OYUOL4_4LdKRk0\\4lTOeKhj0L_UO_4HfK`j05gUO^4Yj0bKgUO^4Xj0cKhUO]4Wj0dKiUO\\4Vj0eKjUOR4JeK[j0:kUOd3IVL3KYj0;kUOc3KVL2KXj0lUOV4Uj0]KdUO11L6f4Uj0]KdUO02M4g4Vj0\\KdUO10M6f4Vj0jKiUOo3N_KZj0b0hUOd3KQLdj0<`UOb3NQLbj0=`UOb3NQLbj0=`UOb3NQLbj0N]UO64k3LRLcj0L^UO73k3LSLcj0K]UO74k3LRLdj09aUOf3JQLej06eUOQ4\\j0nKeUOR4[j0mKfUOR4[j0fKYUO1^UOd3NnKdj0>^UOb30PLbj0?^UO`30RLaj0>^UOa31QL`j0>`UOa30RL_j00\\UO55j3NRLaj0O\\UO55j3NRLbj0;aUOc3LSLcj09aUOd3LSLdj08`UOf3JSLfj07`UOg3IRLhj06_UOh3ISLgj06_UOf3KTLgj05^UOg3KTL3H]j0=eUOg3KTL2I]j0=fUOf3LRLfj08_UOd3LTLdj09`UOb3MTLdj0:`UOa3MTLdj0:^UOc3NSLdj0:^UOd3LTLfj07]UOf3MTLej06^UOf3MTLdj07_UOf3LRL6ITj0a0kUOc3JTL7HSj0b0mUOk31dKQj0a0oUOj30eKQj0a0oUOj30eKQj0a0oUOj3]j0VLbUOk3^j0ULbUOj3_j0VLaUOb3IQLfj0>`UO`3LoKfj0a0^UO^3>PLei0b0mUO^3>PLei0b0mUO^3>PLei0a0nUO_3kj0RLnTO87f3NPLdj00]UO61j3NPLoj04SUOm3NoK^k0Q4bTOPL]k0P4cTORL3Llj0R4RUORL1Nlj0Q4RUOQL2Nlj0Q4RUOoK4OYj01kUOR4FoK6NYj02`UON4S4MnK8NVj05^UON7P4NPLgj0:[UOf3NPLdj0=_UOa3NSLaj0=aUO`3NSLaj0>`UO_3OSLaj0>`UO_3ORLbj00[UO74h3NQLcj00\\UONN25P4NPLcj01`UOOOP4NPLdj02\\UOL5R4KPLdj03ZUOK8S4InKfj04YUOK7T4KmKdj04gUOo3DnKej02hUOP4CoKdj0NlUOT4_OoKdj0LnUOU4]OPL3NWj0MYVOT4_OPL2NVj00WVOQ4BPLcj01\\UOO3o3OQLbj01\\UO02n30QLbj00]UO2On32PLbj0N_UO4Lo33PLnj0OoTOR42oKoj00nTOQ43oKhj07VUOi32PLbj0=]UOc30QLbj0<_UOb3NSLcj0;`UOb3LSLdj0;`UOb3LSLdj0:aUOc3KSLdj0:aUOc3KSLdj0:aUOb3LTLcj0:aUOb3LSLdj0;`UOa3NSLbj0<`UOa3NSLcj0;_UOa3OSLcj0<^UOa3OTLbj0;_UOb3NSLdj0:^UOc3MULdj08_UOc3MULdj08_UOc3MULbj0:aUOb3LTLcj0:bUOa3KULbj0:dUOb3ITLcj0:dUOb3ISLdj0;cUOb3ISLdj0;cUOb3ISLdj0aUO_3MRLcj0?`UO_3NoKdj0c0]UO]3lj0cLTUO]3lj0bLUUO_3jj0aLVUO_3NoKdj0b0]UO`3OnKdj0b0]UO`3OnKdj0b0]UO`3kj0`LTUOa3lj0_LTUOa3lj0_LSUOb31nKbj0`0]UOb30oKcj0>^UOd3NnKdj0?^UOb3NPLcj0>_UOa3OQLbj0>_UOa3OQLcj0>]UO`31RL4LRj0b0iUO`31RL4LRj0b0iUO`31SL3KSj0b0iUO_32TL`j0<_UO`31TLfj06YUOf31TLgj0NSUO06o3OSLij0LUUOO4R4NTLYk0m3fTOSL3Jmj0T4oTORL3Knj0S4oTORL3K\\j02`UON4R4NSL2K[j04^UON7P4NSLgj05ZUOg30TLej06ZUOh30QLfj09YUOf31PLej0ZUOb32oKdj0`0ZUOa32PLcj0?ZUOb33oKdj0=ZUOd31PLej0KZUO<0i31QLdj0;\\UOd3OQLej0:^UOe3KSLfj08_UOP4aj0oK`UOQ4aj0nK`UOQ4aj0nK^UOS4dj0eKWUOM5]4fj0dKVUOO5\\4mj0dKSUO\\4mj0cKTUO\\4kj0eKVUO[4aj0cK\\UO24Q4FRLTk0MVUOQ4FSLSk0MVUOP4HSLRk0MUUOP4ITLQk0LVUOP4ITLoj0OWUOm3JTL4KWj03dUO36m3ISL6KUj0?kUOl31eKTj0?kUOl31eKTj0?jUOm32dKUj0>jUOm32dKUj0>iUOn32dKWj0hUOd3NoK8ORj0>hUOc3?oKii0>hUOb3`0PLhi0>hUOb3`0oKji0>fUOc31oKhj0>WUOd30oKij0;XUOf3NPLjj0;WUOe3OQLhj0;YUOd3OQLgj0ZUOd30oKdj0?YUOe32lKej0j4\\UOUKcj0m4\\UOTKcj0l4\\UOUKdj0k4\\UOUKej0j4[UOVKej0i4\\UOWKdj0i4\\UOVKej0j4[UOVKej0j4\\UOTKej0k4\\UOUKdj0k4\\UOUKdj0l4ZUOVKej0j4[UOWKdj0i4\\UOXKbj0i4^UOWKbj0j4\\UOVKdj0l4[UOTKdj0T5100]K]UOh3cj0XL]UOh3cj0WL^UOb3LnKfj0`0_UOa3LmKgj0a0]UOa3OkKej0d0]UO`3ij0`LXUO^3ij0bLXUO]3hj0cLWUO^3ij0aLXUO_3hj0aLXUO_3hj0bLWUO^3ij0bLWUO^3jj0aLUUO`3kj0aLTUO_3lj0aLTUO_3mj0_LTUOa3lj0oKnTO<6e3lj0nKPUO;5g3kj0oKnTO;7e3kj0`LUUO`3kj0`LVUO^3kj0bLUUO^3kj0[LnTOI7l3kj0[LoTOH6m3OPLcj0;YUOG4P4OnKcj0fLRj09ZUOQ3e0eLPj0 challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "image_path": "ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "Qch36c03Zn01cQO1Zn02dQO0[n01cQO0\\n03aQOO]n03`QOO`n04\\QOMdn0h0O1000000000000O10000O100O101O00000BcNXRO]1gm0fNWROZ1em0kNZROU1dm0mN[ROT1dm0nN[ROR1em0oNXROS1gm0d0O100jROiMal0X2^SOkM`l0U2aSOkM^l0U2bSOlM]l0U2cSOjM]l0V2cSOjM]l0V2cSOjM]l0U2dSOjM]l0V2bSOjM`l0U2f01O001O00O1O1000000001O2NN200O1001O001O00O10WOiMhSOV2Yl0jMgSOV2Yl0jMgSOV2[OiMdm0\\2N1dROdMQm0\\2nROfMRm0S2iROkM81ol0T2b0N100010O0O100O1VOkMhSOU2Xl0kM]SO1KT2hl0kM]SO0LU2Vm0kMjROU2am00WROlMem0T2[ROlMem0T2310O2O0000000000O0100O10000BUROgNjm0X1XROfNim0Y1YRO^Nnm0b19O0010001N10O2O0O1O20O00001O001O0000000001O0001O00jRO_Nlk0a1STO`Nmk0`1TTO_Nlk0a1TTO_Nlk0a1TTO^Nmk0c1X101O000000000O100O010O10000001O1O0O10000000O100001O001O1O00O1000O100O10O110O0000001O01O0000000001N100O10000000dRO_NXl0a1hSO_NXl0a1j0_NSROa1lm0bNSRO^1lm0dNSRO\\1nm0cNQRO^1om0bNoQO`1Qn090OeROSNcl02hROd1Xn00O000PSOYNgk0f1WTO\\Nik0c1WTO_Nhk0a1XTO_Nhk0a1XTO_Nhk0a1XTO^Nik0b1WTO\\Nbk02[SOd1lm0^NSROb1mm0^NSROb1om0[NRROe1Un00PSOYNfk0g1ZTOYNfk0g1Z100OaROZN^O0nl0f1cSO\\N]OOPm0e1bSO]N^ONQm0d1aSO\\Nil0d1VSO\\Nkl0d1USO\\Nll0c1i01O001N100\\RO]Njl0b1VSOaNhl0_1XSObNgl0^1ZSOaNgl0^1YSO`Nil0_1ZSO_NZl0OVSOb1a0]N[l01TSOa1a0^Ngl0b1WSO`Nil0`1VSO`Nkl0`1USO`N`l0OQSOa1?^Nal02PSO`1hm0bNWRO^1im0bNWRO_1hm0aNXRO_1hm0aNXRO_1hm0aNXRO_1?_N_l01SSO`1>aN]l0NVSOa1=`Njl0a1WSO^Nil0a1ZSO]Nel0d1\\SO[NB1cl0d1XTO^Nhk0b1WTO^Nik0b1gSO\\NH1al0c1fSO\\NJ1`l0b1\\101O0O02O0O10000001O00001N10[RO^Nkl0a1USO`Nkl0_1l00FaNRRO_1lm0cNTRO]1lm0cNTRO]1mm0bNSRO^1nm0aNRRO_1nm0aNRRO_1b0aNjl0`1k02N2N1O0000O1001O0O2O000000O100O1O0O201O000000000000O10000000001O001O0001O000000O100O10000000O100000001N10001N010000O10000000010O1O100O0000O2O01N10000000O010O101O001N1001O0000000D`NWRO`1hm0aNXRO_1hm0aNXRO_1hm0aNXRO_1im0`NWRO`1im0]NZROc1Qn02O0O1O20O0O2O0000M3O1O1001O001O3M0O100O1000010O0O10F_NTROa1km0aNTRO_1lm0`NURO`1Vn00O100O1O100O1010O00001N1O101OO010O00011N2O1O0002N2M1O11O1O00000000O10010O01O0000000000O100000O100000001O000O1001O0000000O2O0000O10000O2O1N101O000001N100O100O0101N1010O0010D[N[ROd1em0\\N[ROd1Qn00001O0OE_NXROa1gm0aNXRO_1]m0]NeRO4N^1^m0^NcRO6N\\1im0cNXRO]1hm0cNXRO\\1im0dNWRO[1jm0dNWRO]1im0bNWRO_1hm0`NYROa1fm0^N[ROb1em0^N[ROb1Rn00000000000O1000000001N101O1N11O0O101N1000E_NVRO`1im0cNVRO]1jm0cNURO^1km0bNURO^1km0aNVRO_1jm0_NXROb1Sn00O2O1O0000O2N]RO\\Nkl0d1TSO]Nll0c1USO\\Nkl0c1j00O10O11\\RO\\Nil0d1VSO^Nil0b1WSO_Nhl0b1WSO]Njl0d1i010O0000001N100001O0000000O100001O0000000O10001O00000000O100000000000000001O0O1O1O101O000001O010O010OO100O1O10O1000000O1000001N2O1N2O1OO2O00O011O00000O100001O1N10001O00001O0O101O0O100O1O1O10O2N2N100000O1O1001O000000000O111N001O1O00O100000000000000000000000001O0000000LbQOdN`n0[131O01O00O10O10O101N1O1000O2N10000001O0000O10000002O0O1O01N1N2O2O0O2O000000000001N2O1N2N1000O10000001N100000N2O1001O001O00O1O100O11O0O1000O1000000000O11O00000O101O01O0000O1000001O0VO]QO6dn0H_QO6bn0H_QO7bn0GaQO7an0GaQO7an0GaQO8an0BfQO9fci3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "image_path": "ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "kel34ko03N1001O00000000O1001O1O001O0000001O0000VNMfSO3Zl0MfRO60MZm0MdRO91JRn08nQOGRn09nQOGQn0;nQOEQn0=mQOD^m0MaROa0NCam0M_ROd0LAdm0K`ROe0K@em0K`ROe0JAfm0J`ROg0GAhm0HaROk0B^Omm0GaROZ1^m0gNbROi0B@lm0GaROf0HBgm0HaROe0ICfm0HaROe0IBgm0I`ROe0HChm0H`ROf0FCjm0G`ROg0DDkm0EaROh0CClm0EbROf0DCkm0GaROe0FCim0IaROb0HDgm0JaROb0HDgm0JaROf0CAlm0IbROV1]m0iNdROW1\\m0iNdROW1\\m0iNdROW1\\m0iNdROV1]m0kNbROa0BJlm0DcROb0AJlm0DcROb0@Kmm0CcROV1]m0jNcROW1\\m0iNdROW1\\m0iNdROV1]m0jNcROV1\\m0lNcROb0DDim0JcROc0DBim0KcROT1]m0kNdROU1\\m0kNdROU1\\m0kNcROW1\\m0iNdROW1\\m0jNcROV1]m0jNcROV1]m0jNcROV1]m0jNcROV1^m0iNbROW1^m0iNbROW1^m0iNbROW1^m0iNbROf0ACnm0GaROd0I_Ofm0N`ROa0N^Obm02`RO`0O]Obm02_ROa0O]Obm01`ROc0M\\Ocm01`ROd0K\\Oem00`ROd0K\\Oem00`ROc0L]Odm00_ROc0N]Ocm00_ROc0N\\Oem00]ROd0N]OXn0c0fQO_OYn0b0fQO@Yn0a0dQOA\\n0`0bQOA^n0?bQOA_n0>bQOA^n0?cQO@]n0a0bQO_O^n0c0_QO^Oan0c0]QO^Ocn0P1000B[QO@en0<`QOC`n0;cQOD]n0^QOCbn0>]QOBcn0>^QOAbn0>`QOA`n0?bQO_O^n0b0aQO^O`n0b0_QO^Oan0d0\\QO]Odn0P11O000DZQO]Ofn0c0ZQO]Ofn0bQOD]n0=aQOD_n0=_QOD`n0=`QOC`n0_QOBan0cQOB]n0?bQOA^n0a0`QO_O`n0b0^QO_Obn0a0]QO@bn0c0\\QO]Odn0d0[QO\\Oen0d0ZQO]Ofn0d0YQO\\Ogn0o000001O00000DZQO]Ofn0c0[QO\\Oen0d0ZQO]Ofn0d0YQO\\Ogn0o000O1000EYQO\\Ogn0d0YQO\\Ogn0P1O000O10O100C]QO\\Ocn0c0^QO]Obn0d0]QO\\Ocn0e0\\QO[Oen0e0YQO\\Ogn0o00EYQO\\Ogn0c0ZQO]Ofn0c0[QO\\Oen0d0[QO\\Oen0P10000000A\\QOAen0;bQOA^n0>dQOA]n0=dQOC\\n0=dQOC\\n0>cQOB]n0>cQOA^n0`0bQO_O^n0a0bQO_O^n0a0bQO_O^n0`0cQO@]n0`0cQO@]n0a0cQO^O]n0c0bQO]O^n0c0bQO]O^n0c0aQO^O_n0c0`QO]O`n0d0^QO]Obn0d0\\QO]Odn0d0ZQO^Oen0b0[QO^Oen0b0ZQO_Ofn0b0YQO^Ogn0c0WQO^Ohn0o0O0000000000000000O1DZQO]Ofn0c0[QO\\Oen0d0ZQO]Ofn0d0XQO]Ohn0o0O0000DZQO]Ofn0b0[QO^Oen0b0\\QO]Ocn0d0^QOZOcn0e0_QOZOan0e0aQOZO_n0e0bQOZO_n0e0bQO[O_n0d0aQO\\O_n0b0cQO^O]n0a0dQO_O\\n0b0cQO^O]n0c0bQO]O^n0d0bQOZO_n0f0aQOZO_n0f0`QO\\O_n0b0bQO_O^n0?bQOC^n0=bQOD]n0bQO@^n0c0`QO]O`n0d0_QO\\Oan0e0^QO[Obn0f0\\QO[Odn0e0\\QO\\Ocn0e0[QO\\Oen0e0ZQO[Ofn0P1000EYQO\\Ohn0b0ZQO]Ofn0c0[QO\\Oen0c0\\QO]Odn0c0[QO^Oen0c0ZQO]Ofn0c0YQO^Ogn0m01DZQO]Ofn0b0\\QO]Oen0>VQO^O63dn0?VQO^O63dn0c0\\QO]Odn0d0[QO\\Oen0e0YQO\\Ogn0d0XQO]Ohn0c0XQO]Ohn0c0XQO]Ohn0b0YQO^Ogn0[QO@en0<`QOC`n0=aQOB_n0c0[QO^Oen0c0YQO^Ogn0c0WQO^Oin0m00FVQO]Ojn0b0XQO]Ogn0c0ZQO]Ofn0=XQO_O33en0>XQO_O24fn0c0YQO^Ogn0n00BWQODin0]QO@cn0?`QO_O`n0b0_QO^Oan0d0]QO\\Ocn0d0\\QO]Odn0d0ZQO]Ofn0d0YQO\\Ogn0d0YQO\\Ogn0c0[QO\\Oen0>YQO]O35dn0=bQOC^n0=bQOC^n0>`QOC`n0>^QOCbn0=]QOCdn0<_QOBan0>aQO@_n0?cQO@]n0?eQO@[n0`0eQO@[n0a0dQO_O\\n0c0aQO^O_n0c0`QO]O_n0c0cQO\\O]n0c0eQO\\O[n0d0fQO[OZn0d0b00UQO\\OYn0d0fQO]OZn0c0fQO]OZn0b0gQO^OYn0b0gQO^OYn0b0gQO^OYn0b0hQO]OXn0c0hQO]OXn0c0gQO^OXn0c0gQO_OXn0a0gQO@Yn0a0eQO@[n0`0eQO@[n0`0eQO@[n0`0fQO_OZn0a0fQO^O[n0c0dQO]O\\n0c0dQO]O\\n0c0cQO^O]n0b0dQO]O\\n0b0gQO\\OYn0d0b0O2O00000O1001O0000000O10000RQO_O]n0?dQOA\\n0?gQO^OYn0b0c000O1000000001O1O0000000000O100O2O000O100001O1O1O10O0000000N2O1O1O1001O001O0oPOD]n0=bQOD\\n0>bQOC^n0=bQOC]n0>cQOB]n0>bQOC]n0=dQOD[n0;fQOEZn0;gQOCZn0>iQO^OXn0a0d00000O1O1O1001N2O0000O1O1001O1O100O00O1O10O1RQO^O]n0e0_QO]O`n0d0_QO]O`n0c0XQO[O34cn0b0XQO[O53cn0e0]QO[Odn0d0]QO\\Ocn0d0]QO\\Ocn0c0^QO]Ocn0b0]QO^Ocn0b0^QO]Ocn0b0]QO^Ocn0c0\\QO]Odn0d0[QO[Ofn0=UQOE5Nfn0^QOCbn0>]QOAdn0>]QOCbn0=`QOA`n0?aQO@_n0`0bQO_O^n0a0bQO@]n0`0bQOA^n0`0`QOA`n0a0]QO@cn0a0]QO^Ocn0d0\\QO\\Ocn0d0^QO[Obn0e0^QO[Obn0e0_QOZOan0e0aQOYO`n0e0?O1O1N2O100O1O1000fPO@To0?kPOBUo0?kPO_OVo0b0UQO@^n0`0`QOB_n0>aQOB_n0UQOD3Ohn0e0XQO[Ohn0?_QO@an0`0`QO_O`n0`0aQO@_n0`0aQO@_n0a0_QO@`n0a0`QO_O`n0a0_QO@bn0>`QOA`n0?aQO@_n0?bQOA^n0?cQOA\\n0>eQOB[n0>dQOC\\n0=cQOD]n0kPOCTo0=lPOCTo0=lPOCTo0>lPOCRo0=mPOERo0;nPOERo0;mPOFSo0:mPOF4Ihn0`0UQOGRo09oPOFQo0:oPOESo09oPOFQo0:oPOERo0;nPOERo0;nPOFQo0:nPOGRo09nPOF3Mfn0>VQOE50an0;[QOC52an0:gQOGXn09hQOFYn0:iQODWn0;h00000O1O1O1001O0O100O2O001N100000O101OTQOBUn0=kQOETn0;kQOGTn09lQOGTn09kQOHUn09iQOGXn09hQOGXn09hQOGWn0;hQOEXn0;iQOEVn0;kQODUn0jQOBVn0?iQOBWn0>iQOBWn0>iQOBWn0>kQO@Un0`0kQO@Un0`0lQO_OTn0a0lQO_OUn0`0jQOAVn0?iQOBWn0>hQOCXn0=hQOCYn0;iQODXn0;iQODXn0:iQOFYn07iQOHXn06iQOJRal3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "image_path": "ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "Yjg31jo09hPOL233J41LMXg03fXO8KN:GK?CO81NK4O1OVg00gXO5M09II?CL<2KJ6OL54G_f03_YO3N29KFP12ZOJL6MM62Gbf03\\YO2046a1GfNLJ1_ON?433Gdf03[YO0165b1E<0iNjf0TOYYO1244k5`f0iJ[YO^O3k5af0hJ[YO]O2m5le0^I_ZOY1CY6ne0^H^ZOZ1DX6ne0^H^ZO[1BX6Pf0]H^ZO[1BY6Pf0[H^ZO\\1BY6Pf0[H^ZO\\1BY6oe0\\H_ZO[1BY6ne0]H_ZO[1CX6ne0^H]ZO\\1EU6ne0_H]ZO\\1EU6oe0]H^ZO]1CV6Pf0\\H]ZO^1CV6cf0jI]YOW6bf0iI^YOW6bf0d100OUI_YOX4af0gK`YOY4`f0hK_YOX4af0e2O0O100\\OhF_ZOW9bd0dFn[O8]OU9ed0cFm[O:]OS9fd0cFm[O;\\OR9gd0cFm[O;[OT9ee0mFZZOS9fe0mFZZOS9fe0mF[ZOR9ee0nFaZOl8^e0UGcZOj8]e0VGcZOj8]e0VGcZOk8]e0TGcZOl8]e0TGcZOl8]e0TGcZOl8\\e0WGaZOj8_e0WG`ZOi8`e0\\G[ZOc8fe0_GXZOa8he0l00001O00000OcFYZOb8ge0\\G[ZOd8ee0\\G[ZOd8ee0\\G[ZOd8ee0[G\\ZOe8de0\\GZZOe8ge0[GWZOg8he0i0001O01O001O000iZOhEed0Y:Y[OhEfd0Y:Y[OhEgd0Y:Y[OfEgd0Z:`01O01N100O2O0O100O2O0000000ZFVZOU9`0]Fgd0:nZOX9:_Fhd0:mZOW9;_Fgd0>jZOU9=^Fid0T:W[OlEid0T:X[OkEgd0U:c0O01000001O001hZOhEed0X:Z[OiEed0Y:Y[OiEfd0X:X[OiEgd0Y:X[OgEhd0Y:W[OhEid0Y:V[OgEjd0Z:U[OfEkd0Z:U[OfEkd0Y:W[OfEid0Z:Y[OcEid0[:?00000hZOcEld0]:S[OeEld0\\:S[OdEmd0^:Q[ObEnd0`:R[O^Eod0c:71O000kZO]Ekd0d:T[O]Eld0c:S[O^Emd0b:Q[O`Eod0`:P[OaEQe0g:000HnZOaERe0^:P[OaEPe0_:Q[O`Eod0`:Q[O`End0a:Q[O`Eod0`:P[ObEod0]:R[OcEnd0^:Q[ObEod0h:00000000001@T[OiEld0W:U[OgEmd0X:S[OhEmd0Y:Q[OhEod0e:01O01O00WOVE[\\Oj:ec0UE\\\\Ok:]d00000000000EoZOfEbd0Kg[O]:JgE_d0Lg[O]:JgE`d0Kf[O]:LgE^d0Le[O^:MfE^d0Le[O^:MfE^d0Le[O^:MfERd02g[OM9\\:MfESd07o[OT:NeERd08P\\OS:NeESd07o[OU:MdETd07o[OV:LcEUd07n[OX:LaEVd08m[OW:MaEUd09o[OU:KcEUd09P\\OT:KcEUd09Q\\OS:JdEUd09Q\\OS:JdEUd09Q\\OS:KcETd0:Q\\OS:KcEUd09P\\OT:LbETd0:Q\\OS:KcESd0;R\\OQ:LdERd0;R\\OQ:LdEQd0n[Oo93dEoc0;o[OQ:1eEPd0:o[OQ:1eEQd09n[OR:1eEQd0Nj[O25[:0eE[d0Nd[O^:1dEgd0\\:Z[OcEfd0]:Z[ObEgd0^:X[OcEhd0]:X[OcEhd0]:W[OdEid0\\:W[OdEid0\\:X[OcEhd0]:Y[ObEZd01d[O\\:5aEmc02k[O32Z:6aEnc00k[O51[:4bEXd02d[O\\:3cEZd00c[O^:2cEYd00e[O^:1bEXd02f[O]:1bEXd03f[OZ:1eERd0Li[O64Y:1eERd0Li[O63Z:1dEZd02e[O[:1bE[d02d[O]:0aE]d00d[O_:OaE^d0Oc[O`:OaE]d01c[O^:0aE]d01c[O]:1bEZd03e[O[:1bEYd05d[OZ:3aEZd04c[O\\:2`E\\d03b[O]:3_E]d01a[Oa:1^E^d01b[O`:0_E[d04e[O\\:1`ETd0:k[OV:0bESd09m[OS:2dEQd09n[OR:1eEQd09n[OR:1eEQd09n[OR:1eEQd09n[OS:OeETd08l[OS:0eESd09m[OQ:2eEQd0:m[OP:3fEPd0:m[OP:3eEQd0;l[OP:4dEPd0h[On97dEQd0R;o[OnDQd0R;o[OnDQd0S;n[OmDRd0T;l[OnDSd0S;l[OmDTd0Q;o[OmDRd0R;o[OnDQd0Q;P\\OoDPd0>i[Ol98eEoc0?i[Ol98eEoc0?i[Ol98eEoc0>j[Om97eEoc0>i[On98dEoc0R;Q\\OnDoc0R;Q\\OnDnc0S;R\\OmDnc0S;R\\OmDnc0S;R\\OmDnc0T;Q\\OlDoc0T;Q\\OlDoc0>k[Oo96cEoc0=m[Oo94dEoc0=m[Oo94dEoc0=m[Oo95bEPd0>k[OP:cd0PF][OP:cd0oE^[OQ:cd0nE][OR:cd0nE][OS:bd0mE^[OS:cd0lE][OT:cd0mE\\[OS:4aESd0O1O2N1_[OPEPd0R;n[OPEQd0;f[OS:9bEQd0:g[OT:8bERd09g[OT:7bETd08f[OV:7aEUd07d[OX:8`EXd04`[O[::`EWd05^[O[::aEYd03][O\\:9bEZd02][O[:;bERd0Ne[O5N[:;bEPd02d[O20[:=aEnc02e[Of:=XEmc0P;S\\OPElc0R;S\\OnDlc0T;S\\OlDlc0U;S\\OlDmc0U;R\\OjDoc0V;Q\\OkDnc0V;Q\\OjDoc0W;P\\OiDPd0W;P\\OiDPd0W;P\\OiDPd0V;R\\OhDoc0W;>O11O1d[OgDnc0Z;P\\OgDPd0Z;o[OfDQd0Z;o[OfDQd0Z;P\\OeDPd0a0h[OU:dd0jE_[OT:ad0lE_[OT:ad0mE^[OT:ad0lE_[OT:ad0lE_[OT:ad0lE`[OR:ad0nE_[OQ:bd0nE_[OQ:ad0PF_[Oo9cd0PF][OP:cd0PF][OP:cd0PF][OQ:bd0oE^[OQ:bd0oE^[OP:cd0PF][OP:cd0PF^[Oo9bd0QF^[On9cd0RF\\[OP:cd0oE^[OQ:bd0oE^[OR:ad0nE_[OS:_d0nEa[OR:_d0nEb[OR:]d0oEb[OQ:]d0PFc[OQ:\\d0oEd[OQ:\\d0oEd[OP:]d0PFc[OP:]d0PFc[OP:]d0PFb[OP:`d0PF_[On9cd0h000000012M1O00WE][OT:cd0kE`[OR:ad0nE_[OR:ad0nE_[OR:ad0nE_[OP:cd0PF\\[OP:ed0PFZ[OQ:fd0oE[[OP:ed0PF\\[OP:cd0PF][OP:cd0PF^[Oo9bd0QF][OP:cd0PF][OP:cd0PF[[OR:ed0e0O10O1000000000000000000010O1O2N0000O1ZE[[Oo9fd0QF[[Om9fd0RF[[On9ed0RF[[On9ed0QF\\[Oo9ed0PF[[OP:ed0PF[[OP:ed0PFZ[OQ:ed0RFY[On9hd0e001O01XEX[OV::ZESd0=d[OY:9ZERd0>g[OV:dd0kE][OS:dd0lE][OT:bd0mE^[OR:cd0nE][OS:bd0mE^[OT:bd0lE][OT:cd0lE][OU:bd0kE^[OT:bd0mE^[OS:bd0mE^[OS:bd0lE_[OS:bd0mE^[OS:bd0mE^[OT:ad0lE^[OU:bd0kE^[OV:ad0jE`[OT:ad0lE`[OR:`d0oEa[On9ad0RF_[On9ad0RF_[Om9cd0QF^[Oo9bd0QF^[Oo9cd0eEV[O67U:cd0oE^[OR:`d0oE`[OT:]d0lEc[OT:]d0lEc[OU:\\d0kEd[OU:]d0jEc[OV:^d0cEZ[ON8_:`d0`EY[O17_:^d0jEa[OV:]d0lEc[OT:\\d0mEd[OR:]d0oEa[OR:_d0nEa[OR:_d0nEa[OS:_d0kEb[OU:^d0kEb[OU:^d0lE`[OU:4\\EPd0`0l[OS:4^Eoc0a0k[OR:4_EQd0`0h[OR:8^EPd0X;o[OhDQd0?i[OT:6]ERd0>h[OV:4]ETd0=i[OU:3^ETd0=i[OU:4]ETd0=h[OT:8\\EQd0a0g[OQ:fd0oE[[OP:ed0PF[[Oo9fd0PFZ[OQ:gd0nEY[OR:gd0mEZ[OS:fd0mEZ[OS:fd0mEZ[OS:fd0mE[[OR:dd0oE\\[OQ:ed0nE[[OR:ed0mE\\[OS:ed0lEZ[OU:fd0kEZ[OT:gd0mEY[OR:fd0oEZ[OQ:ed0PF[[OP:dd0QF\\[On9ed0RF[[On9ed0RF[[On9fd0QFZ[Oo9gd0PFY[OP:fd0QFZ[Oo9ed0RF[[On9dd0SF\\[Om9dd0SF][Om9bd0SF][Om9dd0SF\\[Om9ed0RF[[On9ed0RF[[On9ed0RF[[On9dd0SF][Ol9cd0TF][Ol9cd0TF][Ol9cd0TF][Ol9cd0TF][Ok9dd0UF\\[Ok9dd0UF\\[Ok9dd0UF\\[Ok9dd0UF[[Om9dd0TF[[On9dd0QF\\[Oo96bEPd0?j[Oo96cEoc0>k[OP:5bEPd0=l[OQ:4bEPd0=l[OR:4`EPd0>l[OQ:5aEoc0>m[OP:4bEoc0>m[OP:ad0PF_[Oo9cd0PF][OP:cd0PF][OQ:bd0nE_[OR:ad0nE_[OR:5`Enc0?k[OR:5aEPd0=k[OR:3cESd0;g[OU:5`ETd0S;k[OoDTd0Q;l[OoDTd0R;k[OnDUd09g[OX:3_EUd0:h[OX:2^EVd0:h[OW:3_EUd0:h[OU:5aESd0;g[OS:7aESd0Nh[Ol:6VERd0Ng[Om:7UESd0Q;m[OnDSd0R;m[OoDRd0S;l[OmDSd0U;l[OkDSd0X;j[OhDWd0?g[O\\:Yd0cEh[O]:Xd0bEi[O^:Wd0aEj[OV:O^EWd0_Ejc0a0i[OP:9bEnc0>i[OP:9cEmc0=j[OP:8dEnc0k[OS:2_ESd0>k[OS:2`ERd0=l[OS:2`ERd0n[OP:ad0QF^[Oo9bd0PF_[OP:bd0oE^[OR:gd0hEY[OZ:id0aEX[O_:0`E]d00c[O`:0_E^d01a[O^:fd0aEV[O33[:fd0fEU[OO5Z:gd0gET[OO5Z:gd0gET[OO6Y:fd0hET[OO6Y:fd0hET[OO6Y:5fEoc0:l[OQ:3fEoc0;m[OR:2cEQd0O2N100nE`ZOMO[9be0eFaZON0[9_e0jF]ZON3X9`e0VG_ZOj8ae0XG\\ZOj8be0k000O2O003L10K5\\F_ZOg8ae0ZG`ZOc8be0]G^ZOc8be0n000010O02N3M0iZOcEjd0^:U[OcEkd0\\:U[OdEkd0\\:U[OdEkd0\\:U[OdEkd0[:V[OeEjd0W:[[OhEed0U:e00000000001O1TFVZOH3_9ie0fF_ZOW9ce0gFaZO?Gj7je0eGaZO?Gj7`f0THbYOP1HW5gf0gIbYO3O0L4K61c1N_N1a0if0mNdYON53G6J34g1J_N100Akf0HeZO0oN8J107M0IM63002Fnj0N[UO30JO50MO1gk0LW_f3" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "image_path": "ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "mask_rle": { + "size": [ + 1025, + 1025 + ], + "counts": "n]m?9go02O0000000001N2Oiak?" + }, + "dataset_name": "ocr_doc_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0000288.jpg", + "mask_rle": { + "size": [ + 500, + 729 + ], + "counts": "eVY15^?2N2O1N2O001O0000000TOG]B9b=J\\B6c=L]B3b=O]B1c=0\\B0d=1\\BNd=3[BMe=4[BKe=5\\BJd=7\\BIc=8]BGd=8\\BHd=9\\BFe=9[BGe=9[BGf=9YBGh=8XBHj=6VBIm=5SBKo=4oAMQ>3oAMR>2nANR>2mAOS>1mAOT>0kA0V>0jA0V>1hA0X>0gA0Z>0fAO[>1dA0\\>0cA0^>0aA1_>OaA0`>0_A0b>0\\A1e>1VA2i><1O10O001O0O1O1O1O10O0L5N10100001O100O1O10O11N1000000001O2O2M3M3N1O10O2M100O0FA`A>_>E`A:_>K]A5c>>00000000001O002N2N2N2N2N001O01O0000000O100O10000000001O01O002N2N1O100O001O0010O01O1O1O010O0000O2O001O01O00001O010O001O010O10O00001O001O0000001N10001O0100O100O0010O001O1N1O10001N2O000010O10002M4M2M01O001O0000010O00100O01O1O100O0001O0000001O0001O01k@CQ?`0001O001O001O000000000000000000001O0000001O001OO10000O10000000000001O001O2N3Lleo5" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0000288.jpg", + "mask_rle": { + "size": [ + 500, + 729 + ], + "counts": "kZ^28Z?4M3M2N2N2O0O2O0O2O000000000O1000000001O00000O101O1O104K1OO101O0O2O001O4L1O1O1O1O001O000000O101O1N01O1O0O2O1N1001O01OO2O1M2N3O1001O010O1001N5K1O0001OO1000001N101N2MmR]7" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0000288.jpg", + "mask_rle": { + "size": [ + 500, + 729 + ], + "counts": "abU:6\\?>C5K101N100O100000O10O100001N10O100O0100O01000O100O1000O1000O010O1000O02O0000O0100000000O010000O01O100000O0101N1O3L9ElR1" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_49.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "U_Q12m>1N3M2nNR1O1000000000000000000000000010O0000000O10000O1N2]OXBKk=N[BD12i=8WBF10j=2nA37K[>2d00GOJO]A1j>OZn01ibN3d>6^OG[B9b=0WB1g=3TB1i=d0O101N1O100O2O000O1000O1000001N1O101N1O2M2N2O1HPB^OS>`0oA_OT>3iA36HX>85H^A8a>5aACY>>eACV>e0iA\\OS>e0mA\\OQ>i00015QBPOd=X1M1000000000000001O000000000O100O1000000000010JYBQOh=T11N1O00000000O1000000000VOWB2N2M4M2O1O1N2M3@^O^Bf0a=]O\\Bd0c=]O[Bd0j=YORBj0o=3O1HSOZBP1f=50000000000000000010O1O2N1O0000001O4MN1O11O1O000O100JQB[Oo=e0RBZOn=f0RBYOo=g0QBYOo=l0000K50N2O2N2N2N2O0O2O0O2O0O101O000O2O000000O1000001N100O10001O000O100010O00001O000O2O0000001O0O11O0000O1001O0000O10000000000000000001O0O10000001O0000000O1000000001O000O101N101N2O0000O100000O1O1001O000O3M2O001O00001O00O2O00001O00O0100001O000O101O0000000010O00O100001OO1001N100M300000000O101O1O010O000001O001O0O100O2O00000O101O00000000O1000001N1WO[B6f=E`B:`=F`B9a=G`B7b=G`B7X>JRb`1" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_49.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "_Zg34[>2mA3o=2lA0R>`0O101N100O2O00000O100001O00000O10001O0O100001O10O01O000O1000000000O10000000O1000000010O00001O10O0000000000000000000O1000000O10000000000000000000000000000001O00O2O00O100000000000001O000O101N100000O2O1O000O101O00000000O1000001N10000O2O01O00000O101O0000000O2O00000000O101O000O101O0O100000O01O11O00001O1O000000O2O0O1O21N0000001O0O101O00000O2O000000O2O000O100O2O0O1]OlAJ1;T>JQB4Q>KSBNTnm2" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_49.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "P\\j2422`>9O1N101O0000000000000000001O0001O0000000000000O10000000000001O000000000000000000000000000000000000000000000000000000000000000000000000001O0000000000O100000000000000001O000001O0O102N2M9FgaU5" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_49.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "]XY42l>3D;O101O0000000000000000000000000000000000000000000000000000000000000000000000000000001O000000000000000001OO100000000001N010000000000010O0O100000000000000001O0000000000000001N106J5Jecg3" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_49.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "_Th51m>3M3L3N2N2N2O100000000001O0000000000O0100000O2O0000000000000000000000000000000000000000000000000000000000001O01O000000O1000000000O1000000000000001O01O000000000O100001O000O100001O00000O9GhiW2" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0000808.jpg", + "mask_rle": { + "size": [ + 1600, + 1363 + ], + "counts": "jdPm06ga17J4L4M6J6J4L3M2N2N100O100O1O100O001O010O00100O00100O010O00100O010O010ocNhNYW1Y1^hNSO_W1m0lgNIRX18ZgN=dX1DofNj0PY1WO]fN\\1aY1fNneNk1PZ1bNmdNU2R[1^2N2N2O1N1O2O0O2O0O1O100O010O1O10O0100O1O010O1O010O0001O00010O010O01O01O0O100O2N100O2O0O1POeeN[J]Z1a5R10001N101O00001O010kKWdNS2h[1mMYdNR2g[1nMZdNR2f[1nM[dNQ2d[1oM]dNQ2b[1oM^dNR2b[1mM_dNS2`[1lMadNU2^[1kMcdNU2\\[1lMddNT2[[1lMedNU2[[1jMfdNW2X[1jMhdNV2X[1iMidNW2W[1iMidNX2V[1gMjdNZ2V[1eMkdN[2T[1eMmdN\\2R[1cModN]2Q[1bMPeN_2oZ1`MQeNa2oZ1^MReNc2mZ1\\MTeNd2lZ1ZMVeNg2iZ1XMXeNh2hZ1VMZeNj2fZ1UM[eNk2fZ1RM\\eNn2eZ1oL]eNQ3fZ1iL]eNW3P[1SLYeNl3^\\101O001N101O000O10001O0O01000O10O10O010O001O01O0O1O1M3L301O0100O10ROVLgcNj3U\\1]LhcNb3W\\1bLgcN]3Y\\1gLdcNY3[\\1kLccNT3\\\\1oLacNR3^\\1RM_cNm2b\\1UM[cNk2e\\1V12O1O2O010O001O010001O2N2N3L5L3M3M2N1O1O1O2N2N1O1O1O0010O01O0001O1O2N2N1O1O1O1O0O10001O0O10000O100O100O100O100O1O100N2O1M4CP^1AQbN>Q^1@QbN>Q^1APbN" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0000808.jpg", + "mask_rle": { + "size": [ + 1600, + 1363 + ], + "counts": "TUja1815Ua1c0D. This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0000808.jpg", + "mask_rle": { + "size": [ + 1600, + 1363 + ], + "counts": "akfQ11ma17K1O1N2O1O1CGP_N;k`1KS_N6i`1c0L4N1O2O1O1O1O010O1O01O001N1O2G8C>N1N2N3O0O2O1N2N5IPUTo0" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0001122.jpg", + "mask_rle": { + "size": [ + 780, + 1040 + ], + "counts": "W`b1:Ph07J1O2N1O1O1O1O2N2O4K4M4K3N5K4K5L3L4M3M3L3N4L4R[ObMec0d2T\\OaMic0b2S\\OaMkc0c2P\\O`Mnc0c2n[O_MQd0d2k[O_MSd0d2g[O`MYd0Z30O100O0O2TMh[Ob1Yd0]Nj[O_1Xd0`Nk[O\\1Wd0dNk[OY1Wd0fNm[OT1Td0mNP\\Om0Rd0SOP\\Oi0Rd0WOP\\Oe0Rd0[OP\\Oa0Rd0_OP\\O=Rd0CP\\O9Rd0GP\\O6Qd0JQ\\O2Qd0NQ\\ONQd02o[OLSd05l[OIVd07h[OIZd07e[OH]d08b[OGad08^[OHcd08\\[OG\\c0^Oi\\Ok0JF[c0If\\Oa0ME]c0Me\\O>MD_c0Od\\O=MB_c02d\\O=N^O_c05d\\O=N[O^c09e\\OU1FRc0Lh[O=X1Gob0Mh[OOPc01c\\O0>NPc01c\\O1 suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_162.jpg", + "mask_rle": { + "size": [ + 768, + 1024 + ], + "counts": "iYP9=bg03M2O1N2N1O2M2HWOXYOk0gf0UOXYOl0hf0TOWYOn0hf0ROXYOn0hf0ROXYOn0hf0ROXYOn0hf0ROXYOn0hf0SOVYOn0jf0SOUYOn0jf04000nNVYOn0jf0ROVYOm0kf0ROVYOn0jf0SOUYOm0kf040O0100000O100O10000O1001O00ROTYOh0lf0XOTYOi0kf06O101O000000000000O100000000O10O1O0100O100000000001O1O1O000000O10000O10000O10O10000000000000O11O0000000O1000000000O10O1[YOlN^f0T17O2O00O1000001N10000gYOkNge0U1YZOkNge0U1YZOkNge0U1YZOkNge0U1YZOkNge0T1ZZOlNfe0T1ZZOlNee0T1d00000001O001OO100O11O001ZYOlN^f0U1`YOlN`f0T1`YOlN`f0R1cYOmN]f0Q1:00O02O0]YOmNYf0T1eYOmN[f0T1dYOlN\\f0U1bYOlN^f0\\1dYOaNUf0_1kYOaNUf0_1kYOaNUf0_1kYOaNUf0_1kYOaNUf0_1kYOaNUf0_1kYOaNUf0_1kYOaNUf0_1kYOaNUf0U1hYOQO4JTf0U1iYOPO3KTf0U1iYOPO3KSf0V1iYOPO4KRf08iYOk05mNRf0^1nYObNRf0U1jYOPO3LSf0S1kYOQO2LSf0S1kYORO1KSf0T1lYOQO1KSf0T1kYORO2JSf0_1mYOaNSf0_1nYO`NRf0`1nYO_NSf0`1nYO`NRf0V1jYOQO4IRf0U1mYOmN4Noe0U1nYOkN40ne0V1XZOjNhe0V1nYOlN1NQf0W1mYOlN1MRf0W1lYOmN2LRf0V1mYOnN1LRf0V1lYOoN2KRf0U1mYOoN2LQf0V1lYOoN2KRf0V1kYOPO\\f0Z10000000DfYOROZf0m0hYOPO[f0o0eYOQO[f0o0fYOQOYf0n0iYOROWf0m0bYOTO0N^f0m0cYOUOON^f0l0eYOUOL0^f0j0lYOWOUf0g0mYOWOUf0i0jYOYOUf0i0gYOZOWf0Z1M2O01J5EQOhe0T1nYOlN11Qf0S1XZOlNhe01kYOl0?ROfe02kYOk0a0ROde02mYOj0`0TOce02mYOj0hf0WOWYOi0a0TOce03kYOj0b0SOce04jYOj06RO40le04jYOj06SO3Ome03kYOj0b0SOce03kYOj0jf0VOVYOj0jf0VOVYOj0jf0VOVYOk0if06O1\\YOmN[f0S1:O0POVYOi0kf0VOWYOi0if0WOWYOi0if0WOWYOi0if0XOWYOg0if0[OVYOc0kf0]OUYOc0kf090QOUYOh0lf0XOTYOh0lf0XOTYOh0`0UOie03gYOh0`0UOhe0R1XZOnNhe0R1XZOnNge0S1YZOmNhe04hYOg0lf0WOUYOi0Qg001O000000]YOVOfe00QZOj09VOfe00QZOj09VOfe00QZOj09VOfe01PZOi09WOge0OQZOi0:WOee00QZOh0of0M2N2IeXOG]g08fXOE[g0;5O1O1O2KRoh8" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_940.jpg", + "mask_rle": { + "size": [ + 720, + 1280 + ], + "counts": "oXo9a0ke0=Ea0@8H5K3N1N10001N10000001N101O0O2O0010000O11O0O2O0O010O000000000001O001N2001N1O100O010O01O00001O00001O001O1001O0O100O00001O01O000O10001O10O0100001N1O010O01O1O01O01O001O1O010O1O10O00010O10O01O01O001O001O00100O5L6J5J7I5K6J7I4K`_d?" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_940.jpg", + "mask_rle": { + "size": [ + 720, + 1280 + ], + "counts": "WS^>3[f0B7I2N10O010O001O10O01O10O0010O1O010O01O0001O010O00100O001O00010O00100O2O2M4L5K6J4M3L4Kkid<" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_940.jpg", + "mask_rle": { + "size": [ + 720, + 1280 + ], + "counts": "fVS9c0je08I5L4M3N1N2O00000jZOiNRe0[1O00010O0001O010O001O001O0001O010O001O01O1O0KlZOmNUe0S1lZOlNUe0S1jZOoNUe0V10001O01O0O2N1L5O02O2M4L1O0001O001O1IjZOROVe0n0jZOSOUe0m0kZOSOVe0l0jZOTOVe0l0jZOTOVe0l0jZOTOVe0m0iZOSOWe0S11O0O1O101O001O001O000000001O0010O00010O0001O01O01O01O0001O001O00010O000010O01O010O1O1O1N3N7Ga`e`0" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_940.jpg", + "mask_rle": { + "size": [ + 720, + 1280 + ], + "counts": "ib`;:Vf0:F7I1O1O01O01O010O0001O01O010O00010O00010O00010O000010O0000010O001O010O001O1O1O3L8HPUe?" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_940.jpg", + "mask_rle": { + "size": [ + 720, + 1280 + ], + "counts": "]a_<1\\f0>D6J4M1O10O01O01O00_ZOWO[e0j0cZOXO]e0g0bZOZO^e0m0O0001O0O10001O010O0001O01O001O00001O0O2O00001O0000001O01O010O1O0001O0O101O1O1O3M>_OYh]>" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_70.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "nPQ14g>6L3K6J5J6N101O010000000001O1O2N001O3M2N1O1O01O00010N1O10O1O1O10O2O0O2O000O20O0000001O000O2N10O10000000O01000001O0O110O00001O0000000001N10000O100000O100O100001O00O10001O001O00000000O100O10001N02O0001O0000O100000000VOZOZCf0fO001O1O000O0101O00000O10001O0001O0O1000001N10001O00O1O101O0000000O10000000000000000001O000O2O1O2Mdkf3" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_70.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "Ybm5;b>6K4M3M1O2N2N2O0O101O0O101O000000000000O10000O2O001N2N1O2O1N3M2M30OBeA:Y>FfA22NX>8kAEU>>gADU>f0N2L3O1O2OO10O01000001O010O0000O100O10O1001OO100001O00000000WOSB>o=_OTB?X>O001OO10001N2O0O1000001N1000O101O000O2O0O2N3M1O2L32N2M2N2N1O2O1N10000O1000000001N100O2N2N2N3K7GeSg1" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/img_70.jpg", + "mask_rle": { + "size": [ + 480, + 640 + ], + "counts": "XlS14k>9H8H5K3M2OO001O2N1N110O00O0O1100000O0O201N20O01O01O0000O10000O1001O002N3TOmAe0X>O0001N101O1O2N1O001N101O00000001O000000000O1O10O000J61N110O2O1O3M1O001O000O101O001O1OL\\OlAd0T>]OkAc0U>40000O1O100O100000000O101OO10O1O2O000000O10001O00O100O100001O7I2N000O10000001O00000000O011O0000000000010O001N10000O10O2O00000O100000001N100001O0O10001O1N3M2M5I]fk5" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0001250.jpg", + "mask_rle": { + "size": [ + 540, + 720 + ], + "counts": "Q_`2=]`08J1N004m_O]Og?o0L;E6K5J5L2N2M3N2NmN_AKa>J`A\\O0k0^>JaAa0_>o0O000O1000BdA[N]>d1eA[N[>e1eA[N[>e1eAVNNM_>l1cAVNON^>l1cAWN`>j1`AWN_>i1`AXN`>g1aAYN^>g1cAUNNN_>m1cAUNa>k1_AUN`>k1_AVNb>j1^AVNa>k1_AUNa>k1`ATN`>k1`AUNa>k1^AVNb>j1]AVNd>j1]AUNd>i1^AUNc>k170O1VAUNc>j1^AVNb>8YAV1P?iNRAU1n>mNRAM0j0n>i0QAYNh>o1O1OcNZA91XOe>>\\A71[Oc>1ZAR14mNb>0[AP15QO`>O\\Ag09\\O\\>M[Ah08[O\\>N[Am03UOc>NZAP1OSOg>NYA^1f>[N]Ae1c>[N]Ae1c>ZN^Af1b>YN_Ag1a>YN_Ag1a>YN_Af1a>[N_Ac1c>^N[Ac1d>^N[Ac1e>]N[AZ1OeNg>0[A[10bNe>4[AY12aNd>7YAX11cNe>6ZAW11cNe>6ZAV12dNd>6ZAV12eNc>5[AV12eNb>6\\AU11fNc>h1[AXNf>i1YAWNg>>^Ab0KPOg>3ZAOa0b0@[Oe>OZAN26e04ZOJk>OYA4b0?P>]OYAM2e0O^O4a0a>_OZAM2P10Cd>_O[AO0f1f>[NZAO0\\1OgNj>NWAO0[10gNi>4XAV1NgNh>3[A]1e>cNZA^1f>bNZAW1NfNh>1\\AX1MgNg>0\\AZ1MfNg>O]A[1LfNg>O]A[1LgNf>M_A[1KiNf>K`A\\1HjNi>J_A\\1HjNi>J_A[1IkNh>J_A[1IjNi>K^A[1IjNi>K^A[1JiNS?V1n@jNR?V1n@gNU?Y1k@gNU?Y1k@hNT?W1m@hNT?X1l@hNT?X1l@gNU?Y1k@gNh>1^AW1JgNi>2]AV1j>hNYAW1X?M2SO\\@?`?E`@;_?Eb@HN9Y?Dm@=IH42b?>l@[Om>f0PA]Oo>d0o@\\Og>NRAk0J@T?FQAY1Y?O1000O10000000000O10O10000Ad@^O\\?P110O0101O0000000O2N2O2TO\\@;j?]OZ@a0R`0M4L3HaV4MWYK1U`0=Y@Bm>?SABk>:_@He0Ok>?TABk>`0k@]OM3X?`0j@^ON1X?b0j@]O`?e0_@[Oa?g0]@YOc?d0`@]O_?c0`@^O`?b0_@IZ?4`@^O38LLc?a0^@C1Kb?l0^@TOd?j0\\@UOh?g0X@ZOj?c0W@\\Om?b0R@^Oo?a0Q@_OU`09m_OGV`04m_OJV`04;N__ONY`0:2N1O1O1O1NfgQ6" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0001250.jpg", + "mask_rle": { + "size": [ + 540, + 720 + ], + "counts": "blh54\\j6:meI9I6K5K3N2M3N3L3N101N100O2O00jNk@e0U?[Oj@g0T?a0000000000ROl@4T?^Ol@11a0T?Jl@6T?Am@G0h0S?@n@HOh0S?Am@G0h0S?Bl@F0h0U?Jj@6V?Ek@AOj0V?Ek@?U?Ak@>V?Bj@>V?c02O9Fb@kN_?m0a@TO^?k0c@UOU?Ml@8J;5AT?0j@2380FR?2m@M92G1R?2lANT>4iAMW>h0l@QO37P?P1o@oNQ?S1m@lNT?V1k@iNU?X1k@fNV?Z1j@fNV?Z1k@dNV?]14000000000FcNUA]1k>cNUA]1k>dNTA\\1JdNn>1WA`1h>_NYA\\1JdNn>OYA]1JcNm>0YA]1IdNn>OYA]1IeNm>NZA]1HgNm>L[A]1HgNl>M\\A\\1HgNl>M[Ad1d>\\N]Ab1d>_N[AX1JlNk>L[AU1MoNi>KZAV1MoNi>J[A8Hd05[Og>I\\A6Ld01]Og>I\\A6M0K>1Dk>H\\A6MQ1f>jN]A6LP1g>jN]A[1IiNj>L\\A\\1KfNj>N[A\\1KfNj>N[A\\1KfNk>MZA]1KfNk>MZA]1KfNT?Z1m@dNT?\\1l@dNk>OYA]1LdNj>0ZA\\1KfNj>OZA[1LgNi>N[A[1LgNi>N[A[1LgNi>N[A[1LgNi>N[A[1MfNh>O[A[1MeNi>0ZA[1MeNi>0[AZ1LfNi>0[AZ1MfNg>0]AY1KhNh>O\\AZ1KhNi>N\\Ab1d>^N\\Ab1d>^N\\AZ1JhNk>N[AZ1KgNj>O[AZ1KgNj>OZA[1LgNh>0[AY1MhNh>M\\A[1MfNh>O[A[1MeNi>0ZA[1MeNi>0ZA[1MfNh>O[A[1LgNi>M\\A\\1KgNi>M\\A\\1KgNi>M\\A\\1KgNi>M\\A\\1KfNj>N[A\\1KfNj>OZA[1LfNk>NYA\\1LeNU?[1k@dNR?On@72M31I72EW?5j@10OO5OHW?`1O1OHk@iNU?W1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?X1l@hNT?f0RA^OIMU?d0TA^OGNU?d0^A\\Ob>d0]A\\Od>d0j@[O84n>a0j@]OO9W?:j@NV?2j@NV?2k@AOOW??j@OU?0l@1T?Nl@3k>XOWAe0M4l>WOWAd0N5k>WOXAc0M^ONb0l>_OYA^1g>bN[A=I01Hl>K\\A:H31Gl>J\\A:I5OGk?NU@2O1n>IYA04l0b>ESAb0n>f01N9TOi@XO20W?9h@I:e@Io?Nn_O42NT]T3" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0001250.jpg", + "mask_rle": { + "size": [ + 540, + 720 + ], + "counts": "Y\\d34h`00000001O3M2M2O1N4M2M1O101O0O10000O10O1000000O10O0100000O01000O1O1000O10O1000O1000000O100000O100000O0100000O10000000O01000O100000O11O001N0100000O10O0010000O10O0100000O1000O010000O10000000000000O100O10O10000000O2O00000O0100000000000000000000O0100000000O10O10000OLm_O@T`0>7O00h_OCN0o? and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0001250.jpg", + "mask_rle": { + "size": [ + 540, + 720 + ], + "counts": "][P31i`04M2O2M3N1O2N1N3N2N2N1O1N2O1O1O1O1N3M2O2M101O2N1N2O1O1N2O1O1O1O1O1O1001O1O1N100000000O2O00O0101O0O10000O10000O10O10O2OO10O100O010O100O010O10O0100O010O10O01O10O0010O0010O01O010O0010O0010O00010O0010O01O01O01O0010O000001O01O01O0000010O00000010O01O000000000001O01O00000000000001O01O000001O00O101O000O1O1N2N2O1O1@Q@1Q`0;2O1N2O1N2O1N2N2O001N2N3MRfR6" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0001250.jpg", + "mask_rle": { + "size": [ + 540, + 720 + ], + "counts": "abQ764OX`0=M2N3M2N5L1W@VO\\?l0d@TO\\?m0b@SO^?U101O1O1O01O0Lb@jN_?Y102N1O0000001O001O1O0000001N10O10O2O0000001N100O2N11ON2O2N0O200O1N2O1O1O010001O001O1N2O1N2N2O1O1N2O1O1N3N1O1N2O1N2N2O1N2N2N2N2N2N2M3N2N2N3K4L6Jdh[3" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0001250.jpg", + "mask_rle": { + "size": [ + 540, + 720 + ], + "counts": "cjm61g`08J4M2N2O1N101O0000000O1001O00000O10001N100O3N3L2L13M3N2O1N2N101N10001O0000000000O10001O0O20O01O1N4HQoR4" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/0001250.jpg", + "mask_rle": { + "size": [ + 540, + 720 + ], + "counts": "ZPm78c`02N2N2N2N2O1N2O000000000000O2O0O2N2N2N3M[cc3" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "mask_rle": { + "size": [ + 4032, + 3024 + ], + "counts": "g\\Xm2`1Ul3a0B. It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "mask_rle": { + "size": [ + 4032, + 3024 + ], + "counts": "YaeR4W1fk3Z1D6L4N1O2O0O2O0000000O101O0000000000000000000000000000001O00000000000000O100000000000000000000000001O0000001N10001O001O001O0O101O001O0010O0001O001O1O001O001O00000000000000O100000O10O10000O001O100O1O1O00100O1O100O100O10000000O1001O000001O000000001O000010O01O0010O01O10O01O1O1O00100O00001O00O100O100O100O10000O100O100O100O100O10000O100O1000000O10000000000O100000000000000O1000000000000000000000O1000001O000O1000000000000O1000001O00000000001N11N1000000O10001O0O10000000001N1000000000000000000000000O10000000O100000000000000000001O00000000000000000000O100000000000000000O1000001O0O1nNWULmNij3Q1[ULlNfj36VULF92bj35]UL@67_j36aVLCdi3:S2M2MXTh\\6" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "mask_rle": { + "size": [ + 4032, + 3024 + ], + "counts": "lTiZ5m0nl3:H6J6K4L3M4L4L3M3N3L3N2N1O2N2N101N101N100O2O0O100O100O2O00000O100000001O0000O1000000000000O101O00000O10001O000O101O00001O0O101O001N10000000000000O2O000O2O0O100O2O0O10000O1000000000000000000O100000000000O10000001N100000000000000000000000O1000001O00000000001O00001O001O1O2N8kMfTLT1]l3F7I3M2N2N1O1O1O00001O000000001N100000001O000000001O000O10001O0O101N2N2N2N4Koidc5" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "image_path": "ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "mask_rle": { + "size": [ + 451, + 600 + ], + "counts": "i^m14n=2N1O2N1O1O001O1O0O2O1O1N2O1O001O1N2O1O1N2O1O1N@oB2P=NQC2mXGK=GX8`0\\GI;IV8=bGJ8IR8?hGH6Io7a0lGF5Il7b0QHE3Ih7d0WHD0He7e0^HCMH`7h0eH@JI^7h0kH^OGJX7l0SIZOEJR7o0\\IWOBJP6NhH`1i1iN^OIP6m1dJZN\\OIP6j1gJ]NYOIP6g1jJ`NVOIP6c1nJdNQOJQ6_1RKfNmNKQ6\\1UKjNiNJR6W1ZKoNdNJR6T1]KRO`NKS6m0cKXOZNKS6h0hK]OUNKT6>QLFjMMW6L^L8ZMLa:4^EMb:3]ENc:2\\EOe:0[E0e:1XE1h:OWE2i:NTE5l:KRE7n:JkD;]8VObI`0cMg0i8g3O1O1O100O1O1O1O1O1O1O1O1O1O1O100O1O1O1O1O1OWKYJX1f5aNcJM^Mb0n7^OhJ0cM9e7DkJ2fM5^7GoJ4eM3[7HRK5eM1X7IUK6dM0V7IXK7cMOT7JZK7cMNR7J]K8aMNQ7J_K8aMMo6KaK8aMLn6KbK9`MLm6KdK9_MMl6IfK:_MLj6IiK;]MLj6HjK<\\MLk6EkK?[MKh:5XEKh:5XEKh:5XEKh:5XEKh:5XEKg:6ZEIf:7ZEHg:8YEHg:7ZEIf:7ZEIf:7ZEIe:8[EHe:8[EHe:7]EHb:9^EGb:9_EEb::aEB`:?b1O1O010O1N101O1N1O2O1M4MXm70XRHo0[O`0C:E;G7J7I6J5K5L5J5L4L4L4K4N3L3M4L4M2N2M3N3L3N2NPMPGP1n8hN\\GX1a8cNdGCEa0e8GkGA6;m71PHA=9a73UHB`09Y72ZHDb08R72]HEg07j61bHFj06c63dHFl07^61hHGn07X60lHIo05U60mHJQ16P6OPIJS17k5NSIKU15g5OUIKW16c5MWIMX16_5LZINZ14[5N\\IM[15W5N^IM]14U5M_IO^14Q5MbIN^15P5LbIO`14m4MdIO`14k4LeI0a14i4LgIOb15e4LiI0c13d4MiIOd14c4LiI1e13`4LlI0e14_4LlI0f13]4MmI0g13\\4LnI1f14[4KoI1g13Y4LQJ0g14X4LPJ1h13W4LRJ1g13W4LRJ1g13W4LRJ1g13W4KSJ2f13V4LTJ1g12U4MTJ1g12U4MTJ1g12U4LUJ2f12U4LUJ2f12U4LUJ2f12T4MVJ1f12T4MVJ1e13U4LVJ1e13U4LVJ1e13U4LVJ1e13U4LVJ1e13U4LVJ2d12V4LVJ2d12V4LVJ2c13W4KVJ3b12X4KVJ3b11Y4LUJ3b11Y4LUJ4`11[4KUJ4`11[4KUJ5^11]4KTJ4_10^4LTJ3]12_4KTJ4[12b4JRJ5[11c4JRJ6Y10f4JRJ5W12g4JQJ5V12i4IQJ6T11l4JPJ5S11m4JPJ6Q10Q5JmI7P10S5JmI6m02V5IlI6l01Y5JkI6h02]5IjI6g01a5IhI7d00e5LdI6b01j5LbI4`01P6M]I4?1T6M\\I3;2Z6MZI451c6KWI8M2l6HVI;A3[7BTIb2m6^MTI`2n6`MQI`2o6aMQI]2Q7bMPI\\2R7cMoH[2R7eMoHY2S7fMnHX2T7gMmHV2V7iMlHT2V7kMkHS2W7lMjHR2X7mMiHQ2Y7mMiHP2Z7nMiHo1Y7oMiHn1[7nMiHo1[7lMhHQ2h9L4L5K4L5I7J7H9F]QT2" + }, + "dataset_name": "ocr_spotting_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "^bWY13aQ16M101O0O1000001O0000000001N3Mfa`e0" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "Sa_Z1>WQ15M3M10001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O100000000001O00000000000000000000O101O0O2Na[``0" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "jUU`06aQ12N1010O0001O000000000000000001O000O10000000000000000000000000000000000000000000000000O10000001O0000000000000001O0000000001O0O10001NmTm\\1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "knf]15bQ12N2O000O11O000000000000000000001O0000000000000O10000000000001O00000000000000000O010000000000000O2O1N10001O00000000O10001O1NPc_?" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "aX\\i0:\\Q13O1O0O1000001O000000000001O0O100O2MZfXU1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "UVV`06aQ12O0O101O0000000000000001O00000000000000000000000000000000000000000000000000000000000O100000000000000000000000000000001O0000000000000000SQj\\1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "^nfe05`Q14N101O0000000000001O000000000000000000000000000001O00000000O100000O10O2O00000000000000000000000000000001O0000000O10001O00001N2Nl_]W1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "TXVn06`Q13N101O00000000000000000001O000000O10000O10001O0000O101O0O3MfQRP1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "bo\\g08_Q12O1O0O1000001OO10001O000000001N101NVoWW1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "image_path": "web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "ffbk03bQ14M3M2O101N1000000000000001OO100O1N2K5O10O10O10001O0000000O1000000001O00O100O\\OWoN;hP1EZoN:fP1E\\oN:dP1F]oN9cP1G^oN8aP1I_oN7aP1I_oN7aP1J^oN6bP1J^oN6bP1K^oN4cP1L\\oN4dP11WoNOiP12VoNNjP13UoNMkP13UoNMkP1`0001O000000000000O100000000000O10O10O1O1L5O1O20O1O0O0000001O00000O10O10O1O1O10O01O01O1ASoN2nP1MSoN2nP1MToN1nP1MToN1nP1LVoN1Zh[P1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "image_path": "web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "dlm^1>YQ12O001O00000000000001O00000000000000001O000000000000000000O10000O100O11O000000000000001N101O4L7HVS2;_lM2N1000000000000001O0000000001O01O0000000000000000000000000000000O1000001O000O2N3In_91U`F3N1N2O1N101N101N100O1O1O1O1000O100O100L5FgZX<" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "image_path": "web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "ReTd01=:_P1K^oN7\\P1NcoN3\\P1NdoN2[P1OeoN1ZP10foN0ZP10foN0ZP10foN0ZP10foN0ZP10foN0ZP10foN0ZP10goNOYP11goNOYP11goNOYP11hoNNXP12hoNNXP12hoNNYP11hoNNXP12hoNNXP12hoNNXP12hoNNXP12hoNMYP13goNMYP13goNMYP13hoNKYP15goNI[P17eoNH\\P18eoNE]P1;coND^P1, this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "image_path": "web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "X]fd0:]Q13M1000000000000001O00000000001O000001O000000000000O10000000000000O10000001O0001O00000000O101O001NWkmX1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "image_path": "web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "YfTd08^Q14N00001O000000000001O00010O000001O00001O10O00001N2N2N3K^Z6NfeI3M4L2O1O1O1O00000000000001O00001O0000O100000000000001O00000001O000000000000000000000000000000000000001O00000000O1000000000000000O2O00002LnaeW1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "image_path": "web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "mask_rle": { + "size": [ + 1440, + 2560 + ], + "counts": "g^i>8f\\17I3N2N2O1N2O0O2O00000000001O00000000010O0001O01O0001O0000000O10000O10000O100000000000000000000000000000000000000001O0000000000O10O100000000000000000000O1000000O10000000000O100001O0000000000001O000000000000001O000001O0000O100000001O0000000000000000000000000000000001O0000000000000000000O0100000000000000000O10000000000O100000000000000000000000000001O00000000000000O1000000000000001O0000010O000000001O0001O00001O001O01O00000O10000O10000O10000O2O0O2N2N3M7I3L3ON2H:K4M3N101N101O00000000000O1000000001O0000001O0001O01O000000O1000000O10000O1000000000001O0000010O00010O000001O01O00000000O01000O0100O010000O101O00001O0O1000001O000000000000000000000000001O000000000000000000000000000000001O0O101O0O3M4LWU]b2" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "image_path": "web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "VT^[15\\Q1<[Oa0M3N101O00000000000000000000000000000000000000000001O00000001O000000000000000O100000000000000000000000000000000001O00000000000000000000000000000000O1000000000000000O1000000000001O000000000000000000O10000000000000000001O1N3LReR`0" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "image_path": "web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "dgR<, this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "image_path": "web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "eoSZ12fQ1000001O00WW4?VhK5M3O0[oNZOWP1h0goNZOXP1f0hoNZOXP1f0hoNZOXP1f0hoNZOXP1f0hoNZOWP1g0ioNYOWP1g0ioNYOWP1h0hoNXOXP1h0hoNWOXP1j0ioNUOWP1k0hoNVOXP1j0hoNVOXP1j0hoNWOWP1i0ioNWOWP1i0ioNWOWP1i0ioNWOWP1i0ioNWOWP1i0ioNWOWP1i0ioNXOVP1g0joNZOVP1f0joNZOVP1f0joNZOVP1f0joNZOVP1f0joNZOVP1f0joNZOVP1f0joNZOVP1f0joNZOVP1f0ioN\\OVP1e0ioN[OWP1e0hoN\\OXP1d0hoN\\OXP1d0goN]OYP1c0goN]OYP1c0goN]OYP1c0hoN\\OXP1d0hoN\\OXP1d0hoN\\OXP1c0ioN]OWP1c0ioN]OWP1c0joN\\OVP1d0joN\\OVP1d0joN\\OVP1d0joN\\OUP1e0koN[OUP1e0koN[OUP1e0koN[OUP1e0koN[OUP1e0koN[OUP1e0loNZOTP1f0loNZOTP1f0loNZOTP1f0loNZOUP1e0koN[OUP1e0koN[OUP1e0koN[OUP1e0joN\\OVP1d0joN\\OVP1e0hoN\\OXP1d0goN]OXP1e0goN[OYP1R1000000001O0000000000000CdoN^O[P1b0foN_OYP1`0ioN_OWP1=moNCSP1=moNCSP1?loN@TP1a0koN_OUP1b0koN]OUP1c0koN]OUP1c0loN[OUP1e0koN[OUP1e0koN[OUP1e0koN[OUP1e0koN[OUP1e0koN[OUP1e0koN[OVP1e0hoN\\OXP1d0hoN\\OXP1d0hoN\\OXP1d0hoN\\OXP1d0hoN\\OXP1c0ioN]OWP1c0ioN]OWP1c0ioN]OVP1d0koN[OUP1e0koN[OUP1d0loN\\OTP1d0loN\\OTP1c0noN\\ORP12loNN50oo01aPOO_o01bPOM_o02bPON^o02bPON^o02bPON^o02bPON^o03aPOM_o03`PON`o03^POOao03\\PONdo06WPOKio0>moNCSP1b0goN_OYP1a0hoN]OYP1?loN@UP15WPOHjo04P1Lneg`0" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "image_path": "web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "jefS1S1dP12N2O00000O1000000001O00000000000000000001O000000000000000000000000000000000000000000000000000000000O10O1000010O000000000000000000000O1001O00000000000000000O01000000000000000010O00000000O1000000000000001O0000000000000O10000000000000000001O000000O100000000000000000000000000000O2O00000000O101O00000O3MoQcf0" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "image_path": "web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "`nif0=WQ16M10000001N100O11O00001N10000000000001O000000000000000000O1000000000000000000000000000000000000000000000000000000000O101OO1000000000001O0000000000000000000000000000000000O10001O1N4LaP^U1" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "image_path": "web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "mask_rle": { + "size": [ + 1080, + 1920 + ], + "counts": "Qlom0]\\18K4M2N2N2N3N1N1O2N1N3M2N2O1N2O1O1O1O1O1N20O1000O10000001O0O1`NjdNV1V[1gNPeNV1b[1K001M3K5N2M21N1[OXOReNh0nZ1XOReNi0lZ1YOReNh0nZ1YOQeNh0nZ1XOQeNi0oZ1WOQeNi0oZ1XOPeNh0oZ1YOQeNg0oZ1YOQeNg0oZ1YOQeNg0oZ1YOQeNg0oZ1YOQeNf0P[1ZOPeNNI. It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "image_path": "web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "mask_rle": { + "size": [ + 1440, + 2560 + ], + "counts": "ng[S1`1\\[17L4M0O101O0O10000000000000000000001O0000000000O100000000000000000000001O000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000O1000000000000000000000000000000001O00000000000000O10001O00000O2O001M5JVkdT2" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "image_path": "web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "mask_rle": { + "size": [ + 1440, + 2560 + ], + "counts": "VUZb24f[10ZeN3dZ12fdNL14X[1d0bdN_O][1R100O10000O2O0000000000O10000O100O1000001O000O101O2N1N100O_dNmNnZ12mdNR14nNnZ1Z1ReNfNnZ1[1PeNgNoZ1Y1QeNgNoZ1Y1QeNgNoZ1Y1QeNhNnZ1X1ReNhNnZ1X1ReNhNnZ1X1ReNhNmZ1Y1TeNfNlZ1Z1UeNdNlZ1\\1VeN`NkZ1`1;0O10000000O100iNbdNk0`[1:1O3M2M2O000O2O0O2N3LQ_Zk0" + }, + "dataset_name": "web_detailed_caption_box", + "question": "Please provide a detailed description of each marked region in the image.", + "caption": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/annotations/rule.json b/evaluation/MDVP-Bench/annotations/rule.json new file mode 100644 index 0000000000000000000000000000000000000000..79e7387e7f6f6082496206e2561f7a7776cc3bac --- /dev/null +++ b/evaluation/MDVP-Bench/annotations/rule.json @@ -0,0 +1,4 @@ +{ + "box": {"role": "Assistant", "prompt": "We would like to request your feedback on the performance of two AI assistants in response to the user question displayed above. The user's question is based on an image that is provided directly. Regions indicating specific areas of interest are drawn on the image, guiding the user's question. Regions are associated with detailed coordinates in the form of bounding boxes, represented as (x1, y1, w, h) with floating numbers ranging from 0 to 1. These values correspond to the top left x, top left y, width, and height of bounding boxes. \nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space.\nIn the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."}, + "point": {"role": "Assistant", "prompt": "We would like to request your feedback on the performance of two AI assistants in response to the user question displayed above. The user's question is based on an image that is provided directly. Marks indicating specific areas of interest are drawn on the image, guiding the user's question. Marks are associated with detailed coordinates, represented as (x1, y1) with floating numbers ranging from 0 to 1. These values correspond to the x and y of points. \nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space.\nIn the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."} +} \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/android_QA/android_QA_box.json b/evaluation/MDVP-Bench/data/android_QA/android_QA_box.json new file mode 100644 index 0000000000000000000000000000000000000000..d3754c2e46bdeae5f3b8af6e6fc7cd67cff948b2 --- /dev/null +++ b/evaluation/MDVP-Bench/data/android_QA/android_QA_box.json @@ -0,0 +1,662 @@ +[ + { + "image_name": "general_11389433281009988680_2.png", + "question": "What flight destination is associated with the airport code in ?", + "question_id": 1, + "dataset_name": "android_QA_box", + "gt_answers": "The airport code SEA in is associated with Seattle as the flight destination.", + "bbox": [ + [ + 143, + 72, + 403, + 28 + ], + [ + 259, + 129, + 57, + 14 + ], + [ + 28, + 129, + 72, + 14 + ], + [ + 259, + 158, + 43, + 14 + ], + [ + 43, + 158, + 86, + 14 + ], + [ + 590, + 244, + 86, + 28 + ], + [ + 619, + 288, + 57, + 14 + ], + [ + 561, + 302, + 115, + 28 + ], + [ + 43, + 345, + 115, + 14 + ], + [ + 43, + 388, + 57, + 28 + ] + ] + }, + { + "image_name": "web_shopping_151626.png", + "question": "What store details are provided in and on the web page?", + "question_id": 2, + "dataset_name": "android_QA_box", + "gt_answers": "The store details provided are the user's chosen store preference, which is indicated to open at 7am in , and the specific location of the store, 'Cedar Rapids South', in .", + "bbox": [ + [ + 588, + 76, + 30, + 30 + ], + [ + 162, + 91, + 106, + 15 + ], + [ + 25, + 167, + 243, + 15 + ], + [ + 25, + 182, + 243, + 30 + ], + [ + 56, + 364, + 76, + 30 + ], + [ + 25, + 486, + 425, + 75 + ], + [ + 25, + 562, + 653, + 91 + ], + [ + 25, + 684, + 334, + 30 + ], + [ + 375, + 684, + 228, + 30 + ], + [ + 25, + 729, + 212, + 30 + ] + ] + }, + { + "image_name": "general_7376630979146411669_4.png", + "question": "What do , , and have in common?", + "question_id": 3, + "dataset_name": "android_QA_box", + "gt_answers": "All three regions display the initial 'A' for different contact names. They are represented by a green circle, used either when there is no associated contact image or as a visual sorting aid.", + "bbox": [ + [ + 138, + 88, + 148, + 29 + ], + [ + 241, + 207, + 103, + 29 + ], + [ + 241, + 310, + 133, + 29 + ], + [ + 241, + 414, + 103, + 29 + ], + [ + 152, + 518, + 29, + 29 + ], + [ + 241, + 518, + 59, + 14 + ], + [ + 49, + 518, + 14, + 14 + ], + [ + 152, + 621, + 29, + 29 + ], + [ + 241, + 621, + 73, + 29 + ], + [ + 241, + 725, + 103, + 29 + ] + ] + }, + { + "image_name": "web_shopping_232643.png", + "question": "What is indicated by , and is it likely to be an interactive part of the website?", + "question_id": 4, + "dataset_name": "android_QA_box", + "gt_answers": " indicates that the user is viewing their shopping cart on the Walmart website. It is typically not interactive because it is the URL displayed in the browser's address bar.", + "bbox": [ + [ + 121, + 68, + 159, + 11 + ], + [ + 395, + 148, + 45, + 22 + ], + [ + 178, + 148, + 205, + 22 + ], + [ + 463, + 171, + 45, + 11 + ], + [ + 19, + 250, + 57, + 22 + ], + [ + 87, + 250, + 0, + 22 + ], + [ + 99, + 250, + 45, + 22 + ], + [ + 110, + 353, + 148, + 22 + ], + [ + 110, + 387, + 159, + 22 + ], + [ + 110, + 421, + 57, + 22 + ] + ] + }, + { + "image_name": "web_shopping_147026.png", + "question": "What products are compatible with the charger being sold?", + "question_id": 5, + "dataset_name": "android_QA_box", + "gt_answers": "The charger being sold is compatible with Samsung Galaxy S10, S20, Note 10, and Note 20 models as indicated in and .", + "bbox": [ + [ + 34, + 59, + 636, + 29 + ], + [ + 34, + 88, + 177, + 29 + ], + [ + 93, + 162, + 148, + 14 + ], + [ + 93, + 192, + 281, + 14 + ], + [ + 34, + 251, + 162, + 29 + ], + [ + 212, + 251, + 177, + 29 + ], + [ + 78, + 296, + 443, + 29 + ], + [ + 34, + 310, + 44, + 14 + ], + [ + 34, + 370, + 103, + 14 + ], + [ + 197, + 370, + 44, + 14 + ] + ] + }, + { + "image_name": "google_apps_8099385365633912330_2.png", + "question": "What actions can be performed by tapping on or ?", + "question_id": 6, + "dataset_name": "android_QA_box", + "gt_answers": "Tapping on would likely open a mapping or navigation app, while tapping on might enable location services or show the user's current location on a map.", + "bbox": [ + [ + 59, + 80, + 161, + 36 + ], + [ + 235, + 80, + 65, + 36 + ], + [ + 315, + 80, + 36, + 29 + ], + [ + 52, + 446, + 29, + 7 + ], + [ + 66, + 373, + 14, + 29 + ], + [ + 140, + 512, + 29, + 29 + ], + [ + 52, + 622, + 21, + 36 + ], + [ + 330, + 622, + 21, + 29 + ], + [ + 301, + 688, + 14, + 29 + ], + [ + 96, + 688, + 14, + 29 + ] + ] + }, + { + "image_name": "single_191.png", + "question": "What happens when I interact with the icon in ?", + "question_id": 7, + "dataset_name": "android_QA_box", + "gt_answers": "Interacting with the icon in would likely close the current view or popup and return you to the previous screen.", + "bbox": [ + [ + 19, + 1332, + 29, + 44 + ], + [ + 300, + 1346, + 14, + 29 + ], + [ + 138, + 1420, + 14, + 29 + ], + [ + 345, + 1420, + 29, + 44 + ], + [ + 552, + 1420, + 29, + 29 + ] + ] + }, + { + "image_name": "install_13683675498145561788_3.png", + "question": "Which indicates the application is created by a famous tech company?", + "question_id": 8, + "dataset_name": "android_QA_box", + "gt_answers": " indicates the application is created by a famous tech company.", + "bbox": [ + [ + 94, + 58, + 140, + 23 + ], + [ + 129, + 140, + 35, + 23 + ], + [ + 164, + 140, + 93, + 23 + ], + [ + 117, + 175, + 163, + 11 + ], + [ + 316, + 175, + 105, + 11 + ], + [ + 457, + 175, + 46, + 11 + ], + [ + 153, + 198, + 70, + 23 + ], + [ + 129, + 210, + 11, + 11 + ], + [ + 129, + 257, + 46, + 23 + ], + [ + 176, + 257, + 105, + 23 + ] + ] + }, + { + "image_name": "install_6852884529504930052_4.png", + "question": "What does signify in the context of the screenshot?", + "question_id": 9, + "dataset_name": "android_QA_box", + "gt_answers": " represents the logo of eBay, which helps users identify the app and brand associated with online buying and selling.", + "bbox": [ + [ + 315, + 162, + 281, + 44 + ], + [ + 197, + 162, + 103, + 44 + ], + [ + 49, + 192, + 88, + 29 + ], + [ + 197, + 222, + 251, + 44 + ], + [ + 256, + 281, + 88, + 14 + ], + [ + 182, + 281, + 74, + 14 + ], + [ + 182, + 310, + 103, + 29 + ], + [ + 286, + 310, + 44, + 29 + ], + [ + 315, + 384, + 74, + 29 + ], + [ + 93, + 384, + 59, + 14 + ] + ] + }, + { + "image_name": "google_apps_6888264619940729470_4.png", + "question": "What function might serve within the app?", + "question_id": 10, + "dataset_name": "android_QA_box", + "gt_answers": " looks to be a navigation element that would allow users to view their personal collection of photos within the application.", + "bbox": [ + [ + 118, + 80, + 190, + 21 + ], + [ + 96, + 168, + 73, + 21 + ], + [ + 227, + 168, + 87, + 21 + ], + [ + 176, + 168, + 43, + 21 + ], + [ + 15, + 695, + 73, + 21 + ], + [ + 110, + 695, + 87, + 21 + ], + [ + 206, + 695, + 102, + 21 + ], + [ + 315, + 695, + 87, + 21 + ], + [ + 337, + 73, + 14, + 29 + ], + [ + 59, + 73, + 21, + 29 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/android_QA/android_QA_point.json b/evaluation/MDVP-Bench/data/android_QA/android_QA_point.json new file mode 100644 index 0000000000000000000000000000000000000000..fa8d1af1c8cde127a25364e208842b392466d20f --- /dev/null +++ b/evaluation/MDVP-Bench/data/android_QA/android_QA_point.json @@ -0,0 +1,396 @@ +[ + { + "image_name": "single_1770.png", + "question": "What is the purpose of ?", + "question_id": 1, + "dataset_name": "android_QA_point", + "gt_answers": "The purpose of is to allow users to enter or edit a website's URL and then navigate to that page by initiating its loading process within the web browser.", + "points": [ + [ + 669, + 86 + ] + ] + }, + { + "image_name": "install_16096015345286153508_2.png", + "question": "What does represent and where would you typically find this information?", + "question_id": 2, + "dataset_name": "android_QA_point", + "gt_answers": " represents the name of the mobile payment app as displayed in an app store or similar platform. This information is typically found below the app's logo on the app's store page or within the search results in the marketplace.", + "points": [ + [ + 177, + 98 + ], + [ + 78, + 220 + ], + [ + 207, + 189 + ], + [ + 609, + 227 + ], + [ + 207, + 227 + ], + [ + 268, + 227 + ], + [ + 603, + 319 + ], + [ + 131, + 326 + ], + [ + 367, + 326 + ], + [ + 124, + 356 + ] + ] + }, + { + "image_name": "general_5621311415953838849_1.png", + "question": "What information does the text in convey about the user's temporal context?", + "question_id": 3, + "dataset_name": "android_QA_point", + "gt_answers": " provides the day of the week and date, indicating it is Monday, October 10th.", + "points": [ + [ + 278, + 170 + ], + [ + 529, + 162 + ], + [ + 566, + 1168 + ], + [ + 86, + 1309 + ] + ] + }, + { + "image_name": "single_104.png", + "question": "How are and related to each other in context?", + "question_id": 4, + "dataset_name": "android_QA_point", + "gt_answers": "The phrases in 'Save Money.' and 'Live Better' likely form a complete slogan, suggesting that by saving money with the retailer, consumers can live better.", + "points": [ + [ + 93, + 88 + ], + [ + 159, + 169 + ], + [ + 292, + 176 + ], + [ + 418, + 169 + ], + [ + 152, + 199 + ], + [ + 130, + 273 + ], + [ + 48, + 288 + ], + [ + 41, + 303 + ], + [ + 159, + 303 + ], + [ + 159, + 391 + ] + ] + }, + { + "image_name": "install_7655036073434526507_5.png", + "question": "What is the primary use of the feature described in ?", + "question_id": 5, + "dataset_name": "android_QA_point", + "gt_answers": "The primary use of the feature in is for searching content such as apps and games within the device or on an online platform by entering text into the search bar.", + "points": [ + [ + 298, + 91 + ], + [ + 161, + 182 + ], + [ + 169, + 273 + ], + [ + 207, + 364 + ], + [ + 191, + 455 + ] + ] + }, + { + "image_name": "single_2850.png", + "question": "What function might perform when clicked on?", + "question_id": 6, + "dataset_name": "android_QA_point", + "gt_answers": "When clicked on, might redirect the user to the account details page or a login page for accessing an account or profile.", + "points": [ + [ + 15, + 33 + ], + [ + 251, + 33 + ], + [ + 219, + 69 + ], + [ + 251, + 69 + ], + [ + 179, + 69 + ], + [ + 242, + 105 + ], + [ + 24, + 105 + ], + [ + 216, + 350 + ], + [ + 18, + 516 + ], + [ + 251, + 516 + ] + ] + }, + { + "image_name": "web_shopping_58854.png", + "question": "What interaction can be expected when activating the element in ?", + "question_id": 7, + "dataset_name": "android_QA_point", + "gt_answers": "When the 'Sign in' button in is activated, it will prompt the user to enter their login credentials to access their Amazon account.", + "points": [ + [ + 216, + 64 + ], + [ + 91, + 69 + ], + [ + 145, + 91 + ], + [ + 345, + 150 + ], + [ + 123, + 139 + ], + [ + 102, + 215 + ], + [ + 145, + 290 + ], + [ + 123, + 361 + ], + [ + 236, + 437 + ], + [ + 388, + 437 + ] + ] + }, + { + "image_name": "single_391.png", + "question": "What is the function of the element in ?", + "question_id": 8, + "dataset_name": "android_QA_point", + "gt_answers": "The element in is likely a button that can be used to cancel the search query in the search bar.", + "points": [ + [ + 230, + 91 + ], + [ + 124, + 197 + ], + [ + 655, + 197 + ], + [ + 131, + 303 + ], + [ + 108, + 341 + ], + [ + 108, + 410 + ], + [ + 93, + 493 + ], + [ + 184, + 493 + ], + [ + 253, + 493 + ], + [ + 161, + 577 + ] + ] + }, + { + "image_name": "web_shopping_272285.png", + "question": "What action will most likely be taken when interacting with ?", + "question_id": 9, + "dataset_name": "android_QA_point", + "gt_answers": "When interacting with , users can enter a search term to find products or services offered by Walmart on their website.", + "points": [ + [ + 226, + 88 + ], + [ + 145, + 191 + ], + [ + 662, + 199 + ], + [ + 137, + 303 + ], + [ + 159, + 369 + ], + [ + 71, + 451 + ], + [ + 71, + 510 + ], + [ + 130, + 510 + ], + [ + 278, + 510 + ], + [ + 329, + 510 + ] + ] + }, + { + "image_name": "google_apps_3619507004205175709_3.png", + "question": "What is the function of the icon in ?", + "question_id": 10, + "dataset_name": "android_QA_point", + "gt_answers": "The icon in is a forward arrow that most likely functions as a button to move to the next step or page in the app.", + "points": [ + [ + 201, + 269 + ], + [ + 205, + 456 + ], + [ + 202, + 497 + ], + [ + 377, + 650 + ], + [ + 322, + 705 + ], + [ + 84, + 705 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/android_detailed_caption/android_detailed_caption_box.json b/evaluation/MDVP-Bench/data/android_detailed_caption/android_detailed_caption_box.json new file mode 100644 index 0000000000000000000000000000000000000000..d24586140b366956cf51474aaf16fd7e2ac401d9 --- /dev/null +++ b/evaluation/MDVP-Bench/data/android_detailed_caption/android_detailed_caption_box.json @@ -0,0 +1,674 @@ +[ + { + "image_name": "web_shopping_251980.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 1, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61\u00b0F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only.\n: This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage.\n: This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping.\n: The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted.\n: The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others.\n: This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service.\n: The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more.\n: This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance.\n: The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped.\n: The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser.", + "bbox": [ + [ + 82, + 117, + 374, + 35 + ], + [ + 36, + 1006, + 46, + 58 + ], + [ + 328, + 105, + 35, + 35 + ], + [ + 457, + 900, + 23, + 46 + ], + [ + 153, + 900, + 35, + 46 + ], + [ + 59, + 912, + 23, + 23 + ], + [ + 59, + 1006, + 23, + 46 + ], + [ + 457, + 1017, + 23, + 35 + ], + [ + 258, + 1111, + 23, + 35 + ], + [ + 129, + 1111, + 11, + 35 + ] + ] + }, + { + "image_name": "install_18183248185514867672_2.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 2, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device.\n: This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities.\n: Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed.\n: This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network.\n: Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function.\n: This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance.\n: Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls.\n: The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it.\n: The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons.\n: This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app.", + "bbox": [ + [ + 86, + 182, + 45, + 15 + ], + [ + 390, + 182, + 106, + 15 + ], + [ + 223, + 182, + 106, + 30 + ], + [ + 557, + 182, + 121, + 30 + ], + [ + 71, + 395, + 75, + 15 + ], + [ + 588, + 395, + 45, + 15 + ], + [ + 208, + 395, + 136, + 15 + ], + [ + 375, + 395, + 136, + 15 + ], + [ + 71, + 516, + 75, + 30 + ], + [ + 588, + 516, + 45, + 30 + ] + ] + }, + { + "image_name": "google_apps_14797572530627259726_40.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 3, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application.\n: The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters.\n: The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application.\n: The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data.\n: This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences.\n: The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned.\n: This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB.\n: The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage.\n: The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks.\n: The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings.", + "bbox": [ + [ + 140, + 102, + 80, + 21 + ], + [ + 227, + 102, + 102, + 21 + ], + [ + 206, + 175, + 87, + 29 + ], + [ + 301, + 175, + 58, + 21 + ], + [ + 125, + 175, + 65, + 29 + ], + [ + 220, + 212, + 43, + 21 + ], + [ + 271, + 212, + 95, + 29 + ], + [ + 125, + 219, + 80, + 21 + ], + [ + 125, + 307, + 161, + 21 + ], + [ + 30, + 344, + 51, + 29 + ] + ] + }, + { + "image_name": "web_shopping_98501.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 4, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address.\n: This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open.\n: This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase.\n: This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere.\n: This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab.\n: Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser.\n: This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase.\n: The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites.\n: Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers.\n: This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page.", + "bbox": [ + [ + 30, + 68, + 285, + 22 + ], + [ + 87, + 136, + 148, + 22 + ], + [ + 247, + 148, + 34, + 11 + ], + [ + 87, + 171, + 205, + 22 + ], + [ + 87, + 228, + 319, + 22 + ], + [ + 87, + 262, + 102, + 22 + ], + [ + 315, + 319, + 102, + 34 + ], + [ + 87, + 319, + 79, + 22 + ], + [ + 167, + 319, + 136, + 22 + ], + [ + 87, + 353, + 102, + 22 + ] + ] + }, + { + "image_name": "install_5797941172247377583_7.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 5, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes.\n: This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity.\n: The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context.\n: The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device.\n: This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label.\n: The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases.\n: Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself.\n: The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings.\n: \"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times.\n: The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended.", + "bbox": [ + [ + 132, + 76, + 76, + 30 + ], + [ + 162, + 182, + 76, + 15 + ], + [ + 56, + 182, + 45, + 60 + ], + [ + 572, + 212, + 75, + 15 + ], + [ + 162, + 212, + 76, + 30 + ], + [ + 253, + 258, + 106, + 15 + ], + [ + 162, + 258, + 76, + 15 + ], + [ + 86, + 319, + 60, + 30 + ], + [ + 299, + 319, + 45, + 30 + ], + [ + 451, + 364, + 75, + 15 + ] + ] + }, + { + "image_name": "google_apps_8853455778583749835_5.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 6, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings.\n: This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface.\n: This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text.\n: The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions.\n: Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options.\n: The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface.\n: Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration.\n: \"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified.\n: This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region.\n: The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features.", + "bbox": [ + [ + 110, + 79, + 148, + 22 + ], + [ + 30, + 171, + 22, + 22 + ], + [ + 30, + 216, + 34, + 11 + ], + [ + 64, + 216, + 125, + 11 + ], + [ + 201, + 216, + 68, + 11 + ], + [ + 30, + 239, + 159, + 11 + ], + [ + 190, + 239, + 102, + 11 + ], + [ + 99, + 330, + 159, + 22 + ], + [ + 99, + 364, + 136, + 11 + ], + [ + 247, + 364, + 79, + 11 + ] + ] + }, + { + "image_name": "google_apps_9088043504221051292_1.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 7, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date.\n: The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application.\n: This is a single character \"G,\" customarily associated with Google\u2019s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service.\n: This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map.\n: This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages.\n: This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation.\n: The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices.", + "bbox": [ + [ + 103, + 80, + 204, + 21 + ], + [ + 37, + 417, + 29, + 7 + ], + [ + 23, + 658, + 43, + 36 + ], + [ + 52, + 358, + 14, + 36 + ], + [ + 37, + 578, + 21, + 36 + ], + [ + 44, + 658, + 14, + 36 + ], + [ + 345, + 666, + 21, + 29 + ] + ] + }, + { + "image_name": "single_1849.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 8, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality.\n: The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website.\n: Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information.\n: Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile.\n: This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page.\n: Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase.\n: This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation.\n: 'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours.\n: The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered.\n: Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse.", + "bbox": [ + [ + 121, + 68, + 262, + 22 + ], + [ + 30, + 136, + 171, + 45 + ], + [ + 304, + 171, + 79, + 22 + ], + [ + 406, + 171, + 57, + 22 + ], + [ + 486, + 182, + 22, + 0 + ], + [ + 42, + 216, + 91, + 34 + ], + [ + 178, + 228, + 68, + 11 + ], + [ + 19, + 296, + 125, + 22 + ], + [ + 270, + 296, + 136, + 22 + ], + [ + 64, + 330, + 68, + 22 + ] + ] + }, + { + "image_name": "install_125967318814166469_6.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 9, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app.\n: This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application.\n: The phrase \"to earn\" typically suggests that there is an opportunity to gain something\u2014often points, money, or rewards\u2014by performing certain actions or tasks.\n: The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app.\n: This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits.\n: The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics.\n: The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements.\n: This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app.\n: The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app.\n: The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars.", + "bbox": [ + [ + 156, + 125, + 250, + 34 + ], + [ + 418, + 125, + 68, + 34 + ], + [ + 156, + 182, + 114, + 22 + ], + [ + 42, + 193, + 22, + 11 + ], + [ + 76, + 193, + 34, + 11 + ], + [ + 156, + 216, + 114, + 22 + ], + [ + 156, + 250, + 68, + 11 + ], + [ + 235, + 250, + 22, + 11 + ], + [ + 452, + 307, + 45, + 11 + ], + [ + 64, + 307, + 45, + 11 + ] + ] + }, + { + "image_name": "single_2921.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 10, + "dataset_name": "android_detailed_caption_box", + "gt_answers": ": This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site.\n: This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional.\n: The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer.\n: Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text.\n: This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user.\n: The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops.\n: The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo.\n: The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information.\n: This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found.\n: Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item.", + "bbox": [ + [ + 51, + 24, + 108, + 12 + ], + [ + 51, + 36, + 48, + 12 + ], + [ + 21, + 72, + 72, + 18 + ], + [ + 231, + 78, + 30, + 6 + ], + [ + 15, + 114, + 78, + 12 + ], + [ + 15, + 150, + 36, + 6 + ], + [ + 51, + 150, + 42, + 12 + ], + [ + 93, + 150, + 6, + 6 + ], + [ + 105, + 150, + 36, + 6 + ], + [ + 15, + 180, + 120, + 12 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/android_detailed_caption/android_detailed_caption_point.json b/evaluation/MDVP-Bench/data/android_detailed_caption/android_detailed_caption_point.json new file mode 100644 index 0000000000000000000000000000000000000000..090e1463f319f6a9236ef17bdea855138dd91ff9 --- /dev/null +++ b/evaluation/MDVP-Bench/data/android_detailed_caption/android_detailed_caption_point.json @@ -0,0 +1,444 @@ +[ + { + "image_name": "web_shopping_363481.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 1, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This region displays the text \"Mon. Nov,\" which likely represents the abbreviated day of the week, Monday, and an abbreviation for the month of November. It appears to be part of the system interface that provides the user with the current date information.\n: The text \"9\" seems to be indicating the time, likely representing 9 minutes past the hour. This is typically presented alongside hours in a digital clock display, which is part of a user interface that communicates the current time.\n: The letter \"G\" is likely the logo or icon for Google, suggesting that this button or symbol could provide access to Google-related services or functions when interacted with, like a search function or the Google Assistant.\n: This icon represents a typical phone call function. Clicking on it would likely initiate a phone call, accessing the phone's dialer or call history, allowing a user to make a call.\n: The icon here is associated with messaging or chat functions. Interacting with it would open up a messaging app or the chat interface where a user can send and receive messages.\n: This icon denotes a media play function, possibly to play music or videos. Clicking it would likely open a media player application or resume media playback.\n: The icon indicates a microphone, which is commonly used to initiate voice input or voice command features on a device, such as activating voice search or a digital assistant.\n: The Google \"G\" logo represents the Google app or a shortcut to Google services. Engaging with this icon would typically launch the Google app or perform a Google search.\n: The icon here is an arrow pointing left, generally representing a \"Back\" navigation button. It could be used to navigate back to a previous screen or close the current application view.\n: This icon suggests a navigation bar rectangle, usually appearing at the bottom of the screen on some Android devices to denote the 'Home' button, used to return to the home screen.", + "points": [ + [ + 123, + 117 + ], + [ + 500, + 1153 + ], + [ + 78, + 1301 + ], + [ + 92, + 1146 + ], + [ + 226, + 1146 + ], + [ + 633, + 1138 + ], + [ + 625, + 1309 + ], + [ + 92, + 1309 + ], + [ + 145, + 1434 + ], + [ + 566, + 1434 + ] + ] + }, + { + "image_name": "install_17210609411686286427_4.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 2, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This is a search bar at the top of a mobile interface, inviting users to input text to search for applications and games within the device's app store or similar platform. The text, 'Search for apps & games', suggests an interactive function where a user can tap and begin typing.\n: This text displays the current time as '9:43' which is likely located at the top of a smartphone screen. It is not interactive and serves the purpose of informing the user of the time.\n: This text is part of an application name, 'mcdonald's app', showcased in a list that suggests installed applications or search results on a mobile device. It is typically interactive, potentially allowing a user to tap it to either open the app or view more details.\n: Similar to , this is the name of another application, 'indeed job search', likely in a list of installed apps or search results. It is interactive, and tapping it would either launch the app or provide additional information about it.\n: Positioned beside 'indeed job search', this is simply the generic word 'app', which confirms the nature of the list item in as an application. It might not be separately interactive and is probably part of the app's full name as displayed in the list.\n: This label, 'youtube kids', signifies the name of an application. Like and , it indicates an item in a list of apps and is almost certainly interactive, meant to be tapped for further action, such as opening the application.", + "points": [ + [ + 215, + 70 + ], + [ + 112, + 134 + ], + [ + 166, + 210 + ], + [ + 166, + 280 + ], + [ + 275, + 280 + ], + [ + 145, + 344 + ] + ] + }, + { + "image_name": "web_shopping_323098.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 3, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This area displays the text \"Wednesday,\" representing a day of the week, providing users with information about the current day. It usually forms part of a date and time display on a device's home or lock screen. This text is typically not interactive and serves as an informational element rather than a functional one.\n: This displays the numeral \"2,\" which likely indicates the current date of the month. Together with other elements, it would usually form part of the full date display. Similar to , this text is not interactive and is meant to inform the user of the current date.\n: This region shows the abbreviated month \"Nov,\" which stands for November, integrating with and to indicate a full date ('Wednesday, 2 Nov'). This serves as a non-interactive element that provides users with the current month information on their device.\n: This represents an upward-pointing chevron, commonly used as a visual cue for the user to open or expand a menu, revealing additional content or options. Depending on the interface, interacting with this icon typically initiates an action such as displaying a hidden menu or accessing a dropdown list.\n: Shown is an icon typically representing a chat or messaging application. When interactive, tapping this icon would open the associated application, allowing the user to send messages, read conversations, or engage in other communication-related activities.\n: This is the logo for Google, which usually indicates access to Google services or applications. If interactive, tapping the icon would likely take the user to the Google search engine, or it could open a suite of Google apps provided on the device.", + "points": [ + [ + 294, + 165 + ], + [ + 539, + 165 + ], + [ + 475, + 165 + ], + [ + 352, + 1137 + ], + [ + 222, + 1216 + ], + [ + 86, + 1345 + ] + ] + }, + { + "image_name": "web_shopping_210077.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 4, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This area displays a URL, which indicates the web address of the current page being viewed. It specifies that the user is on the shopping cart section of the 'ebay.com' website.\n: This text signifies the branding of the website currently being accessed. The text is typically a clickable hyperlink that would redirect the user back to the homepage of the eBay website.\n: This text informs the user that they are not logged in to their account. It is likely placed as a prompt to encourage the user to sign in to access additional features.\n: This text is part of a message intended to persuade the user to sign in. It suggests that there are actions available to save the items being viewed.\n: Continuing from the previous text, it elaborates on the benefits of signing in, indicating that items can be saved or previous items viewed.\n: This text concludes the message encouraging the user to sign in to manage their items, with 'sign in' possibly being a clickable link to the sign in page.\n: This text is the title of the section or page, indicating that the user is viewing their shopping cart, which contains the items they have added for potential purchase.\n: This text represents a call to action, likely a button, prompting the user to proceed with the checkout process with the items in their shopping cart.\n: This text displays the total price for the items currently in the shopping cart, which gives the user a quick view of how much they would need to pay for the items selected.\n: This text describes a summary of the items in the shopping cart, indicating the subtotal price for a defined number of items in this case, 4 items.", + "points": [ + [ + 245, + 91 + ], + [ + 78, + 174 + ], + [ + 268, + 273 + ], + [ + 496, + 280 + ], + [ + 352, + 319 + ], + [ + 169, + 349 + ], + [ + 146, + 432 + ], + [ + 359, + 547 + ], + [ + 648, + 630 + ], + [ + 131, + 638 + ] + ] + }, + { + "image_name": "web_shopping_160127.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 5, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": Analysis reveals that the element represents text in a user interface, specifically the word \"Barrel.\" The typography suggests it belongs to a title or label within an application or digital environment, potentially related to search or identification of a subject matter.\n: This element indicates the text \"Crate\" displayed within a digital interface. Like \"Barrel,\" this text is likely to be part of a title or keyword in a search query or categorization system in the application or digital space it's situated in.\n: The text \"&\" is symbolic, often used to represent the conjunction \"and.\" In digital interfaces, it typically serves to link concepts or terms, suggesting a relationship or combination of items or categories, such as \"Crate & Barrel.\"\n: Similar to , this element represents the word \"Barrel,\" which is part of textual content in a user interface. Its role is likely analogous, serving as a search term or identifier within the application's context.\n: The text \"Crate\" here echoes the content in , serving as part of a textual representation within a digital interface. It holds a consistent purpose with , potentially identifying or categorizing content.\n: Like , this is the text \"&,\" signifying the word \"and.\" Its appearance in an application or digital interface suggests it's combined with other textual elements to denote a composite term or a linked concept.\n: This element contains the text \"&,\" recurrent in its function as per and , linking terms or concepts within the application or digital interface.\n: The word \"Barrel,\" present here, is a piece of text within a user interface. Its role is consistent with earlier-mentioned textual elements, serving as part of a composite term or a searchable keyword.\n: This represents the text \"Crate,\" found within a digital application's user interface. Functionally aligned with similar text elements, it is part of a searchable or identifiable term within the system.\n: Similar to the other occurrences, this element is the text \"&,\" used in user interfaces to connect terms or keywords. Its utility rests in forming composite terms or expressions that enhance searchability or categorization.", + "points": [ + [ + 200, + 91 + ], + [ + 78, + 91 + ], + [ + 139, + 91 + ], + [ + 261, + 197 + ], + [ + 139, + 197 + ], + [ + 199, + 197 + ], + [ + 55, + 311 + ], + [ + 238, + 311 + ], + [ + 131, + 311 + ], + [ + 184, + 311 + ] + ] + }, + { + "image_name": "google_apps_5543396760681014988_1.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 6, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This region shows temperature information, specifically '16\u00b0C'. It suggests a weather update or temperature reading relevant to the user's current or selected location. Typically, this is not an interactive element but provides a quick reference for the current weather conditions.\n: Indicating 'Thursday,' this text signifies the day of the week. It is part of a date or time display on a device screen, usually not interactable and is for informational purposes to keep the user aware of the current day.\n: The text 'Chrome' refers to the Google Chrome web browser. This is probably the name of an app, and tapping on this area would likely open the Chrome browser for internet browsing.\n: Labeled 'Maps,' this most likely refers to the Google Maps application. Typically, tapping on this would open the Maps app, allowing the user to search for locations, navigate, or explore maps.\n: This icon represents a moon, which is commonly used to indicate night mode or a 'do not disturb' feature in operating systems. Interacting with this icon might toggle on or off the related feature.\n: This icon marked 'LOCATION' represents a location service or GPS. Clicking on it would typically open a map or other location-based service, showing the user's current position or allowing them to search for places.\n: The icon indicated as 'CHAT' likely represents a messaging app or function. Tapping this would typically open a chat application or conversation list within a messaging service.\n: Featuring a 'G' inside a circle, this icon is symbolic of Google and is likely a shortcut to the Google Search service or might represent the Google app. Interacting with this icon generally initiates a search query or opens the Google app.\n: This icon is associated with 'ASSISTANT', which refers to the Google Assistant AI service. Clicking this icon would usually activate the assistant, allowing the user to make voice commands, ask questions, and control smart devices.", + "points": [ + [ + 311, + 105 + ], + [ + 124, + 105 + ], + [ + 340, + 229 + ], + [ + 70, + 424 + ], + [ + 274, + 101 + ], + [ + 81, + 379 + ], + [ + 154, + 526 + ], + [ + 66, + 639 + ], + [ + 329, + 640 + ] + ] + }, + { + "image_name": "web_shopping_233073.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 7, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This area showcases the textual representation of a day of the week, reading \"Sunday.\" It is likely part of a user interface displaying the current date or day, frequently associated with system status bars on digital devices.\n: This is a numeric visual representation, showing the number \"6\". When considering its context within a user interface, it may denote the current date or hour depending on its positioning and nearby elements.\n: The alphanumeric combination \"56\u00b0F\" visible here represents a temperature reading in Fahrenheit. It likely serves as a quick, glanceable weather update on a device's status bar or a dedicated weather widget.\n: This text \"Nov\" presumably abbreviates \"November,\" which suggests it is part of date-related information, commonly displayed on digital interfaces to inform users of the current month.\n: This is a single-digit numeral \"9,\" which, depending on context, could represent a date, time, or a numerical notification count associated with an app or system feature.\n: The icon displayed here is indicative of cloud cover or cloudy weather conditions. This icon is typically found within weather-related apps or widgets, and it may be interactive, potentially leading to a more detailed weather forecast upon being tapped.\n: Shown is an envelope icon, a universal symbol for email or messaging. It often represents access to an email application and can signal new messages when overlayed with a number or visual indicator.\n: This is a chat bubble icon, widely recognized as a symbol for messaging services or chat functions. It typically serves as a button to open a messaging app and may indicate unread messages if coupled with a notification badge.\n: Displayed is a play triangle within a circle, signifying it is a play button commonly associated with media apps. It is generally interactive and pressing it would likely start playing media content or open a media application.\n: The icon here is a rounded rectangle with multi-colored sections, likely representing the Google app or Google Assistant. Interacting with this icon could open the Google search app or activate the Google Assistant for voice searches and commands.", + "points": [ + [ + 166, + 125 + ], + [ + 297, + 125 + ], + [ + 395, + 125 + ], + [ + 257, + 125 + ], + [ + 383, + 911 + ], + [ + 338, + 119 + ], + [ + 70, + 899 + ], + [ + 167, + 899 + ], + [ + 468, + 900 + ], + [ + 64, + 1008 + ] + ] + }, + { + "image_name": "install_125967318814166469_6.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 8, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app.\n: This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application.\n: The phrase \"to earn\" typically suggests that there is an opportunity to gain something\u2014often points, money, or rewards\u2014by performing certain actions or tasks.\n: The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app.\n: This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits.\n: The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics.\n: The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements.\n: This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app.\n: The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app.\n: The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars.", + "points": [ + [ + 281, + 142 + ], + [ + 452, + 142 + ], + [ + 213, + 193 + ], + [ + 53, + 198 + ], + [ + 93, + 198 + ], + [ + 213, + 227 + ], + [ + 190, + 255 + ], + [ + 246, + 255 + ], + [ + 474, + 312 + ], + [ + 86, + 312 + ] + ] + }, + { + "image_name": "general_6564305532501069251_6.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 9, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This icon typically represents a calendar application or function. It often indicates the presence of scheduling features or date-related information. This icon, when tapped, may open a calendar application or bring forth a date-selection interface.\n: This icon generally signifies a menu or more options, often called a \"kebab menu.\" Upon interaction, it typically opens a dropdown or popup menu with additional options that are related to the current content or application.\n: Commonly known as the \"hamburger icon,\" this symbol usually denotes a hidden menu or navigation drawer. Tapping this icon would likely reveal additional menu options or functionalities within the application.\n: A downward-pointing chevron icon like this is often used to suggest that more information can be revealed or accessed, such as expanding a list, opening a dropdown menu, or displaying hidden content.\n: This plus symbol is widely used as an indicator for adding new content or creating a new entry. It may be used to add events, compose messages, or include other entries within the application in which it appears.\n: This element resembles a navigation bar at the bottom of a mobile application. It's designed to help users switch between different sections or functionalities of the app. It would be clickable and would change the displayed content accordingly.\n: Often referred to as a back arrow icon, this is typically used for navigation purposes. It indicates the ability to go back to the previous screen or dismiss the current view in an application.", + "points": [ + [ + 337, + 57 + ], + [ + 384, + 57 + ], + [ + 37, + 53 + ], + [ + 187, + 58 + ], + [ + 362, + 625 + ], + [ + 308, + 705 + ], + [ + 99, + 705 + ] + ] + }, + { + "image_name": "single_1569.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 10, + "dataset_name": "android_detailed_caption_point", + "gt_answers": ": This is likely the address bar of a web browser displaying the domain 'target.com', which is the website of a retail company known as Target. The domain suggests that users can visit this site to browse and potentially purchase products or access services provided by the retailer.\n: This represents part of the corporate motto or tagline for Target, which is generally used to signify their commitment to providing customers with value. It provides context for the subsequent parts of the tagline shown in adjacent areas.\n: This is the first part of Target's tagline, \"Expect More.\" This phrase aims to convey the company's promise that consumers can expect more in terms of quality, variety, and overall shopping experience when they shop at Target.\n: The second part of Target's tagline, \"Pay Less,\" implies that despite offering a higher caliber of shopping experience, the prices are competitive or lower than elsewhere, meaning customers get more value for their money.\n: Similar to , this text is the domain 'target.com', and when repeated in this context, it could be part of a search result or an auto-suggestion in the browser showing the user different related searches or visited pages to the Target website.\n: This is another instance of 'target.com', similar to and . It also appears as part of the search results or auto-suggestions, indicating that the user has visited or interacted with this domain on previous occasions.\n: This string of text 'target.com careers' is likely a search query or suggestion related to employment opportunities at Target. It implies that if selected, it would lead the user to a section of the Target website focused on careers and job openings.\n: The text 'target.com check balance' seems to be a search input or suggestion referring to a function on the Target website for checking the balance of a gift card or an account associated with the retailer.\n: Presented as 'target.com/redcard', this text likely refers to a specific sub-page of the Target website dedicated to information and services related to the Target REDcard, which is a credit or debit card that offers additional benefits to Target shoppers.\n: Similar to , it\u2019s a partial representation of the company's name, \"Target,\" without the accompanying domain or context provided in other areas. It can indicate a truncated version of the retailer's name or domain as part of search suggestions or results.", + "points": [ + [ + 76, + 69 + ], + [ + 111, + 139 + ], + [ + 217, + 139 + ], + [ + 339, + 139 + ], + [ + 123, + 163 + ], + [ + 129, + 227 + ], + [ + 170, + 309 + ], + [ + 205, + 385 + ], + [ + 205, + 455 + ], + [ + 106, + 455 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/multipanel_Inter_Relationship/multipanel_Inter_Relationship_box.json b/evaluation/MDVP-Bench/data/multipanel_Inter_Relationship/multipanel_Inter_Relationship_box.json new file mode 100644 index 0000000000000000000000000000000000000000..4ac653219ac04cc4447d45c338401996e7111b3d --- /dev/null +++ b/evaluation/MDVP-Bench/data/multipanel_Inter_Relationship/multipanel_Inter_Relationship_box.json @@ -0,0 +1,512 @@ +[ + { + "image_name": "35.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 1, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": The thematic flow between these regions seems to describe a concept for a child rights media award, signifying the importance of family protection and education in helping a child reach full potential which is encapsulated in . Each image ( and ) appears to be a supportive pillar for the child's development which is affirmed by the equation depicted across the panels.", + "bbox": [ + [ + 329.153605015674, + 71.48589341692791, + 380.87774294670845, + 152.03761755485894 + ], + [ + 144.20062695924764, + 314.43260188087777, + 225.70532915360502, + 249.21630094043886 + ], + [ + 376.1755485893417, + 297.19122257053294, + 230.40752351097177, + 266.4576802507837 + ], + [ + 606.5830721003134, + 297.19122257053294, + 260.1880877742947, + 283.69905956112854 + ] + ] + }, + { + "image_name": "1.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 2, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": These regions represent various social and personal challenges people face, such as financial hardship, educational barriers, conflicts, mental health stressors, and the importance of physical well-being.\n: These panels depict issues related to time management, social expectations, work-life balance, the need for support systems, and the problem of domestic violence. Each tackles a different dimension of stressors affecting individuals in society.", + "bbox": [ + [ + 61.644394110985246, + 23.411098527746322, + 169.4224235560589, + 214.39637599094 + ], + [ + 255.71007927519818, + 22.17893544733862, + 152.17214043035108, + 228.56625141562856 + ], + [ + 438.07021517553795, + 21.56285390713477, + 144.16308040770102, + 229.79841449603626 + ], + [ + 621.0464326160816, + 24.64326160815402, + 149.09173272933182, + 231.6466591166478 + ], + [ + 805.2548131370329, + 24.027180067950173, + 148.47565118912797, + 229.1823329558324 + ], + [ + 63.4926387315968, + 320.3624009060023, + 170.6545866364666, + 199.61041902604757 + ], + [ + 253.86183465458663, + 319.1302378255946, + 157.10079275198188, + 200.22650056625142 + ], + [ + 439.9184597961495, + 317.8980747451869, + 153.40430351075878, + 204.53907134767837 + ], + [ + 624.7429218573046, + 315.4337485843715, + 147.85956964892412, + 206.38731596828993 + ], + [ + 810.1834654586637, + 319.1302378255946, + 142.93091732729332, + 203.30690826727067 + ] + ] + }, + { + "image_name": "36.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 3, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": Both regions depict themes of unity and collaborative spirit. The first shows a literal grouping of individuals, while the second shows a symbolic unity through the gesture of clasped hands. The combined images represent various aspects of a community or organization, focusing on people and their collective actions.\n: Both of these regions highlight aspects of camaraderie and assistance, with showing what appears to be a diverse group of individuals coming together, and depicting the action of one person helping another to rise. These images complement each other by demonstrating the concept of support within a community or group.\n: The themes of action and support link these regions. The clasped hands in and the act of assistance in both symbolize the ways in which individuals within an organization or community can actively contribute to the common good and help others.", + "bbox": [ + [ + 35.10971786833856, + 46.123824451410655, + 230.72100313479623, + 259.56112852664575 + ], + [ + 265.8307210031348, + 42.36206896551724, + 265.8307210031348, + 263.3228840125392 + ], + [ + 531.6614420062696, + 42.36206896551724, + 225.70532915360502, + 265.8307210031348 + ] + ] + }, + { + "image_name": "u=856418766,3773125614&fm=253&app=138&f=JPEG.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 4, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": Each marked region represents a key soft skill that is considered valuable for customer service jobs, as indicated by the overarching title \"Top 10 Soft Skills for Customer Service Jobs\". The icons and text are color-coded with distinct background colors to differentiate them. They share the common feature of visually articulating each soft skill with a simple, clear illustration and associated text. The spatial arrangement suggests a reading order from left to right and top to bottom, similar to how English text is read. This sequence may indicate a prioritization or categorization of skills.", + "bbox": [ + [ + 0, + 72.71736662883087, + 153.95232690124857, + 161.7071509648127 + ], + [ + 153.95232690124857, + 73.7343927355278, + 120.00908059023836, + 160.69012485811578 + ], + [ + 273.96140749148697, + 73.7343927355278, + 129.16231555051078, + 152.04540295119182 + ], + [ + 407.19182746878545, + 72.20885357548241, + 123.06015891032916, + 169.3348467650397 + ], + [ + 529.2349602724177, + 72.20885357548241, + 140.34960272417706, + 160.69012485811578 + ], + [ + 5.466515323496027, + 246.12031782065833, + 142.8921679909194, + 158.14755959137344 + ], + [ + 148.35868331441543, + 241.54370034052212, + 124.58569807037458, + 163.74120317820658 + ], + [ + 272.94438138479, + 241.54370034052212, + 132.72190692395006, + 150.5198637911464 + ], + [ + 410.24290578887627, + 245.1032917139614, + 118.48354143019296, + 165.77525539160044 + ], + [ + 528.7264472190692, + 245.1032917139614, + 125.09421112372304, + 158.6560726447219 + ] + ] + }, + { + "image_name": "40.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 5, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": All regions are part of an informational guide on professionalism in the workplace. focuses on punctuality, on appropriate language, on demeanor, on refraining from gossip, on a positive attitude, and on dressing correctly. They collectively convey different facets of professional behavior.\n: Serves as a title and encapsulates the theme presented in , , , , , and , as it broadly indicates that the image content is about professional conduct at work.", + "bbox": [ + [ + 68.18181818181819, + 126.17554858934167, + 686.5203761755486, + 253.91849529780563 + ], + [ + 938.0877742946708, + 159.09090909090907, + 507.83699059561127, + 272.72727272727275 + ], + [ + 242.1630094043887, + 431.8181818181818, + 517.2413793103448, + 221.0031347962382 + ], + [ + 832.2884012539184, + 398.9028213166144, + 434.95297805642633, + 235.1097178683385 + ], + [ + 884.012539184953, + 669.2789968652037, + 550.1567398119122, + 190.43887147335423 + ], + [ + 94.04388714733543, + 673.9811912225705, + 658.307210031348, + 213.9498432601881 + ], + [ + 298.58934169278996, + 57.993730407523486, + 926.3322884012539, + 72.88401253918495 + ] + ] + }, + { + "image_name": "33.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 6, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": Your Detailed Analysis", + "bbox": [ + [ + 78.20689655172414, + 126.06896551724137, + 236.79310344827587, + 325.86206896551727 + ], + [ + 315, + 141.27586206896552, + 275.89655172413796, + 310.65517241379314 + ], + [ + 590.8965517241379, + 141.27586206896552, + 217.24137931034483, + 319.3448275862069 + ], + [ + 814.6551724137931, + 136.9310344827586, + 254.17241379310346, + 317.1724137931035 + ], + [ + 1071, + 139.10344827586206, + 249.82758620689657, + 312.82758620689657 + ], + [ + 393.2068965517242, + 21.793103448275847, + 599.5862068965517, + 86.89655172413794 + ] + ] + }, + { + "image_name": "14.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 7, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": These illustrations reflect various forms of exploitation and human rights violations. Each image succinctly visualizes a different type of abuse, ranging from sexual exploitation to forced labor, and from the use of child soldiers to forced marriage. They share a common theme in illustrating coercive or non-consensual situations where individuals are compelled to engage in activities against their will, often for others' profit or gain.", + "bbox": [ + [ + 25.554926387316115, + 16.756511891279807, + 525.0373725934314, + 559.9467723669309 + ], + [ + 550.5922989807476, + 22.342015855039715, + 480.3533408833522, + 554.3612684031709 + ], + [ + 1030.9456398640998, + 22.342015855039715, + 455.2185730464326, + 548.7757644394111 + ], + [ + 1488.9569648924123, + 22.342015855039715, + 501.2989807474518, + 547.3793884484711 + ], + [ + 1497.3352208380522, + 608.8199320498302, + 491.52434881087197, + 601.8380520951301 + ], + [ + 1007.2072480181201, + 656.2967157417894, + 490.12797281993204, + 554.3612684031709 + ], + [ + 505.9082672706683, + 656.2967157417894, + 501.2989807474518, + 543.190260475651 + ], + [ + 38.12231030577591, + 636.7474518686297, + 469.18233295583235, + 562.7395243488107 + ] + ] + }, + { + "image_name": "u=3591188059,2160065444&fm=253&app=138&f=JPEG.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 8, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": The marked regions depict various concepts such as community, support, value, reinforcement, confidence, success, motivation, and enjoyment. Each illustration is paired with a label that expresses the theme of the image. They seem to be conveying a message about the benefits of collaboration and personal development. The overarching theme indicated in the caption suggests an advocacy for the cooperation between languages and literacy, with an emphasis on not removing children from Spanish lessons for extra English, which implies that each region represents one of the arguments or values supporting this stance.", + "bbox": [ + [ + 13.976730987514161, + 110.66969353007946, + 132.23609534619752, + 143.01929625425652 + ], + [ + 166.64415437003402, + 93.64358683314416, + 136.2088535754824, + 173.09875141884223 + ], + [ + 315.9063564131668, + 61.29398410896708, + 119.75028376844494, + 152.66742338251987 + ], + [ + 519.0845629965947, + 141.316685584563, + 134.50624290578887, + 143.01929625425652 + ], + [ + 17.38195232690122, + 284.90351872871736, + 136.77639046538025, + 163.45062429057887 + ], + [ + 154.7258796821793, + 287.7412031782066, + 157.77525539160047, + 160.61293984108968 + ], + [ + 322.71679909194097, + 308.74006810442677, + 168.55845629965947, + 151.53234960272417 + ], + [ + 499.22077185017025, + 310.44267877412034, + 143.58683314415438, + 151.53234960272417 + ] + ] + }, + { + "image_name": "32.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 9, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": Your Detailed Analysis - The four regions together form a cohesive message about the value of early childhood education. acts as a header, setting the topic, while , , and provide individual reasons that support the overarching theme. They are designed to appeal to viewers who may be interested or passionate about child development, symbolizing affection (), cognitive development (), and the joy of teaching ().", + "bbox": [ + [ + 5.6426332288401255, + 140.07836990595612, + 355.4858934169279, + 455.17241379310354 + ], + [ + 361.12852664576803, + 121.26959247648902, + 449.5297805642634, + 473.9811912225706 + ], + [ + 810.6583072100315, + 121.26959247648902, + 366.7711598746082, + 490.90909090909093 + ], + [ + 173.04075235109718, + 0, + 846.3949843260189, + 98.69905956112852 + ] + ] + }, + { + "image_name": "4.png", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 10, + "dataset_name": "multipanel_Inter_Relationship_box", + "gt_answers": ": Each region illustrates a key concept important for managing ADHD in children through emotional and social learning. emphasizes self-esteem, while deals with emotional regulation. focuses on the peer environment, on institutional and familial support, on cognitive aspects like ethical decision-making, on creativity as an outlet or skill, on responsibility, on resilience and coping strategies, and touches on the negative impact of bullying and the importance of communication. Collectively, these regions underscore various factors that can affect a child's experience and management of ADHD, offering a holistic view of social and emotional learning domains.", + "bbox": [ + [ + 188.9580973952435, + 22.355605889014722, + 582.3103057757644, + 102.19705549263873 + ], + [ + 93.14835787089467, + 144.77916194790487, + 190.55492638731596, + 269.33182332955835 + ], + [ + 327.3499433748584, + 142.65005662514156, + 281.0419026047565, + 272.52548131370327 + ], + [ + 617.9728199320498, + 147.97281993204984, + 225.68516421291054, + 252.29898074745188 + ], + [ + 65.46998867497169, + 425.82106455266137, + 228.87882219705548, + 266.13816534541337 + ], + [ + 332.6727066817667, + 422.62740656851645, + 240.5889014722537, + 252.29898074745188 + ], + [ + 635.0056625141563, + 420.4983012457531, + 201.2004530011325, + 265.07361268403173 + ], + [ + 94.21291053227633, + 707.9275198187995, + 225.68516421291054, + 177.78029445073614 + ], + [ + 380.5775764439411, + 698.3465458663646, + 182.03850509626275, + 199.0713476783692 + ], + [ + 635.0056625141563, + 700.475651189128, + 187.361268403171, + 198.00679501698755 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/multipanel_Inter_Relationship/multipanel_Inter_Relationship_point.json b/evaluation/MDVP-Bench/data/multipanel_Inter_Relationship/multipanel_Inter_Relationship_point.json new file mode 100644 index 0000000000000000000000000000000000000000..9ebb300de7edfd0e1d19889892228f9f92672788 --- /dev/null +++ b/evaluation/MDVP-Bench/data/multipanel_Inter_Relationship/multipanel_Inter_Relationship_point.json @@ -0,0 +1,387 @@ +[ + { + "image_name": "u=386285352,2538957886&fm=253&app=138&f=JPEG.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 1, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": "...: These images are all related as they represent different types of weather conditions. The panels seem to be part of an educational set or resource designed to teach or convey information about various meteorological phenomena. The layout suggests that the images are intended to be viewed in a sequence or as a collective that comprehensively covers a spectrum of weather vocabulary. Each image combines a graphical representation with text that defines the specific weather condition being depicted.", + "points": [ + [ + 64, + 142 + ], + [ + 181, + 148 + ], + [ + 316, + 148 + ], + [ + 447, + 148 + ], + [ + 66, + 294 + ], + [ + 182, + 294 + ], + [ + 308, + 293 + ], + [ + 436, + 296 + ], + [ + 58, + 430 + ], + [ + 186, + 429 + ] + ] + }, + { + "image_name": "44.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 2, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": "...: The marked regions, when examined collectively, layout a guide to the expected behavior within Buddhist temples. The graphics are simple and paired with clear instructions for ease of understanding, reflecting a concern for cultural sensitivity and appropriate conduct. The sequence starts with a general call to respect, progresses through specific actions like clothing and physical interactions, and culminates with directives about monetary offerings and hand usage. The spatial arrangement and reading order is left-to-right and top-to-bottom, which is standard for English-speaking viewers, and each panel is designed to contribute to a comprehensive set of instructions for temple etiquette.", + "points": [ + [ + 791, + 96 + ], + [ + 282, + 367 + ], + [ + 590, + 356 + ], + [ + 935, + 348 + ], + [ + 1270, + 347 + ], + [ + 1103, + 736 + ], + [ + 754, + 741 + ], + [ + 426, + 730 + ] + ] + }, + { + "image_name": "14.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 3, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": ": These illustrations reflect various forms of exploitation and human rights violations. Each image succinctly visualizes a different type of abuse, ranging from sexual exploitation to forced labor, and from the use of child soldiers to forced marriage. They share a common theme in illustrating coercive or non-consensual situations where individuals are compelled to engage in activities against their will, often for others' profit or gain.", + "points": [ + [ + 287, + 295 + ], + [ + 790, + 299 + ], + [ + 1257, + 296 + ], + [ + 1738, + 295 + ], + [ + 1742, + 908 + ], + [ + 1252, + 933 + ], + [ + 755, + 927 + ], + [ + 272, + 917 + ] + ] + }, + { + "image_name": "49.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 4, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": ": These regions collectively depict various financial themes associated with personal and business expenses, potential tax credits, and deductions. They appear to be informing the viewer of different financial considerations, such as tax planning, familial loans, and expenses related to child care, transportation, sales activities, and fitness. Each panel visually represents a distinct category that one might encounter in financial management or tax preparation.", + "points": [ + [ + 152, + 153 + ], + [ + 529, + 158 + ], + [ + 867, + 159 + ], + [ + 883, + 439 + ], + [ + 520, + 470 + ], + [ + 150, + 461 + ], + [ + 135, + 768 + ], + [ + 518, + 765 + ], + [ + 889, + 764 + ] + ] + }, + { + "image_name": "u=2125079971,3262680906&fm=253&app=138&f=JPEG.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 6, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": ": All regions are part of an instructional or motivational poster delineating various self-care activities. They are interconnected thematically, presenting different strategies to improve personal well-being. The spatial arrangement mirrors the order of a list, suggesting these are recommendations to be considered collectively or individually. Each panel emphasizes personal activities that contribute to mental or physical health, showcasing practices such as mindfulness, physical exercise, journaling, hydration, enjoying meals, setting boundaries, managing digital engagement, enjoying music, adhering to healthy sleep habits, and maintaining a clean environment.", + "points": [ + [ + 77, + 140 + ], + [ + 180, + 142 + ], + [ + 280, + 142 + ], + [ + 289, + 260 + ], + [ + 188, + 267 + ], + [ + 81, + 267 + ], + [ + 80, + 390 + ], + [ + 185, + 394 + ], + [ + 288, + 391 + ], + [ + 393, + 391 + ] + ] + }, + { + "image_name": "u=2378115703,2671782333&fm=253&app=138&f=JPEG.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 7, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": "...: The panels depict the twelve zodiac signs, each with a representative illustration and its corresponding Italian name. They follow the Western astrological sequence and are likely part of a horoscope feature. The stylistic approach of the illustrations is consistent, using simple lines, bold colors, and clear symbolism. The sequence in this case is non-traditional, as not all twelve signs are shown, and the usual order is somewhat shuffled. Traditionally, the signs are ordered as follows: Aries (Region 1), Taurus (Region 2), Gemini (Region 3), Cancer (Region 4), Leo (Region 5), Virgo (Region 6), Libra (Region 7), Scorpio (Region 8), Sagittarius (Region 9), Capricorn (Region 10), followed by Aquarius and Pisces, which are not labeled in this series. The layout likely aligns with an emphasis on astrology in the context of guidance or predictions, as indicated by the text in the header, which appears to be Italian and suggests a personal task related to forgiveness.", + "points": [ + [ + 58, + 205 + ], + [ + 179, + 210 + ], + [ + 305, + 213 + ], + [ + 428, + 206 + ], + [ + 53, + 348 + ], + [ + 178, + 345 + ], + [ + 305, + 347 + ], + [ + 432, + 349 + ], + [ + 59, + 494 + ], + [ + 186, + 492 + ] + ] + }, + { + "image_name": "45.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 8, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": ": All the marked regions collectively illustrate business ideas that are economical to initiate, as indicated by the heading in . Each panel from to portrays a different business idea through visual illustrations accompanied by labels, suggesting diverse, cost-effective entrepreneurial opportunities.", + "points": [ + [ + 768, + 191 + ], + [ + 231, + 581 + ], + [ + 579, + 586 + ], + [ + 935, + 620 + ], + [ + 1289, + 624 + ] + ] + }, + { + "image_name": "24.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 9, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": ": These regions collectively represent different aspects of customer service, with each one emphasizing a unique component such as email communication, service problem-solving, global support, expertise, quality assurance, callback services, technical help, language translation, and 24/7 consultation. They are likely part of a larger illustration highlighting various customer support roles and services offered by a company.", + "points": [ + [ + 329, + 283 + ], + [ + 758, + 288 + ], + [ + 1223, + 320 + ], + [ + 1211, + 753 + ], + [ + 773, + 733 + ], + [ + 334, + 745 + ], + [ + 316, + 1194 + ], + [ + 781, + 1194 + ], + [ + 1234, + 1203 + ] + ] + }, + { + "image_name": "u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 10, + "dataset_name": "multipanel_Inter_Relationship_point", + "gt_answers": ": These regions are all visually conveying symptoms of the Middle East respiratory syndrome coronavirus (MERS-CoV), as indicated by the overarching text of the entire image. From fever in , cough in , to more severe manifestations such as pneumonia in and renal failure in , they contribute to a comprehensive overview of potential health issues caused by the virus. The sequence in which these regions are presented may suggest a progression from more common to more severe symptoms, although such progression varies by individual cases of the disease.", + "points": [ + [ + 81, + 186 + ], + [ + 222, + 185 + ], + [ + 351, + 185 + ], + [ + 481, + 187 + ], + [ + 153, + 364 + ], + [ + 285, + 362 + ], + [ + 422, + 364 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/multipanel_QA/multipanel_QA_box.json b/evaluation/MDVP-Bench/data/multipanel_QA/multipanel_QA_box.json new file mode 100644 index 0000000000000000000000000000000000000000..093cb23057d70fe86b56dc231dd6f239f95d6d45 --- /dev/null +++ b/evaluation/MDVP-Bench/data/multipanel_QA/multipanel_QA_box.json @@ -0,0 +1,524 @@ +[ + { + "image_name": "u=2125079971,3262680906&fm=253&app=138&f=JPEG.png", + "question": "What activity shown in might be beneficial for mental clarity?", + "question_id": 1, + "dataset_name": "multipanel_QA_box", + "gt_answers": "The activity of practicing mindfulness or meditation, as shown in , might be beneficial for mental clarity.", + "bbox": [ + [ + 20.080022701475613, + 81.72531214528944, + 114.07491486946651, + 119.1827468785471 + ], + [ + 136.99262202043136, + 87.96821793416572, + 89.10329171396141, + 110.66969353007946 + ], + [ + 226.6634506242906, + 87.96821793416572, + 108.96708286038596, + 110.66969353007946 + ], + [ + 243.12202043132805, + 198.63791146424518, + 92.5085130533485, + 125.99318955732123 + ], + [ + 134.15493757094214, + 211.12372304199772, + 108.96708286038593, + 113.50737797956867 + ], + [ + 30.29568671963679, + 211.12372304199772, + 103.85925085130533, + 113.50737797956867 + ], + [ + 29.72814982973895, + 326.3337116912599, + 102.72417707150964, + 128.83087400681043 + ], + [ + 132.4523269012486, + 334.8467650397276, + 106.69693530079455, + 120.3178206583428 + ], + [ + 239.14926220204316, + 334.8467650397276, + 99.31895573212262, + 114.07491486946651 + ], + [ + 338.46821793416575, + 334.8467650397276, + 110.66969353007946, + 114.07491486946651 + ] + ] + }, + { + "image_name": "47.png", + "question": "What is the main topic of the infographic?", + "question_id": 2, + "dataset_name": "multipanel_QA_box", + "gt_answers": "The main topic of the infographic is the pros and cons of antidepressants.", + "bbox": [ + [ + 58.77742946708464, + 20.37617554858932, + 371.4733542319749, + 359.717868338558 + ], + [ + 70.53291536050156, + 380.0940438871473, + 359.717868338558, + 275.07836990595604 + ], + [ + 70.53291536050156, + 655.1724137931034, + 307.9937304075235, + 256.26959247648904 + ], + [ + 498.4326018808777, + 67.39811912225703, + 592.4764890282131, + 844.0438871473355 + ], + [ + 1109.717868338558, + 20.376175548589252, + 359.717868338558, + 357.3667711598746 + ], + [ + 1107.3667711598746, + 377.7429467084639, + 362.0689655172414, + 279.7805642633229 + ], + [ + 1107.3667711598746, + 657.5235109717868, + 340.9090909090909, + 263.3228840125392 + ] + ] + }, + { + "image_name": "29.png", + "question": "What are the expressions on the faces of individuals in and meant to signify?", + "question_id": 3, + "dataset_name": "multipanel_QA_box", + "gt_answers": "The expressions portray distress and discomfort, indicative of the sleep disturbance experienced in and the profound sadness or hopelessness associated with depression in .", + "bbox": [ + [ + 8.150470219435736, + 10.373040752351116, + 232.2884012539185, + 211.91222570532915 + ], + [ + 240.43887147335423, + 6.297805642633247, + 305.6426332288401, + 215.987460815047 + ], + [ + 546.0815047021944, + 6.297805642633247, + 224.13793103448276, + 213.9498432601881 + ], + [ + 772.2570532915361, + 10.373040752351116, + 275.0783699059561, + 209.8746081504702 + ], + [ + 1047.3354231974922, + 10.373040752351116, + 248.58934169278996, + 205.79937304075233 + ], + [ + 1041.2225705329154, + 216.17241379310346, + 254.70219435736675, + 193.57366771159874 + ], + [ + 782.4451410658307, + 240.62382445141066, + 258.77742946708463, + 169.12225705329152 + ], + [ + 529.7805642633228, + 240.62382445141066, + 252.66457680250784, + 177.27272727272725 + ], + [ + 279.153605015674, + 228.39811912225707, + 250.6269592476489, + 189.49843260188086 + ], + [ + 8.150470219435736, + 228.39811912225707, + 271.0031347962382, + 197.6489028213166 + ] + ] + }, + { + "image_name": "u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "question": "What should you do before applying the gel ice pack for cold use according to ?", + "question_id": 4, + "dataset_name": "multipanel_QA_box", + "gt_answers": "According to , you should place the gel ice pack in the freezer or refrigerator for at least two hours, or until the desired temperature is achieved, before application.", + "bbox": [ + [ + 40.6061293984109, + 73.2122587968218, + 208.2860385925085, + 99.31895573212259 + ], + [ + 264.2156640181612, + 60.1589103291714, + 225.87968217934164, + 225.87968217934164 + ], + [ + 32.66061293984109, + 216.79909194097615, + 218.5017026106697, + 259.9318955732123 + ], + [ + 271.59364358683314, + 337.6844494892168, + 236.40635641316686, + 133.93870601589103 + ] + ] + }, + { + "image_name": "15.png", + "question": "What is depicted in as a symbol of defying death?", + "question_id": 5, + "dataset_name": "multipanel_QA_box", + "gt_answers": "A figure resembling a wizard, labeled 'Immortality,' is depicted in as a symbol of defying death.", + "bbox": [ + [ + 289.5423197492163, + 0, + 879.8495297805642, + 88.16614420062699 + ], + [ + 38.156739811912225, + 168.96865203761757, + 255.8746081504702, + 294.0313479623824 + ], + [ + 294.0313479623824, + 463, + 287.2978056426332, + 320.9655172413793 + ], + [ + 549.9059561128527, + 151.012539184953, + 294.0313479623824, + 327.69905956112854 + ], + [ + 843.937304075235, + 478.7115987460815, + 260.3636363636364, + 291.7868338557994 + ], + [ + 1151.435736677116, + 115.10031347962386, + 242.40752351097177, + 345.6551724137931 + ] + ] + }, + { + "image_name": "u=1806211385,2703137837&fm=253&app=138&f=JPEG.png", + "question": "What does the text in suggest regarding the types of products used for cleaning?", + "question_id": 6, + "dataset_name": "multipanel_QA_box", + "gt_answers": "The text in indicates that appropriate products are used to eliminate dirt, dust, grease, and germs.", + "bbox": [ + [ + 0, + 55.61861520998865, + 404.22758229284904, + 157.2077185017026 + ], + [ + 424.6589103291714, + 49.37570942111237, + 216.23155505107832, + 237.79795686719638 + ], + [ + 0, + 216.23155505107832, + 386.6339387060159, + 150.96481271282633 + ], + [ + 341.2309875141884, + 295.6867196367764, + 303.6322360953462, + 201.4755959137344 + ] + ] + }, + { + "image_name": "5.png", + "question": "What does suggest about personal interactions?", + "question_id": 7, + "dataset_name": "multipanel_QA_box", + "gt_answers": " emphasizes the importance of respecting personal boundaries and consent in interactions with others.", + "bbox": [ + [ + 121.89127972819932, + 22.355605889014722, + 705.7984144960362, + 103.26160815402038 + ], + [ + 79.30917327293318, + 149.03737259343148, + 200.13590033975086, + 252.29898074745188 + ], + [ + 348.6409966024915, + 140.52095130237825, + 217.1687429218573, + 268.26727066817665 + ], + [ + 629.682899207248, + 141.58550396375992, + 203.3295583238958, + 256.5571913929785 + ], + [ + 635.0056625141563, + 398.1426953567384, + 198.00679501698755, + 266.13816534541337 + ], + [ + 341.18912797281996, + 418.36919592298983, + 225.68516421291054, + 251.2344280860702 + ], + [ + 69.7281993204983, + 418.36919592298983, + 241.65345413363534, + 227.81426953567384 + ], + [ + 69.7281993204983, + 689.8301245753114, + 277.8482446206116, + 222.49150622876556 + ], + [ + 349.70554926387314, + 685.5719139297848, + 262.9445073612684, + 223.55605889014723 + ], + [ + 614.7791619479049, + 687.7010192525481, + 216.10419026047566, + 223.55605889014723 + ] + ] + }, + { + "image_name": "u=250255567,2407443674&fm=253&app=138&f=JPEG.png", + "question": "What is common between the challenges depicted in , , and ?", + "question_id": 8, + "dataset_name": "multipanel_QA_box", + "gt_answers": "The common factor among , , and is that they each illustrate individuals with statements highlighting their increased risk of mental health issues such as PTSD, victimization, depression, and anxiety.", + "bbox": [ + [ + 3.1963677639046737, + 2.843359818388195, + 128.5198637911464, + 173.4449489216799 + ], + [ + 183.46538024971625, + 2.843359818388195, + 147.85471055618618, + 155.8161180476731 + ], + [ + 369.42111237230426, + 31.276958002270145, + 130.5788876276958, + 145.5800227014756 + ], + [ + 0, + 217.23269012485812, + 170.38592508513057, + 155.81611804767311 + ], + [ + 140.24631101021572, + 373.6174801362088, + 249.0783200908059, + 112.59704880817253 + ], + [ + 348.3802497162316, + 220.07604994324632, + 151.61975028376844, + 191.64245175936435 + ] + ] + }, + { + "image_name": "u=1539278969,4203474920&fm=253&app=138&f=PNG.png", + "question": "What does the icon in signify in the context of vehicle surveillance?", + "question_id": 9, + "dataset_name": "multipanel_QA_box", + "gt_answers": "The icon in represents a fuel sensor system, used for monitoring the fuel level in a vehicle's tank.", + "bbox": [ + [ + 0, + 52.78093076049943, + 143.6123723041998, + 133.3711691259932 + ], + [ + 143.6123723041998, + 50.51078320090806, + 136.2088535754824, + 135.64131668558457 + ], + [ + 279.8212258796822, + 50.51078320090806, + 165.17877412031783, + 139.04653802497162 + ], + [ + 277.5510783200908, + 189.5573212258797, + 167.4489216799092, + 144.15437003405222 + ], + [ + 120.34335981838821, + 208.2860385925085, + 154.93757094211125, + 123.72304199772985 + ], + [ + 0, + 208.2860385925085, + 120.34335981838821, + 128.83087400681043 + ], + [ + 0, + 338.2519863791146, + 123.18104426787743, + 155.5051078320091 + ], + [ + 123.18104426787743, + 347.90011350737797, + 156.07264472190693, + 145.85698070374573 + ], + [ + 279.25368898978434, + 347.90011350737797, + 165.74631101021566, + 145.2894438138479 + ] + ] + }, + { + "image_name": "u=2237054763,2488129206&fm=253&app=138&f=PNG.png", + "question": "What symptom is shown in that suggests contacting emergency services?", + "question_id": 10, + "dataset_name": "multipanel_QA_box", + "gt_answers": "The symptom shown in that suggests contacting emergency services is a child with a fever, cough, vomiting, or worsening diarrhea.", + "bbox": [ + [ + 0, + 125.42565266742338, + 182.46311010215663, + 187.85471055618615 + ], + [ + 182.46311010215663, + 124.85811577752554, + 164.01816118047674, + 188.422247446084 + ], + [ + 346.4812712826334, + 124.85811577752554, + 153.51872871736663, + 188.422247446084 + ], + [ + 347.61634506242905, + 313.2803632236095, + 152.38365493757095, + 186.71963677639047 + ], + [ + 183.03064699205447, + 313.84790011350736, + 164.01816118047674, + 186.1520998864926 + ], + [ + 0, + 313.84790011350736, + 183.03064699205447, + 186.1520998864926 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/multipanel_QA/multipanel_QA_point.json b/evaluation/MDVP-Bench/data/multipanel_QA/multipanel_QA_point.json new file mode 100644 index 0000000000000000000000000000000000000000..b5896617282fd75a5b9726989ed51148af536356 --- /dev/null +++ b/evaluation/MDVP-Bench/data/multipanel_QA/multipanel_QA_point.json @@ -0,0 +1,376 @@ +[ + { + "image_name": "u=4096204886,1155943766&fm=253&app=138&f=JPEG.png", + "question": "What does imply about the importance of environment for motivation?", + "question_id": 1, + "dataset_name": "multipanel_QA_point", + "gt_answers": "The cozy setting in with a plant and books implies that a comfortable and inviting environment can help inspire perseverance and motivation.", + "points": [ + [ + 97, + 129 + ], + [ + 301, + 90 + ], + [ + 497, + 125 + ], + [ + 133, + 320 + ], + [ + 504, + 321 + ] + ] + }, + { + "image_name": "u=1806211385,2703137837&fm=253&app=138&f=JPEG.png", + "question": "What does the text in suggest regarding the types of products used for cleaning?", + "question_id": 2, + "dataset_name": "multipanel_QA_point", + "gt_answers": "The text in indicates that appropriate products are used to eliminate dirt, dust, grease, and germs.", + "points": [ + [ + 202, + 133 + ], + [ + 532, + 167 + ], + [ + 193, + 291 + ], + [ + 492, + 395 + ] + ] + }, + { + "image_name": "u=2710646532,750768721&fm=253&app=138&f=JPEG.png", + "question": "What is the significance of the illustration in ?", + "question_id": 3, + "dataset_name": "multipanel_QA_point", + "gt_answers": "The illustration in signifies the requirement of pre-event COVID-19 testing for selected larger-scale and higher-risk activities, using antigen rapid tests.", + "points": [ + [ + 75, + 303 + ], + [ + 244, + 302 + ], + [ + 419, + 303 + ], + [ + 412, + 543 + ], + [ + 240, + 545 + ], + [ + 78, + 541 + ] + ] + }, + { + "image_name": "u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "question": "What is the trend in hiring time over the past 5 years according to ?", + "question_id": 4, + "dataset_name": "multipanel_QA_point", + "gt_answers": "According to , the time to hire has more than doubled over the past 5 years.", + "points": [ + [ + 112, + 140 + ], + [ + 240, + 149 + ], + [ + 380, + 141 + ], + [ + 337, + 272 + ], + [ + 177, + 273 + ] + ] + }, + { + "image_name": "u=3591188059,2160065444&fm=253&app=138&f=JPEG.png", + "question": "What concept does symbolize in the context of the overarching theme?", + "question_id": 5, + "dataset_name": "multipanel_QA_point", + "gt_answers": " symbolizes the importance of peers or community in the context of promoting cooperation between languages and literacy.", + "points": [ + [ + 79, + 181 + ], + [ + 234, + 179 + ], + [ + 374, + 137 + ], + [ + 586, + 212 + ], + [ + 85, + 365 + ], + [ + 232, + 367 + ], + [ + 406, + 383 + ], + [ + 570, + 385 + ] + ] + }, + { + "image_name": "27.png", + "question": "What does the icon in imply within the context of the infographic?", + "question_id": 6, + "dataset_name": "multipanel_QA_point", + "gt_answers": "The icon in suggests the concept of innovation and ideas generation, which could be associated with mentoring aspects within the internship training program.", + "points": [ + [ + 216, + 485 + ], + [ + 568, + 726 + ], + [ + 956, + 842 + ], + [ + 1344, + 741 + ], + [ + 1693, + 481 + ], + [ + 990, + 270 + ] + ] + }, + { + "image_name": "u=856418766,3773125614&fm=253&app=138&f=JPEG.png", + "question": "What soft skills is represented in and how is it visually depicted?", + "question_id": 7, + "dataset_name": "multipanel_QA_point", + "gt_answers": " represents 'Clear Communication' and is visually depicted with the icon of a person's side profile and a speech bubble which is a common symbol for speaking or conversing.", + "points": [ + [ + 76, + 152 + ], + [ + 213, + 153 + ], + [ + 337, + 149 + ], + [ + 468, + 156 + ], + [ + 599, + 152 + ], + [ + 76, + 325 + ], + [ + 210, + 322 + ], + [ + 338, + 316 + ], + [ + 469, + 327 + ], + [ + 590, + 324 + ] + ] + }, + { + "image_name": "46.png", + "question": "What does the illustration in suggest about career flexibility?", + "question_id": 8, + "dataset_name": "multipanel_QA_point", + "gt_answers": "The illustration in suggests that a career in law offers flexible working hours or a good work-life balance.", + "points": [ + [ + 236, + 368 + ], + [ + 388, + 718 + ], + [ + 822, + 803 + ], + [ + 1176, + 724 + ], + [ + 1283, + 384 + ], + [ + 726, + 373 + ], + [ + 751, + 88 + ] + ] + }, + { + "image_name": "10.png", + "question": "What activity is depicted in to promote healthy aging?", + "question_id": 9, + "dataset_name": "multipanel_QA_point", + "gt_answers": "In , exercise is promoted as part of healthy aging, with an illustration of a senior walking in a park.", + "points": [ + [ + 800, + 128 + ], + [ + 294, + 485 + ], + [ + 621, + 486 + ], + [ + 964, + 487 + ], + [ + 1295, + 488 + ], + [ + 1307, + 888 + ], + [ + 982, + 898 + ], + [ + 638, + 898 + ], + [ + 297, + 901 + ], + [ + 295, + 1304 + ] + ] + }, + { + "image_name": "u=386285352,2538957886&fm=253&app=138&f=JPEG.png", + "question": "What weather condition is represented by and which region depicts its opposite?", + "question_id": 10, + "dataset_name": "multipanel_QA_point", + "gt_answers": " represents rainy weather, and its opposite, sunny weather, is depicted in .", + "points": [ + [ + 64, + 142 + ], + [ + 181, + 148 + ], + [ + 316, + 148 + ], + [ + 447, + 148 + ], + [ + 66, + 294 + ], + [ + 182, + 294 + ], + [ + 308, + 293 + ], + [ + 436, + 296 + ], + [ + 58, + 430 + ], + [ + 186, + 429 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/multipanel_detailed_caption/multipanel_detailed_caption_box.json b/evaluation/MDVP-Bench/data/multipanel_detailed_caption/multipanel_detailed_caption_box.json new file mode 100644 index 0000000000000000000000000000000000000000..2bedd02be750dbc80ac7867eca7f8a9dedbbf8cc --- /dev/null +++ b/evaluation/MDVP-Bench/data/multipanel_detailed_caption/multipanel_detailed_caption_box.json @@ -0,0 +1,536 @@ +[ + { + "image_name": "6.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 1, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below.\n: This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind.\n: Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger.\n: This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity.\n: The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed.\n: An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors.\n: Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support.\n: Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving.\n: This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability.\n: Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader.", + "bbox": [ + [ + 213.4428086070215, + 28.742921857304644, + 518.4371460928652, + 84.09966024915062 + ], + [ + 77.18006795016987, + 136.26274065685163, + 267.202718006795, + 258.6862967157418 + ], + [ + 344.3827859569649, + 137.3272933182333, + 231.0079275198188, + 257.62174405436014 + ], + [ + 610.5209513023783, + 123.4881087202718, + 260.8154020385051, + 278.9127972819932 + ], + [ + 605.19818799547, + 426.885617214043, + 272.52548131370327, + 233.1370328425821 + ], + [ + 349.70554926387314, + 440.72480181200456, + 201.2004530011325, + 206.52321630804076 + ], + [ + 72.92185730464327, + 432.2083805209513, + 233.1370328425821, + 216.10419026047566 + ], + [ + 76.11551528878822, + 670.6681766704417, + 269.33182332955835, + 239.52434881087203 + ], + [ + 346.5118912797282, + 668.5390713476784, + 227.81426953567384, + 241.65345413363534 + ], + [ + 620.1019252548132, + 668.5390713476784, + 191.61947904869763, + 222.49150622876556 + ] + ] + }, + { + "image_name": "u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 2, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling.\n: This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers.\n: Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner.\n: Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy.\n: This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion.", + "bbox": [ + [ + 66.41997729852442, + 87.18955732122588, + 92.4472190692395, + 106.02951191827468 + ], + [ + 193.4801362088536, + 90.25652667423383, + 94.63791146424518, + 119.61180476730986 + ], + [ + 327.55051078320093, + 84.56072644721907, + 106.02951191827468, + 115.23041997729852 + ], + [ + 255.25766174801362, + 216.44040862656072, + 164.30192962542566, + 112.60158910329172 + ], + [ + 117.68217934165722, + 216.87854710556186, + 121.80249716231555, + 115.66855845629965 + ] + ] + }, + { + "image_name": "50.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 3, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018.\n: A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler.\n: This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style.\n: Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money.\n: Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips.\n: Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences.\n: Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure.\n: Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes.", + "bbox": [ + [ + 21.36677115987461, + 5.362068965517206, + 1064.7774294670846, + 142.44514106583074 + ], + [ + 56.978056426332294, + 167.39341692789966, + 149.56739811912226, + 256.4012539184953 + ], + [ + 338.307210031348, + 156.71003134796234, + 217.22884012539186, + 267.08463949843264 + ], + [ + 619.6363636363636, + 158.49059561128524, + 197.64263322884014, + 268.8652037617555 + ], + [ + 897.4043887147336, + 167.39341692789966, + 179.8369905956113, + 258.1818181818182 + ], + [ + 135.3228840125392, + 450.5031347962382, + 256.4012539184953, + 293.79310344827593 + ], + [ + 480.7523510971787, + 457.6253918495298, + 215.44827586206898, + 275.98746081504703 + ], + [ + 783.448275862069, + 455.8448275862069, + 211.8871473354232, + 283.1097178683386 + ] + ] + }, + { + "image_name": "24.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 4, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication.\n: Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving.\n: This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support.\n: In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions.\n: Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text.\n: The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services.\n: This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues.\n: Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services.\n: A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence.", + "bbox": [ + [ + 133.33333333333334, + 60, + 393.33333333333337, + 446.6666666666667 + ], + [ + 543.3333333333334, + 80, + 430, + 416.6666666666667 + ], + [ + 980, + 80, + 486.6666666666667, + 480 + ], + [ + 956.6666666666667, + 560, + 510, + 386.6666666666667 + ], + [ + 593.3333333333334, + 520, + 360, + 426.6666666666667 + ], + [ + 83.33333333333334, + 520, + 503.33333333333337, + 450 + ], + [ + 86.66666666666667, + 973.3333333333334, + 460, + 443.33333333333337 + ], + [ + 546.6666666666667, + 973.3333333333334, + 470, + 443.33333333333337 + ], + [ + 1016.6666666666667, + 973.3333333333334, + 436.6666666666667, + 460 + ] + ] + }, + { + "image_name": "u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 5, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": The image features a human heart symbolizing a strong heart as one of the benefits of running.\n: This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running.\n: A silhouette of a figure measuring their waist indicates that running can aid in weight loss.\n: The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running.\n: A smiling face emoticon suggests that running can improve one's mood.\n: An illustration of a leg bone signifies that running increases bone density.\n: An image of a brain is used to illustrate the benefit of improved brain function from running.\n: The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running.", + "bbox": [ + [ + 44.65323496027242, + 62.92849035187289, + 81.31668558456299, + 90.71509648127127 + ], + [ + 143.9494892167991, + 54.3473325766175, + 87.85471055618616, + 88.67196367763904 + ], + [ + 235.89046538024974, + 75.1872871736663, + 85.40295119182747, + 82.54256526674233 + ], + [ + 268.17196367763904, + 164.2678774120318, + 84.17707150964812, + 88.26333711691257 + ], + [ + 19.727014755959143, + 166.31101021566403, + 99.70488081725311, + 79.27355278093073 + ], + [ + 59.36379114642452, + 258.2519863791146, + 75.18728717366628, + 94.39273552780931 + ], + [ + 149.2616345062429, + 275.82292849035184, + 84.17707150964814, + 80.4994324631101 + ], + [ + 242.83711691259933, + 261.5209988649262, + 84.58569807037456, + 93.16685584562995 + ] + ] + }, + { + "image_name": "11.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 6, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color.\n: Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters.\n: This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters.\n: Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters.\n: A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters.\n: A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals.\n: The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps.", + "bbox": [ + [ + 1028.513805522209, + 7.994897959183692, + 2225.1500600240097, + 548.8703481392557 + ], + [ + 91.47839135654262, + 631.0369147659063, + 615.624849939976, + 714.5204081632653 + ], + [ + 709.5756302521008, + 628.5645258103241, + 672.4897959183673, + 719.4651860744298 + ], + [ + 1382.065426170468, + 628.5645258103241, + 660.1278511404562, + 719.4651860744298 + ], + [ + 2042.1932773109243, + 623.6197478991596, + 707.1032412965186, + 724.4099639855942 + ], + [ + 2749.296518607443, + 623.6197478991596, + 620.5696278511405, + 724.4099639855942 + ], + [ + 3369.8661464585834, + 668.1227490996398, + 684.8517406962785, + 674.9621848739496 + ] + ] + }, + { + "image_name": "u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 7, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"\n: The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"\n: This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom.\n: Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress.\n: An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label.\n: The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted.\n: Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited.", + "bbox": [ + [ + 16.8927355278093, + 111.80476730987515, + 131.1010215664018, + 150.3972758229285 + ], + [ + 158.2094211123723, + 111.80476730987515, + 128.2633371169126, + 149.82973893303065 + ], + [ + 286.4727582292849, + 110.66969353007946, + 130.53348467650397, + 150.96481271282633 + ], + [ + 417.00624290578884, + 110.66969353007946, + 128.83087400681043, + 154.93757094211125 + ], + [ + 91.80760499432462, + 290.5788876276958, + 124.29057888762769, + 148.1271282633371 + ], + [ + 217.233257661748, + 286.03859250851303, + 137.34392735527808, + 152.66742338251987 + ], + [ + 355.14472190692396, + 288.30874006810444, + 135.07377979568673, + 153.2349602724177 + ] + ] + }, + { + "image_name": "23.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 8, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right.\n: Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it.\n: Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"\n: Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"\n: Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it.\n: Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category.\n: This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"\n: Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"\n: Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"\n: Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it.", + "bbox": [ + [ + 26.666666666666714, + 22.814814814814813, + 1053.3333333333333, + 144.49382716049382 + ], + [ + 53.283950617284, + 197.72839506172838, + 353.6296296296296, + 391.65432098765433 + ], + [ + 406.9135802469136, + 186.320987654321, + 288.98765432098764, + 403.0617283950617 + ], + [ + 695.9012345679013, + 186.320987654321, + 311.8024691358025, + 425.87654320987656 + ], + [ + 703.5061728395062, + 616, + 304.1975308641975, + 403.0617283950617 + ], + [ + 403.11111111111114, + 623.604938271605, + 300.39506172839504, + 395.45679012345676 + ], + [ + 45.67901234567906, + 623.604938271605, + 357.4320987654321, + 414.4691358024691 + ], + [ + 49.48148148148153, + 1049.4814814814815, + 342.22222222222223, + 365.037037037037 + ], + [ + 391.70370370370375, + 1026.6666666666667, + 330.8148148148148, + 387.85185185185185 + ], + [ + 722.5185185185186, + 1026.6666666666667, + 300.39506172839504, + 391.65432098765433 + ] + ] + }, + { + "image_name": "u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 9, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression.\n: This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression.\n: An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice.\n: Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression.\n: The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression.\n: This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression.", + "bbox": [ + [ + 0.28376844494892167, + 107.26447219069239, + 169.69353007945517, + 184.4494892167991 + ], + [ + 194.94892167990918, + 107.83200908059024, + 161.74801362088536, + 168.55845629965947 + ], + [ + 374.2905788876277, + 17.593643586833146, + 125.7094211123723, + 268.4449489216799 + ], + [ + 0, + 294.5516458569807, + 155.22133938706017, + 171.96367763904652 + ], + [ + 178.49035187287174, + 284.90351872871736, + 128.2633371169126, + 213.3938706015891 + ], + [ + 323.2122587968218, + 287.17366628830877, + 171.39614074914869, + 212.82633371169126 + ] + ] + }, + { + "image_name": "u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 10, + "dataset_name": "multipanel_detailed_caption_box", + "gt_answers": ": The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"\n: An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"\n: Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature.\n: Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"", + "bbox": [ + [ + 40.6061293984109, + 73.2122587968218, + 208.2860385925085, + 99.31895573212259 + ], + [ + 264.2156640181612, + 60.1589103291714, + 225.87968217934164, + 225.87968217934164 + ], + [ + 32.66061293984109, + 216.79909194097615, + 218.5017026106697, + 259.9318955732123 + ], + [ + 271.59364358683314, + 337.6844494892168, + 236.40635641316686, + 133.93870601589103 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/multipanel_detailed_caption/multipanel_detailed_caption_point.json b/evaluation/MDVP-Bench/data/multipanel_detailed_caption/multipanel_detailed_caption_point.json new file mode 100644 index 0000000000000000000000000000000000000000..8799acc8e4aa61d69a1558272f249a165f2cb1ea --- /dev/null +++ b/evaluation/MDVP-Bench/data/multipanel_detailed_caption/multipanel_detailed_caption_point.json @@ -0,0 +1,396 @@ +[ + { + "image_name": "u=758170821,3598362063&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 1, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": The image shows a header section with the text \"Our Services,\" suggesting this panel is an introduction to the various services provided by a company that specializes in cleaning. It is likely the main title for the subsequent individual services listed below in the other panels.\n: This panel shows an icon of a bed with a sparkling effect, and the text \"Mattress Cleaning\" below it. The image suggests a service focused on cleaning mattresses, likely promising a thorough clean to remove dirt, dust mites, or other allergens.\n: Depicted here is a symbol representing a floor and a carpet with a cleaning instrument, accompanied by the text \"Floor and Carpet Cleaning.\" This indicates a service dedicated to the cleaning of flooring and carpets, which may include washing or stain removal.\n: Illustrated in this region is an icon of a house with waves of air or fragrance emanating from it, and the text \"Air Treatment\" is shown below. This indicates a service that likely involves improving indoor air quality, possibly through filtration or purification methods.\n: Featured in this panel is a graphic of a sofa with bubbles above it, and \"Sofa Cleaning\" is the text identified below. This suggests a cleaning service for upholstery, particularly sofas, where thorough cleaning for removal of stains and dust is implied.\n: This panel contains an icon of a curtain with sparkling stars around it, and the text \"Curtain Cleaning\" underneath. It indicates a service focused on cleaning curtains, implying that it could remove dust and refresh the fabric.\n: Displayed is a symbol of a figure with a broom and a dustpan, and below it is the text \"Maid Service.\" This suggests that the company offers regular cleaning services performed by a maid, which could include various household cleaning tasks.", + "points": [ + [ + 240, + 98 + ], + [ + 98, + 177 + ], + [ + 101, + 259 + ], + [ + 243, + 178 + ], + [ + 241, + 255 + ], + [ + 384, + 176 + ], + [ + 385, + 254 + ] + ] + }, + { + "image_name": "47.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 2, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": The image presents a stylized illustration of a person with a sun and cloud above their head, suggesting an improvement in mood or clearing of dark thoughts. The text beneath indicates that this is a positive aspect of antidepressants, as it \"Helps manage symptoms.\"\n: This section depicts a thumbs-up hand gesture, which universally signifies approval or success. Accompanying text declares this as \"Effective,\" implying that antidepressants are generally successful in treating symptoms.\n: The graphic illustrates a person hugging themselves with the word \"OK\" circled, signaling self-assurance and safety. This indicates that the medication is considered \"Safe\" for use.\n: The overall header of the infographic reads \"Pros and Cons of Antidepressants,\" which introduces the subject of the entire image. In the central image, a figure seems contemplative, with a question mark above her head, indicating consideration or decision-making regarding antidepressant use.\n: The visual element shows an upset stomach, paired with text \"Can cause side effects,\" which denotes a negative aspect of antidepressant use, highlighting potential adverse reactions to the medication.\n: An hourglass is featured, symbolizing time passing. The associated phrase \"Takes time to see results\" points out that it may require a duration before the effects of antidepressants are noticeable.\n: The final portion portrays a person holding an umbrella under a cloud, with a sad face shown, along with text saying \"Some may not work,\" suggesting that antidepressants might not be effective for everyone.", + "points": [ + [ + 243, + 199 + ], + [ + 249, + 517 + ], + [ + 223, + 783 + ], + [ + 794, + 489 + ], + [ + 1288, + 198 + ], + [ + 1288, + 516 + ], + [ + 1277, + 788 + ] + ] + }, + { + "image_name": "23.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 3, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right.\n: Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it.\n: Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"\n: Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"\n: Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it.\n: Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category.\n: This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"\n: Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"\n: Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"\n: Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it.", + "points": [ + [ + 552, + 94 + ], + [ + 229, + 392 + ], + [ + 550, + 387 + ], + [ + 850, + 398 + ], + [ + 855, + 817 + ], + [ + 553, + 820 + ], + [ + 223, + 830 + ], + [ + 220, + 1231 + ], + [ + 556, + 1219 + ], + [ + 872, + 1221 + ] + ] + }, + { + "image_name": "u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 4, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"\n: An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"\n: Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature.\n: Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"", + "points": [ + [ + 144, + 122 + ], + [ + 376, + 172 + ], + [ + 141, + 345 + ], + [ + 389, + 403 + ] + ] + }, + { + "image_name": "38.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 5, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": The image shows the title \"Physical Therapy Options for Managing Cervical Radiculopathy,\" suggesting the content is focused on treatments for a specific type of neck issue through various physical therapy techniques.\n: Depicts an illustrated person with their arms crossed and hands resting on opposite shoulders in a stretching pose, with the word \"Exercise\" beneath, indicating that physical activity is a recommended therapy option.\n: Shows an illustrated arm with an ice pack applied to the elbow area, labeled with the word \"Ice,\" signifying cold therapy as a suggested treatment for cervical radiculopathy.\n: Features an illustration of a person applying heat pads to their neck, with the label \"Heat\" below, suggesting the use of heat therapy for managing cervical radiculopathy.\n: An illustration of an ultrasound device being applied to the shoulder area with the word \"Ultrasound\" underneath, indicating this as a therapeutic technique for cervical radiculopathy.\n: Illustrates a hand with electrical currents around it, together with a small device, with the caption \"Electrical stimulation,\" indicating this as an option for treating the condition.\n: Shows an illustration of a person getting a neck massage, labeled with the word \"Massage,\" recommending it as a method for alleviating symptoms of cervical radiculopathy.", + "points": [ + [ + 747, + 70 + ], + [ + 1230, + 318 + ], + [ + 790, + 324 + ], + [ + 319, + 332 + ], + [ + 294, + 741 + ], + [ + 763, + 756 + ], + [ + 1240, + 765 + ] + ] + }, + { + "image_name": "42.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 6, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": The image depicts a banner with the phrase \"TALKING ENGLISH\" in capitalized bold letters, suggesting the content is educational material for learning English. The use of speech bubble imagery conveys a focus on spoken language skills.\n: This panel features a simplistic human icon above the words \"Personal Data,\" which likely indicates a topic or category associated with personal information in the context of learning English.\n: Shown here is an illustration of a location marker on a circular landscape, positioned above the word \"Travel.\" This panel suggests a thematic connection to traveling, likely containing vocabulary and phrases pertinent to this subject.\n: The image includes a depiction of a hotel facade with a four-star rating and the word \"Hotel\" above it, combined with \"Flat,\" below, portrays a focus on accommodations as part of the learning material.\n: Characterized by an envelope and an '@' symbol design, the panel reads \"Post office,\" signifying a section dedicated to postal services, which may cover relevant terminology and dialogue.\n: Displaying various service-related icons such as a barber's pole, repair tools, and a shopping bag, this panel is labeled \"Services,\" indicating a lesson on language used for different services.\n: The panel shows a plate with a knife, fork, and food, accompanied by the word \"Meals,\" indicating content that teaches English vocabulary and expressions related to eating and food.\n: This panel with a shopping cart filled with goods and tagged with the word \"Shopping\" suggests a section on retail-related English dialogue and terminology.\n: Representing a stylized town with buildings and a car, the word \"Town\" beneath it likely denotes a section focused on language and vocabulary pertaining to the town or city environment.\n: Depicting a heart with a heartbeat wave, accompanied by the word \"Health,\" this panel signifies content aimed at teaching English words and phrases related to health and wellbeing.", + "points": [ + [ + 265, + 63 + ], + [ + 96, + 206 + ], + [ + 269, + 202 + ], + [ + 436, + 202 + ], + [ + 440, + 412 + ], + [ + 280, + 417 + ], + [ + 103, + 411 + ], + [ + 89, + 620 + ], + [ + 264, + 631 + ], + [ + 435, + 632 + ] + ] + }, + { + "image_name": "u=4152088043,200625823&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 7, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": An illustrated hand suggests \"eyeballing\" portion sizes rather than using precise measurements, which may lead to inaccurate estimates of food quantities.\n: Depicts a kitchen scale and food items implying the incorrect practice of measuring food by volume instead of weight, which can result in less accurate tracking of food intake.\n: Features a bottle of alcohol and a warning against not tracking alcohol properly, indicating that failing to account for alcoholic drinks can undermine dietary tracking efforts.\n: Shows an apple with text about using generic measurements (medium apple), indicating a potential pitfall in estimating food portions without specificity, possibly leading to inaccuracies.\n: Displays a check mark with a statement about not using verified entries, suggesting that relying on unverified food database entries may affect the accuracy of dietary tracking.\n: Presents two steaks with an advisory about the common mistake of weighing raw and then logging cooked food (or vice versa), which can lead to inaccurate calorie or nutrient logging due to changes in weight through cooking.\n: A notepad icon symbolizes the mistake of not planning your day in advance, possibly implying that a lack of meal planning can result in poor dietary choices or tracking.\n: Illustrates a cookie with a caption about forgetting to track everything, pointing out that omitting items from dietary tracking can lead to an incomplete record of consumption.\n: Shows a person on a treadmill with a warning about eating back calories burned from exercise, which is a common issue where individuals may overcompensate for exercise by increasing caloric intake.", + "points": [ + [ + 110, + 89 + ], + [ + 318, + 95 + ], + [ + 527, + 86 + ], + [ + 523, + 257 + ], + [ + 322, + 260 + ], + [ + 115, + 261 + ], + [ + 112, + 421 + ], + [ + 315, + 422 + ], + [ + 524, + 420 + ] + ] + }, + { + "image_name": "u=74400039,1118061224&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 8, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": This panel displays cartoon images representing mathematical elements such as numbers and mathematical symbols, coupled with the word \"Math\" indicating the school subject of mathematics.\n: Illustrated in this panel is an image depicting different aspects of the Earth, including landmasses and water bodies, with the word \"Geography\" below, suggesting the subject that studies the Earth and its features.\n: This panel features creative elements like a paint palette, brushes, and a pencil, along with the word \"Art\" which refers to the school subject that involves creativity and visual expression.\n: It shows a simple physics equation \"F=ma\" associated with Newton's second law, and the word \"Physics\" below, referring to the scientific subject that studies matter, energy, and the fundamental forces of nature.\n: This panel contains an image of a historical scroll or document and a feather quill, placed above the word \"History,\" indicating the school subject related to studying past events.\n: Here, an individual is depicted in a dynamic pose that signifies physical activity, and the words \"Physical Education\" beneath, implying the subject focusing on bodily exercise and fitness.\n: In this panel, a set of flasks and beakers, commonly used in chemical experiments, appear above the word \"Chemistry,\" highlighting the subject of science dealing with the properties of substances and their interactions.\n: Represented is a cell structure containing a nucleus, typically studied in biology classes, with the word \"Biology\" below, pointing to the subject that explores living organisms and life processes.\n: This section comprises an open book with pages fluttering, which, along with the word \"Literature,\" signifies the subject dealing with written works, especially those considered of artistic or intellectual value.", + "points": [ + [ + 71, + 205 + ], + [ + 71, + 319 + ], + [ + 72, + 437 + ], + [ + 256, + 203 + ], + [ + 247, + 316 + ], + [ + 252, + 432 + ], + [ + 414, + 203 + ], + [ + 417, + 320 + ], + [ + 417, + 440 + ] + ] + }, + { + "image_name": "u=3749429080,2686066601&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 9, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": The image displays a sphygmomanometer, an instrument used for measuring blood pressure, with the indicator pointing to a high reading. Below it is a label stating \"high blood pressure.\"\n: Illustrated is a hand with a red, swollen finger, possibly indicating inflammation or injury. The text beneath the image reads \"sore finger.\"\n: Depicted is a stylized white tooth with a prominent crack, signifying dental damage or decay. Above the tooth, the term \"toothache\" is written, suggesting dental pain.\n: This shows a depiction of a human stomach with wavy lines around it, typically used to represent unease or pain. The label \"stomachache\" describes the discomfort associated with the area.\n: A human hand is shown with lines around the base of the thumb extending to the wrist. This is commonly used to represent swelling or pain. The word \"sprain\" is noted below, indicating a type of injury.\n: The image showcases a medical device, a blood glucose meter, which is used to monitor blood sugar levels, and the reading displayed suggests a state of \"diabetes,\" as noted next to it.\n: Illustrated is a clinical thermometer with the mercury rising to a fever level. Above it, the word \"fever\" signals an elevated body temperature.\n: A human figure is displayed with the left arm wrapped in a green cast, indicating a fracture. The word \"broken arm\" is positioned below the figure, confirming the injury depicted.\n: Here is the lower half of a figure with the right knee highlighted, stating \"injury,\" which likely refers to a physical trauma or damage to the knee area.", + "points": [ + [ + 43, + 167 + ], + [ + 33, + 313 + ], + [ + 116, + 314 + ], + [ + 210, + 315 + ], + [ + 311, + 271 + ], + [ + 319, + 124 + ], + [ + 179, + 89 + ], + [ + 140, + 194 + ], + [ + 231, + 181 + ] + ] + }, + { + "image_name": "u=1806211385,2703137837&fm=253&app=138&f=JPEG.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 10, + "dataset_name": "multipanel_detailed_caption_point", + "gt_answers": ": An icon featuring a paint roller and tray represents cleaning. Accompanying text indicates the use of appropriate products to remove dirt, dust, grease, and germs.\n: This area shows a window icon, accompanied by text advocating for ventilation. The text suggests airing out after each service to ensure sufficient renewal of the air.\n: A spray bottle icon is depicted, emitting a mist. The text delineates the cleaning crew's use of masks, gloves, and gowns throughout the cleaning process.\n: An illustration of a virion being targeted, with text detailing the practice of disinfecting. It notes that disinfectant is sprayed on all floor surfaces.", + "points": [ + [ + 202, + 133 + ], + [ + 532, + 167 + ], + [ + 193, + 291 + ], + [ + 492, + 395 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/natural_Inter_Relationship/natural_Inter_Relationship_box.json b/evaluation/MDVP-Bench/data/natural_Inter_Relationship/natural_Inter_Relationship_box.json new file mode 100644 index 0000000000000000000000000000000000000000..56bfb750bec975409b9487bd64c0462660e4206f --- /dev/null +++ b/evaluation/MDVP-Bench/data/natural_Inter_Relationship/natural_Inter_Relationship_box.json @@ -0,0 +1,410 @@ +[ + { + "image_name": "000000044029.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 1, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": These marks indicate two alpine skis. They are part of a pair used in skiing, typically similar in design but mirrored. The skis are designed to slide over snow and are essential for the sport of alpine skiing. Both are attached to the skier's boots.\n: The alpine ski at and the ski pole at are part of the skiing equipment set used by a skier. The ski is used for gliding on the snow while the pole aids balance and propulsion.\n: The ski at and the ski pole at are also part of the skiing gear, serving complementary functions. Here, the ski supports the skier's weight, and the pole assists with the skier's turn.\n: The goggles marked here are independent of the other marks, representing eye protection and enhancing vision for the skier in various lighting and weather conditions.", + "bbox": [ + [ + 304.54, + 324.5, + 60.23, + 21.91 + ], + [ + 236.2, + 325.15, + 51.82, + 20.77 + ], + [ + 411.73, + 162.79, + 42.01, + 33.16 + ], + [ + 226.63, + 193.67, + 180.5, + 46.47 + ], + [ + 495.31, + 287.23, + 57.12, + 56.79 + ] + ] + }, + { + "image_name": "000000518836.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 2, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": Both are parts of the same brown horse, one marking the head and the other the body of the animal, indicating a singular being.\n: The brown horse at is in sharp focus, contrasting with the white horse at which is in the background and less distinct.\n: Both marks indicate horses, yet their colors and focus differ, with one in the foreground and the other in the background.\n: The image features two horses of different colors, and highlighting portions of a brown horse, while denotes a white horse that appears in a different plane, creating depth perception.", + "bbox": [ + [ + 166.41, + 42.56, + 341.16, + 436.51 + ], + [ + 0.0, + 287.76, + 20.7, + 163.06 + ], + [ + 543.63, + 260.83, + 96.37, + 84.28 + ] + ] + }, + { + "image_name": "65.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 3, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": The man identified in both and is the same, with providing a closer detail of his attire, specifically his gray sweater.\n: The white hair on the man at possibly correlates with the person identified in , suggesting it is the same individual.\n: Both and pertain to vehicles; however, they are disparate in type, with one being a 2-door sedan and the other a pickup truck.\n: The brick sidewalk at and the concrete pavers at are adjacent to each other, collectively forming part of a pedestrian walkway.\n: The presence of the man at along with the moving van at indicates a bustling urban environment with active vehicular and pedestrian movement.", + "bbox": [ + [ + 360, + 57, + 145, + 169 + ], + [ + 407, + 67, + 49, + 35 + ], + [ + 125, + 125, + 520, + 424 + ], + [ + 0, + 368, + 209, + 226 + ], + [ + 0, + 336, + 289, + 260 + ], + [ + 318, + 10, + 417, + 155 + ], + [ + 177, + 72, + 144, + 73 + ], + [ + 384, + 106, + 122, + 101 + ] + ] + }, + { + "image_name": "000000227741.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 4, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": Both feet with sports shoes and the tennis racket are part of the tennis player\u2019s gear during a match. The right and left foot placements in the shoes are part of the athlete's stance, tying into the action of serving the ball with the racket held in the right hand.", + "bbox": [ + [ + 239.94, + 73.4, + 142.53, + 55.04 + ], + [ + 415.08, + 327.77, + 36.8, + 38.52 + ], + [ + 355.3, + 353.16, + 25.62, + 36.34 + ] + ] + }, + { + "image_name": "286039.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 5, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": These marks are all associated with an aged refrigerator and its contents. shows the general exterior and interior of the appliance, is specifically the freezer section, details the inner door side, and pinpoints a particular jar inside. They collectively depict a state of disuse and abandonment.\n: The tree at and the turbine vent at offer contrasting elements of nature and man-made structures. The tree appears lifeless and still, while the vent suggests ongoing utility and interaction with the natural elements.\n: The beer can at has no direct connection to the other marked objects but shares a common theme of neglect and improper waste disposal.\n: The window at stands out as a feature of the house, separate from the disorder of the refrigerator yet part of the larger scene of the house and its environs.", + "bbox": [ + [ + 86, + 318, + 549, + 563 + ], + [ + 116, + 324, + 314, + 177 + ], + [ + 350, + 494, + 311, + 401 + ], + [ + 0, + 123, + 486, + 317 + ], + [ + 266, + 765, + 25, + 44 + ], + [ + 555, + 703, + 28, + 59 + ], + [ + 653, + 290, + 35, + 38 + ], + [ + 422, + 383, + 103, + 118 + ] + ] + }, + { + "image_name": "000000094046.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 6, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": The marked regions through display objects found typically in an outdoor recreational or sports setting, specifically a baseball field. Marks , , , , , and all depict parts of outdoor chairs, indicating a common function for spectator seating arranged along the side of the field. Each chair is made of metal, has vertical slats, and is blue, which signifies uniformity in the setting. is a slightly different entity as it's a cooler used for storing refreshments, likely available for players or spectators during a game. stands out as it is an element of the sport being played, a baseball base, placed on the sandy field as part of the game infrastructure contrasted against the seating and cooling amenities marked by the other regions.", + "bbox": [ + [ + 245.37, + 556.62, + 130.96, + 23.13 + ], + [ + 492.34, + 303.6, + 53.93, + 52.31 + ], + [ + 331.24, + 231.71, + 29.76, + 41.96 + ], + [ + 429.02, + 235.8, + 74.93, + 93.05 + ], + [ + 390.13, + 234.19, + 52.69, + 88.8 + ], + [ + 378.6, + 232.62, + 30.85, + 54.81 + ], + [ + 0.0, + 229.99, + 41.73, + 52.86 + ], + [ + 176.72, + 227.54, + 42.37, + 10.05 + ] + ] + }, + { + "image_name": "2407533.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 7, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": Both are components of a catcher's gear with protective functions; one guards the hand, while the other shields the face.\n: These marks identify elements of a catcher's attire that consist of protective gear and accessories that hold the uniform together.\n: This mark indicates a branding element, unrelated to protective sports gear, representing a manufacturer's logo.\n: These two marks relate to the broader context of the scene: one likely representing commercial aspects of the venue and the other signifying the presence of a live audience.\n: Collectively, these marks identify various aspects of a catcher's gear and attire, all serving functional purposes on the field.", + "bbox": [ + [ + 165, + 202, + 87, + 88 + ], + [ + 55, + 328, + 172, + 170 + ], + [ + 133, + 414, + 34, + 83 + ], + [ + 228, + 74, + 100, + 143 + ], + [ + 91, + 0, + 111, + 107 + ], + [ + 80, + 223, + 96, + 22 + ], + [ + 128, + 234, + 12, + 11 + ], + [ + 0, + 0, + 332, + 79 + ] + ] + }, + { + "image_name": "000000114616.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 8, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": Both and depict sheep in a domesticated setting, identifiable by their woolly coats and ear tags. They are likely part of the same flock within a farming environment. Although they are both sheep, each one is positioned differently, and their gazes suggest different levels of alertness.", + "bbox": [ + [ + 18.53, + 87.08, + 319.31, + 209.02 + ], + [ + 179.84, + 211.29, + 453.4, + 264.5 + ] + ] + }, + { + "image_name": "2410042.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 9, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": These marks all identify earrings within a person's ears, displaying a preference for hoop-style jewelry. The earrings differ in color, suggesting a variety in the person's collection. Each mark is reflective of individual fashion choices.\n: Depict different aspects of the man's engagement; with highlighting his action of eating and showing part of his appearance, specifically his hair.\n: Both and are objects placed upon other surfaces; a hat on a head and a box on a wall, respectively. They are both dark-colored items and share a utilitarian aspect\u2014protection for the head and likely enclosure or support for items within or on the wall.\n: All these marks are connected to the man himself, showing his attire, activity, and personal style\u2014eating, wearing a hat, hair characteristics, and choice of earring.", + "bbox": [ + [ + 45, + 3, + 303, + 275 + ], + [ + 156, + 32, + 25, + 44 + ], + [ + 137, + 1, + 167, + 50 + ], + [ + 10, + 0, + 122, + 93 + ], + [ + 168, + 59, + 15, + 20 + ], + [ + 126, + 21, + 91, + 55 + ], + [ + 158, + 47, + 37, + 52 + ] + ] + }, + { + "image_name": "000000104486.jpg", + "question": "Please analyze the relationship between all marked regions in the image.", + "question_id": 10, + "dataset_name": "natural_Inter_Relationship_box", + "gt_answers": ": This region includes an individual who is caught in a moment that seems to involve some sort of task or activity. The person is engaged with a luggage cart, which suggests they might be arriving or departing from a location that offers such amenities, possibly a hotel. The cart holds luggage indicating travel or transit. The man's expression and attire provide clues to his role or state at the moment, such as potentially being a guest handling his luggage. The other individual seen partially in the background creates a sense of movement or interaction, but their relationship to the man or the context is unclear.\n", + "bbox": [ + [ + 157.23, + 341.07, + 10.67, + 2.08 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/natural_Inter_Relationship/natural_Inter_Relationship_point.json b/evaluation/MDVP-Bench/data/natural_Inter_Relationship/natural_Inter_Relationship_point.json new file mode 100644 index 0000000000000000000000000000000000000000..949a0450b9f2bd5e14ac23c8541b630f02295ca4 --- /dev/null +++ b/evaluation/MDVP-Bench/data/natural_Inter_Relationship/natural_Inter_Relationship_point.json @@ -0,0 +1,384 @@ +[ + { + "image_name": "2413306.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 1, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": The two skateboards, each identified by and , share a common theme as integral elements of the skateboarding culture depicted in the image. One skateboard is stationary on the ground while the other is propped up, indicating a momentary pause in activity.\n: The individuals marked by , , and can be connected through the context of a shared activity or location. They are part of a group, with engaging in hydration, observing the environment, and sitting and enjoying the company.\n: The smiling boy at and the man in a brown shirt at appear to be part of the same social gathering or activity. The smile and casual body language suggest a positive, engaging atmosphere.\n: Standing apart from the dynamic interactions among the individuals and sporting equipment, represents a stationary object, a street lamp providing practicality by lighting the environment, contributing to public safety and ambiance.", + "points": [ + [ + 174, + 275 + ], + [ + 113, + 179 + ], + [ + 374, + 168 + ], + [ + 462, + 234 + ], + [ + 478, + 161 + ], + [ + 221, + 57 + ], + [ + 306, + 175 + ], + [ + 166, + 159 + ] + ] + }, + { + "image_name": "1592243.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 2, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": Both marked regions show items presumably set upon a dining table or surface within a domestic setting, displaying objects associated with eating and drinking.\n: All these marks denote books placed on a wooden shelf. Each book is situated close to other books or items, suggesting a common use and location within a personal library or living space. Despite their proximity, each book is described in isolation without reference to its neighboring items beyond its immediate support or contact.\n: A contrast emerges between , which is focused on the dining table area with glassware and potentially dining or social occasions, and the rest of the marks, which are more aligned with a study or reflective environment dominated by books and shelves.", + "points": [ + [ + 363, + 369 + ], + [ + 579, + 530 + ], + [ + 692, + 156 + ], + [ + 868, + 197 + ], + [ + 889, + 231 + ], + [ + 741, + 246 + ], + [ + 738, + 258 + ], + [ + 911, + 305 + ] + ] + }, + { + "image_name": "2413119.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 3, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": The image only contains one marked region, therefore there are no inter-relationships with other marked regions to analyze. The provided description of stands as an isolated analysis of the observed aircraft and its immediate surroundings on the airfield.\n", + "points": [ + [ + 249, + 297 + ] + ] + }, + { + "image_name": "000000352214.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 4, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": These marks all indicate carrot slices which are parts of the same category of vegetable. They vary in orientation and interaction with their immediate surroundings. \n: Both are individual kernels of corn but are located in different areas of the image, surrounded by distinct groupings of other ingredients. \n: This mark stands out as the only broccoli piece, with a different structure and texture compared to the other marked items.", + "points": [ + [ + 359, + 14 + ], + [ + 480, + 314 + ], + [ + 134, + 130 + ], + [ + 269, + 234 + ], + [ + 409, + 123 + ], + [ + 346, + 330 + ], + [ + 332, + 79 + ], + [ + 278, + 84 + ] + ] + }, + { + "image_name": "2406502.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 5, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": The ceiling panel and cashbox are both integral components of the bus's interior, likely sharing similar materials and serving the function of utility and transaction management.\n: The upper interior panel and individual seat exhibit a cohesive design aesthetic, integral to the bus's function of providing transportation services to passengers.\n: The panel covers the inside upper area of the bus, while the mirror is a fixture visible from the inside, serving distinct functions of structure and safety.\n: Both found in the driver\u2019s area, the interior upper panel and the steering wheel contribute to the operation and control of the bus.\n: The upper panel, cashbox, and signage are all placed near the front of the bus, likely closely interacting in the process of boarding and fare collection.\n: The location of the driver\u2019s seat under the upper panel indicates a designed space for the bus operator, combining comfort and functionality.\n: The ceiling and banner share the same space, with the former providing infrastructure for the latter to be displayed for passengers.\n: Both the steering wheel and driver's seat are essential to the driver's operation of the bus, ensuring the vehicle is controlled and navigated safely.", + "points": [ + [ + 249, + 33 + ], + [ + 282, + 227 + ], + [ + 415, + 278 + ], + [ + 101, + 132 + ], + [ + 170, + 171 + ], + [ + 325, + 237 + ], + [ + 143, + 241 + ], + [ + 36, + 21 + ] + ] + }, + { + "image_name": "000000383406.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 6, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": The can at and the baseballs at and are associated with leisure activities and appear amidst a personal space, suggesting a casual environment.\n: Both and are monitors used for information display, differing in content, with being blank and showing text.\n: Items at marks , , and represent health and medication, indicative of a presence of pharmaceutical items within a living space.\n: The baseballs at and are similar in appearance and indicate engagement in or affinity for sports-related activities.\n: These items, although individually distinct, collectively represent common household objects that might be found in a living area or storage space.\n: Monitors at and and the medicine-related items at , , and suggest a multifunctional space, likely a home office or living area with mixed usage.", + "points": [ + [ + 98, + 183 + ], + [ + 237, + 85 + ], + [ + 550, + 93 + ], + [ + 193, + 180 + ], + [ + 147, + 176 + ], + [ + 558, + 250 + ], + [ + 572, + 260 + ], + [ + 551, + 208 + ] + ] + }, + { + "image_name": "000000412464.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 7, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": The fork (), sandwich (), and part of the table () all contribute to a dining scenario, with the fork and sandwich being part of a meal and the table serving as the base for the setting.\n: The condiments ( and ) are typically served alongside main dishes like the sandwich (), often to enhance flavor or as part of the presentation.\n: The table ( and ) provides a surface for drinks ( and ), supporting a cohesive dining experience within a restaurant or similar environment.", + "points": [ + [ + 222, + 152 + ], + [ + 608, + 229 + ], + [ + 277, + 211 + ], + [ + 257, + 237 + ], + [ + 320, + 277 + ], + [ + 437, + 18 + ], + [ + 602, + 105 + ], + [ + 199, + 37 + ] + ] + }, + { + "image_name": "2409784.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 8, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": All these marks represent different ways eggs are presented for sale in the same retail environment. and offer organized trays of eggs, differentiated by the eggs' shell color, implying a variety of choices for consumers. , on the other hand, showcases eggs without a tray, further diversifying the purchasing options available. Despite the differences in presentation, all three marks denote a focus on providing eggs as a primary product.\n: These marks share the trait of displaying food products for sale in a hanging or elevated manner, features bananas presented in a ready-to-buy fashion, while presents jars of food items on a shelf. Each display utilizes vertical space within the store to attractively present the items to customers.\n: introduces us to the human element of the market, a woman who is potentially responsible for organizing and selling the items, of which could be an integral part, as the sign helps facilitate the commercial exchange by informing customers of the products and prices.", + "points": [ + [ + 304, + 226 + ], + [ + 352, + 219 + ], + [ + 358, + 228 + ], + [ + 33, + 180 + ], + [ + 106, + 142 + ], + [ + 358, + 134 + ], + [ + 358, + 268 + ], + [ + 41, + 210 + ] + ] + }, + { + "image_name": "2408779.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 9, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": The sturdy metal structure identified in appears to be the support for the stop sign seen in , indicating they are parts of a whole, albeit different aspects of the same item, observed from opposing viewpoints.\n: These two regions both contain signs; however, features a regulatory sign while displays a sign with conflicting instruction, which is unusual in usual traffic contexts.\n: The stop sign at and the safety cone at are both objects pertaining to roadway safety and control, likely positioned to guide and protect road users.\n: The metal backing at and the backside of a sign with reflectors at suggest an association as they describe different portions of what could be the same object's structure.\n: Although each mark indicates a type of barrier, the tree limb at is a natural form whereas the poles and chains at represent a man-made partition.", + "points": [ + [ + 137, + 192 + ], + [ + 73, + 206 + ], + [ + 279, + 227 + ], + [ + 352, + 249 + ], + [ + 144, + 277 + ], + [ + 327, + 287 + ], + [ + 69, + 207 + ], + [ + 222, + 47 + ] + ] + }, + { + "image_name": "2410052.jpg", + "question": "Please analyze the relationship between all marked points in the image.", + "question_id": 10, + "dataset_name": "natural_Inter_Relationship_point", + "gt_answers": ": These marks indicate different elements of a baseball environment. relates to the field, while , , and all relate to the attire of a baseball player, pointing to the interconnectedness of the sport's playing surface and specialized clothing.\n: This mark stands apart from the others, as it is a part of a stationary object likely adjacent to the baseball field but unrelated to the sport itself. The wall does not have a direct connection to the activity being performed.\n: Both marks relate to the upper body wear of the baseball player, with being part of the outer layer (jacket) and showing the headgear (helmet) which is essential for protection.\n: These two marks indicate safety gear, specifically the helmet. While focuses on the helmet\u2019s design and structure, shows it in use, exemplifying its practical application in the sport.", + "points": [ + [ + 249, + 339 + ], + [ + 468, + 150 + ], + [ + 325, + 117 + ], + [ + 299, + 322 + ], + [ + 187, + 154 + ], + [ + 304, + 208 + ], + [ + 318, + 157 + ], + [ + 245, + 337 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/natural_QA/natural_QA_box.json b/evaluation/MDVP-Bench/data/natural_QA/natural_QA_box.json new file mode 100644 index 0000000000000000000000000000000000000000..1c6a236dddf016de3b5108d01a18fea82c28b54f --- /dev/null +++ b/evaluation/MDVP-Bench/data/natural_QA/natural_QA_box.json @@ -0,0 +1,452 @@ +[ + { + "image_name": "000000271795.jpg", + "question": "What is the primary use of the object at ?", + "question_id": 1, + "dataset_name": "natural_QA_box", + "gt_answers": "The object at is a telephone used for voice communication in the office.", + "bbox": [ + [ + 582.02, + 216.01, + 57.98, + 69.83 + ], + [ + 399.09, + 246.95, + 35.93, + 55.63 + ], + [ + 410.69, + 269.13, + 72.86, + 43.82 + ], + [ + 249.59, + 211.38, + 30.33, + 30.52 + ], + [ + 224.48, + 138.99, + 66.58, + 32.18 + ], + [ + 167.48, + 172.43, + 81.64, + 130.62 + ], + [ + 578.15, + 116.36, + 51.49, + 53.87 + ], + [ + 564.29, + 377.71, + 75.71, + 33.08 + ] + ] + }, + { + "image_name": "000000247984.jpg", + "question": "What is the visible color of the suitcase at ?", + "question_id": 2, + "dataset_name": "natural_QA_box", + "gt_answers": "The visible color of the suitcase at is black.", + "bbox": [ + [ + 0.0, + 173.07, + 585.64, + 252.93 + ], + [ + 121.3, + 2.79, + 332.83, + 242.69 + ] + ] + }, + { + "image_name": "2406803.jpg", + "question": "What can be inferred about the weather conditions from and ?", + "question_id": 3, + "dataset_name": "natural_QA_box", + "gt_answers": "The presence of sunlight on the wing in and the partly cloudy sky in suggest variable weather, with both sunny spells and clouds.", + "bbox": [ + [ + 306, + 183, + 54, + 18 + ], + [ + 223, + 175, + 100, + 180 + ], + [ + 222, + 150, + 112, + 207 + ], + [ + 50, + 382, + 125, + 70 + ], + [ + 235, + 281, + 65, + 30 + ], + [ + 10, + 241, + 65, + 100 + ], + [ + 165, + 107, + 84, + 22 + ], + [ + 225, + 358, + 79, + 125 + ] + ] + }, + { + "image_name": "000000452274.jpg", + "question": "What is the color of the shirt at ?", + "question_id": 4, + "dataset_name": "natural_QA_box", + "gt_answers": "The shirt at appears to be white.", + "bbox": [ + [ + 129.1, + 214.13, + 510.9, + 211.87 + ], + [ + 301.79, + 322.21, + 62.35, + 103.79 + ] + ] + }, + { + "image_name": "1165.jpg", + "question": "What material is the toilet in made of, and where is it positioned in the room?", + "question_id": 5, + "dataset_name": "natural_QA_box", + "gt_answers": "The toilet in is made of metal and is positioned in the corner of the room.", + "bbox": [ + [ + 458, + 367, + 58, + 55 + ], + [ + 503, + 300, + 59, + 164 + ], + [ + 518, + 68, + 78, + 56 + ], + [ + 64, + 228, + 91, + 71 + ], + [ + 148, + 225, + 354, + 45 + ], + [ + 98, + 276, + 411, + 27 + ], + [ + 73, + 370, + 416, + 84 + ], + [ + 90, + 38, + 358, + 177 + ] + ] + }, + { + "image_name": "1144.jpg", + "question": "What type of games can be played with the objects at and ?", + "question_id": 6, + "dataset_name": "natural_QA_box", + "gt_answers": "Billiards or pool can be played with the object at , and table tennis can be played with the objects at .", + "bbox": [ + [ + 210, + 138, + 75, + 75 + ], + [ + 201, + 190, + 107, + 55 + ], + [ + 191, + 77, + 152, + 35 + ], + [ + 265, + 340, + 61, + 19 + ], + [ + 153, + 139, + 52, + 101 + ] + ] + }, + { + "image_name": "2408509.jpg", + "question": "What kind of beverage might be contained in the glass at ?", + "question_id": 7, + "dataset_name": "natural_QA_box", + "gt_answers": "Given the empty appearance of the glass at and the lack of visible liquid or ice, it is possible that it either contained a clear liquid such as water or is currently empty.", + "bbox": [ + [ + 184, + 182, + 158, + 62 + ], + [ + 236, + 242, + 218, + 127 + ], + [ + 0, + 182, + 64, + 129 + ], + [ + 138, + 193, + 34, + 103 + ], + [ + 413, + 184, + 75, + 160 + ], + [ + 358, + 160, + 82, + 117 + ], + [ + 185, + 112, + 142, + 81 + ], + [ + 188, + 246, + 128, + 47 + ] + ] + }, + { + "image_name": "000000180095.jpg", + "question": "What type of sign is denoted by , and what specific condition does it appear to be in?", + "question_id": 8, + "dataset_name": "natural_QA_box", + "gt_answers": "The sign at is a traffic direction sign that has been defaced or altered.", + "bbox": [ + [ + 173.73, + 349.42, + 170.88, + 251.7 + ], + [ + 174.15, + 106.06, + 170.47, + 241.4 + ], + [ + 304.47, + 179.26, + 56.49, + 141.71 + ] + ] + }, + { + "image_name": "000000019544.jpg", + "question": "What kind of object is highlighted at on the image, and where is it located?", + "question_id": 9, + "dataset_name": "natural_QA_box", + "gt_answers": "The object at is likely a ceramic plate, located on the wooden desk.", + "bbox": [ + [ + 482.62, + 119.0, + 57.81, + 17.26 + ], + [ + 50.67, + 167.0, + 45.86, + 90.97 + ], + [ + 34.05, + 298.05, + 143.67, + 51.96 + ], + [ + 446.22, + 446.23, + 151.88, + 29.21 + ], + [ + 180.67, + 315.5, + 247.42, + 55.62 + ], + [ + 439.15, + 381.14, + 177.53, + 80.2 + ], + [ + 44.96, + 335.65, + 140.66, + 71.73 + ], + [ + 65.2, + 389.64, + 125.82, + 23.02 + ] + ] + }, + { + "image_name": "2407722.jpg", + "question": "What purpose do the red lights identified in serve on the vehicle?", + "question_id": 10, + "dataset_name": "natural_QA_box", + "gt_answers": "The red emergency lights marked by are used to signal urgency or to indicate that this is a service or emergency vehicle, such as a tow truck, ambulance, or fire truck.", + "bbox": [ + [ + 132, + 147, + 135, + 17 + ], + [ + 329, + 283, + 63, + 67 + ], + [ + 97, + 189, + 155, + 45 + ], + [ + 14, + 101, + 65, + 85 + ], + [ + 258, + 175, + 54, + 63 + ], + [ + 257, + 315, + 25, + 37 + ], + [ + 63, + 317, + 187, + 32 + ], + [ + 317, + 128, + 113, + 19 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/natural_QA/natural_QA_point.json b/evaluation/MDVP-Bench/data/natural_QA/natural_QA_point.json new file mode 100644 index 0000000000000000000000000000000000000000..468d76925603d66571804b6f003d6b334eac0767 --- /dev/null +++ b/evaluation/MDVP-Bench/data/natural_QA/natural_QA_point.json @@ -0,0 +1,316 @@ +[ + { + "image_name": "000000267088.jpg", + "question": "What is the main visible difference between and ?", + "question_id": 1, + "dataset_name": "natural_QA_point", + "gt_answers": "The main visible difference between and is their location on the giraffe; is on the head, while is on the neck.", + "points": [ + [ + 334, + 288 + ], + [ + 328, + 279 + ] + ] + }, + { + "image_name": "2408070.jpg", + "question": "What activity is the person at engaging in?", + "question_id": 2, + "dataset_name": "natural_QA_point", + "gt_answers": "The person at is engaging in kitesurfing.", + "points": [ + [ + 238, + 91 + ] + ] + }, + { + "image_name": "000000347724.jpg", + "question": "What object is highlighted by all the marked regions in the image?", + "question_id": 3, + "dataset_name": "natural_QA_point", + "gt_answers": "All the marked regions in the image highlight carrots, each with an orange body and a green top.", + "points": [ + [ + 182, + 450 + ], + [ + 153, + 519 + ], + [ + 225, + 315 + ], + [ + 351, + 439 + ], + [ + 323, + 440 + ], + [ + 416, + 352 + ], + [ + 110, + 385 + ], + [ + 83, + 542 + ] + ] + }, + { + "image_name": "2408814.jpg", + "question": "Can you describe the pattern on the tie visible at ?", + "question_id": 4, + "dataset_name": "natural_QA_point", + "gt_answers": "The tie has a pattern that includes shades of blue with what appears to be yellow accents. The exact pattern is not clearly discernible, but it contributes to the professional look of the attire.", + "points": [ + [ + 165, + 241 + ], + [ + 360, + 207 + ], + [ + 315, + 320 + ], + [ + 47, + 175 + ], + [ + 40, + 135 + ], + [ + 261, + 90 + ], + [ + 41, + 144 + ], + [ + 406, + 356 + ] + ] + }, + { + "image_name": "2407853.jpg", + "question": "What safety feature for signaling turns is shown in the image?", + "question_id": 5, + "dataset_name": "natural_QA_point", + "gt_answers": "The orange signal light on the motorcycle, likely a turn indicator, is the safety feature shown in the image.", + "points": [ + [ + 194, + 349 + ], + [ + 487, + 38 + ], + [ + 379, + 284 + ], + [ + 98, + 149 + ], + [ + 240, + 293 + ], + [ + 225, + 172 + ], + [ + 324, + 76 + ], + [ + 489, + 45 + ] + ] + }, + { + "image_name": "000000185512.jpg", + "question": "What purpose does the object at serve in a bathroom?", + "question_id": 6, + "dataset_name": "natural_QA_point", + "gt_answers": "The object at is a mirror, primarily used for grooming and providing visual feedback.", + "points": [ + [ + 370, + 230 + ], + [ + 350, + 433 + ], + [ + 168, + 529 + ], + [ + 262, + 251 + ], + [ + 168, + 245 + ], + [ + 305, + 534 + ] + ] + }, + { + "image_name": "000000051476.jpg", + "question": "What is the function of the object marked with in cooking?", + "question_id": 7, + "dataset_name": "natural_QA_point", + "gt_answers": "The object marked with is used for flipping or stirring food during preparation or cooking.", + "points": [ + [ + 182, + 174 + ], + [ + 136, + 15 + ], + [ + 156, + 56 + ], + [ + 581, + 96 + ], + [ + 160, + 76 + ], + [ + 111, + 360 + ], + [ + 493, + 353 + ], + [ + 257, + 360 + ] + ] + }, + { + "image_name": "000000028343.jpg", + "question": "What might be the common use for the objects identified at , , , and ?", + "question_id": 8, + "dataset_name": "natural_QA_point", + "gt_answers": "The objects identified at these marks are all doughnuts, typically consumed as a sweet snack or dessert.", + "points": [ + [ + 197, + 58 + ], + [ + 97, + 74 + ], + [ + 247, + 118 + ], + [ + 352, + 49 + ], + [ + 217, + 34 + ], + [ + 69, + 284 + ], + [ + 51, + 199 + ], + [ + 280, + 40 + ] + ] + }, + { + "image_name": "000000232357.jpg", + "question": "What is the primary function of the object identified at ?", + "question_id": 9, + "dataset_name": "natural_QA_point", + "gt_answers": "The primary function of the object at is to illuminate the room it is installed in.", + "points": [ + [ + 336, + 63 + ], + [ + 73, + 319 + ], + [ + 236, + 355 + ], + [ + 123, + 340 + ] + ] + }, + { + "image_name": "000000262235.jpg", + "question": "What function might the item at serve for the individual?", + "question_id": 10, + "dataset_name": "natural_QA_point", + "gt_answers": "The item at likely serves as a form of identification or security, granting access or indicating the person's role or authorization within a professional setting or event.", + "points": [ + [ + 208, + 473 + ], + [ + 228, + 345 + ], + [ + 214, + 423 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/natural_brief_caption/natural_brief_caption_box.json b/evaluation/MDVP-Bench/data/natural_brief_caption/natural_brief_caption_box.json new file mode 100644 index 0000000000000000000000000000000000000000..d31e4612966f8c028d2a13818c464322dc71d1ca --- /dev/null +++ b/evaluation/MDVP-Bench/data/natural_brief_caption/natural_brief_caption_box.json @@ -0,0 +1,452 @@ +[ + { + "image_name": "000000518574.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 1, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ":Illuminated red fire engine on a wet city street\n:Tall lamppost topped with a bright street light\n:Street light with a vertical lamppost on a sidewalk\n:Lamppost with multiple lights at intersection angle\n:Street lamppost with single light and shadow on ground\n:Lamppost on sidewalk behind traffic light at night\n:Lamppost with an attached circular sign on city street\n:Colorful banner flag hanging from a light post", + "bbox": [ + [ + 321.04, + 515.8, + 44.2, + 48.82 + ], + [ + 67.53, + 133.57, + 218.27, + 506.43 + ], + [ + 347.62, + 479.72, + 31.44, + 77.04 + ], + [ + 284.74, + 432.36, + 156.22, + 140.97 + ], + [ + 154.17, + 463.4, + 30.59, + 93.7 + ], + [ + 455.67, + 304.12, + 24.33, + 131.83 + ], + [ + 21.57, + 454.71, + 20.28, + 118.66 + ], + [ + 203.74, + 152.88, + 70.07, + 185.51 + ] + ] + }, + { + "image_name": "000000474502.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 2, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ":Polar bear standing with open mouth", + "bbox": [ + [ + 219.09, + 85.48, + 278.19, + 341.52 + ] + ] + }, + { + "image_name": "2407147.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 3, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ":Man wearing a wide-brimmed hat in silhouette\n:Silhouetted man donning a long jacket holding something", + "bbox": [ + [ + 163, + 59, + 122, + 136 + ], + [ + 181, + 134, + 155, + 268 + ] + ] + }, + { + "image_name": "000000113533.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 4, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ":Round white plate on wooden surface.\n:Round white plate with blue edging.\n:Round white plate, partially obscured.\n:White plate above a table's surface.\n:White frosted cake with floral decor.\n:Blue frosted cake resembling a flag.\n:Flame on a single lit candle.\n:Flame on candle atop a cake.", + "bbox": [ + [ + 268.02, + 476.97, + 122.73, + 29.06 + ], + [ + 134.91, + 517.28, + 162.24, + 38.73 + ], + [ + 0.0, + 562.05, + 149.94, + 37.95 + ], + [ + 365.95, + 438.58, + 34.05, + 29.61 + ], + [ + 242.23, + 368.6, + 133.72, + 134.72 + ], + [ + 136.26, + 484.35, + 148.85, + 64.53 + ], + [ + 61.37, + 476.39, + 24.48, + 34.25 + ], + [ + 353.93, + 393.58, + 22.43, + 17.33 + ] + ] + }, + { + "image_name": "2408783.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 5, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ": White baseball cap worn by a tennis player.\n: Tennis racquet held in the player's left hand.\n: Service base line on a clay tennis court.\n: Tennis shoes in white with black accents.\n: Tennis shirt with short sleeves and black details.\n: Spectator intently watching the tennis match.\n: Flag on a pole behind the crowd at the tennis match.\n: Tennis player's hand with fingers spread wide.", + "bbox": [ + [ + 157, + 186, + 37, + 46 + ], + [ + 102, + 151, + 23, + 111 + ], + [ + 129, + 393, + 154, + 105 + ], + [ + 180, + 397, + 16, + 33 + ], + [ + 141, + 170, + 120, + 149 + ], + [ + 273, + 196, + 50, + 89 + ], + [ + 326, + 28, + 12, + 162 + ], + [ + 188, + 66, + 46, + 43 + ] + ] + }, + { + "image_name": "2406705.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 6, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ": Avocado pieces arranged on a plate with other foods.\n: Creamy substance layered over a slice of tomato.\n: Beverage can positioned on a table surface.\n: Table with a pattern of holes throughout its surface.\n: Slice of yellow bell pepper on a mixed food plate.\n: Red bell pepper strip placed alongside other ingredients.\n: Fresh avocado chunks on top of a food dish.\n: Triad of toast slices resting near a plate.", + "bbox": [ + [ + 248, + 134, + 76, + 76 + ], + [ + 297, + 174, + 84, + 69 + ], + [ + 60, + 0, + 140, + 51 + ], + [ + 27, + 366, + 47, + 28 + ], + [ + 2, + 192, + 186, + 80 + ], + [ + 381, + 172, + 37, + 141 + ], + [ + 219, + 130, + 125, + 75 + ], + [ + 59, + 41, + 355, + 124 + ] + ] + }, + { + "image_name": "2407365.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 7, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ": Black soft-sided suitcase with a zippered exterior pocket.\n: Mint green hardshell suitcase, slightly obscured in background.\n: Jeans pocket with a visible tag from a clothing manufacturer.\n: Reflection of red neon light on a polished floor surface.\n: Light grey suitcase with an extending handle, in the background.\n: Main compartment of a black soft-sided suitcase.\n: Lower portion of blue jeans with visible stitching and texture.\n: Two suitcase wheels, possibly attached to a retractable handle.", + "bbox": [ + [ + 179, + 146, + 60, + 42 + ], + [ + 442, + 7, + 53, + 71 + ], + [ + 171, + 8, + 25, + 24 + ], + [ + 16, + 61, + 85, + 102 + ], + [ + 226, + 0, + 47, + 57 + ], + [ + 185, + 170, + 40, + 30 + ], + [ + 106, + 60, + 31, + 70 + ], + [ + 166, + 243, + 85, + 16 + ] + ] + }, + { + "image_name": "000000133090.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 8, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ": Tennis player serving ball in motion.", + "bbox": [ + [ + 311.33, + 377.7, + 8.74, + 18.77 + ] + ] + }, + { + "image_name": "000000147173.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 9, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ": Skateboard placed on ground with visible deck graphics.\n: Skateboard laying on ground near curb, wheels up.\n: Skateboard on ground, griptape side up, near shoes.\n: Person standing, wearing dark clothes, facing away.\n: Person walking away from camera, green hoodie, jeans.\n: Seated person smiling, hands together, wearing a hat.\n: Person seated on ledge, legs crossed, looking downward.\n: Person sitting with friends, black t-shirt, visible smile.", + "bbox": [ + [ + 409.35, + 307.47, + 114.31, + 41.71 + ], + [ + 517.31, + 276.09, + 98.17, + 65.08 + ], + [ + 384.92, + 213.63, + 45.08, + 29.63 + ], + [ + 416.86, + 151.32, + 99.08, + 163.96 + ], + [ + 97.14, + 65.63, + 132.57, + 284.47 + ], + [ + 497.59, + 156.08, + 94.46, + 174.57 + ], + [ + 198.6, + 143.65, + 124.69, + 133.45 + ], + [ + 441.71, + 124.0, + 82.27, + 107.35 + ] + ] + }, + { + "image_name": "2408569.jpg", + "question": "Please provide a brief description of each marked region in the image.", + "question_id": 10, + "dataset_name": "natural_brief_caption_box", + "gt_answers": ": Red book cover possibly related to cooking\n: White cheese slice held by human fingers\n: Golden-brown crispy edge of a pizza crust\n: White shredded cheese on top of a pizza\n: Red marinara sauce spread on pizza dough\n: Large pizza with a variety of toppings\n: Small partially obscured pizza on a box\n: Human fingers grasping a cheese slice", + "bbox": [ + [ + 453, + 156, + 39, + 40 + ], + [ + 288, + 11, + 98, + 71 + ], + [ + 150, + 253, + 112, + 55 + ], + [ + 47, + 135, + 66, + 36 + ], + [ + 381, + 185, + 39, + 45 + ], + [ + 48, + 78, + 401, + 227 + ], + [ + 26, + 70, + 418, + 234 + ], + [ + 286, + 2, + 130, + 50 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/natural_brief_caption/natural_brief_caption_point.json b/evaluation/MDVP-Bench/data/natural_brief_caption/natural_brief_caption_point.json new file mode 100644 index 0000000000000000000000000000000000000000..1563f6a71db377c23fdce1d8fd552c99479b7e5c --- /dev/null +++ b/evaluation/MDVP-Bench/data/natural_brief_caption/natural_brief_caption_point.json @@ -0,0 +1,320 @@ +[ + { + "image_name": "000000265256.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 1, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ":Person holding a large patriotically designed kite", + "points": [ + [ + 233, + 244 + ] + ] + }, + { + "image_name": "000000487774.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 2, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ":Black ergonomic mouse with two main buttons.\n:Edge of grey computer keyboard, button partially visible.", + "points": [ + [ + 415, + 266 + ], + [ + 87, + 271 + ] + ] + }, + { + "image_name": "000000073521.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 3, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ":Upright black suitcase near seating area.\n:Large black standing suitcase with extended handle.\n:Blue plaid suitcase lying on the floor.\n:Black suitcase adjacent to chair at terminal.\n:Piece of handle protruding from top of suitcase.\n:Partial view of a suitcase handle above seat.\n:Handle of suitcase, higher perspective than surroundings.\n:Suitcase handle above a blue plaid patterned item.", + "points": [ + [ + 361, + 208 + ], + [ + 228, + 194 + ], + [ + 123, + 227 + ], + [ + 392, + 163 + ], + [ + 390, + 170 + ], + [ + 346, + 165 + ], + [ + 239, + 94 + ], + [ + 147, + 166 + ] + ] + }, + { + "image_name": "4134.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 4, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ":Metal handrails on the right side of outdoor steps.\n:Two individuals walking together in an outdoor setting.\n:Individual approaching steps near a building exterior.\n:Building entrance with an open door and a visible interior.\n:Metal railing adjacent to a building pathway.\n:Exposed brick section of a building's exterior wall.\n:Woman in a blue jacket walking outdoors.\n:Concrete steps leading up to a modern building.", + "points": [ + [ + 409, + 209 + ], + [ + 215, + 255 + ], + [ + 371, + 213 + ], + [ + 127, + 266 + ], + [ + 255, + 176 + ], + [ + 100, + 111 + ], + [ + 194, + 232 + ], + [ + 345, + 221 + ] + ] + }, + { + "image_name": "2407684.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 5, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ": Part of a wet, shiny motorcycle wheel.\n: Curved, metallic part, likely a vehicle's component.", + "points": [ + [ + 188, + 259 + ], + [ + 117, + 317 + ] + ] + }, + { + "image_name": "000000266371.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 6, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ": Assorted vegetables displayed on an outdoor table.\n: Various neatly arranged orange vegetables on a table.\n: Exhibit of diverse fruits and vegetables on a market table.\n: Top part of an umbrella with white and green pattern.\n: Partial view of a patterned umbrella providing shade at market.\n: Blue plastic bucket beside a market stand.\n: Green bucket on the ground near produce table.\n: Single strawberry rests on a surface at a market.", + "points": [ + [ + 181, + 255 + ], + [ + 292, + 282 + ], + [ + 602, + 314 + ], + [ + 104, + 35 + ], + [ + 433, + 76 + ], + [ + 578, + 397 + ], + [ + 520, + 401 + ], + [ + 108, + 112 + ] + ] + }, + { + "image_name": "225.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 7, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ":Yellow cab on city street\n:Traffic signal, urban background\n:Red street sign, elevated position\n:Adult female, bag in hand\n:Adult male, blue attire, accessory\n:Print on vehicle door, street view", + "points": [ + [ + 363, + 312 + ], + [ + 597, + 178 + ], + [ + 770, + 199 + ], + [ + 527, + 452 + ], + [ + 653, + 309 + ], + [ + 201, + 340 + ] + ] + }, + { + "image_name": "1159702.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 8, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ":Bunch of bananas on display\n:Sack of grain on the floor\n:Fresh pineapples hanging from hooks\n:Shelf with various products in jars\n:Large metal 'S' shaped hook\n:White metal mesh wall structure\n:Single pineapple hanging from a hook\n:Assorted items on market stall surface", + "points": [ + [ + 495, + 118 + ], + [ + 455, + 718 + ], + [ + 632, + 490 + ], + [ + 499, + 349 + ], + [ + 125, + 184 + ], + [ + 1008, + 706 + ], + [ + 719, + 633 + ], + [ + 414, + 583 + ] + ] + }, + { + "image_name": "2409087.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 9, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ": Appliance for heating and cooking food, located above stove.\n: Rotary controls for temperature management on cooking appliance.\n: Dark-toned storage compartments in kitchen for utensils and pantry items.\n: Metallic cooking vessel on stove with visible steam or vapors.\n: Decorative foliage in a transparent container, placed near water source.\n: Metal basin for washing within the kitchen, accompanied by a faucet.", + "points": [ + [ + 402, + 173 + ], + [ + 413, + 239 + ], + [ + 283, + 298 + ], + [ + 372, + 250 + ], + [ + 67, + 168 + ], + [ + 104, + 288 + ] + ] + }, + { + "image_name": "2546.jpg", + "question": "Please provide a brief description of each marked point in the image.", + "question_id": 10, + "dataset_name": "natural_brief_caption_point", + "gt_answers": ":Silver car parked beside road on a sunny day.\n:Garbage can on grass near pavement on sunny day.\n:Telephone pole near stop sign at street corner.\n:Garbage can by roadside under clear sky.\n:Leafy bush in front of a residential home.\n:Truck parked on residential street during sunny day.\n:Brown door on the facade of a white house.\n:Tall trees with dense foliage on a bright day.", + "points": [ + [ + 444, + 375 + ], + [ + 277, + 399 + ], + [ + 62, + 347 + ], + [ + 722, + 357 + ], + [ + 163, + 333 + ], + [ + 555, + 353 + ], + [ + 30, + 339 + ], + [ + 304, + 200 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/natural_detailed_caption/natural_detailed_caption_box.json b/evaluation/MDVP-Bench/data/natural_detailed_caption/natural_detailed_caption_box.json new file mode 100644 index 0000000000000000000000000000000000000000..24bb14546f2483cef245aeaedff8d4122f92cf00 --- /dev/null +++ b/evaluation/MDVP-Bench/data/natural_detailed_caption/natural_detailed_caption_box.json @@ -0,0 +1,350 @@ +[ + { + "image_name": "4010.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 1, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ": The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting.\n: This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area.\n: A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance.", + "bbox": [ + [ + 152, + 161, + 68, + 69 + ], + [ + 7, + 1, + 126, + 293 + ], + [ + 583, + 160, + 19, + 138 + ] + ] + }, + { + "image_name": "2407550.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 2, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ": This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware.\n: The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly.\n: The light switches are white, contrasting with the dark wall, likely plastic, and appear functional.\n: Positioned in the background, these white light switches are paired on a wall above the countertop.\n: This silver oven, with digital controls and a handle, appears modern and built into the cabinetry.\n: An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop.\n: The floor, constructed of hardwood, showcases a natural finish with variations in wood grain.\n: The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients.", + "bbox": [ + [ + 321, + 132, + 47, + 40 + ], + [ + 368, + 233, + 127, + 97 + ], + [ + 3, + 217, + 30, + 39 + ], + [ + 293, + 177, + 26, + 29 + ], + [ + 383, + 186, + 95, + 107 + ], + [ + 360, + 121, + 19, + 49 + ], + [ + 275, + 306, + 105, + 25 + ], + [ + 26, + 193, + 36, + 79 + ] + ] + }, + { + "image_name": "402.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 3, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ": This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic.\n: The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a caf\u00e9 or restaurant patio.\n: The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface.\n: Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario.\n: A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention.\n: A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity.\n: Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments.\n: A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings.", + "bbox": [ + [ + 110, + 483, + 141, + 115 + ], + [ + 564, + 484, + 108, + 114 + ], + [ + 662, + 544, + 85, + 44 + ], + [ + 199, + 259, + 27, + 40 + ], + [ + 418, + 315, + 74, + 43 + ], + [ + 224, + 6, + 106, + 510 + ], + [ + 143, + 358, + 74, + 64 + ], + [ + 64, + 260, + 24, + 36 + ] + ] + }, + { + "image_name": "000000518836.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 4, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ": A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane.\n: This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible.\n: A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence.", + "bbox": [ + [ + 166.41, + 42.56, + 341.16, + 436.51 + ], + [ + 0.0, + 287.76, + 20.7, + 163.06 + ], + [ + 543.63, + 260.83, + 96.37, + 84.28 + ] + ] + }, + { + "image_name": "000000205601.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 5, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ": A frying pan on a heat source contains saut\u00e9ed meat and vegetables, emitting steam, indicating the food is hot and being cooked.\n: An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking.\n: A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation.\n: A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process.", + "bbox": [ + [ + 171.9, + 272.26, + 468.1, + 143.38 + ], + [ + 1.57, + 268.7, + 513.82, + 156.53 + ], + [ + 571.84, + 326.08, + 68.16, + 54.29 + ], + [ + 185.34, + 231.32, + 23.92, + 88.0 + ] + ] + }, + { + "image_name": "000000299654.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 6, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ":The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears.", + "bbox": [ + [ + 182.39, + 0.57, + 331.0, + 360.43 + ] + ] + }, + { + "image_name": "000000107939.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 7, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ":The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings.\n:This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition.", + "bbox": [ + [ + 249.92, + 99.78, + 131.46, + 183.95 + ], + [ + 257.37, + 177.56, + 124.0, + 106.16 + ] + ] + }, + { + "image_name": "000000437374.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 8, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ": A plush, padded object designed for comfort, potentially used on a sofa.\n: Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting.\n: Decorative accessory adorned on the ear, visible as a small, shiny object.\n: This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise.\n: Appears to be a young boy, casually dressed, gripping an electronic device with attention.", + "bbox": [ + [ + 1.34, + 257.38, + 74.46, + 141.2 + ], + [ + 36.97, + 292.0, + 66.92, + 131.17 + ], + [ + 486.58, + 179.23, + 2.1, + 1.91 + ], + [ + 246.21, + 69.46, + 359.56, + 357.53 + ], + [ + 77.9, + 37.18, + 202.29, + 390.82 + ] + ] + }, + { + "image_name": "2407508.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 9, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ": The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope.\n: A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski.\n: There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing.\n: A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting.\n: A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment.\n: This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver.\n: A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles.", + "bbox": [ + [ + 103, + 135, + 72, + 75 + ], + [ + 144, + 239, + 37, + 14 + ], + [ + 297, + 243, + 93, + 16 + ], + [ + 131, + 48, + 205, + 52 + ], + [ + 143, + 237, + 210, + 15 + ], + [ + 233, + 175, + 30, + 27 + ], + [ + 217, + 152, + 68, + 67 + ] + ] + }, + { + "image_name": "2411153.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 10, + "dataset_name": "natural_detailed_caption_box", + "gt_answers": ":Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race.\n:This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events.", + "bbox": [ + [ + 148, + 124, + 58, + 47 + ], + [ + 289, + 111, + 171, + 91 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/natural_detailed_caption/natural_detailed_caption_point.json b/evaluation/MDVP-Bench/data/natural_detailed_caption/natural_detailed_caption_point.json new file mode 100644 index 0000000000000000000000000000000000000000..a211a16910fbb6f6620a9381fa828584286ba07d --- /dev/null +++ b/evaluation/MDVP-Bench/data/natural_detailed_caption/natural_detailed_caption_point.json @@ -0,0 +1,336 @@ +[ + { + "image_name": "000000057091.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 1, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ":A black hard-shell suitcase lies open, revealing a variety of packed items including clothes, electronic devices, and personal belongings. It provides a glimpse into the owner's travel necessities, with the suitcase resting on a carpeted floor, indicating a domestic setting.\n:This mark highlights a section of what appears to be the corner of a suitcase with a black surface. This partial view focuses on the suitcase exterior, which seems sturdy, possibly made of a hard material suitable for travel purposes.\n:An upright backpack with a blue, orange, and black color scheme stands on a carpeted floor. It looks functional, equipped with multiple compartments and zippers, suggesting it is designed for organization and convenience in mobility.\n:The mark indicates a portion of a bed, specifically the corner, with blue bedding visible. The angle suggests the view is from above, showing the bed's soft textures and the inviting nature of a personal home space.", + "points": [ + [ + 421, + 206 + ], + [ + 136, + 195 + ], + [ + 136, + 196 + ], + [ + 495, + 74 + ] + ] + }, + { + "image_name": "2406528.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 2, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ":A middle-aged man stands addressing a crowd. He is dressed in a smart suit and sports a tie that appears reddish-pink, which stands out against the light blue shirt he's wearing. His stance suggests he is speaking or presenting at this event, holding a microphone in one hand. The tie's color contrasts with the background and his suit, drawing attention to his attire and possibly indicating his role as a speaker or figure of authority at this gathering.\n:This part of the image shows a person holding a video camera, evidently recording the ongoing event. The individual's hands are positioned to operate the camera, and the device is held up to their face as they look through the viewfinder. The camera seems to be a professional model typically used for capturing high-quality video footage, suggesting that the event being filmed has significance or is meant to be documented and shared.\n:A man is depicted wearing a green cap, suggesting he is an attendee or a spectator at the event. The cap is notably sporty, which may indicate either a casual dress code at the event or personal preference. The man's focus is not on the camera, suggesting he is engrossed in the event or is looking at something or someone else of interest.\n:In the background, beyond the main subjects in the foreground, there is a man wearing a cap. This man seems to be observing the event or listening to a speaker, indicative of his attendance as part of the crowd. Although further back and less distinct than others in the image, his presence adds to the sense of the size and composition of the audience at the event.\n:Caught in the background of the image, this mark indicates a man wearing eyeglasses. His presence, although less prominent, indicates the diversity of attendees or participants at the event. The eyeglasses he wears could imply a need for visual aid, or perhaps they are an element of his personal style.\n:A pole is shown featuring two pennants that appear to be blowing in the wind. These pennants may signify affiliations, slogans, or simply decorative elements meant to attract attention or embellish the event space. Their placement on a pole ensures they are visible above the crowd, broadcasting any messages or symbols they carry.\n:The close-up of a red pennant suggests a bold statement or symbol, often associated with visibility and attention. Its color makes it stand out, and if it bears any insignia or text, such a bright color would help in conveying whatever message it is meant to display. Mounted on a pole, it waves, catching the eyes of onlookers and adding vibrancy to the scene.\n:A sign is visible that has red letters printed on a contrasting white background. The red text on the sign is designed to be highly readable and eye-catching. Given the context, the sign is likely to be a banner carrying a message or statement pertinent to the event, with the intention of communicating to attendees and perhaps even at a distance to the larger public.", + "points": [ + [ + 267, + 181 + ], + [ + 88, + 174 + ], + [ + 204, + 240 + ], + [ + 199, + 216 + ], + [ + 156, + 211 + ], + [ + 85, + 64 + ], + [ + 73, + 57 + ], + [ + 341, + 116 + ] + ] + }, + { + "image_name": "000000425000.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 3, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ":This object appears to be a piece of clothing specifically designed for the lower limbs, potentially serving both a practical function and a fashion statement.\n:This item is a personal carrying accessory, likely used to transport everyday items, and features a design that seems fashionable or practical.\n:A device installed by a municipality for monitoring and charging for parking time. Traditionally coin-operated and situated next to parking spaces.\n:A similar parking time management system found next to vehicular parking spots, likely functioning in a coin-operated manner.\n:A manually powered mode of transportation consisting of a frame, two wheels, pedals, and handlebars, commonly used for commuting or leisure.\n:This is an item of footwear, usually extending above the ankle and designed to provide protection and possibly make a fashion statement.\n:A counterpart to the previous footwear item, intended to be worn as a pair for functional and stylistic purposes.\n:A segment of a flexible material, used to secure or support parts of an object or clothing, equipped with mechanisms for adjustment or attachment.", + "points": [ + [ + 250, + 285 + ], + [ + 361, + 206 + ], + [ + 477, + 162 + ], + [ + 422, + 163 + ], + [ + 346, + 331 + ], + [ + 263, + 331 + ], + [ + 232, + 334 + ], + [ + 361, + 182 + ] + ] + }, + { + "image_name": "000000279420.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 4, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ":Segment of a ski visible, appearing narrow, elongated, and indicative of winter sports equipment, positioned on a snowy background.\n:Visible section of a ski showing characteristics similar to , suggesting it as part of skiing gear, resting on a snow-laden surface.\n:Front part of a ski boot showing a sturdy structure designed to affix to a ski, indicative of winter sportswear, placed on a snow-covered surface.\n:A piece of a ski boot visible, seemingly robust and protective, potentially part of a set for snow activities, surrounded by a wintry backdrop.\n:An individual captured mid-action while skiing, clothed in winter attire, featuring a hat on their head, set against a snowy environment.", + "points": [ + [ + 383, + 285 + ], + [ + 246, + 319 + ], + [ + 378, + 265 + ], + [ + 254, + 286 + ], + [ + 285, + 44 + ] + ] + }, + { + "image_name": "000000472621.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 5, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ": This is a classic, freestanding bathtub characterized by its large, oval shape and elegant design. The tub features ornate claw feet, which are likely made of brass or a similar material, giving it a vintage feel. It is positioned on a decorative rug, and there's a variety of bath accessories arranged on its rim, suggesting a well-maintained and luxurious bathroom setting.\n: The floral arrangement appears lush and vibrant, adding a natural decorative touch to the space. It consists of various plant species and colors, arranged artistically to create a focal point within the space. The display is situated at an elevated position on the wall, likely to enhance the ambiance and aesthetic appeal of the room.\n: This is a soft, hand-sized towel hanging near a sink. The towel is likely made of a fabric such as cotton or a cotton blend, known for absorbency. It hangs from a ring or a bar, suggesting its purpose for drying hands after washing. Its position and neat appearance imply it is regularly used and maintained.\n: A towel rack mounted to the wall consists of several horizontal bars, with rolled towels arranged neatly across. These towels give the impression of multi-purpose use, possibly for drying hands or after a bath. Their orderly presentation and the rack's location suggest accessibility and an emphasis on organization within the bathroom.", + "points": [ + [ + 359, + 237 + ], + [ + 569, + 30 + ], + [ + 239, + 110 + ], + [ + 241, + 85 + ] + ] + }, + { + "image_name": "970.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 6, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ":A rectangular window set within a stucco wall featuring reflections of the sky and surroundings. The architecture includes an ornamental design around the window, appearing to be part of a larger structure with warm-colored walls.\n:This is part of a grand edifice, where the window is on an upper level, framed by stone and revealing blinds or curtains behind the glass. It gives off an institutional vibe and contributes to the building's stately presence.\n:An accumulation of various bicycles secured to a metal rack. The bikes vary in design and color, indicating a common parking spot for cyclists, possibly associated with a nearby building or public space.\n:A horizontal concrete wall or barrier separates a grassy area from a walkway. Its smooth, flat top could serve for sitting or as a barrier. It runs parallel to the grass field, following a linear path.\n:An architecturally distinct structure's corner is in view, with a tan facade and characteristic arched windows lined with dark frames, indicative of a formal and potentially historic building.\n:On the concrete surface, there are splotches of discoloration ranging in size. These could be stains from water, rust, or other materials, showing signs of wear or lack of maintenance.\n:A single bicycle lies on its side on a concrete pavement. Its orientation suggests that it may have been accidentally knocked over or hastily left, separate from properly parked bicycles.\n:A set of windows with the interior lights turned on. The visible glow suggests occupancy or activity within the building, contrasting against the dimming light of the outdoor environment.", + "points": [ + [ + 59, + 268 + ], + [ + 271, + 182 + ], + [ + 296, + 421 + ], + [ + 198, + 546 + ], + [ + 212, + 216 + ], + [ + 286, + 749 + ], + [ + 19, + 426 + ], + [ + 61, + 370 + ] + ] + }, + { + "image_name": "2406598.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 7, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ":This is a close-up view focusing on the wooden texture of the hull of a boat that appears to be in a beach setting, with the boat's registration number partially visible, and the bow is near pebbles and shingle, indicating it might be a fishing vessel temporarily on shore.\n:Detailed observation captures a part of a person's left arm. The individual is wearing heavy-duty yellow outerwear, which is suitable for marine or wet conditions. The focus is on the fabric's creases and the natural bend of the elbow.\n:The detail shows a segment of durable yellow fabric that indicates a pocket feature of an article of clothing. It exhibits folds and stitching details characteristic of a garment designed for outdoor or work-related environments.\n:A human hand wearing a tight-fitting black glove. The glove seems to be made of a material suitable for manual work, offering dexterity and possibly grip enhancements, as suggested by the visible texture of the glove material.\n:This mark highlights the headwear worn by an individual. It is a khaki hat that appears to offer ear protection, possibly insulating against environmental noise or wind. It is suggestive of outdoor or industrial workwear.", + "points": [ + [ + 480, + 46 + ], + [ + 177, + 233 + ], + [ + 123, + 214 + ], + [ + 99, + 233 + ], + [ + 139, + 139 + ] + ] + }, + { + "image_name": "000000189267.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 8, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ":Tip of a ski, partially submerged in snow, equipment for winter sports\n:Rear portion of a ski above snowy surface, typically used for downhill travel\n:Bottom of boot attached to ski bindings, meant for secure footing while skiing\n:Ski boot connected to ski, with bindings visibly securing the boot in place\n:Clothed hand in glove gripping an object, likely for stability or maneuvering\n:Hand grasping ski pole, visible shaft and strap, used for balance while skiing\n:Upper segment of ski pole extending upwards, with hand strap for secure grip\n:Lower end of ski pole planted in snow, aiding in balance and propulsion", + "points": [ + [ + 390, + 388 + ], + [ + 396, + 370 + ], + [ + 379, + 357 + ], + [ + 353, + 364 + ], + [ + 302, + 208 + ], + [ + 225, + 289 + ], + [ + 440, + 42 + ], + [ + 320, + 308 + ] + ] + }, + { + "image_name": "000000042190.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 9, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ": A sport-specific black boot, part of a binding system on wakeboard, wet from water contact.\n: A matching sport-specific black boot, securely fixed onto a wakeboard, displaying its functionality.\n: A solid black wakeboard cutting through water with a person wakeboarding, causing spray.", + "points": [ + [ + 422, + 289 + ], + [ + 393, + 299 + ], + [ + 450, + 297 + ] + ] + }, + { + "image_name": "2412434.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 10, + "dataset_name": "natural_detailed_caption_point", + "gt_answers": ":A tray filled with chopped carrots, with cuts offering visual texture.\n:Two spoons with white handles resting together in a confined bin.\n:The top of a condiment bottle, made of red plastic, shows a pointed tip.\n:A yellow condiment squeeze bottle visible behind the grill roller rods.\n:A long metal kitchen utensil with a thin handle seems sturdy and functional.\n:The top of another condiment container, having a brown hue with a narrow spout.\n:A bin contains chopped onions, likely for food seasoning, looking juicy and freshly cut.\n:A cooking hotdog on a grill, one side showing signs of overcooking or charring.", + "points": [ + [ + 214, + 115 + ], + [ + 387, + 202 + ], + [ + 436, + 56 + ], + [ + 366, + 29 + ], + [ + 46, + 321 + ], + [ + 375, + 7 + ], + [ + 407, + 161 + ], + [ + 64, + 251 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/ocr_doc_QA/ocr_doc_QA_box.json b/evaluation/MDVP-Bench/data/ocr_doc_QA/ocr_doc_QA_box.json new file mode 100644 index 0000000000000000000000000000000000000000..d1390933b4cf048dd340767afd2a562b55fea8d1 --- /dev/null +++ b/evaluation/MDVP-Bench/data/ocr_doc_QA/ocr_doc_QA_box.json @@ -0,0 +1,470 @@ +[ + { + "image_name": "0501_0243_0005.jpg", + "question": "What is the primary topic discussed in ?", + "question_id": 1, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": "The primary topic discussed in is the concept of a Turing Test and the assessment of a system's intelligence by comparing its answers to those of a human.", + "bbox": [ + [ + 152.0, + 65.0, + 728.0, + 27.0 + ], + [ + 249.0, + 138.0, + 521.0, + 262.0 + ], + [ + 319.0, + 401.0, + 396.0, + 25.0 + ], + [ + 134.0, + 438.0, + 765.0, + 147.0 + ], + [ + 132.0, + 582.0, + 760.0, + 57.0 + ], + [ + 267.0, + 635.0, + 501.0, + 204.0 + ], + [ + 189.0, + 838.0, + 654.0, + 27.0 + ], + [ + 138.0, + 887.0, + 756.0, + 166.0 + ] + ] + }, + { + "image_name": "0501_0215_0000.jpg", + "question": "What is the significance of including the Kraken ibn Octopus image in the document?", + "question_id": 2, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": "The image of Kraken ibn Octopus in the document likely serves a metaphorical purpose, reflecting the complex nature of OCR for Arabographic texts, or it may simply serve to visually engage the reader and provide a historical connection to the manuscript tradition discussed in the research.", + "bbox": [ + [ + 501.0, + 144.0, + 550.0, + 135.0 + ], + [ + 541.0, + 321.0, + 520.0, + 68.0 + ], + [ + 139.0, + 150.0, + 296.0, + 381.0 + ], + [ + 656.0, + 417.0, + 395.0, + 115.0 + ], + [ + 143.0, + 629.0, + 511.0, + 35.0 + ], + [ + 136.0, + 692.0, + 911.0, + 406.0 + ], + [ + 131.0, + 1166.0, + 923.0, + 374.0 + ], + [ + 1027.0, + 1553.0, + 18.0, + 23.0 + ] + ] + }, + { + "image_name": "0501_0112_0016.jpg", + "question": "What is the main focus of TabStruct-Net as mentioned in ?", + "question_id": 3, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": "The main focus of TabStruct-Net is to provide a data-driven, end-to-end trainable architecture for predicting table structure from table images and combines both top-down and bottom-up methods.", + "bbox": [ + [ + 346.0, + 183.0, + 553.0, + 28.0 + ], + [ + 938.0, + 180.0, + 27.0, + 25.0 + ], + [ + 264.0, + 233.0, + 708.0, + 577.0 + ], + [ + 265.0, + 844.0, + 340.0, + 35.0 + ], + [ + 265.0, + 900.0, + 703.0, + 221.0 + ], + [ + 266.0, + 1151.0, + 382.0, + 29.0 + ], + [ + 264.0, + 1192.0, + 702.0, + 77.0 + ], + [ + 277.0, + 1284.0, + 687.0, + 53.0 + ] + ] + }, + { + "image_name": "735400b9246050615d18bd4b869005c2956025e1d4672053c3878f869d2d749e.png", + "question": "What is the main purpose of in the document?", + "question_id": 4, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": "The main purpose of is to provide the title of the financial document, specifically indicating that the content below will detail the consolidated statements of cash flows of the entity for a particular period.", + "bbox": [ + [ + 105, + 65, + 660, + 21 + ], + [ + 104, + 94, + 848, + 838 + ], + [ + 105, + 943, + 428, + 13 + ], + [ + 456, + 988, + 143, + 9 + ], + [ + 60, + 987, + 14, + 12 + ] + ] + }, + { + "image_name": "8ee7444d6abb05f1012d240b4e41e87a9c7f3ced1d4cf85eb84be958b7ffc81c.png", + "question": "What theory is discussed in and why was it deemed unsatisfactory?", + "question_id": 5, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": "Van der Waals theory for liquids is discussed in , and it was deemed unsatisfactory because it did not account for the differences between the densities of liquids and solids as initially thought.", + "bbox": [ + [ + 120, + 94, + 780, + 96 + ], + [ + 120, + 202, + 780, + 123 + ], + [ + 120, + 338, + 780, + 205 + ], + [ + 120, + 553, + 780, + 71 + ], + [ + 120, + 695, + 780, + 232 + ], + [ + 145, + 939, + 755, + 15 + ], + [ + 421, + 660, + 479, + 20 + ], + [ + 496, + 978, + 9, + 15 + ] + ] + }, + { + "image_name": "a53dc9a9c4847e6c94b56842d033d2c1d54ff689b9d035b94593960863d5ca61.png", + "question": "What physical phenomenon is the caption in referring to?", + "question_id": 6, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": "The caption in is referring to a commensurate spin-density wave at strong Hund's rule coupling, a phenomenon in the context of quantum physics and material science.", + "bbox": [ + [ + 120, + 530, + 780, + 73 + ], + [ + 120, + 630, + 781, + 286 + ], + [ + 145, + 928, + 755, + 15 + ], + [ + 491, + 978, + 19, + 15 + ], + [ + 124, + 105, + 754, + 412 + ] + ] + }, + { + "image_name": "3a3cf888c4e9e6a1852571722fdff5625fc706faf8f476048f3e37f431c20c09.png", + "question": "Can you explain how feedwater is distributed within the reactor pressure vessel as described in ?", + "question_id": 7, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": "In , it is stated that feedwater from the condenser system is introduced into the downcomer region of the reactor pressure vessel outside the core shroud. This is done through one or more lines, and sparger piping is mentioned as a possible means for distributing the feedwater.", + "bbox": [ + [ + 154, + 91, + 745, + 254 + ], + [ + 155, + 366, + 747, + 425 + ], + [ + 155, + 813, + 743, + 117 + ] + ] + }, + { + "image_name": "e39800e3bf0e84bcd291b922011d13f1df73c3a1ab075b00d850dd6028c922e2.png", + "question": "What regions discuss the balance of information or zeros and ones in relation to complexity?", + "question_id": 8, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": " and discuss the balance of information or zeros and ones in relation to complexity, with focusing on how extreme disorder or order reproduces the highest complexity and elaborating on the tradeoff between information persistence and exploration.", + "bbox": [ + [ + 120, + 97, + 783, + 44 + ], + [ + 120, + 159, + 783, + 261 + ], + [ + 120, + 438, + 783, + 292 + ], + [ + 120, + 747, + 850, + 106 + ], + [ + 120, + 871, + 783, + 44 + ], + [ + 502, + 959, + 19, + 13 + ] + ] + }, + { + "image_name": "a957f60205a196e887ed9fc159e8a738eb93849de0868d4dce341aa82bc28158.png", + "question": "What types of activities does the of the document divide the cash flows into?", + "question_id": 9, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": " divides the cash flows into operating activities, investing activities, and financing activities.", + "bbox": [ + [ + 330, + 62, + 364, + 14 + ], + [ + 78, + 139, + 866, + 684 + ], + [ + 80, + 836, + 266, + 9 + ], + [ + 747, + 978, + 197, + 8 + ], + [ + 507, + 983, + 10, + 7 + ] + ] + }, + { + "image_name": "0501_0103_0002.jpg", + "question": "Which region explains the efforts to standardize the Arabic language's online presence?", + "question_id": 10, + "dataset_name": "ocr_doc_QA_box", + "gt_answers": " explains the efforts to create spelling standards for Modern Standard Arabic online, mentioning computational methods such as CODA and Arabizi.", + "bbox": [ + [ + 259.0, + 253.0, + 709.0, + 148.0 + ], + [ + 263.0, + 421.0, + 698.0, + 144.0 + ], + [ + 266.0, + 568.0, + 700.0, + 140.0 + ], + [ + 265.0, + 744.0, + 664.0, + 37.0 + ], + [ + 260.0, + 796.0, + 699.0, + 288.0 + ], + [ + 264.0, + 1082.0, + 697.0, + 270.0 + ], + [ + 598.0, + 1386.0, + 22.0, + 21.0 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/ocr_doc_QA/ocr_doc_QA_point.json b/evaluation/MDVP-Bench/data/ocr_doc_QA/ocr_doc_QA_point.json new file mode 100644 index 0000000000000000000000000000000000000000..f19cb94479dc9f34a3a6b7fb5e84bec6109ebb57 --- /dev/null +++ b/evaluation/MDVP-Bench/data/ocr_doc_QA/ocr_doc_QA_point.json @@ -0,0 +1,304 @@ +[ + { + "image_name": "0501_0024_0025.jpg", + "question": "What types of content can you find in of the document?", + "question_id": 1, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": "In , you can find bibliographic entries for scholarly works, which include authors' names, paper titles, publication details, DOIs, and URLs.", + "points": [ + [ + 597, + 760 + ], + [ + 595, + 1581 + ], + [ + 953, + 1587 + ], + [ + 293, + 1596 + ] + ] + }, + { + "image_name": "0501_0063_0005.jpg", + "question": "What does the caption in signify about the table?", + "question_id": 2, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": "The caption in indicates that the table presents benchmarking results of various configurations tested on a test set.", + "points": [ + [ + 421, + 199 + ], + [ + 596, + 474 + ], + [ + 599, + 920 + ], + [ + 475, + 1122 + ], + [ + 595, + 1253 + ] + ] + }, + { + "image_name": "2ca63b73694ed160b6622c66f310b0e1560a7663934acb864f0abf64494aa300.png", + "question": "What types of products are prominently featured in , and how does this relate to the purpose of the document?", + "question_id": 3, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": "In , there's a variety of alcoholic beverages including wines, beers, and spirits, suggesting that the company's product offerings are diverse. This relates to the purpose of the document, likely an annual report, by showcasing what the company produces or sells.", + "points": [ + [ + 511, + 604 + ], + [ + 515, + 175 + ], + [ + 513, + 92 + ] + ] + }, + { + "image_name": "78434ade0043f2ad51901b7aed5805201ecb92dfb9c36a2b61715c327265f238.png", + "question": "What chapter of the document is referring to, and what is the main focus based on the title?", + "question_id": 4, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": " refers to Chapter 6, and the main focus, based on the title, appears to be about the role of heavy quarks in photon production and radiation processes within the context of physics.", + "points": [ + [ + 828, + 130 + ], + [ + 401, + 334 + ], + [ + 306, + 474 + ], + [ + 511, + 693 + ] + ] + }, + { + "image_name": "6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "question": "Can you tell me about the fees provided to non-executive directors according to ?", + "question_id": 5, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": "According to , non-executive directors receive a fee for their service with specific annual fees for the Chairman and board members. The Chairman fee is $213,200 per annum, and the fee for each board non-executive director is $140,400 per annum.", + "points": [ + [ + 852, + 48 + ], + [ + 300, + 147 + ], + [ + 332, + 175 + ], + [ + 903, + 978 + ], + [ + 509, + 621 + ], + [ + 511, + 269 + ], + [ + 502, + 207 + ] + ] + }, + { + "image_name": "c6703eee02fb570504b9a09f74e09f8152eef554f2e5251b78a3f9b46afef8a5.png", + "question": "What main topic does suggest the chapter discusses?", + "question_id": 6, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": " suggests that the chapter will discuss Quantum Chromodynamics (QCD) and compare or contemplate its aspects with the Anti-de Sitter/Conformal Field Theory (Ads/CFT) correspondence.", + "points": [ + [ + 828, + 130 + ], + [ + 319, + 130 + ], + [ + 511, + 228 + ], + [ + 511, + 508 + ], + [ + 511, + 802 + ] + ] + }, + { + "image_name": "0501_0059_0042.jpg", + "question": "What is 'FogBus2' mentioned in and ?", + "question_id": 7, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": "'FogBus2' is described as a lightweight and distributed container-based framework intended to support IoT-enabled systems across various servers such as edge, fog, and cloud servers. It includes features designed for efficient deployment, scheduling, scalability, and dynamic resource management.", + "points": [ + [ + 509, + 461 + ], + [ + 334, + 718 + ], + [ + 632, + 834 + ], + [ + 650, + 1232 + ], + [ + 631, + 1562 + ] + ] + }, + { + "image_name": "e9c8841c90a9a2000c39a368e99b35f0fd4f295f0de67c572e9717b37567eab1.png", + "question": "What is the main benefit of using the simulation tools mentioned in ?", + "question_id": 8, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": "The main benefit is to enable engineers and operators to monitor and optimize the performance of operating units, which leads to minimized energy consumption and maximized throughput and yields, thereby improving refinery profitability.", + "points": [ + [ + 203, + 63 + ], + [ + 256, + 244 + ], + [ + 271, + 410 + ], + [ + 703, + 222 + ], + [ + 704, + 366 + ], + [ + 705, + 466 + ], + [ + 600, + 423 + ], + [ + 45, + 998 + ] + ] + }, + { + "image_name": "0501_0239_0000.jpg", + "question": "What information is provided about the author in and ?", + "question_id": 9, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": "In , the author's name, Eugene Borovikov, is provided. presents the author's institutional affiliation, which is American Management Systems, Inc., along with the corresponding address.", + "points": [ + [ + 52, + 731 + ], + [ + 611, + 1495 + ], + [ + 611, + 242 + ], + [ + 613, + 298 + ], + [ + 614, + 356 + ], + [ + 610, + 430 + ], + [ + 612, + 524 + ], + [ + 612, + 598 + ] + ] + }, + { + "image_name": "0501_0048_0003.jpg", + "question": "What is the purpose of the subsection titled 'Image Preprocessing'?", + "question_id": 10, + "dataset_name": "ocr_doc_QA_point", + "gt_answers": "The purpose of the subsection titled 'Image Preprocessing' is to describe the techniques and methods used to prepare scanned document images for further analysis or processing.", + "points": [ + [ + 236, + 159 + ], + [ + 598, + 261 + ], + [ + 610, + 813 + ], + [ + 610, + 1350 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/ocr_doc_detailed_caption_box.json b/evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/ocr_doc_detailed_caption_box.json new file mode 100644 index 0000000000000000000000000000000000000000..6233fa550a91e7857e151a51e1bc76513bcc14c9 --- /dev/null +++ b/evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/ocr_doc_detailed_caption_box.json @@ -0,0 +1,428 @@ +[ + { + "image_name": "22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 1, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document.\n: This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely.", + "bbox": [ + [ + 237, + 123, + 638, + 402 + ], + [ + 533, + 965, + 16, + 14 + ] + ] + }, + { + "image_name": "3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 2, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level.\n: This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds\u2013Au), polar angle (\u03b8), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization.\n: Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process.\n: This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data.\n: The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes.", + "bbox": [ + [ + 120, + 106, + 780, + 126 + ], + [ + 302, + 249, + 414, + 395 + ], + [ + 120, + 689, + 780, + 96 + ], + [ + 120, + 797, + 781, + 150 + ], + [ + 496, + 978, + 9, + 15 + ] + ] + }, + { + "image_name": "0501_0146_0010.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 3, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures.\n: This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks.\n: This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance.\n: This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in .\n: This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks.\n: This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in .\n: This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks.\n: In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks.\n: This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages.", + "bbox": [ + [ + 212.0, + 158.0, + 787.0, + 581.0 + ], + [ + 212.0, + 741.0, + 803.0, + 97.0 + ], + [ + 212.0, + 871.0, + 440.0, + 32.0 + ], + [ + 214.0, + 917.0, + 802.0, + 115.0 + ], + [ + 212.0, + 1054.0, + 450.0, + 29.0 + ], + [ + 207.0, + 1088.0, + 813.0, + 215.0 + ], + [ + 210.0, + 1312.0, + 296.0, + 33.0 + ], + [ + 212.0, + 1356.0, + 801.0, + 89.0 + ], + [ + 595.0, + 1480.0, + 35.0, + 28.0 + ] + ] + }, + { + "image_name": "2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 4, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar.\n: This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated.\n: This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format.\n: The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels.\n: This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content.\n: This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table.", + "bbox": [ + [ + 122, + 90, + 796, + 37 + ], + [ + 122, + 147, + 802, + 138 + ], + [ + 122, + 525, + 796, + 239 + ], + [ + 123, + 784, + 653, + 13 + ], + [ + 122, + 817, + 800, + 113 + ], + [ + 122, + 305, + 251, + 13 + ] + ] + }, + { + "image_name": "0501_0148_0008.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 5, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR).\n: This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text.\n: This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison.\n: This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information.\n: This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found.\n: This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online.\n: This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections.", + "bbox": [ + [ + 250.0, + 168.0, + 696.0, + 652.0 + ], + [ + 468.0, + 831.0, + 254.0, + 32.0 + ], + [ + 247.0, + 957.0, + 697.0, + 439.0 + ], + [ + 134.0, + 1404.0, + 924.0, + 90.0 + ], + [ + 139.0, + 1573.0, + 309.0, + 43.0 + ], + [ + 845.0, + 1575.0, + 208.0, + 28.0 + ], + [ + 581.0, + 1566.0, + 28.0, + 28.0 + ] + ] + }, + { + "image_name": "6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 6, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"\n: This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company.\n: This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors.\n: Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document.\n: This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame.\n: This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service.\n: This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates.", + "bbox": [ + [ + 778, + 43, + 148, + 11 + ], + [ + 98, + 137, + 404, + 20 + ], + [ + 98, + 167, + 469, + 16 + ], + [ + 881, + 973, + 44, + 11 + ], + [ + 95, + 315, + 829, + 612 + ], + [ + 98, + 231, + 827, + 76 + ], + [ + 98, + 193, + 808, + 28 + ] + ] + }, + { + "image_name": "c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 7, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": This region is categorized as text. It discusses the mathematical concept of homotopy groups designated \u03c0_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe.\n: This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments.\n: The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements.\n: The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings.\n: This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with.\n: The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document.", + "bbox": [ + [ + 120, + 94, + 780, + 340 + ], + [ + 120, + 481, + 780, + 97 + ], + [ + 120, + 590, + 781, + 150 + ], + [ + 120, + 752, + 780, + 96 + ], + [ + 468, + 446, + 433, + 26 + ], + [ + 491, + 978, + 19, + 15 + ] + ] + }, + { + "image_name": "0301_0188_0040.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 8, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process.\n: This is a footer section of an image or document. It contains a citation that reads \"\u00a9 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material.", + "bbox": [ + [ + 0.0, + 0.0, + 1170.0, + 1548.0 + ], + [ + 477.0, + 1510.0, + 216.0, + 21.0 + ] + ] + }, + { + "image_name": "8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 9, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter.\n: This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented.\n: This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory.\n: In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions.\n: This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory.\n: This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory.", + "bbox": [ + [ + 824, + 124, + 19, + 13 + ], + [ + 180, + 124, + 193, + 13 + ], + [ + 180, + 177, + 663, + 230 + ], + [ + 180, + 424, + 663, + 292 + ], + [ + 180, + 734, + 663, + 75 + ], + [ + 294, + 849, + 549, + 19 + ] + ] + }, + { + "image_name": "1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 10, + "dataset_name": "ocr_doc_detailed_caption_box", + "gt_answers": ": The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes.\n: The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants.\n: In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions.\n: contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules.\n: The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions.\n: The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it\u2019s often better for insiders to disclose their private information.", + "bbox": [ + [ + 120, + 144, + 783, + 155 + ], + [ + 120, + 314, + 783, + 127 + ], + [ + 120, + 456, + 783, + 70 + ], + [ + 148, + 541, + 755, + 14 + ], + [ + 120, + 570, + 783, + 354 + ], + [ + 507, + 960, + 9, + 14 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/ocr_doc_detailed_caption_point.json b/evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/ocr_doc_detailed_caption_point.json new file mode 100644 index 0000000000000000000000000000000000000000..2852117d2be970a807236ffc3524ff60f985291c --- /dev/null +++ b/evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/ocr_doc_detailed_caption_point.json @@ -0,0 +1,276 @@ +[ + { + "image_name": "0301_0091_0037.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 1, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": The image within this region depicts a man walking to the right while dressed in a light-colored hat, a long-sleeve shirt, and dark pants. He appears to be in a thoughtful or solemn mood, based on his posture and the setting. The background reveals a grassy field under a blue sky with puffy white clouds. There are other individuals in the distance, some of whom appear to be gathered around a particular spot, suggesting an event may be taking place. In the foreground, garden tools are scattered on the ground, which could indicate maintenance work or a ceremonial function, considering the context suggested by the rest of the image.\n: This region shows a caption that provides specific information about the scene depicted in the image. It mentions an individual by the name of Michael Williams and reveals that the casket shown is being carried toward his plot at Woodlawn Cemetery in a place named Westwego. The text specifies a date, May 18, 2020, and informs that the photograph was taken by someone named Kathleen Flynn. This caption serves to identify the solemn nature of the scene displayed - a funeral procession.\n: This region features a byline, which attributes the photograph to Kathleen Flynn. Bylines are used to credit the photographer or creator of the image, indicating who has taken the photograph and sometimes when or where it was taken.", + "points": [ + [ + 1083, + 1444 + ], + [ + 335, + 319 + ], + [ + 330, + 492 + ] + ] + }, + { + "image_name": "0501_0003_0012.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 2, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": This region appears to display a page number, suggesting its location at the top-right corner of a document page, providing a reference point within a larger text, such as a book, report, or academic paper.\n: This is the header of the document which includes the title \"VITLayout: Fusion of Visual and Text Features for DLA\". It appears at the top of the page, likely summarizing the subject or the main theme of the document, which seems to be about a layout analysis model that integrates visual and text features for Document Layout Analysis (DLA).\n: Labeled as \"Table 5,\" this caption describes what is presumably a table below it. It indicates that the table contains ablation experimental results for a model called the VITLayout used in document layout analysis.\n: The content shows a table with experimental results for the VITLayout model. The table includes columns for model variations and their corresponding F1 scores across different categories such as Text, Title, List, Figure, and Table Average. The table is used to show quantitative results to compare the performance of the model variations.\n: This paragraph discusses the beneficial aspects of perceptual features in document layout analysis and notes improvements of the VITLayout model. It suggests that the VITLayout has been applied to various tasks. It also mentions that the model can accurately locate and classify text and table information from thousands of grant applications, demonstrating practical applications and suggesting an area for future attention.\n: This paragraph provides an analysis and commentary on the performance of the VITLayout model. The author reflects on the state-of-the-art performance and identifies areas for improvement and future research avenues, such as optimizing multimodal fusion in DLA and creating an end-to-end, lighter DLA model.", + "points": [ + [ + 954, + 195 + ], + [ + 669, + 196 + ], + [ + 615, + 262 + ], + [ + 615, + 391 + ], + [ + 617, + 695 + ], + [ + 617, + 1067 + ] + ] + }, + { + "image_name": "8df649b02bb0b5be9ac6845591f756ca9a81457ab1d298178744047b179f7356.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 3, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": This is a page-header containing the title \"DERIVATIVES AND RISK MANAGEMENT.\" It likely serves as the heading for a section in a financial report that discusses how the company handles financial risks and derivative instruments.\n: This section-header is labelling a subsection titled \"Hedging activities.\" It probably details specific strategies and financial instruments the company uses to hedge against various types of market risks.\n: Another page-header reads \"6 SUBSEQUENT EVENTS.\" This indicates a new section focusing on events or transactions that occurred after the reporting period but before the publication of the report.\n: This text paragraph contains information about hedging activities related to natural gas. It discusses the financial strategy known as \"two-way costless collars\" and \"three-way costless collars,\" including terms such as floor price, index price, cap price, and sold put options. This paragraph generally addresses how the company mitigates risks from fluctuating natural gas prices.\n: This textual area explains the company's use of basis swaps and call options to manage exposure to gas price movements. It outlines the conditions under which the company would receive or pay amounts based on the specified terms of a contract tied to gas prices and price differentials. It provides insights into the financial derivatives and risk management strategies the company employs.\n: The page-footer, with the numeric identifier '6', indicates this is the sixth page of the document. Page footers usually contain information such as page numbers to help the reader navigate the document.\n: This area of the document is a table with various columns and rows, which likely details information about the financial instruments discussed in the text, such as hedging contracts. Specifics like contract dates, notional amounts, and related financial terms would typically be included in such a table to illustrate the company's hedging activities quantitatively.", + "points": [ + [ + 157, + 17 + ], + [ + 262, + 114 + ], + [ + 198, + 41 + ], + [ + 511, + 77 + ], + [ + 511, + 168 + ], + [ + 511, + 719 + ], + [ + 511, + 458 + ] + ] + }, + { + "image_name": "0501_0099_0010.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 4, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": The marked region is a portion of a bibliography or reference list from an academic paper or similar document. This section includes citations of various research papers, articles, or works that are relevant to the subject being discussed in the main text. Citations in this list follow a standardized academic format, providing a concise account of the authors, publication year, title, publication venue, and in some cases, additional information such as volume, issue, page numbers, and digital identifiers like arXiv or DOI numbers. The listed works are related to computer vision, specifically to tasks involving visual question answering and deep learning techniques. They are referred to in the paper to acknowledge previous contributions, build upon existing research, or contrast with new findings. The inclusion of this detailed reference list serves to attribute intellectual property, enable readers to consult the original sources for more information, and demonstrate the research's context within the field.\n: This section is also part of a bibliography or reference list from an academic document, detailing citations for various articles related to computer vision and pattern recognition. Similar to , this list contains structured references that include the authors, year of publication, paper title, and where the work was published, among other details. The cited works here also focus on topics pertinent to computer science disciplines such as image recognition, text detection, and deep learning techniques. This reference list functions as an essential academic practice to credit sources, provide readers with resources for further exploration of the topic, and establish the study's position in the broader research landscape. It ensures that the authors\u2019 work is informed by and contributing to a continuous dialogue within the scientific community.", + "points": [ + [ + 349, + 761 + ], + [ + 879, + 760 + ] + ] + }, + { + "image_name": "82a75b3f20541bc86d47c069975a58ab1b7334d46f68cfa0c4538a5d22e30ada.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 5, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": This region appears to be the header of a document. It is a blue horizontal bar with the text \"PART III\" on the left side, written in uppercase white letters. The header indicates that this section is the third part of a larger document, suggesting that what follows is a continuation or subdivision of the document's content.\n: This region displays a table with two columns. The left column is labeled \"NAME OF ENTITY\" and the right column is labeled \"JURISDICTION OF INCORPORATION.\" The table lists several entities along with the respective jurisdictions of incorporation. Examples of entities mentioned include various corporations with names such as \"Chunghwa Precision Test Tech. USA Corporation\" and \"Senao Trading (Fujian) Co., Ltd.\" The jurisdictions listed span several countries, including the United States of America, Vietnam, the People's Republic of China, and Taiwan (referred to as \"Taiwan, R.O.C.\" in the table). This table serves to provide information about the legal incorporation status of different entities under a specific jurisdiction.", + "points": [ + [ + 28, + 656 + ], + [ + 511, + 224 + ] + ] + }, + { + "image_name": "3547fd5c6bce6d3846cf8bddbb7da8cefd8c31cdab156a1a3b6229265b669f22.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 6, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": This region includes the header of the page, which typically contains the title or main subject of the document. In this case, it signifies that the document is the 2004 Annual Report for a corporation named Amphenol.\n: This region is titled \"Signatures\" and serves as a section header, indicating that the following content pertains to the signing of the document by authorized individuals.\n: Here, the text explains that, in compliance with the Securities Exchange Act of 1934, the report must be signed by authorized persons. It mentions that the signatories are legally designated representatives of the corporation, acting on March 11, 2005.\n: The text in this region contains the name and titles of an individual - Chairman, Chief Executive Officer, and President - implying that this person is a principal executive of the company.\n: This region presents the company's logo, which likely serves to authenticate the document officially and visually represents the corporation.\n: This region continues the text from , elaborating on the requirement for the signatories to endorse the report as of the date indicated below, further reinforcing the formal nature of the document.\n: This region shows a table with three columns: Signature, Title, and Date. The table lists the names, titles, and signature dates (all on March 11, 2005) of various key executives and directors, demonstrating their formal acknowledgment and approval of the document's contents.\n: The page footer typically contains additional information about the document, such as page numbers or legal disclaimers. However, in this image, this section is not clearly visible, so its specific content can't be described.", + "points": [ + [ + 514, + 54 + ], + [ + 117, + 92 + ], + [ + 514, + 137 + ], + [ + 717, + 264 + ], + [ + 722, + 185 + ], + [ + 514, + 333 + ], + [ + 511, + 583 + ], + [ + 937, + 969 + ] + ] + }, + { + "image_name": "4268de66f307483c8fa9ad3cbc28e9946a2f7bedd601a792b7069bb786e9dddf.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 7, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": This region contains text discussing the process by which vehicles that do not detect landmarks originally found by other vehicles contribute additional information. Specifically, it describes how these vehicles, if unable to confirm the presence of a detected landmark, provide backend servers with information such as time, perspective, sensors used, and weather conditions to improve the landmark database. The process is said to be conducted under privacy considerations to ensure data anonymization.\n: The text in this region elaborates on the statistical evaluation of confirmations and denials regarding landmarks. It touches on the false-positive and false-negative scenarios associated with landmark detection and emphasizes the need for aggregation and statistical validation of information by backend servers. Additionally, it mentions the importance of integrating context information, such as geographical adjustments and weather data, in enhancing the system's detection capabilities.\n: This section outlines how particular sensory-related attributes, or 'environmental features,' are appended to the map as metadata, enabling vehicles to attribute specific characteristics to landmarks. This provision of additional details not only enriches the map's data but also prevents unnecessary communication of delete hypotheses to the backend server\u2014ultimately improving the efficiency of information sharing among vehicles considering the same landmarks in their navigation.\n: The text describes a scenario where multiple vehicles fail to confirm a previously detected landmark. In such cases, a delete hypothesis is reinforced each time a vehicle fails to detect the landmark, leading to its removal from the map database. The text underscores the collaborative effort of vehicles in maintaining the accuracy and integrity of the map system by reporting changes in landmark presence.\n: In this region, the text specifies the process following the strengthening of a delete hypothesis. When a landmark's deletion is validated, the map segment is updated, and an updated version is made available to the vehicles. This ensures that the vehicles have access to the most accurate navigation data, reflecting the current environment without the invalidated landmark.", + "points": [ + [ + 511, + 152 + ], + [ + 505, + 430 + ], + [ + 508, + 694 + ], + [ + 495, + 811 + ], + [ + 493, + 903 + ] + ] + }, + { + "image_name": "82e3e7f6b499e2160faf925d747c5690b543bd7f612ab393a971a0f868f4a34d.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 8, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": This area is classified as a page-footer. It typically contains information like the page number or document classification. However, the contents of this specific page-footer cannot be determined without further context or visible text.\n: This region is categorized as a page-header, which is often placed at the top of a page to display titling, navigation, or pertinent information about the document's content. While the exact text is not discernible, page-headers typically include the document name, section title, or date.\n: Specified as a section-header, this particular section-header seems to be titled \"Ten-year Financial Review\". Section-headers are used to introduce a forthcoming section, its subject matter, and its significance within the larger document.\n: This marked region is denoted as a table, which is a structured arrangement of data organized in rows and columns. This table appears to relate to financial data, showing a multi-year comparison of various financial metrics such as operating revenue, net income, common share data, balance sheet data, and other data. This arrangement allows for easy comparison of values across different years, indicating trends and changes over time.", + "points": [ + [ + 495, + 956 + ], + [ + 44, + 510 + ], + [ + 255, + 59 + ], + [ + 504, + 455 + ] + ] + }, + { + "image_name": "7c48613c82de00d64b250e44cd668ce0410aa361a802d33c22c9d9ad5b987c47.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 9, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": This region specifies 'Patent Application Publication,' indicating that the content on the page is part of a document detailing a patent application that has been made publicly available. It helps users understand the nature of the document.\n: This region lists a date, 'Apr. 8, 2010,' which refers to a publication or filing date associated with the patent application document. Dates are critical for understanding the timeline of patent applications and the status of the technology or invention described therein.\n: The text contains a numerical and alphabetic code 'US 2010/0086500 A1.' This is a patent identification code, typically used to uniquely identify a particular patent application in the United States Patent and Trademark Office (USPTO) database.\n: This region features the caption 'FIG. 2,' which indicates that the corresponding image or figure is labeled as Figure 2 in the document. Such captions provide a reference that allows readers to locate and discuss specific parts of the visual content within the patent application.\n: This region presents a graph with two axes labeled 'Resolution' on the y-axis and 'Conc.(mM)' on the x-axis, which shows a curve with a mathematical equation and an R-squared value. It represents data, likely from experiments or simulations, that demonstrates how resolution varies with concentration in millimolar units. The equation and R-squared value suggest a logarithmic relationship and the goodness of fit of the data to the model, respectively. This type of image is common in patent applications to illustrate technical details or results related to the invention.", + "points": [ + [ + 263, + 78 + ], + [ + 545, + 78 + ], + [ + 795, + 79 + ], + [ + 510, + 257 + ], + [ + 508, + 625 + ] + ] + }, + { + "image_name": "3fd82f62ff8b481033f097756a45e340c79bcc3ba661ce6e6d1b775188121b69.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 10, + "dataset_name": "ocr_doc_detailed_caption_point", + "gt_answers": ": This region is labeled as a \"page-header\" and features the title \"CONSOLIDATED BALANCE SHEETS\" in a bolded, uppercase font, which suggests that it heads a financial document summarizing a company's assets, liabilities, and stockholders' equity at a specific point in time. The title's format implies importance and distinguishes it from other sections of the document.\n: This marked region, categorized as a \"table,\" presents detailed financial information typically found in a balance sheet. It is organized into two columns, displaying figures for two fiscal years denoted as 2003 and 2004. Assets are listed at the top part, showing various types of assets such as current assets and property, with numerical values next to each item indicating their reported values in dollar amounts. This table is essential for providing a clear and itemized account of the company's financial status for the two respective years.\n: Marked as a \"section-header,\" this region represents the title of a section within the financial document, in this case, \"CONSOLIDATED BALANCE SHEETS.\" The stylized font and the use of dots to draw attention suggest that this header is for an important section of the document, likely introducing the subsequent detailed table of financial data.\n: This region, identified as \"text,\" contains the concluding line of the financial table, summarizing the total liabilities and stockholders' equity. The text provides the final aggregated figures for both 2003 and 2004 in dollars, directly under the liabilities and equity section. This line is crucial as it ties together the document's purpose by showing the balance sheet's bottom line, ensuring the accounting equation is satisfied (total assets equal total liabilities plus stockholders' equity).", + "points": [ + [ + 861, + 28 + ], + [ + 498, + 504 + ], + [ + 498, + 106 + ], + [ + 236, + 834 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/ocr_spotting_QA/ocr_spotting_QA_box.json b/evaluation/MDVP-Bench/data/ocr_spotting_QA/ocr_spotting_QA_box.json new file mode 100644 index 0000000000000000000000000000000000000000..f9c1b690d0aab510eab4ab86fcaeb1806c1ad78e --- /dev/null +++ b/evaluation/MDVP-Bench/data/ocr_spotting_QA/ocr_spotting_QA_box.json @@ -0,0 +1,266 @@ +[ + { + "image_name": "0000119.jpg", + "question": "What kind of font is shared by the text in , , and ?", + "question_id": 1, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": "The text in , , and all share a serif typeface, implying a coordinated design and branding strategy for the signage.", + "bbox": [ + [ + 176, + 96, + 106, + 30 + ], + [ + 164, + 130, + 127, + 32 + ], + [ + 173, + 162, + 38, + 24 + ], + [ + 228, + 167, + 44, + 25 + ] + ] + }, + { + "image_name": "img_805.jpg", + "question": "What kind of establishment can be inferred to be associated with ?", + "question_id": 2, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": "Given the modern and bold font of the text 'FFICINE' and its location, it can be inferred that the establishment is likely a retail store or business within a shopping center or mall.", + "bbox": [ + [ + 356.0, + 233.0, + 87.0, + 20.0 + ], + [ + 443.0, + 233.0, + 95.0, + 16.0 + ] + ] + }, + { + "image_name": "img_336.jpg", + "question": "Is the text 'HOL' part of a larger word or name, and if so, could it be related to the text in other regions?", + "question_id": 3, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": "Yes, the text 'HOL' appears to be part of a larger word or name and could be related to the text in and , which when combined may spell out a single coherent word or name.", + "bbox": [ + [ + 529.0, + 430.0, + 101.0, + 44.0 + ], + [ + 521.0, + 496.0, + 105.0, + 40.0 + ], + [ + 521.0, + 566.0, + 92.0, + 35.0 + ], + [ + 823.0, + 30.0, + 101.0, + 105.0 + ] + ] + }, + { + "image_name": "img_475.jpg", + "question": "What kind of offers might be indicated by the text in ?", + "question_id": 4, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": "The text in suggests that there could be special offers, discounts, or promotional events available to customers.", + "bbox": [ + [ + 236.0, + 431.0, + 38.0, + 26.0 + ], + [ + 270.0, + 429.0, + 21.0, + 17.0 + ], + [ + 262.0, + 260.0, + 120.0, + 39.0 + ], + [ + 186.0, + 276.0, + 78.0, + 29.0 + ], + [ + 311.0, + 231.0, + 67.0, + 34.0 + ], + [ + 210.0, + 225.0, + 58.0, + 31.0 + ], + [ + 222.0, + 241.0, + 69.0, + 36.0 + ] + ] + }, + { + "image_name": "0001135.jpg", + "question": "What type of business might be identified by the name in ?", + "question_id": 5, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": "Given the context and the accompanying image of an ice cream cone, the business identified by the name in is likely an ice cream shop.", + "bbox": [ + [ + 155, + 211, + 186, + 80 + ], + [ + 189, + 306, + 48, + 26 + ], + [ + 272, + 315, + 50, + 29 + ] + ] + }, + { + "image_name": "img_247.jpg", + "question": "What type of sign is displayed in , and what is its purpose?", + "question_id": 6, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": "The sign in is an EXIT sign, which serves the purpose of guiding individuals to the nearest exit, ensuring that people can quickly find their way out in case of emergency.", + "bbox": [ + [ + 27.0, + 114.0, + 52.0, + 26.0 + ], + [ + 518.0, + 222.0, + 42.0, + 27.0 + ], + [ + 520.0, + 271.0, + 31.0, + 34.0 + ] + ] + }, + { + "image_name": "img_60.jpg", + "question": "What color are the letters in ?", + "question_id": 7, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": "The letters in are a bright teal color.", + "bbox": [ + [ + 89.0, + 172.0, + 451.0, + 145.0 + ] + ] + }, + { + "image_name": "img_6.jpg", + "question": "What might indicate about a particular area or space within the transportation vehicle?", + "question_id": 8, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": " indicates that there is a reserved area or seat nearby intended for specific passengers who may need special consideration, such as those with disabilities, senior citizens, or expectant mothers.", + "bbox": [ + [ + 111.0, + 459.0, + 97.0, + 24.0 + ], + [ + 641.0, + 454.0, + 53.0, + 16.0 + ] + ] + }, + { + "image_name": "0000696.jpg", + "question": "What color scheme is used for the lettering and the background in and ?", + "question_id": 9, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": "The lettering in both and is white, and the background is maroon.", + "bbox": [ + [ + 212, + 47, + 239, + 101 + ], + [ + 186, + 230, + 292, + 127 + ] + ] + }, + { + "image_name": "img_218.jpg", + "question": "What brand does represent?", + "question_id": 10, + "dataset_name": "ocr_spotting_QA_box", + "gt_answers": " represents the brand 'HITACHI'.", + "bbox": [ + [ + 663.0, + 278.0, + 69.0, + 16.0 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/ocr_spotting_QA/ocr_spotting_QA_point.json b/evaluation/MDVP-Bench/data/ocr_spotting_QA/ocr_spotting_QA_point.json new file mode 100644 index 0000000000000000000000000000000000000000..ba3e013594c648a88dd82331509ba4dbfc317895 --- /dev/null +++ b/evaluation/MDVP-Bench/data/ocr_spotting_QA/ocr_spotting_QA_point.json @@ -0,0 +1,264 @@ +[ + { + "image_name": "tr_img_01039.jpg", + "question": "What does the complete phrase given by and signify?", + "question_id": 1, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The complete phrase 'POLLING STATION' signifies the location where voters go to cast their votes during an election.", + "points": [ + [ + 406, + 109 + ], + [ + 407, + 212 + ] + ] + }, + { + "image_name": "0001235.jpg", + "question": "What could be the primary language or locale of the business indicated by the signage in ?", + "question_id": 2, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The primary language or locale of the business indicated by the signage in is likely Slavic, such as Czech, since 'VINARNA' is a term associated with wine establishments in Slavic languages.", + "points": [ + [ + 340, + 320 + ], + [ + 350, + 505 + ], + [ + 471, + 479 + ], + [ + 471, + 733 + ], + [ + 599, + 756 + ], + [ + 643, + 799 + ], + [ + 585, + 807 + ] + ] + }, + { + "image_name": "tr_img_03390.jpg", + "question": "What issue does the sign, as referenced by the text in and , seem to focus on?", + "question_id": 3, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The sign appears to focus on issues related to the care for children of migrant workers, specifically regarding their education during non-compulsory phases.", + "points": [ + [ + 558, + 325 + ], + [ + 626, + 486 + ], + [ + 536, + 382 + ], + [ + 548, + 441 + ] + ] + }, + { + "image_name": "0000433.jpg", + "question": "What type of cuisine does the name associated with suggest?", + "question_id": 4, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The name 'SEOUL' in suggests a connection to Korean cuisine.", + "points": [ + [ + 986, + 526 + ], + [ + 974, + 618 + ], + [ + 957, + 742 + ], + [ + 961, + 971 + ], + [ + 961, + 1198 + ], + [ + 988, + 1223 + ] + ] + }, + { + "image_name": "img_175.jpg", + "question": "What does the text in and signify when combined together?", + "question_id": 5, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The text in and when combined signifies the name of a company or organization, likely indicating its brand identity or logo.", + "points": [ + [ + 160, + 272 + ], + [ + 410, + 276 + ] + ] + }, + { + "image_name": "tr_img_03584.jpg", + "question": "What type of information do and together provide?", + "question_id": 6, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": " and provide the name of a road in both Chinese characters and its English phonetic transcription, specifically 'Danshan Road'.", + "points": [ + [ + 805, + 767 + ], + [ + 788, + 1008 + ], + [ + 776, + 828 + ], + [ + 740, + 1075 + ], + [ + 907, + 871 + ], + [ + 860, + 1104 + ] + ] + }, + { + "image_name": "img_184.jpg", + "question": "What indication is there that is welcoming visitors to a particular establishment?", + "question_id": 7, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The presence of the word 'WELCOME' set above other text suggests a greeting to visitors, indicating their arrival at an establishment, likely the one named in and .", + "points": [ + [ + 271, + 98 + ], + [ + 142, + 204 + ], + [ + 485, + 202 + ], + [ + 196, + 393 + ], + [ + 531, + 393 + ] + ] + }, + { + "image_name": "tr_img_03043.jpg", + "question": "What is the purpose of the signs in and ?", + "question_id": 8, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The purpose of the signs is to inform and direct traffic due to construction. The sign in warns of construction ahead, while the sign in instructs to take a detour.", + "points": [ + [ + 1582, + 1377 + ], + [ + 1584, + 1464 + ] + ] + }, + { + "image_name": "0000549.jpg", + "question": "What might be the overall context or setting for the texts found in , , and when viewed together?", + "question_id": 9, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The texts 'DOMINE', 'DIRIGA', and 'ROB', due to their similar font style and placement on what appears to be a decorative or commemorative structure, might belong to a religious or historical context. They could form part of a larger phrase or exhortation when put together.", + "points": [ + [ + 73, + 254 + ], + [ + 127, + 279 + ], + [ + 185, + 260 + ] + ] + }, + { + "image_name": "tr_img_03827.jpg", + "question": "What information is provided in regarding the bus?", + "question_id": 10, + "dataset_name": "ocr_spotting_QA_point", + "gt_answers": "The information in displays the bus's authorized carrying capacity, which is 29 people.", + "points": [ + [ + 288, + 126 + ], + [ + 436, + 360 + ], + [ + 200, + 373 + ], + [ + 201, + 404 + ], + [ + 439, + 396 + ], + [ + 398, + 132 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/ocr_spotting_detailed_caption_box.json b/evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/ocr_spotting_detailed_caption_box.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee3d7101f6bd1e5401ce7731d1328377452273d --- /dev/null +++ b/evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/ocr_spotting_detailed_caption_box.json @@ -0,0 +1,314 @@ +[ + { + "image_name": "0000288.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 1, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food.\n: The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby.\n: The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership.", + "bbox": [ + [ + 83, + 147, + 257, + 83 + ], + [ + 162, + 201, + 87, + 50 + ], + [ + 665, + 377, + 60, + 39 + ] + ] + }, + { + "image_name": "img_49.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 2, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management.\n: The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work.\n: The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a fa\u00e7ade above a window, it is part of the business's branding, likely the name of the company.\n: This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles.\n: Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians.\n: The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered.", + "bbox": [ + [ + 70.0, + 157.0, + 207.0, + 47.0 + ], + [ + 311.0, + 165.0, + 225.0, + 45.0 + ], + [ + 254.0, + 247.0, + 186.0, + 35.0 + ], + [ + 191.0, + 380.0, + 92.0, + 19.0 + ], + [ + 293.0, + 383.0, + 89.0, + 17.0 + ], + [ + 392.0, + 385.0, + 92.0, + 16.0 + ] + ] + }, + { + "image_name": "0000808.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 3, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out.\n: \"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance.\n: The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage.\n: \"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme.\n: The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy.", + "bbox": [ + [ + 581, + 352, + 389, + 391 + ], + [ + 663, + 790, + 229, + 241 + ], + [ + 905, + 780, + 181, + 233 + ], + [ + 1019, + 324, + 156, + 158 + ], + [ + 689, + 721, + 44, + 54 + ] + ] + }, + { + "image_name": "0001122.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 4, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name.\n: The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise.", + "bbox": [ + [ + 51, + 103, + 362, + 264 + ], + [ + 516, + 120, + 466, + 335 + ] + ] + }, + { + "image_name": "img_162.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 5, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services.", + "bbox": [ + [ + 386.0, + 291.0, + 261.0, + 43.0 + ] + ] + }, + { + "image_name": "img_940.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 6, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian.\n: The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association.\n: The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment.\n: The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business.\n: The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment\u2019s focus on chicken dishes.", + "bbox": [ + [ + 451.0, + 44.0, + 121.0, + 115.0 + ], + [ + 655.0, + 144.0, + 54.0, + 65.0 + ], + [ + 412.0, + 113.0, + 111.0, + 71.0 + ], + [ + 521.0, + 150.0, + 50.0, + 46.0 + ], + [ + 567.0, + 166.0, + 57.0, + 46.0 + ] + ] + }, + { + "image_name": "img_70.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 7, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity.\n: The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces.\n: The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours.", + "bbox": [ + [ + 71.0, + 191.0, + 314.0, + 36.0 + ], + [ + 404.0, + 189.0, + 117.0, + 34.0 + ], + [ + 77.0, + 262.0, + 161.0, + 40.0 + ] + ] + }, + { + "image_name": "0001250.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 8, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name.\n: The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context.\n: The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered.\n: \"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text.\n: The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings.\n: \"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity.\n: \"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes.", + "bbox": [ + [ + 165, + 290, + 192, + 89 + ], + [ + 363, + 281, + 159, + 57 + ], + [ + 225, + 354, + 140, + 46 + ], + [ + 185, + 376, + 156, + 94 + ], + [ + 426, + 344, + 83, + 101 + ], + [ + 419, + 347, + 56, + 23 + ], + [ + 481, + 420, + 22, + 16 + ] + ] + }, + { + "image_name": "tr_img_01235.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 9, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care.\n: \"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients.\n: The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare.\n: \"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods.", + "bbox": [ + [ + 758.0, + 3763.0, + 498.0, + 112.0 + ], + [ + 1291.0, + 3751.0, + 572.0, + 119.0 + ], + [ + 1061.0, + 3894.0, + 303.0, + 105.0 + ], + [ + 1386.0, + 3882.0, + 180.0, + 110.0 + ] + ] + }, + { + "image_name": "tr_img_01644.jpg", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 10, + "dataset_name": "ocr_spotting_detailed_caption_box", + "gt_answers": ": The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units.", + "bbox": [ + [ + 134.0, + 94.0, + 317.0, + 225.0 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/ocr_spotting_detailed_caption_point.json b/evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/ocr_spotting_detailed_caption_point.json new file mode 100644 index 0000000000000000000000000000000000000000..b4216bad43e0e25cfbb55a029dd3fb8ef61ca6d1 --- /dev/null +++ b/evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/ocr_spotting_detailed_caption_point.json @@ -0,0 +1,220 @@ +[ + { + "image_name": "tr_img_01109.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 1, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text \"Police\" appears prominently on a sign with a blue background flanked by a checkered pattern that suggests it is a police station signboard. The font is bold and in white, creating a stark contrast for readability. The text is placed below a grid of four white and three blue squares that likely symbolize the police service's emblem or logo. This sign is usually intended to mark the location of a police facility and to provide a visible and recognizable point for those seeking law enforcement services.", + "points": [ + [ + 568, + 258 + ] + ] + }, + { + "image_name": "img_717.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 2, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text \"MONDIAL\" is presented in uppercase letters, colored in white, against a dark background for enhanced contrast and visibility. The font appears to be a bold, sans-serif type, suggesting a modern and clean aesthetic. It is located above the entrance of what seems to be a commercial establishment, prominently placed to attract attention and inform visitors or passersby of the brand or company name associated with the premises. The purpose of this signage is likely to serve as a locator or identifier for the business, possibly a shop or store within a larger commercial complex, such as a mall or shopping center. The text's clean and distinct style makes it easily legible to those at a distance or approaching the location, fulfilling its role as an effective means of branding and identification.", + "points": [ + [ + 869, + 121 + ] + ] + }, + { + "image_name": "img_838.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 3, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text \"Ion\" appears in uppercase with a simple, bold sans-serif font. The letters are evenly spaced and directly applied to a surface which seems to be a column within an interior space, likely a shopping or commercial center. The text is positioned at some height on the column, suggesting it may serve as signage or branding for visibility across the area.\n: \"ART\" is displayed in uppercase letters with a heavier weight than normal text. It uses a clean, bold sans-serif font, emphasizing clarity and importance. Positioned on what appears to be a vertical surface, possibly another column or wall, it is likely there to denote a section, a store, or an art-related venue within the building.\n: Here again, the text \"Ion\" is shown, matching the font and style of . Its identical appearance and placement on a similar structure indicate it is part of the same naming or branding strategy, contributing to consistent visual identity throughout the facility.\n: The word \"Salt\" is written in lowercase with a sans-serif font that is less bold compared to the previous regions. This label is also applied to a vertical surface and seems to designate a specific area or venue, possibly a restaurant or store within the complex.\n: Displaying the word \"Grill\" in a simplistic sans-serif font and all lowercase letters, this text complements the text in , potentially representing the full name or theme of a dining establishment, \"Salt Grill.\"", + "points": [ + [ + 121, + 33 + ], + [ + 192, + 22 + ], + [ + 99, + 105 + ], + [ + 86, + 165 + ], + [ + 200, + 188 + ] + ] + }, + { + "image_name": "0000310.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 4, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text \"ALVES\" is displayed prominently on a yellow background, likely part of a sports jersey. The font is bold and capitalized, providing a strong visual presence. This type of text is typically indicative of a last name, suggesting that it belongs to an athlete. The text is part of a personalized jersey, which often represents the athlete's identity on the field.\n: The word \"PARIS\" is part of a larger emblem, which appears to be the crest of a sports team. The font is large, bold, and arched, making it a focal point of the emblem. This text, being part of a logo, signifies the city or location associated with the team, fostering a sense of identity and pride among its supporters.\n: The name \"DANI\" appears in a casual and less formal font compared to the other texts. Its placement above \"ALVES\" suggests it may be a first name, offering a complete identity when paired with the surname below it. The text's appearance on the jersey connects it to a specific individual, likely a player.\n: The number \"32\" is shown in a large, bold font with a drop shadow effect, providing a sense of depth. Numbers on sports jerseys typically indicate the player's squad number, which is used for identification during games and for fans to recognize their favorite athletes.", + "points": [ + [ + 270, + 310 + ], + [ + 338, + 74 + ], + [ + 207, + 310 + ], + [ + 240, + 377 + ] + ] + }, + { + "image_name": "tr_img_03430.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 5, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text \"\u7231\u62a4\u811a\u4e0b\u8349\" is written in white, capitalized characters against a solid red background. The font is simple and sans-serif, making it easily legible. The message itself is a plea in Mandarin Chinese, translating to 'Protect the grass beneath your feet', suggesting that the grass in the area needs to be preserved from damage. This text is often found in public areas as a reminder to passersby.\n: The text \"Please\" appears in black, lowercase letters against a white background. The text is positioned above Chinese characters within a rectangular yellow sign that is bordered in red. The font is sans-serif, which contributes to a clear and direct message. This fragment of text seems to be the starting part of a polite request or instruction, commonly used in public signage to encourage certain behavior.\n: This text, comprising the Mandarin Chinese characters \"\u83ab\u6298\u679d\u5934\u82b1\", translates to 'Do not pick the flowers from the branches'. The characters are white, set against a red background, and written in a straightforward, sans-serif typeface for easy reading. The statement serves as a directive aimed at preserving the natural flora and likely complements other sign components to relay a full conservation message.\n: The single word \"grass\" is displayed in black, lowercase letters on a white backdrop. The font is sans-serif and unembellished, ensuring readability. This word is part of a larger directive likely aiming to protect the grassy area where the sign is placed.\n: Displayed in black, lowercase letters on a white background, the word \"keep\" is written in a simple, sans-serif font. This term indicates an imperative, which typically precedes further instructions. It is likely part of an overall message on the sign to instruct the viewer on what actions to take or avoid concerning the vicinity.\n: The word \"away\" is shown in black, lowercase letters against a white background, using a sans-serif font for clarity. It usually denotes a distance or separation from something and is most probably part of a set of instructions on the sign.\n: The text \"from\" is provided in black, lowercase letters on a white background. The sans-serif typeface used is unadorned, emphasizing legibility. This preposition suggests a relational direction or separation and fits in line with the other words on the sign to form an instructive sentence.\n: The word \"the\" is written in black, lowercase letters on a white background. The font is straightforward, sans-serif, and the word functions as a definite article, typically used in English to specify a particular instance of a noun that is understood from the context.", + "points": [ + [ + 1328, + 1481 + ], + [ + 1100, + 1616 + ], + [ + 1329, + 1557 + ], + [ + 1089, + 1664 + ], + [ + 1243, + 1618 + ], + [ + 1371, + 1619 + ], + [ + 1494, + 1613 + ], + [ + 1601, + 1611 + ] + ] + }, + { + "image_name": "img_981.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 6, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": This region displays the text \"SIDE\" in a bold, sans-serif font, which is typically used for clarity and legibility. The text is in capital letters, suggesting it may be a label or a title for a section within the larger environment. The exact purpose of this text cannot be determined without further context, but it is likely used to indicate a specific side or area within the location, possibly for orientation or categorization purposes.\n: Here we see the word \"LABEL\" presented in a bold, sans-serif typeface similar to that of . The text is capitalized, which often denotes a heading or an important designation. The usage of the word \"LABEL\" suggests that this might be an instructive sign or part of an informational display, guiding viewers to understand that items in the vicinity should bear a label or are sorted by labels. The setting appears to be a retail or commercial environment, where such signs are integral in assisting customers to navigate the space efficiently.", + "points": [ + [ + 357, + 127 + ], + [ + 408, + 129 + ] + ] + }, + { + "image_name": "tr_img_03892.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 7, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text here reads \"\u6e56\u5357\u7701\u4f20\u8fbe\u5b66\u4e60\" which translates to \"Hunan Province Conveys Learning\". The characters are printed in a formal, bold typeface that is common for official or governmental contexts. It appears to be a banner or sign placed above a stage, suggesting that it is related to an official event or meeting where learning or communication of knowledge is being disseminated, most likely political in nature given the context.\n: This region contains the text \"\u5341\u4e00\u5c4a\u5168\u56fd\u4eba\u5927\u56db\u6b21\u4f1a\u8bae\" which translates to \"The Fourth Session of the 11th National People's Congress\". The font is similar to that of Region 1, indicating formality and official communication. As with Region 1, the text likely denotes the name or nature of the official event being held, which is a session of the National People's Congress, an important legislative body in China.\n: The characters \"\u7cbe\u795e\u4f1a\u8bae\" translate to \"Spiritual Meeting\" or \"Meeting with Purpose/Resolve\". This text, in the same formal style, may describe the focus or nature of the gathering, emphasizing a meeting with a specific ideological or political significance.\n: Here, the text reads \"\u5168\u56fd\u653f\u534f\u5341\u4e00\u5c4a\u56db\u6b21\u4f1a\u8bae\" which means \"The Fourth Session of the 11th National Committee of the Chinese People's Political Consultative Conference (CPPCC)\". This indicates that another major political event is being referenced, which is the meeting of the CPPCC, an important advisory body in the Chinese political system.", + "points": [ + [ + 115, + 24 + ], + [ + 323, + 16 + ], + [ + 482, + 25 + ], + [ + 323, + 35 + ] + ] + }, + { + "image_name": "tr_img_03783.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 8, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text reads \"\u6df1\u5733\u53d1\u5c55\u94f6\u884c,\" which translates to \"Shenzhen Development Bank.\" The characters are in a bold, red font and are prominently displayed against the backdrop of the grey building's upper facade. This suggests that the building is either a branch or headquarters of the Shenzhen Development Bank, and the text serves as a signboard, indicating the presence and branding of the bank within the building.\n: The text \"\u8fdc\u6d0b\u5927\u53a6\" translates to \"Oceanwide Building.\" The characters are in a lighter tone, possibly white or silver, and appear smaller than the text in . They are located on the side of the building, indicating the name of the edifice or the name of the company that occupies it. This text acts as an identifying marker for the building itself.", + "points": [ + [ + 250, + 120 + ], + [ + 62, + 345 + ] + ] + }, + { + "image_name": "img_159.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 9, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text reads \"RAF\" in a bold, sans-serif font. It is in white against a navy blue background which might suggest a formal or official nature, potentially related to an organization or entity abbreviated as \"RAF\". The text's high contrast with the background ensures visibility and legibility, which hints that it is intended for public view and is likely significant to the entity it represents.\n: This text displays the word \"CAREERS\" in white letters on a red background. The choice of a bold, sans-serif font type maximizes readability and the stark contrast with the background color draws attention. The term \"CAREERS\" indicates a focus on employment or vocational opportunities and could imply that the organization or entity referenced in is presenting information related to job prospects or guidance for individuals seeking employment.", + "points": [ + [ + 131, + 247 + ], + [ + 388, + 244 + ] + ] + }, + { + "image_name": "img_452.jpg", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 10, + "dataset_name": "ocr_spotting_detailed_caption_point", + "gt_answers": ": The text \"NOVEMBER\" is in all caps and is captured with high contrast, suggesting that it may be used here as an indication of the schedule or release time frame of the movie or event advertised in the poster it's a part of. The font is bold and fairly large, meant to stand out and be easily readable from a distance. It is positioned at the bottom of a visual display, which is commonly where important information is included, such as dates, times, or locations. The context points to it being part of a promotion for entertainment content.\n: The text \"ttins\" appears to be part of a larger word or phrase that's not entirely visible, likely due to the angle or framing of the photo. Without additional context, it's difficult to determine what the full text is supposed to convey. The letters are stylized in a way that is consistent with creative titling, often used in movie or show titles. It is located on a smaller poster, which suggests it is likely a part of promotional material for a different entertainment piece than what's indicated in .\n: The text \"One\" stands alone with no additional context. The style and positioning suggest it may be part of a larger phrase or title. It is located on a large poster which hints at its significance; however, without further context, it's not possible to pinpoint the exact nature of the content it relates to. It may refer to a title, a character name, or a concept pivotal to the promotional material it is featured on.", + "points": [ + [ + 434, + 640 + ], + [ + 1069, + 420 + ], + [ + 397, + 575 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/web_QA/web_QA_box.json b/evaluation/MDVP-Bench/data/web_QA/web_QA_box.json new file mode 100644 index 0000000000000000000000000000000000000000..8aa7dd67de2879a07d06e8d78025e70bf56e3b69 --- /dev/null +++ b/evaluation/MDVP-Bench/data/web_QA/web_QA_box.json @@ -0,0 +1,290 @@ +[ + { + "image_name": "a2ded10dfbb6d0e57ef6284499b50c3e.png", + "question": "What action can users typically perform by interacting with ?", + "question_id": 1, + "dataset_name": "web_QA_box", + "gt_answers": "Users can typically download an iOS-compatible app or access iOS-specific services by interacting with .", + "bbox": [ + [ + 979, + 367, + 844, + 211 + ], + [ + 96, + 367, + 825, + 211 + ] + ] + }, + { + "image_name": "web_3357799b-92ed-4ce8-8c01-452993f9397b.png", + "question": "What content appears when you select from the search results?", + "question_id": 2, + "dataset_name": "web_QA_box", + "gt_answers": "When you select the content within , it probably takes you to a detailed page about Arsenal F.C., the association football club based in London, England.", + "bbox": [ + [ + 781, + 944, + 863, + 148 + ] + ] + }, + { + "image_name": "73621fa95a3736b26b69b3f2ab5f5df9.png", + "question": "What action will most likely be initiated by clicking on the email address in ?", + "question_id": 3, + "dataset_name": "web_QA_box", + "gt_answers": "Clicking on the email address would probably open the user's default email client with a new message composed and the 'To' field already populated with the email address 'info@medcompany.by.'", + "bbox": [ + [ + 633, + 21, + 115, + 19 + ], + [ + 1497, + 21, + 38, + 19 + ], + [ + 0, + 2, + 0, + 0 + ], + [ + 1459, + 21, + 38, + 19 + ] + ] + }, + { + "image_name": "web_c762cbb8-b6b0-48e3-84ad-5c4090137fa2.png", + "question": "What happens when you click on the magnifying glass icon in ?", + "question_id": 4, + "dataset_name": "web_QA_box", + "gt_answers": "When you click on the magnifying glass icon in , the website will likely perform a search with the entered term 'machine learning' and display relevant search results.", + "bbox": [ + [ + 1617, + 936, + 109, + 87 + ] + ] + }, + { + "image_name": "70514e899292ab6f2811529dabca3c39.png", + "question": "What could I find under the section indicated by ?", + "question_id": 5, + "dataset_name": "web_QA_box", + "gt_answers": "Under the section indicated by , you would likely find completed projects or achievements showcased by the company or organization.", + "bbox": [ + [ + 1056, + 636, + 134, + 19 + ], + [ + 460, + 1039, + 115, + 19 + ], + [ + 1113, + 60, + 115, + 38 + ], + [ + 1324, + 2, + 96, + 38 + ], + [ + 1113, + 2, + 172, + 38 + ], + [ + 844, + 2, + 230, + 38 + ], + [ + 1056, + 732, + 211, + 19 + ], + [ + 940, + 60, + 76, + 38 + ], + [ + 1440, + 2, + 38, + 38 + ], + [ + 806, + 60, + 57, + 38 + ] + ] + }, + { + "image_name": "web_6c4e07ff-3589-4a78-99cf-fcf703a33a74.png", + "question": "If I want to see visual previews of links, should I change the setting in ?", + "question_id": 6, + "dataset_name": "web_QA_box", + "gt_answers": "Yes, you should check the checkbox in to enable the display of thumbnails which serve as visual previews for links.", + "bbox": [ + [ + 435, + 905, + 46, + 45 + ] + ] + }, + { + "image_name": "86f76b2e2a69f885543e8d7250c77845.png", + "question": "What can I find by clicking on ?", + "question_id": 7, + "dataset_name": "web_QA_box", + "gt_answers": "By clicking on , you would likely find products or content related to the National Rugby League (NRL).", + "bbox": [ + [ + 537, + 828, + 115, + 19 + ], + [ + 1382, + 636, + 76, + 38 + ], + [ + 1036, + 789, + 76, + 19 + ], + [ + 1228, + 789, + 96, + 19 + ], + [ + 595, + 213, + 76, + 38 + ], + [ + 691, + 213, + 76, + 38 + ], + [ + 499, + 616, + 96, + 19 + ], + [ + 441, + 213, + 76, + 38 + ], + [ + 902, + 789, + 115, + 19 + ], + [ + 499, + 674, + 76, + 19 + ] + ] + }, + { + "image_name": "072abbc72f059f0d21b806afb400eba1.png", + "question": "What is suggested by the style of the text within ?", + "question_id": 8, + "dataset_name": "web_QA_box", + "gt_answers": "The style of the text within suggests that it might be a hyperlink, possibly to the main page or a central hub of the site.", + "bbox": [ + [ + 19, + 21, + 76, + 76 + ], + [ + 192, + 98, + 76, + 19 + ] + ] + }, + { + "image_name": "web_bd7a1ecb-e3e7-4a79-ba7e-91ae27a861ba.png", + "question": "What happens when I click on ?", + "question_id": 9, + "dataset_name": "web_QA_box", + "gt_answers": "Clicking on will close the cart summary window, allowing you to continue browsing the website.", + "bbox": [ + [ + 2482, + 255, + 42, + 57 + ] + ] + }, + { + "image_name": "web_1c75d12a-d93d-4cb2-b179-38009584c282.png", + "question": "What action is a user likely to be prompted to take when interacting with ?", + "question_id": 10, + "dataset_name": "web_QA_box", + "gt_answers": "Upon interacting with , a user is likely to be redirected to a donation landing page where they can financially support the website.", + "bbox": [ + [ + 2256, + 213, + 194, + 71 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/web_QA/web_QA_point.json b/evaluation/MDVP-Bench/data/web_QA/web_QA_point.json new file mode 100644 index 0000000000000000000000000000000000000000..77933c6955746d0e9ad35fdbbfbf225c20219093 --- /dev/null +++ b/evaluation/MDVP-Bench/data/web_QA/web_QA_point.json @@ -0,0 +1,332 @@ +[ + { + "image_name": "f7a651aeda9e0d429adf3d9bd3bec6b4.png", + "question": "What might I find if I click on ?", + "question_id": 1, + "dataset_name": "web_QA_point", + "gt_answers": "Clicking on would likely redirect you to a page where you could learn more about the company's history, values, or mission.", + "points": [ + [ + 1161, + 79 + ], + [ + 825, + 79 + ], + [ + 902, + 79 + ], + [ + 1027, + 79 + ], + [ + 537, + 597 + ], + [ + 1468, + 79 + ], + [ + 729, + 79 + ], + [ + 1574, + 79 + ] + ] + }, + { + "image_name": "web_921e2d81-b8a5-425a-81e6-476cd16f9bdb.png", + "question": "What information can an administrator expect to find or manage after clicking on ?", + "question_id": 2, + "dataset_name": "web_QA_point", + "gt_answers": "An administrator can expect to find customer-related data such as personal details, purchase history, customer service interactions, and possibly analytics on customer behavior after clicking on .", + "points": [ + [ + 88, + 584 + ] + ] + }, + { + "image_name": "a49865bf112a6961f41c6a32fc766d89.png", + "question": "What would happen if I click on the 'HOME' tab in ?", + "question_id": 3, + "dataset_name": "web_QA_point", + "gt_answers": "Clicking on the 'HOME' tab in would redirect you to the main or starting page of the website.", + "points": [ + [ + 633, + 126 + ], + [ + 383, + 126 + ], + [ + 738, + 683 + ], + [ + 1363, + 770 + ], + [ + 57, + 49 + ], + [ + 1343, + 126 + ], + [ + 134, + 49 + ], + [ + 172, + 49 + ], + [ + 872, + 520 + ], + [ + 1430, + 126 + ] + ] + }, + { + "image_name": "9383db3ef92848cd46868197cc5e446f.png", + "question": "What would likely happen if I click on ?", + "question_id": 4, + "dataset_name": "web_QA_point", + "gt_answers": "If you click on , you would likely be directed to a help or support resource to assist you with any inquiries or issues you might have with the services or products on the website.", + "points": [ + [ + 498, + 21 + ], + [ + 1478, + 21 + ], + [ + 374, + 21 + ], + [ + 1333, + 88 + ], + [ + 758, + 88 + ], + [ + 1199, + 88 + ], + [ + 633, + 88 + ], + [ + 1074, + 88 + ], + [ + 1285, + 21 + ], + [ + 374, + 136 + ] + ] + }, + { + "image_name": "web_37acd27f-731c-4292-8caf-159b9be41189.png", + "question": "What versions of Ubuntu are mentioned in ?", + "question_id": 5, + "dataset_name": "web_QA_point", + "gt_answers": "The versions mentioned in are 18.04 LTS, 20.04 LTS, and 22.04 LTS.", + "points": [ + [ + 320, + 652 + ] + ] + }, + { + "image_name": "web_147f0485-994a-4176-bd82-88d81ab8607a.png", + "question": "What happens when you check the 'All' checkbox in ?", + "question_id": 6, + "dataset_name": "web_QA_point", + "gt_answers": "When you check the 'All' checkbox in , it likely includes all namespaces in the search results, rather than just the default selection or other specified filters.", + "points": [ + [ + 924, + 786 + ] + ] + }, + { + "image_name": "243a622c02ef1f0a4ca2a860daa9b5c5.png", + "question": "What particular topics might I find assistance on if I navigate to on the website?", + "question_id": 7, + "dataset_name": "web_QA_point", + "gt_answers": "You would find guidance or insights on topics relevant to the financial services provided by the organization behind the website.", + "points": [ + [ + 1141, + 49 + ], + [ + 1372, + 49 + ], + [ + 1007, + 107 + ], + [ + 949, + 971 + ], + [ + 1247, + 49 + ], + [ + 1055, + 789 + ], + [ + 787, + 49 + ], + [ + 1305, + 107 + ], + [ + 844, + 789 + ], + [ + 1142, + 107 + ] + ] + }, + { + "image_name": "30620b1a82ea24f660db2be94d4c09c8.png", + "question": "What options do I have if I wish to learn more about the background or ethos of the company?", + "question_id": 8, + "dataset_name": "web_QA_point", + "gt_answers": "You could select to navigate to the 'About' page, which would provide information about the company's background or ethos.", + "points": [ + [ + 1295, + 98 + ], + [ + 1430, + 568 + ], + [ + 1410, + 40 + ], + [ + 1785, + 40 + ], + [ + 1391, + 98 + ], + [ + 1631, + 98 + ], + [ + 422, + 491 + ], + [ + 1506, + 98 + ], + [ + 1842, + 98 + ], + [ + 1727, + 98 + ] + ] + }, + { + "image_name": "web_921e2d81-b8a5-425a-81e6-476cd16f9bdb.png", + "question": "What would likely happen when I click on the 'Products' option in ?", + "question_id": 9, + "dataset_name": "web_QA_point", + "gt_answers": "Clicking on the 'Products' option would probably navigate you to the product management section where you can view and interact with the inventory.", + "points": [ + [ + 439, + 229 + ] + ] + }, + { + "image_name": "d5b8fc0c05f898a78cd3183d534aafdd.png", + "question": "If I'm looking for information on the company's background and history, which should I click on?", + "question_id": 10, + "dataset_name": "web_QA_point", + "gt_answers": "You should click on to find information about the company's background and history.", + "points": [ + [ + 1295, + 78 + ], + [ + 911, + 318 + ], + [ + 863, + 78 + ], + [ + 1488, + 78 + ], + [ + 978, + 78 + ], + [ + 1083, + 78 + ], + [ + 1401, + 78 + ], + [ + 1180, + 78 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/web_detailed_caption/web_detailed_caption_box.json b/evaluation/MDVP-Bench/data/web_detailed_caption/web_detailed_caption_box.json new file mode 100644 index 0000000000000000000000000000000000000000..13e6fbbc3e096701115b385fb6b31513075c9f8f --- /dev/null +++ b/evaluation/MDVP-Bench/data/web_detailed_caption/web_detailed_caption_box.json @@ -0,0 +1,434 @@ +[ + { + "image_name": "60e34f9315443cb6c77d32da7ba5eee1.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 1, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot.\n: This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process.\n: This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November.\n: This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction.\n: This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity.\n: Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well.\n: This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable.\n: The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself.\n: Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity.\n: This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page.", + "bbox": [ + [ + 1248, + 156, + 19, + 19 + ], + [ + 1286, + 40, + 134, + 19 + ], + [ + 480, + 386, + 153, + 19 + ], + [ + 1382, + 117, + 76, + 0 + ], + [ + 768, + 156, + 19, + 19 + ], + [ + 480, + 348, + 153, + 19 + ], + [ + 652, + 981, + 153, + 19 + ], + [ + 921, + 156, + 19, + 19 + ], + [ + 710, + 156, + 19, + 19 + ], + [ + 825, + 1000, + 172, + 19 + ] + ] + }, + { + "image_name": "0180e97a3e9609ea8c72b6b8db0071c8.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 2, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website.\n: This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items.\n: This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with.\n: Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners.\n: This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support.\n: \"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers.\n: The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service.\n: Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance.\n: Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents.\n: Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site.", + "bbox": [ + [ + 1420, + 21, + 134, + 19 + ], + [ + 614, + 252, + 192, + 19 + ], + [ + 614, + 136, + 134, + 19 + ], + [ + 614, + 98, + 115, + 19 + ], + [ + 614, + 156, + 115, + 19 + ], + [ + 614, + 213, + 38, + 19 + ], + [ + 326, + 2, + 422, + 38 + ], + [ + 614, + 309, + 96, + 19 + ], + [ + 614, + 60, + 76, + 19 + ], + [ + 614, + 290, + 96, + 19 + ] + ] + }, + { + "image_name": "web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 3, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website.", + "bbox": [ + [ + 333, + 971, + 366, + 36 + ] + ] + }, + { + "image_name": "367173643a055b0657de17afff8d541d.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 4, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly.\n: This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings.\n: The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge.\n: This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise.", + "bbox": [ + [ + 1324, + 40, + 96, + 19 + ], + [ + 364, + 732, + 19, + 19 + ], + [ + 307, + 559, + 172, + 57 + ], + [ + 960, + 21, + 57, + 57 + ] + ] + }, + { + "image_name": "5c9b9883310423712e466bee13a36a02.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 5, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart.\n: This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website.\n: The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform.\n: It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created.\n: Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content.\n: This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form.\n: This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in .\n: This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest.\n: It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content.\n: This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members.", + "bbox": [ + [ + 1478, + 21, + 95, + 57 + ], + [ + 1017, + 98, + 95, + 38 + ], + [ + 1228, + 21, + 19, + 57 + ], + [ + 1056, + 616, + 76, + 19 + ], + [ + 1267, + 21, + 38, + 57 + ], + [ + 1286, + 98, + 115, + 38 + ], + [ + 1075, + 463, + 153, + 38 + ], + [ + 691, + 98, + 115, + 38 + ], + [ + 902, + 98, + 96, + 38 + ], + [ + 1152, + 98, + 115, + 38 + ] + ] + }, + { + "image_name": "53b4ab2cb706a43fec7ce4ac5eac181e.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 6, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content.\n: This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives.\n: This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more.\n: This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data.\n: This is typically a navigation link that returns the user to the main homepage of the website from any other page.\n: It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text.\n: This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause.\n: Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics.\n: This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments.\n: Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form.", + "bbox": [ + [ + 998, + 60, + 115, + 19 + ], + [ + 1267, + 232, + 153, + 76 + ], + [ + 1689, + 1039, + 76, + 19 + ], + [ + 729, + 1039, + 96, + 19 + ], + [ + 825, + 60, + 76, + 19 + ], + [ + 1785, + 1039, + 76, + 19 + ], + [ + 1305, + 40, + 134, + 38 + ], + [ + 1113, + 60, + 76, + 19 + ], + [ + 902, + 60, + 96, + 19 + ], + [ + 1190, + 60, + 115, + 19 + ] + ] + }, + { + "image_name": "web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 7, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers.", + "bbox": [ + [ + 2282, + 775, + 75, + 68 + ] + ] + }, + { + "image_name": "656b47ffb1270a8038d876586e92a71b.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 8, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted.\n: This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website.\n: This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors.\n: Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission.\n: The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication.\n: The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience.\n: The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry.\n: \"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact.\n: This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website.", + "bbox": [ + [ + 1612, + 40, + 76, + 76 + ], + [ + 57, + 40, + 268, + 76 + ], + [ + 1132, + 40, + 153, + 76 + ], + [ + 979, + 40, + 153, + 76 + ], + [ + 1708, + 40, + 134, + 76 + ], + [ + 345, + 175, + 57, + 19 + ], + [ + 1478, + 40, + 134, + 76 + ], + [ + 1286, + 40, + 192, + 76 + ], + [ + 57, + 60, + 268, + 19 + ] + ] + }, + { + "image_name": "web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 9, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration.", + "bbox": [ + [ + 801, + 1139, + 196, + 68 + ] + ] + }, + { + "image_name": "web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "question": "Please provide a detailed description of each marked region in the image.", + "question_id": 10, + "dataset_name": "web_detailed_caption_box", + "gt_answers": ": This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks.", + "bbox": [ + [ + 1880, + 205, + 66, + 67 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/data/web_detailed_caption/web_detailed_caption_point.json b/evaluation/MDVP-Bench/data/web_detailed_caption/web_detailed_caption_point.json new file mode 100644 index 0000000000000000000000000000000000000000..ebd9cb5d6f269bcc23312c48938d248f0ff167c8 --- /dev/null +++ b/evaluation/MDVP-Bench/data/web_detailed_caption/web_detailed_caption_point.json @@ -0,0 +1,252 @@ +[ + { + "image_name": "027df36525ffe2decebea6c85c92c270.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 1, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This is a call-to-action button that prompts users to \"Sign Up\" likely for a newsletter or account registration. It's an interactive element that, once clicked, would either open a form for the user to enter their details (such as name, email, etc.) or redirect them to a registration page.\n: The text \"Servicing\" appears to be part of a navigation menu, specifically a header or tab that would lead the user to a section or a page related to bike servicing options. Clicking on this would either scroll down to the relevant section on the same page or redirect to a dedicated servicing page.\n: Appears to be a search bar, offering users an opportunity to type in queries to find specific content on the website. It's an interactive feature that processes typed search terms to display relevant results on either the same page or a new search results page.\n: This seems to indicate a link or a button that takes users to the Maison du Velo's YouTube channel. It's likely an interactive element, clicking on which would navigate the user to the video platform to view the company's videos.\n: \"Book a Service\" is likely another navigation item or button, similar to , designed to take users directly to a booking interface where they can schedule bike services.\n: It appears to be the Maison du Velo logo, commonly serving as a clickable element that returns the user to the homepage when clicked.\n: This looks like the main header or title of the webpage, and in this context, it informs the user that the page they are looking for cannot be found. It's typically non-interactive and serves an informative function.\n: Labelled \"Consultation,\" this might be a section of the navigation menu focused on personalized services or appointments. Engaging this would lead to further information about consultation offerings.\n: The number zero alongside a shopping cart icon suggests that this is an indicator of the number of items currently in the user's online shopping cart. It's a standard feature on e-commerce sites and clicking on it would typically lead to the shopping cart or checkout page.\n: The representation of Maison du Velo on Twitter indicates a social media icon meant to direct users to the Maison du Velo's Twitter account. Interacting with it would take the user to their Twitter feed.", + "points": [ + [ + 1488, + 597 + ], + [ + 1064, + 491 + ], + [ + 1487, + 59 + ], + [ + 527, + 568 + ], + [ + 709, + 59 + ], + [ + 988, + 664 + ], + [ + 459, + 59 + ], + [ + 901, + 59 + ], + [ + 1525, + 59 + ], + [ + 392, + 568 + ] + ] + }, + { + "image_name": "web_98d5a92a-e7d5-4fa6-8c36-83dc78c3fce6.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 2, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This area appears to be a checkbox labeled \"Show Password\", typically found on login forms on websites. This functionality, when toggled, allows the user to switch between hiding and displaying the characters they have input into the password field. This is an interactive element that improves user experience by allowing users to confirm their password entries before submitting the form. Given the obfuscated dots in the password field, engaging this checkbox would reveal the actual password characters.", + "points": [ + [ + 365, + 1006 + ] + ] + }, + { + "image_name": "61f88222ea0459157c48e4ac322de434.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 3, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This region appears to be a navigation item labeled \"AGENDA\". It likely leads users to a page or section focusing on scheduled events or calendar-related information for the organization or website in question.\n: This is another navigation item labeled \"PUBLICATIONS DE QCD\". This item would typically direct users to a repository or list of publications associated with the organization QCD. Users may find research papers, reports, articles, or whitepapers here.\n: This is titled \"QCD & VOUS\". It is possibly a section dedicated to information about the relationship between QCD and the user or community (\"vous\" translates to \"you\" in French). It might include user engagement initiatives, membership details, or customer service information.\n: Labeled \"Accueil\", this is the French word for \"Home\" and typically represents a button or link that, when clicked, will take the user back to the main homepage of the website.\n: \"CONTACTEZ-NOUS\" translates to \"CONTACT US\". This is likely a call-to-action button that, when interacted with, will take the user to a contact form, email address, phone number, or other means of getting in touch with the organization.\n: This area, labeled \"INSTANCES\", probably refers to different organizational bodies or decision-making entities within the organization. It could lead to information on committees, boards, or corporate governance structures.\n: The \"LEADER\" region appears to be another navigational element pointing to content related to leadership within the organization, such as profiles of key leaders or organizational leadership strategies.\n: This icon represents Twitter and is commonly used as a social media link. Clicking it would typically take the user to the organization's Twitter profile to follow and engage with their tweets.\n: Similar to , this is a Facebook icon, which when clicked, will likely redirect the user to the organization's Facebook page, where they can follow posts, updates, and social interactions.\n: \"EXPERTISES & ACTIONS\" suggests a section dedicated to the organization's areas of specialization and their practical endeavors or initiatives. It could house detailed descriptions of services, programs, or campaigns.", + "points": [ + [ + 1180, + 213 + ], + [ + 499, + 213 + ], + [ + 1458, + 155 + ], + [ + 326, + 241 + ], + [ + 719, + 49 + ], + [ + 1141, + 213 + ], + [ + 594, + 213 + ], + [ + 1324, + 1058 + ], + [ + 1286, + 1058 + ], + [ + 979, + 155 + ] + ] + }, + { + "image_name": "web_eab83369-92a1-4819-ae45-eb2c0ab5b131.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 4, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This region displays an option within a navigation menu, labeled \"Milestones\". The text suggests that this is a clickable area which, when interacted with, will likely navigate the user to a section or feature within the GitLab platform that deals with milestones. Milestones in this context are likely used for tracking progress on a collection of issues or merge requests within the scope of a project or a group. The design indicates this is one item in a list, which includes other items such as \"Snippets\" and \"Activity\". It is part of a larger dropdown menu labeled \"Explore,\" which suggests a functionality for users to explore various sections or features within GitLab.", + "points": [ + [ + 350, + 407 + ] + ] + }, + { + "image_name": "web_77e68e05-bbe4-4f54-96fa-79f714c17505.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 5, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This is a language setting interface element within a webpage. It consists of a dropdown box, which likely contains options for selecting different languages for the content of this site. The presence of bracketed symbols suggests that this is a clickable element, which upon interaction, would display a list of available languages and allow the user to change the language preference for the page's content.", + "points": [ + [ + 374, + 1420 + ] + ] + }, + { + "image_name": "73621fa95a3736b26b69b3f2ab5f5df9.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 6, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This area contains an email address, \"info@medcompany.by,\" which serves as an electronic means for visitors to contact the company\u2014presumably for inquiries, support, or other communication purposes. Such an email hyperlink is commonly clickable and would open the user\u2019s default email client preloaded with the recipient's address.\n: This identifies an Instagram social media icon, which is typically used as a link to redirect users to the company's Instagram profile. Interaction with this element would likely open a new tab or window in the user\u2019s browser, taking them directly to the associated Instagram page.\n: This text, \"Skip to content,\" is typically a link designed to help users bypass navigation menus or other elements to go directly to the main content of the webpage. It is a feature that can improve accessibility for users who use screen readers or prefer to skip repetitive links.\n: Similar to , this marks a Facebook social media icon. Clicking on this icon would typically redirect the user to the company\u2019s Facebook page. It serves as a quick-access link for visitors to connect with the company on Facebook.", + "points": [ + [ + 690, + 30 + ], + [ + 1516, + 30 + ], + [ + 0, + 2 + ], + [ + 1478, + 30 + ] + ] + }, + { + "image_name": "web_0492a7c3-3b0d-4f62-8c0d-939ad51aa616.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 7, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": The content within this area is labeled \"Use GitLab\" and \"Get started with GitLab features and functionality.\" This suggests it is an introductory section intended for users who are new to GitLab or those looking to learn how to utilize its features. This area is likely to be interactive, possibly linking to resources or documentation that would guide a user through the basic functionalities of GitLab.", + "points": [ + [ + 415, + 1293 + ] + ] + }, + { + "image_name": "web_921e2d81-b8a5-425a-81e6-476cd16f9bdb.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 8, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This region appears to be part of a navigation menu of a website, specifically an e-commerce platform or a Content Management System (CMS), possibly Magento or a similar service. The area features the word \"CUSTOMERS,\" which implies it is a section dedicated to customer-related information and functionalities. This area is likely interactive and could lead the user to a subsection of the CMS where they can manage customer data, view customer interactions, or track customer orders and behavior. Given the standard conventions of web design, clicking on this area would result in navigating to the customer management area of the website.", + "points": [ + [ + 88, + 584 + ] + ] + }, + { + "image_name": "9e7e2a0e3102aaca2cfba8ffe1182de4.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 9, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This section likely relates to the website's privacy policy or information on how user data is collected, used, and protected. It is commonly an interactive link that, when clicked, would direct a user to a page detailing the privacy practices of the site. The privacy section is integral for building trust with the users by being transparent about their data handling.\n: This appears to be a category or section devoted to providing financial tips. It may offer educational content, such as articles, blog posts, or resources, aimed at helping individuals manage their finances better. If this is a hyperlink, clicking on it would take the user to a page with listed financial tips articles or resources.\n: This section is likely dedicated to taxation-related content. It could offer advice, updates, and guidelines regarding taxes which could be of interest to individuals or businesses trying to navigate tax laws. Clicking on it would typically redirect the user to the taxation-related content.\n: This indicates a downloadable financial services guide provided as a PDF document. Clicking on this link would normally prompt the download of a PDF file, which contains detailed information about the financial services offered by the website's owning entity.\n: This section probably features information related to real estate, which may include buying, selling, investing, and property management advice. It is likely an interactive link that navigates to a page with real estate information.\n: The financial updates section is expected to contain the latest news, trends, and changes in the financial world. This might be an interactive area where clicking on it would take the user to recent financial updates and news articles.\n: This likely represents a resource section for budgeting, where users can find tools, tips, and strategies to create and maintain a budget. It is probably clickable, leading to a dedicated page with budgeting assistance.\n: Refers to the official Australian Tax Office (ATO). It may provide direct access to tax-related information, forms, and services relevant to Australian citizens. This could be an interactive hyperlink guiding users to the official ATO resources or relevant tax guidance affiliated with the site.\n: The checklist section is assumed to contain a series of actionable lists aimed at assisting users in organizing various financial processes or tasks. If it is a hyperlink, clicking it would take you to a page where you can view or download these checklists.", + "points": [ + [ + 1343, + 11 + ], + [ + 1333, + 357 + ], + [ + 1314, + 529 + ], + [ + 1477, + 11 + ], + [ + 1324, + 491 + ], + [ + 1343, + 395 + ], + [ + 1314, + 280 + ], + [ + 451, + 981 + ], + [ + 816, + 145 + ], + [ + 1333, + 337 + ] + ] + }, + { + "image_name": "web_7d1b09cc-3602-4670-9fb2-d81afb5ec381.png", + "question": "Please provide a detailed description of each marked point in the image.", + "question_id": 10, + "dataset_name": "web_detailed_caption_point", + "gt_answers": ": This region appears to be a search function within the web interface, typically represented by a magnifying glass icon. It's a common web interface element which allows users to input search queries to find relevant content or information within the website or application. This particular search bar seems to be part of a larger administrative or dashboard interface, possibly for a content management system or an online store backend, given the surrounding context. It is likely interactive: when clicked on, it would allow the user to type in search terms, and upon submission, the system would display relevant results or take the user to the relevant page.", + "points": [ + [ + 2114, + 187 + ] + ] + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/eval.sh b/evaluation/MDVP-Bench/eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..19f63f3be4bf65cf5500d1140a1d3037c60bf437 --- /dev/null +++ b/evaluation/MDVP-Bench/eval.sh @@ -0,0 +1,16 @@ +export output_results=$1 + +python3 transfer.py --output_path $output_results + +for p in \ + "android_detailed_caption_box" \ + "multipanel_detailed_caption_box" \ + "natural_detailed_caption_box" \ + "ocr_doc_detailed_caption_box" \ + "ocr_spotting_detailed_caption_box" \ + "web_detailed_caption_box" +do + python3 eval_gpt.py --phase $p + python3 summarize_gpt_score.py --dir result + rm -fr result/* +done diff --git a/evaluation/MDVP-Bench/eval_gpt.py b/evaluation/MDVP-Bench/eval_gpt.py new file mode 100644 index 0000000000000000000000000000000000000000..21fa1575b67dc6b84b105b59d7bf4ca223b53f53 --- /dev/null +++ b/evaluation/MDVP-Bench/eval_gpt.py @@ -0,0 +1,205 @@ +""" +Reference: https://github.com/haotian-liu/LLaVA/blob/main/llava/eval/eval_gpt_review.py +""" + +import argparse +import json +import os +import time + +import cv2 +import numpy as np +import openai +import requests +from paint_util import encode_image, paint_text_box, paint_text_point +from tqdm import tqdm + +# Define Azure OpenAI details +model_name = "gpt-4o-2024-11-20" +max_tokens = 1000 # range: [1, 4095] + +# Initialize the Azure client +client = openai.AzureOpenAI( + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), + api_key=os.getenv("AZURE_OPENAI_KEY"), + api_version="2024-03-01-preview", +) + + +def get_eval(content: str, max_tokens: int): + while True: + try: + messages = [ + { + "role": "system", + "content": "You are a helpful and precise assistant for checking the quality of the answer.", + }, + { + "role": "user", + "content": content, + }, + ] + completion = client.chat.completions.create( + model=model_name, + messages=messages, + max_tokens=max_tokens, + temperature=0, + ) + ret = completion.choices[0].message.content + break + + except Exception as e: + print(e) + time.sleep(1) + + return ret + + +def parse_score(review): + try: + score_pair = review.split("\n")[0] + score_pair = score_pair.replace(",", " ") + sp = score_pair.split(" ") + print("score_pair:", score_pair, sp) + return [float(sp[0]), float(sp[1])] + except Exception as e: + print(e) + print("error", review) + return [-1, -1] + + +def main(args): + phase = args.phase # android_QA_box + domain = phase.split("_box")[0] # android_QA + + if "natural" in phase: + context_str = "The image is a natural image." + elif "ocr" in phase: + context_str = "The image contains text, and the user wishes to know the content of the text." + elif "screen" in phase: + context_str = "The image is a screenshot from a mobile phone or webpage." + elif "panel" in phase: + context_str = "The image is a multi-panel figure." + elif "android" in phase: + context_str = "The image is an andriod screenshot." + elif "web" in phase: + context_str = "The image is a webpage screenshot." + + question_path = f"mdvp_for_gpt4v_eval/{phase}/question.json" + args.question = question_path + # parser.add_argument('--question', default=question_path, help='path to question file') + + answer_list_path = [ + f"mdvp_for_gpt4v_eval/{phase}/answer.json", + f"mdvp_for_gpt4v_eval/{phase}/prediction.json", + ] + args.answer_list = answer_list_path + # parser.add_argument('--answer-list', nargs='+', default=answer_list_path, help='gpt answer and model answer json files') + + rule_path = f"annotations/rule.json" + args.rule = rule_path + # parser.add_argument('--rule', default=rule_path ,help='gpt rule') + + f_q = json.load(open(os.path.expanduser(args.question))) + f_ans1 = json.load(open(os.path.expanduser(args.answer_list[0]))) + f_ans2 = json.load(open(os.path.expanduser(args.answer_list[1]))) + rule_dict = json.load(open(os.path.expanduser(args.rule), "r")) + + os.makedirs("./result", exist_ok=True) + + if os.path.isfile(os.path.expanduser(args.output)): + cur_reviews = [ + json.loads(line) for line in open(os.path.expanduser(args.output)) + ] + else: + cur_reviews = [] + + review_file = open(f"{args.output}", "a") + + idx = 0 + for ques, ans1, ans2 in tqdm(zip(f_q, f_ans1, f_ans2)): + # paint som mark on image + image_name = ques["image"] + image_path = f"data/{domain}/images/" + image_name + # print("loading image from {}".format(image_path)) + image = cv2.imread(image_path) + height, width, channels = image.shape + (width, height) + if "bbox" in ques["annotation"]: + bbox = ques["annotation"]["bbox"] + paint_image_path = paint_text_box(image_path, bbox) + rule = rule_dict["box"] + elif "points" in ques["annotation"]: + points = ques["annotation"]["points"] + paint_image_path = paint_text_point(image_path, points) + rule = rule_dict["point"] + base64_image = encode_image(paint_image_path) + + prompt = rule["prompt"] + role = rule["role"] + content_text = ( + f"[Context]\{context_str}\n\n" + f'[Question]\n{ques["text"]}\n\n' + f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n' + f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n' + f"[System]\n{prompt}\n\n" + ) + + content = [ + { + "type": "text", + "text": content_text, + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}", + "detail": "high", + }, + }, + ] + + cur_js = { + "id": idx + 1, + "question_id": ques["question_id"], + "answer1_id": ans1.get("answer_id", ans1["question_id"]), + "answer2_id": ans2.get("answer_id", ans2["question_id"]), + "category": phase, + } + # pdb.set_trace() + if idx >= len(cur_reviews): + review = get_eval(content, args.max_tokens) + # print(review) + + scores = parse_score(review) + cur_js["content"] = review + cur_js["tuple"] = scores + cur_js["answer1"] = ans1["text"] + cur_js["answer2"] = ans2["text"] + review_file.write(json.dumps(cur_js) + "\n") + review_file.flush() + else: + print(f"Skipping {idx} as we already have it.") + + idx += 1 + print(idx) + + review_file.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="ChatGPT-based QA evaluation.") + parser.add_argument( + "--phase", help="MDVP domain", type=str, required=True + ) # android_QA_box + parser.add_argument( + "--max-tokens", + type=int, + default=1024, + help="maximum number of tokens produced in the output", + ) + parser.add_argument( + "--output", default=f"result/gpt_score.jsonl", help="output json dir" + ) + args = parser.parse_args() + main(args) diff --git a/evaluation/MDVP-Bench/inference.py b/evaluation/MDVP-Bench/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..539febffce8405e9b5241069296147895c212643 --- /dev/null +++ b/evaluation/MDVP-Bench/inference.py @@ -0,0 +1,165 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang +# -------------------------------------------------------- + +import argparse +import json +import os + +import numpy as np +import torch +from PIL import Image +from pycocotools import mask as mask_utils +from pycocotools.coco import COCO +from tqdm import tqdm +from transformers import AutoModel, AutoProcessor, GenerationConfig + +from evaluation.eval_dataset import SingleRegionCaptionDataset + +TORCH_DTYPE_MAP = dict(fp16=torch.float16, bf16=torch.bfloat16, fp32=torch.float32) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Inference of Grasp Any Region models on MDVP-Bench." + ) + + parser.add_argument( + "--model_name_or_path", + help="HF model name or path", + default="HaochenWang/GAR-1B", + ) + parser.add_argument( + "--cache_name", + help="cache name to save model outputs.", + default="gar_1b", + ) + parser.add_argument( + "--data_type", + help="data dtype", + type=str, + choices=["fp16", "bf16", "fp32"], + default="bf16", + ) + parser.add_argument( + "--anno_file", + help="path to the annotation file.", + default="evaluation/MDVP-Bench/annotations/mdvp_caption_mask.json", + ) + parser.add_argument( + "--image_folder", + help="the folder of images", + default="evaluation/MDVP-Bench/data", + ) + parser.add_argument( + "--max_num_tiles", + help="maximum number of tiles for AnyRes", + type=int, + default=8, + ) + parser.add_argument( + "--seed", + type=int, + default=0, + help="Random seed for reproducible text generation", + ) + args = parser.parse_args() + return args + + +def annToMask(ann, h, w): + rles = mask_utils.frPyObjects(ann, h, w) + rle = mask_utils.merge(rles) + m = mask_utils.decode(rle) + return m + + +def main(): + args = parse_args() + data_dtype = TORCH_DTYPE_MAP[args.data_type] + torch.manual_seed(args.seed) + + # init ditribution for dispatch_modules in LLM + torch.cuda.set_device(0) + torch.distributed.init_process_group(backend="nccl") + + # build HF model + model = AutoModel.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + torch_dtype=data_dtype, + ) + model.cuda() + model.eval() + + processor = AutoProcessor.from_pretrained( + args.model_name_or_path, + trust_remote_code=True, + ) + model_outputs = [] + cache_name = args.cache_name + + with open(args.anno_file, "r") as file: + data = json.load(file) + + for idx, item in enumerate(tqdm(data)): + image_path = os.path.join(args.image_folder, item["image_path"]) + img = Image.open(image_path).convert("RGB") + + mask = mask_utils.decode(item["mask_rle"]) + mask = (mask.astype(np.uint8) * 255).astype(np.uint8) + + prompt_number = model.config.prompt_numbers + prompt_tokens = [f"" for i_p in range(prompt_number)] + [ + "" + ] + dataset = SingleRegionCaptionDataset( + image=img, + mask=mask, + processor=processor, + dynamic_image_size=True, + max_num_tiles=args.max_num_tiles, + prompt_number=prompt_number, + visual_prompt_tokens=prompt_tokens, + data_dtype=data_dtype, + ) + + data_sample = dataset[0] + + with torch.no_grad(): + generate_ids = model.generate( + **data_sample, + generation_config=GenerationConfig( + max_new_tokens=1024, + do_sample=False, + eos_token_id=processor.tokenizer.eos_token_id, + pad_token_id=processor.tokenizer.pad_token_id, + ), + return_dict=True, + ) + + outputs = processor.tokenizer.decode( + generate_ids.sequences[0], skip_special_tokens=True + ).strip() + + print(outputs) # Print model output for this image + + model_outputs.append( + { + "image_path": image_path, + "caption": outputs, + "gt": item["caption"], + } + ) + + with open(f"evaluation/MDVP-Bench/model_outputs/{cache_name}.json", "w") as file: + json.dump(model_outputs, file, indent=4, ensure_ascii=False) + + print(f"Cache name: {cache_name}") + + +if __name__ == "__main__": + main() diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/answer.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/answer.json new file mode 100644 index 0000000000000000000000000000000000000000..1fc627ed32af43ec4e04f2ac4c8172d3201bc43b --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/answer.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage." + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address." + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere." + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab." + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases." + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself." + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "\"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified." + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region." + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features." + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date." + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"to earn\" typically suggests that there is an opportunity to gain something—often points, money, or rewards—by performing certain actions or tasks." + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy." + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a human heart symbolizing a strong heart as one of the benefits of running." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A silhouette of a figure measuring their waist indicates that running can aid in weight loss." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"" + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"" + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"" + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"" + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"" + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The light switches are white, contrasting with the dark wall, likely plastic, and appear functional." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Positioned in the background, these white light switches are paired on a wall above the countertop." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This silver oven, with digital controls and a handle, appears modern and built into the cabinetry." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor, constructed of hardwood, showcases a natural finish with variations in wood grain." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A frying pan on a heat source contains sautéed meat and vegetables, emitting steam, indicating the food is hot and being cooked." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Decorative accessory adorned on the ear, visible as a small, shiny object." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Appears to be a young boy, casually dressed, gripping an electronic device with attention." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds–Au), polar angle (θ), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in ." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a footer section of an image or document. It contains a citation that reads \"© 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items." + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners." + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support." + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "\"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge." + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created." + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives." + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data." + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a navigation link that returns the user to the main homepage of the website from any other page." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors." + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "\"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact." + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/prediction.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/prediction.json new file mode 100644 index 0000000000000000000000000000000000000000..46b7886dee75138be7ac702f93ad13e4478cf79c --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/prediction.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "Friday, Sep 2, 61°F" + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, fluffy cloud with a smooth, rounded shape and a slightly irregular outline. The cloud has a soft, cotton-like texture and appears to be illuminated from the left side, giving it a gentle gradient from white to a light blue hue." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white gear with a blue circular center and six evenly spaced, straight teeth." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, featuring three horizontal blue lines of varying lengths inside the bubble." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A red, rectangular play button with rounded corners, featuring a white, centered play triangle pointing to the right." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a red dot above it and a yellow dot below it." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A circular button with a white center and a thin black border." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, right-pointing arrow with a thick border." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Excel\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Facebook\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Speedtest\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Instagram\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lite\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Messenger\" is written in a bold, sans-serif font with a gradient of pink to white. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Home De..." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A pair of cartoonish eyes with large, white, oval-shaped pupils and black, curved eyelashes. The eyes are set against a green background." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A blue square with a white lowercase \"g\" followed by three white horizontal lines of equal length." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "mobile, data" + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The Wi-Fi symbol consists of a lowercase \"i\" with a dot above it, followed by a lowercase \"f\" with a dot above it." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" in lowercase letters, with a serif font, is written in a dark color against a light background." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"hotspot\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "usage, and hotspot" + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "A white, L-shaped object with a smooth surface and rounded edges." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Search or type web address" + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"ebay shopping cart\" is displayed in lowercase letters. The word \"ebay\" is in a larger font size compared to the word \"shopping cart\". The text is in a sans-serif font and is centered horizontally." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"cart\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "https://cart.ebay.com/" + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Welcome to Costco Wholesale" + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com\" is displayed in lowercase letters." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Pay Less." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"Target\" in bold, black, sans-serif font." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"Expect More.\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"target.com\" is displayed in a bold, sans-serif font with a blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A blue circular icon with a white lowercase \"s\" in the center." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is written in lowercase white letters on a green background." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "purchases" + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "In-app purchases" + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black letter \"B\" followed by a black plus sign." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Editors' Choice" + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Search settings" + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "A white envelope icon with a triangular flap on the right side, set against a dark background." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"Add\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "another email" + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"account\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Set up your personal or work email" + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "or work email" + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Network & internet" + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Wi-Fi, mobile, data" + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "usage, and" + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "Wednesday, May 18" + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The word \"Maps\" in a bold, sans-serif font, with a gradient of pink to white, giving it a three-dimensional appearance." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A red, teardrop-shaped pin with a black circular center." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, containing three horizontal blue lines of varying lengths." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a white border, a red circle with a white border, and a yellow circle with a white border." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com/Check\" is displayed in a bold, sans-serif font. The word \"costco.com\" is in lowercase letters, and the word \"Check\" is in uppercase letters. The text is aligned to the left." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo features the word \"COSTCO\" in large, bold, red capital letters with a white outline. Below it, the word \"WHOLESALE\" is written in smaller, bold, blue capital letters." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Warehouses\" is written in a bold, sans-serif font with a light blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Account\" in a bold, sans-serif font, with a gradient of blue shades ranging from light to dark, giving it a three-dimensional appearance." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue shopping cart with a white handle and a white basket area." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular sign with the word \"Shop\" in white, bold, sans-serif font. To the left of the text, there are three horizontal white lines of varying lengths." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a consistent size throughout." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "My Warehouse" + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Delivery Location" + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Seattle\" in bold, black, sans-serif font." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "Fetch Rewards: Play" + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Play\" in bold, black, sans-serif font." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "to earn" + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"MAKE\" is written in bold, uppercase letters with a dark green color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MONEY\" in bold, uppercase letters with a green background and black outline." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is displayed in lowercase letters. The letters are green and have a sans-serif font. The text is aligned to the left." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" is written in a bold, sans-serif font with a light gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"ads\" is written in lowercase letters with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design. The color of the text is a light gray, blending subtly with the background." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"50K+\" is displayed in bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The \"50K\" is in a larger font size compared to the \"+\" sign." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a computer keyboard with a standard QWERTY layout, including function keys, a number pad, and arrow keys. The keys are rectangular with white lettering on black keys, and the keyboard has a slight ergonomic curve." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text \"walmart.com\" is displayed in a bold, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance. The text is aligned to the left." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A white rectangular signboard with the text \"Lenovo Thinkpad\" in black, sans-serif font." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular button with rounded corners featuring the word \"Cancel\" in white, bold, sans-serif font." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad laptop with a visible keyboard and trackpad." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lenovo\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black, rectangular computer keyboard with a standard QWERTY layout, including a number pad on the right side. The keys are chiclet-style with white lettering, and there is a slight sheen on the surface, suggesting a smooth texture. The function keys are aligned along the top, and there is a visible space bar at the bottom center." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"in\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A rectangular electronic device with a screen displaying text, surrounded by a thin bezel." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad charger." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING" + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow background with two human head outlines facing each other. The left head has the words \"FIXED MINDSET\" above a red downward arrow, and the right head has the words \"GROWTH MINDSET\" above a green upward arrow." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon character with a serious expression, featuring a dark gray hair, a light blue shirt, and a red cross symbol on the left side of the head. The character has a red frown and is surrounded by two white, cloud-like shapes on either side of the head." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a beige background featuring five hands in different colors: red, green, blue, purple, and orange, arranged in a circular pattern. Above the hands, the text \"Understanding Diversity\" is written in black." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A black and white illustration of a person with a light bulb on their head, holding a book. The person has a question mark above their head and another question mark to the right of their head. The background is a light peach color." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon illustration of two boys, one wearing a red shirt and blue shorts, and the other wearing a striped shirt and brown shorts, both with their arms raised. The word \"Bullying\" is written above them." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon of a boy and a girl playing with each other. The boy is wearing a yellow shirt, black shorts, and red shoes. The girl is wearing a yellow shirt, blue pants, and red shoes. Both have black hair and are smiling." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue speech bubble with a yellow lightning bolt symbol, and a red speech bubble with a yellow lightning bolt symbol, both containing a person's face." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A rectangular blue traffic sign with a white border, featuring three white arrows. The leftmost arrow curves to the left, the middle arrow points straight up, and the rightmost arrow curves to the right. Below the arrows, the word \"CHANGES\" is written in white capital letters." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of two human figures, one in yellow and the other in blue, both with black outlines. The yellow figure is standing on a staircase, while the blue figure is standing on a platform. The blue figure is holding a microphone and appears to be speaking or presenting. The word \"Leadership\" is written in black text above the figures." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue circle with a white plus sign inside it, followed by a white \"2X\" text." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A hand holding a person with a blue shirt and black pants, with a purple banner below displaying \"$4,000\" in white text." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pie chart with a blue background and a white border, featuring a white line that divides the chart into two sections. The left section is larger and has a white number \"36%\" inside it, while the right section is smaller and has a white number \"36%\" inside it." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A globe with a blue ocean and white continents, featuring a purple banner with white text that reads \"2.7 trillion impact to global GDP from use of more efficient talent platforms.\"" + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "46% of companies are sometimes or frequently understaffed" + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Travel personas: how travelers identify their travel style" + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular planner with a white background featuring a colorful illustration of two people, one wearing a red hat and the other wearing a blue hat, both holding a smartphone. The person in the red hat is holding a book, and the person in the blue hat is holding a suitcase. The background includes a mountain and a sun. The text \"THE SMART PLANNER\" is written in bold, black letters at the top." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "The Relaxed Nomad" + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A family of five, consisting of a man, a woman, and three children, standing together. The man is holding a baby, while the woman is holding a suitcase. The children are standing around them, with one child holding a suitcase. The family is depicted in a circular frame." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A red airplane seat with a high backrest and armrests, featuring a small, rectangular, red and white logo on the backrest." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a white background featuring a stylized illustration of a person in a blue suit with a red briefcase in their left hand and a blue suitcase in their right hand. The person is depicted in a walking motion, with one leg forward and the other leg back. The text \"THE BUSINESS ROAD WARRIOR\" is written in bold, black capital letters above the illustration." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with short brown hair, wearing a purple top with a white collar, is holding a yellow envelope in her right hand. She has a headset on her head and is standing in front of a computer monitor with the word \"BIG\" visible on the screen." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange background with a white border." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a stylized globe in the center. The globe is divided into four quadrants, each in a different shade of blue. A black headset with a microphone is positioned over the globe, with the earpieces extending outward. To the right of the globe, there is a yellow star." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon man with a light blue shirt and a black tie, wearing a headset with a microphone. He has a light brown hair and is pointing upwards with his right hand. To his right, there is a yellow light bulb with a red base." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue and black wrench with a flat-head design, featuring a blue handle with a textured grip and a black head with a serrated edge. The wrench has a long, straight shaft connecting the handle to the head." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person wearing a headset with a microphone, a purple shirt, and a white undershirt. The person is holding a smartphone with a blue and white design on the screen." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a hand holding a black and white telephone handset, with a blue circular background featuring a partial globe and a speech bubble with the word \"BIG\" in white. The hand is wearing an orange life jacket with white stripes." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a red border, featuring a stylized design of a person wearing a white shirt and a black tie, with a blue and white striped hat. The background includes a Union Jack flag and a yellow rectangle." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon woman with brown hair tied back, wearing a purple shirt with a white collar, and a headset. She is holding a yellow star in her right hand and has a black and white striped object in her left hand." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized heart with a vibrant red color, featuring a prominent blue and orange flame-like design on the upper left side, and a smaller blue and orange flame-like design on the lower right side." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized human figure with arms raised in a celebratory pose, surrounded by four blue arrows pointing outward, each arrow with a slight curve and a pointed tip." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A digital bathroom scale with a black base and a white digital display showing the weight." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pair of pink lungs with a central trachea, featuring detailed vein patterns and a slightly curved shape." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow smiley face with a black outline, featuring two black dots for eyes and a curved black line for a mouth." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human knee with a visible bone structure, including the femur and tibia, with a slight curvature and a smooth surface." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human brain with a detailed, textured surface, featuring a prominent cerebral cortex and a visible portion of the brain stem." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A red heart with a white outline and a white line through the center, indicating a heart rate or rhythm." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The word \"HOTAPPE\" is written in large, bold, uppercase letters. The letters are colored in the following order from left to right: red, orange, yellow, light blue, and dark blue." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular smiley face with a yellow face and a red border. The face has two blue teardrop-shaped eyes, a brown curved mouth, and two brown curved lines for eyebrows." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "An orange circle with two brown hands facing each other, with the word \"OPENNESS\" in green capital letters below the circle." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular yellow background with a pair of hands clasped together in the center. The hands are depicted in a light brown color, with the left hand slightly overlapping the right hand. Radiating from the hands are white lines, giving the impression of light or energy emanating from the hands. Below the hands, the word \"TOUCH\" is written in bold, uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a large red heart in the center. Inside the heart, there is a white exclamation mark. Below the heart, the word \"ATTENTION\" is written in bold, uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with two cartoon faces, one on the left and one on the right, separated by a white dashed line. The left face has brown hair and a neutral expression, while the right face has light brown hair and a neutral expression. Below the faces, the word \"PROXIMITY\" is written in bold, uppercase letters." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with a white eye in the center, featuring a brown iris and a white sclera. Below the eye, the words \"EYE CONTACT\" are written in bold, uppercase letters." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt, with a thermometer in their mouth." + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a red nose and a red mouth, and a white hand with three fingers extended." + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a green and white object in their mouth." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person with a green circular background. The person has a gray face with a red nose and a red mouth. The person is wearing a red shirt with a yellow collar." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt and white pants, is sitting on a white chair." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green sign with a white border, featuring a stylized illustration of a pair of red lungs with a gray outline, set against a dark background." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a kidney with a red and pink color scheme, featuring a central red area with a pink outline, flanked by two symmetrical, curved, pink shapes resembling the kidney's lobes, all set against a light green background." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue rectangular header with the text \"Interesting Facts\" in white, bold, sans-serif font. To the left of the text, there are three white horizontal lines. To the right of the text, there is a red heart symbol." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized panda face with a white face, black ears, black patches around the eyes, and a black nose. The panda has a small, curved black mouth and a content expression. The face is set against a green circular background." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A green circular background with a white plate in the center, containing a yellow circle. To the left of the plate is a white fork, and to the right is a white spoon." + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring a red heart in the center. The heart is outlined in white and has a white line running horizontally across its middle. Below the heart, the word \"Diseases\" is written in white, bold, sans-serif font." + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A white long-sleeved shirt with a teal collar and cuffs, featuring a row of black buttons down the front." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange icon with a white film reel design in the center. The film reel has a blue border with white squares on the left and right sides, and a white center with a blue horizontal line dividing it into two sections." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green icon featuring a white syringe with a red cross symbol on the barrel, a white droplet to the right of the syringe, and a yellow needle." + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue icon featuring a white wine glass with a yellow liquid on the left and a white bottle with a yellow liquid and a brown cap on the right." + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring two stylized human figures. The figure on the left has short, light brown hair and is wearing a red shirt. The figure on the right has short, light brown hair and is wearing a brown shirt with a white collar." + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a solid olive green background featuring a white silhouette of the Earth in the center." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person sitting on a chair with their head in their hands, wearing a blue shirt and black pants." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with short brown hair, wearing a blue long-sleeve shirt and yellow pants, is sitting on a white platform with a blue wave design at the bottom." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a pink top, is depicted with a thought bubble above her head." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with red hair, wearing a green top and blue pants, is sitting on a windowsill with their legs crossed." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a blue sleeveless top and black leggings, is running with her arms slightly bent and her legs in motion. She has a white earphone cord hanging from her right ear." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with black hair tied back, wearing a green headband, a green sleeveless top, and black pants, is sitting cross-legged with her hands pressed together in a prayer position." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A green dinosaur with a white belly, a purple dinosaur with a white belly, and a green dinosaur with a white belly." + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon boy with a purple bandage on his forehead, wearing a blue shirt and blue pants, is holding a purple object in his right hand." + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A lime green refrigerator with a single door, featuring a black handle on the right side. The door has a horizontal indentation near the top." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A small, rectangular, lime green refrigerator with a single door featuring a vertical handle on the left side. The bottom section of the refrigerator is orange with a horizontal handle." + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark-colored, rectangular structure with a flat roof and vertical sides, featuring a small, square window on the upper left side and a larger, rectangular window on the lower right side." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A tree with dense, green foliage." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A cylindrical, metallic pole with a consistent diameter throughout its length, featuring a series of evenly spaced, horizontal bands encircling its surface." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A deep, dark-colored bowl with a wide, flared rim and a smooth, glossy finish." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern kitchen with a stainless steel oven and a black cooktop. The oven has a digital display and control panel, and there is a visible handle on the oven door. The cooktop has multiple burners with black grates." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular wall socket with a single, round, black power switch located on the right side." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular light switch with rounded edges, featuring a central toggle switch mechanism." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Stainless steel built-in oven with a large glass door, featuring a digital control panel above the door with multiple buttons and a display screen." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, slender vase with a flared rim and a narrow neck that gradually widens into a bulbous base, featuring a glossy finish with a gradient of colors transitioning from a deep blue at the top to a greenish hue towards the bottom." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor is composed of medium-toned wooden planks with a smooth, polished finish. The wood grain is visible, running lengthwise along the planks, which are laid out parallel to each other. The planks have a consistent width and exhibit a warm, reddish-brown hue." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical, wall-mounted spice rack with multiple tiers, each tier holding several glass jars with metal lids. The jars are arranged in a single column, and the rack appears to be made of a dark, possibly wooden material." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A round, dark-colored table with a smooth surface and a central pedestal base." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The chair features a high, slightly curved backrest and seat cushion upholstered in a woven fabric with a diamond pattern. The fabric is primarily light green with a central vertical stripe in a slightly darker shade. The armrests are padded and covered in the same woven fabric, with a light green color. The chair's legs are dark-colored and straight." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A purple place mat with a textured surface and a white circular design in the center." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical traffic light with three circular lenses arranged in a column, displaying a red light at the top, an unlit middle lens, and a green light at the bottom, all encased in a black housing with a visor over each lens." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A pink and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, dark brown tree trunk with a rough, textured bark. The trunk is relatively straight and has a consistent width throughout its visible length." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A chalkboard sign with a wooden frame displaying the text \"Château de la Bertrandière\" in elegant, cursive script." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, narrow, rectangular window with a dark frame and a single vertical pane of clear glass." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A chestnut horse with a white star on its forehead, dark eyes, and a soft, dark muzzle. Its ears are pricked forward, and the mane appears to be a lighter shade of chestnut, blending into the darker coat. The horse's neck shows a gentle curve, and the coat has a healthy sheen." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown horse with a dark mane." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A light gray horse with a darker mane and tail, featuring a well-muscled build, a straight profile, and a calm demeanor." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular table with rounded corners and a smooth surface." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern black induction cooktop featuring a sleek design with two visible cooking zones, each outlined with a white circular pattern. The front edge has a stainless steel trim with control buttons, and there are small yellow indicator lights above the buttons." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A glossy, metallic spoon with a reflective surface and a tapered handle, featuring a rounded bowl with a slight indentation on one side." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A metallic spoon with a reflective surface and a tapered handle that widens towards the end, featuring a rounded bowl with a pointed tip." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "A zebra with a distinctive black and white striped pattern, featuring a mane of short, erect hair transitioning from black at the base to white at the tips. The stripes on the neck are vertical and become more horizontal as they reach the mane. The visible part of the zebra's face shows a pattern of narrow stripes that converge around the eyes and muzzle, with a white area above the eyes and a dark nose. The ears are pointed and display a striped pattern consistent with the head." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal red stop sign with bold, all-caps white lettering spelling \"STOP\" centered on the sign." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal sign with a red background and a white border, featuring the word \"STOP\" in white uppercase letters." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped cushion with a visible textured surface that suggests a soft, plush fabric." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped pillow with a visible corner that appears to be soft and plush." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "The earlobe is small and rounded, with a smooth, slightly glossy surface. It is adorned with a small, round, gold-colored earring that has a subtle, reflective sheen." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young girl with curly hair, wearing a patterned top with a mix of geometric and floral designs in dark and vibrant colors, paired with long-sleeved pink undershirts. Her expression is one of mild surprise or excitement, with her mouth slightly open and eyes looking upwards. Her arms are outstretched with palms facing up, as if gesturing or presenting something." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young boy with curly hair, wearing a striped sweater with shades of blue, white, and brown, stands with his hands clasped together. He has a focused expression on his face, with his mouth slightly open and his eyes looking to the side." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski parka featuring a color-block design with a white torso, green sleeves, and black shoulder panels. The jacket has a high collar and a front zipper closure. There are red accents on the cuffs and a red logo on the left chest area. The parka is paired with a black helmet with a visor." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The knee of the person is covered in a dark purple snowsuit with a slightly glossy finish. The fabric appears thick and durable, suitable for cold weather. The knee area is slightly bent, indicating a relaxed stance. The snowsuit has a subtle sheen, reflecting light, and the material appears to be tightly fitted around the knee." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The trousers are dark-colored, possibly black or dark gray, with a straight-leg cut. They feature a visible side pocket on the right leg with a flap closure, and there are belt loops around the waistband. The fabric appears to be a sturdy material, potentially denim or a similar thick textile." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A blue helmet with a glossy finish, featuring a prominent white stripe with a red outline running horizontally across the middle. The helmet has a black visor attached at the front, and a black chin strap with a buckle hanging down from the sides." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The ski boot is predominantly black with a sleek, modern design. It features a sturdy, high-ankle structure for support and a smooth, matte finish. The boot has a contoured shape to fit the foot and lower leg, with a slightly raised heel for added stability. The sole is thick and textured for grip and durability." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The sleeve of the sweater is predominantly white with a bold red graphic of a person skiing. The fabric appears to be a soft, knit material, and the sleeve is slightly loose-fitting, extending to the wrist. The red graphic is positioned centrally on the sleeve, adding a dynamic contrast to the white background." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person wearing a black helmet, goggles, and a white jacket with red and black accents, including a red logo on the left chest area. The individual is also wearing black gloves and appears to be in a skiing stance." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A red, white, and green racing motorcycle fairing with the number 69 prominently displayed in white on a red background, flanked by green and white stripes. The fairing features sponsor logos and a black lower section with a vent." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A series of parallel, evenly spaced, horizontal lines with a consistent width, running across the entire width of the image." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The mass of the quark is a very small, massless particle that is part of the quark mass. It is a fundamental particle that is responsible for the mass of the quark, and it is often used in particle physics to study the properties of quarks and their interactions." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A lowercase letter 'v' with a serif at the top and a tail at the bottom, featuring a bold, sans-serif font." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The geometry and adsorption energies for the structures of thio-glycolic acid on Au(111) at 0.25ML." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The table contains a list of adsorption sites with their corresponding adsorption energies. The adsorption sites are listed in a column, with each site having a corresponding adsorption energy. The adsorption energy is listed in a separate column, with each site having a corresponding energy value. The table also includes the adsorption energy for the Au-S bond, which is listed in a separate column. The adsorption energy for the Au-S bond is listed in a separate column, with each site having a corresponding energy value." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy is 0.63 eV." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy for the most stable structure of the HSCH2COOH on the Au(111) surface is 0.63 eV. The adsorption site preferred by the sulfur atom is located at the top of the gold atom. The polar angle between the normal vector of the surface and the S-C2 direction is 74.2 degrees. The HSCH2COOH tend to lie down at the low coverage." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a person's face with a neutral expression, featuring a prominent nose, closed lips, and visible teeth. The person has short hair and is wearing a dark-colored top." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmented image shows two graphs. The first graph on the left is titled \"Atom prediction performance (a and c)\" and features a scatter plot with blue and orange dots. The second graph on the right is titled \"Bond prediction performance (a and c)\" and also features a scatter plot with blue and orange dots. Both graphs have a similar layout with x and y axes, and the dots are evenly spaced." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The F1 score for segmentation and classification networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of segmentation network is measured by the F1 score, which is a metric that takes into account both precision and recall. The F1 score is used to evaluate the performance of the segmentation network in terms of its ability to correctly segment the image. The segmentation network is compared to other networks, and the F1 score is used to determine which network is the most effective." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmentation network is a deep learning model that uses a combination of atom, bond, and charge types to predict the pixel values. The model is trained on a large amount of data and has a high F1 score, indicating its high accuracy. The F1 score is a measure of the model's performance, taking into account both precision and recall. The model is designed to be used for various tasks, such as image segmentation and object detection." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different types of networks is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is higher for the different types of networks, indicating that they are performing well. The performance of the different types of networks is compared to the performance of the segmentation networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different parts of the network is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is used to evaluate the performance of the network in different tasks, such as atom, bond, and charge type classifications. The network is able to do a good job even when the segmentation is not perfect, and the performance of the different parts of the network is significantly higher than the segmentation networks." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "8.3 Overall graph accuracy" + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The overall graph accuracy is a measure of the performance of the different parts of the graph, including the segmentation network and the classification network. The segmentation network is used to segment the graph into different parts, and the classification network is used to predict the type of the graph. The overall graph accuracy is a combination of these two parts, and it is used to measure the performance of the graph." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 11 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The digits are evenly spaced and aligned vertically." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar is a low GI sweet spot, which is a type of sugar that has a low glucose index." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The low GI sweet spot is at least 22mg CE/100mg sucrose." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar has a low GI of about 22-32 mg CE/100g polyphenols. The polyphenol content is high, with a range of 22-32 mg CE/100g polyphenols. The sugar is hygroscopic, with a higher moisture content, and the polyphenol content increases as the sugar becomes more saturated." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuit is a dark brown, viscous liquid with a glossy sheen, contained in a clear glass bottle." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuites are prepared at two different sugar mills, designated as \"Mill 1\" and \"Mill 2\". The polyphenol content of each sample is determined and washed until they reach the desired polyphenol content, which is roughly 500 to 2000 ICUMSA. The results are in Table 4 below." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a sugar cube with a textured surface, featuring a pattern of small, raised dots." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image shows a collection of documents with text, some of which are in black and white, and others in color. The documents are arranged in a grid-like pattern, with some documents having a red border. The text on the documents is in various fonts and sizes, and some documents have a red box around the text." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The OCR process." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a letter written in English, with the first line reading \"Available OCR\" followed by \"for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The third line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The ninth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The tenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The eleventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twelfth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The thirteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventeenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The nineteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twentieth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-first line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twenty-second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-third" + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The Internet Archive image containing this excerpt can be accessed here: https://archive.org/details/b2439867/page/n7" + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Journal of Data Mining and Digital Humanities" + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a web address in blue, displaying the URL \"http://dmh.esciences.org\"." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 9 is displayed in a bold, sans-serif font with a slight shadow effect, giving it a three-dimensional appearance. The color of the number is a gradient of dark to light gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"IOOF Annual Report 2012\" is displayed in a serif font, with \"IOOF\" in a larger size and \"Annual Report 2012\" in a smaller size. The text is in a light green color." + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors' Remuneration" + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "4.1. Components of Non-Executive Director remuneration" + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"page 37\" is written in lowercase letters." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The company is a non-executive director." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year by the Company, its subsidiaries and associated entities may not exceed an amount approved by shareholders. The ceiling amount includes all remuneration provided to Non-Executive Directors, including superannuation but not including retirement benefits. The current limit of $980,000 per annum was approved by shareholders at the 2010 Annual General Meeting. There has been no increase to the Non-Executive Director fee pool since this time." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors receive a fee for being a Director of the Board. An additional fee is paid to the Chairman of the Board. Non-Executive Directors do not receive additional fees for service on Board and Committees. The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year may not exceed an amount approved by shareholders." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a mathematical equation involving the homotopy group of a space." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a monopole, which is a type of topological defect." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The topological conditions of formation of defects only govern the formation of topologically stable defects. It was found that defects solutions can form even when the topology is trivial. The most well-known example are the electro-weak strings, formed during the electroweak symmetry breaking, which are perturbatively stable for a range of parameters which are not realized in nature, and belong to the broader class of embedded defects." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "These defects are a priori unstable though mechanisms (such as plasma effects) have been found to stabilize them. They are of interest for inflation model builders since this mechanism can allow lift the constraints from the formation of cosmic strings (see Sec. IV F on D-term inflation)." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a mathematical equation with the variables \"x\" and \"y\" in a bold font, followed by a period and the number \"1\" in a smaller font." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 72 is displayed in a bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A stylized illustration of a woman with a bun hairstyle, featuring a pattern of white, cloud-like shapes with small black dots scattered throughout. The woman is wearing a pink garment with a polka dot pattern." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A red and white polka dot pattern with a small, dark, irregularly shaped mark near the center." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 62 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Chapter 2: Motivation" + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The string theory is a type of string theory that is compactified on the near-horizon geometry. It is a decoupled theory that combines the two pictures of the same low energy limit of one theory, Type IIB string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a field theory picture of the low energy limit of Type IIB string theory." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The regions for which analytic tools exist for these two different pictures turn out to be completely incompatible." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The equation is a complex mathematical expression involving various variables and functions. It includes a combination of Greek letters, numbers, and mathematical operators. The visible part of the equation shows a series of variables and functions that are interconnected, with some parts appearing to be in parentheses. The equation is written in a formal, mathematical notation." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The presence of an insider on the market does not necessarily lead to arbitrage, and the presence of insiders might be considered beneficial to the market, in the sense that it leads to higher information efficiency of the equilibrium price process." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The fundamental value of the firm is a stochastic process, and the insider can observe it directly or at least observe it in a less noisy way than the other agents on the market." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paper relaxes the assumption of static insider information and studies the equilibrium trading and price processes, as well as market efficiency, in a setting with dynamic private information." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2]." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2] and is designed to include dynamic information. It has a much smaller set of admissible trading strategies and pricing rules compared to the ones considered in the work. The model shows the existence of a unique Markovian equilibrium, which is an equilibrium price that allows the insider to trade undetected and depends only on the total order process. The model also shows that the presence of an insider increases the market informational efficiency for times close to the time of the order." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red, three-dimensional, cursive sign with the word \"Abondana\" written in a flowing, elegant script." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"Cafe\" in a stylized, cursive font with a gradient of red to dark red, giving it a three-dimensional appearance. The letters are slightly italicized and have a shadow effect, enhancing their depth." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The logo features the letters \"USIS\" in bold, with \"US\" in red and \"IS\" in black. Below the letters, there is a tagline in smaller, gray font." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"ESTATE\" is written in bold, black, uppercase letters on a yellow background." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"AGENTS\" in bold, uppercase letters. The letters are black and set against a yellow background." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"SAXONS\" in bold, black capital letters on a yellow background." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SAXONS\" in capital letters with a serif font, set against a dark background. The letters are in a metallic gold color with a slight gradient, giving them a three-dimensional appearance. The signboard has a reflective surface, suggesting it is made of a glossy material." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard displays the word \"ESTATE\" in capital letters, with a serif font. The letters are dark and possibly metallic, with a reflective surface that catches the light, giving them a slightly shiny appearance. The background of the signboard is not visible, but the letters are set against a dark backdrop that contrasts with the lighter color of the text." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features capitalized, serif lettering spelling \"AGENTS\" with a metallic finish and a slight gradient, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Triple\" is written in a cursive, green font with a white outline. The letters are slightly italicized and have a playful, rounded design." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"WHITE\" is written in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SPOT\" is written in bold, uppercase letters with a light blue color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A stylized, cursive letter \"O\" with a green outline and a white fill, featuring a small, curved tail extending from the bottom right." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The visible part of the ag is a white, stylized letter \"A\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"NEW\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"YORK\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features bold, black, uppercase letters spelling \"AYAM\" on a yellow background." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features large, bold, red letters \"RUA\" on a yellow background." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A yellow sign with the word \"SMASHED\" in bold, black, uppercase letters." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A dark brown, rectangular sign with the word \"FRIED\" in bold, uppercase letters." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A brown sign with the word \"CHICKEN\" in bold, uppercase letters." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Accommodation" + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Office\" is written in a bold, sans-serif font with a dark blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Nightline\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red and white signboard with the word \"BUBBA\" in bold, capitalized, red letters on a white background, with a red border around the sign." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"GUMP\" in large, bold, red capital letters with a white outline. The letters are set against a textured, light-colored background that resembles a stone or concrete surface." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SHRIMP\" in capital letters, with a bold, sans-serif font. The letters are white with a slight shadow effect, giving them a three-dimensional appearance. The background of the signboard is a deep blue color, providing a stark contrast to the white text." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved signboard with a red border and a white background, featuring the word \"RESTAURANT\" in bold, black, uppercase letters." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved, red and white signboard with the word \"MARKET\" in capital letters, featuring a serif font." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular blue signboard with a white border and a white symbol resembling a stylized letter 'C' in the center." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular, metallic signboard with a textured background featuring the letters \"TM\" in a bold, sans-serif font, centered on the sign." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"EVENING\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"PRIMROSE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"BASE\" is written in bold, uppercase letters with a red background and white outline. The letters are evenly spaced and have a slightly distressed texture." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"OIL\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred appearance." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The number 46 is displayed in a large, bold, white font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and have a smooth, rounded design." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green calendar with white text displaying the numbers 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369," + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular button with rounded corners featuring the word \"Connecter\" in white lowercase letters, followed by a yellow icon resembling a lock with a keyhole." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized, lowercase letter \"a\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized lowercase \"a\" with a curved tail extending from the bottom right, resembling a lowercase \"i\" with a dot above it." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular sign with white text displaying the numbers \"07\" in a bold, sans-serif font." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The word \"septembre\" in lowercase letters, with a green background and white font." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The text \"Lien Web\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The numbers 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 are displayed in a green color." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and a fringe detail." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Plan du site" + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A white arrow pointing to the right, with a slight curve at the tail end, is positioned to the right of the text \"Book Now.\"" + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in bold, black, sans-serif font." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "contact us" + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"About\" is written in a bold, sans-serif font with a gradient of pink to red, set against a teal background." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A teal-colored horizontal bar with white text and symbols. On the left side, there is a white envelope icon followed by the text \"doonawash@gmail.com\". On the right side, there is a white icon resembling a person in a wheelchair." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Contact us\" is written in a bold, sans-serif font with a pinkish-red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a gradient of blue shades, transitioning from a lighter blue at the top to a darker blue at the bottom." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in a bold, sans-serif font with a pinkish hue. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "OpenStreetMap Belgium" + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A black arrow pointing to the right." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A white rectangular tag with the number \"4.9\" in bold black font centered on it." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with white text that reads \"Pulsuz Konsultasyon.\"" + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The word \"Portfolio\" in a bold, sans-serif font, with a slight italicization, and a drop shadow effect, giving it a three-dimensional appearance. The letters are black with a white outline, and the text is set against a plain background." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white shopping cart icon with a blue outline, featuring a rectangular basket with a grid pattern, two vertical handles, and four wheels, two of which are visible." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"SUPPORT\" in bold, uppercase letters with a blue background and white outline." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white lowercase letter \"f\" with a bold, sans-serif font, set against a blue background." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Your Charts" + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A blue \"X\" with a white outline, featuring a slightly darker blue fill and a lighter blue border." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white sign with the words \"CONTACT US\" in bold, uppercase, blue letters." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with rounded corners and a white border. The button has the words \"ADD TO CART\" in bold, white, uppercase letters centered on it." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PRODUCTS\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PODCAST\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters. The letters are blue and have a slight shadow effect, giving them a three-dimensional appearance. The text is centered horizontally." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"WHAT WE DO\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font style. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "GWM launches livelihood micro-grants" + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with a black border and the word \"Settings\" in black, bold, sans-serif font centered on it." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Privacy Policy" + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"HOME\" in uppercase letters, with a bold, sans-serif font, is centered on the image. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, creating a subtle contrast against the background." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular black button with the word \"Accept\" in white, bold, sans-serif font centered on it." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A red, oval-shaped button with a white border and the word \"DONATE\" in bold, uppercase, red letters centered on it." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"NEWS\" in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a clean, modern font style." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters with a sans-serif font. The letters are evenly spaced and aligned horizontally. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"CONTACT US\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "A black and white icon depicting a stylized, abstract representation of a building with a flat roof and multiple rectangular windows arranged in a grid pattern." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"News\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a dark color, contrasting with the lighter background." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The logo features the word \"ServeGate\" in bold, black letters. To the left of the text, there is a stylized design consisting of two overlapping triangles, one in teal and the other in red, with a black line separating them." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Our Difference" + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"About Us\" is written in a bold, sans-serif font with a red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "A rectangular button with rounded corners, featuring a light pink background and a thin red border. The button displays the text \"Get in touch\" in bold, red, sans-serif font." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" in a serif font, with the letters in a light gray color against a white background." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Services\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Indigenous Impact" + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"ServeGate\" is written in bold, black, sans-serif font. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with rounded corners, featuring the text \"Close issue\" in bold, black, sans-serif font." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "A vertical array of three circular, dark-colored buttons with a slightly raised, smooth surface, aligned centrally on a light-colored background." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/question.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/question.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8b74285a791635b57237bb12e062821bd02ed0 --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/android_detailed_caption_box/question.json @@ -0,0 +1,694 @@ +[ + { + "question_id": 1, + "image": "web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 82, + 117, + 374, + 35 + ], + [ + 36, + 1006, + 46, + 58 + ], + [ + 328, + 105, + 35, + 35 + ], + [ + 457, + 900, + 23, + 46 + ], + [ + 153, + 900, + 35, + 46 + ], + [ + 59, + 912, + 23, + 23 + ], + [ + 59, + 1006, + 23, + 46 + ], + [ + 457, + 1017, + 23, + 35 + ], + [ + 258, + 1111, + 23, + 35 + ], + [ + 129, + 1111, + 11, + 35 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 2, + "image": "install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 86, + 182, + 45, + 15 + ], + [ + 390, + 182, + 106, + 15 + ], + [ + 223, + 182, + 106, + 30 + ], + [ + 557, + 182, + 121, + 30 + ], + [ + 71, + 395, + 75, + 15 + ], + [ + 588, + 395, + 45, + 15 + ], + [ + 208, + 395, + 136, + 15 + ], + [ + 375, + 395, + 136, + 15 + ], + [ + 71, + 516, + 75, + 30 + ], + [ + 588, + 516, + 45, + 30 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 3, + "image": "google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 140, + 102, + 80, + 21 + ], + [ + 227, + 102, + 102, + 21 + ], + [ + 206, + 175, + 87, + 29 + ], + [ + 301, + 175, + 58, + 21 + ], + [ + 125, + 175, + 65, + 29 + ], + [ + 220, + 212, + 43, + 21 + ], + [ + 271, + 212, + 95, + 29 + ], + [ + 125, + 219, + 80, + 21 + ], + [ + 125, + 307, + 161, + 21 + ], + [ + 30, + 344, + 51, + 29 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 4, + "image": "web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 30, + 68, + 285, + 22 + ], + [ + 87, + 136, + 148, + 22 + ], + [ + 247, + 148, + 34, + 11 + ], + [ + 87, + 171, + 205, + 22 + ], + [ + 87, + 228, + 319, + 22 + ], + [ + 87, + 262, + 102, + 22 + ], + [ + 315, + 319, + 102, + 34 + ], + [ + 87, + 319, + 79, + 22 + ], + [ + 167, + 319, + 136, + 22 + ], + [ + 87, + 353, + 102, + 22 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 5, + "image": "install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 132, + 76, + 76, + 30 + ], + [ + 162, + 182, + 76, + 15 + ], + [ + 56, + 182, + 45, + 60 + ], + [ + 572, + 212, + 75, + 15 + ], + [ + 162, + 212, + 76, + 30 + ], + [ + 253, + 258, + 106, + 15 + ], + [ + 162, + 258, + 76, + 15 + ], + [ + 86, + 319, + 60, + 30 + ], + [ + 299, + 319, + 45, + 30 + ], + [ + 451, + 364, + 75, + 15 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 6, + "image": "google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 110, + 79, + 148, + 22 + ], + [ + 30, + 171, + 22, + 22 + ], + [ + 30, + 216, + 34, + 11 + ], + [ + 64, + 216, + 125, + 11 + ], + [ + 201, + 216, + 68, + 11 + ], + [ + 30, + 239, + 159, + 11 + ], + [ + 190, + 239, + 102, + 11 + ], + [ + 99, + 330, + 159, + 22 + ], + [ + 99, + 364, + 136, + 11 + ], + [ + 247, + 364, + 79, + 11 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 7, + "image": "google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 103, + 80, + 204, + 21 + ], + [ + 37, + 417, + 29, + 7 + ], + [ + 23, + 658, + 43, + 36 + ], + [ + 52, + 358, + 14, + 36 + ], + [ + 37, + 578, + 21, + 36 + ], + [ + 44, + 658, + 14, + 36 + ], + [ + 345, + 666, + 21, + 29 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 8, + "image": "single_1849.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 121, + 68, + 262, + 22 + ], + [ + 30, + 136, + 171, + 45 + ], + [ + 304, + 171, + 79, + 22 + ], + [ + 406, + 171, + 57, + 22 + ], + [ + 486, + 182, + 22, + 0 + ], + [ + 42, + 216, + 91, + 34 + ], + [ + 178, + 228, + 68, + 11 + ], + [ + 19, + 296, + 125, + 22 + ], + [ + 270, + 296, + 136, + 22 + ], + [ + 64, + 330, + 68, + 22 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 9, + "image": "install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 156, + 125, + 250, + 34 + ], + [ + 418, + 125, + 68, + 34 + ], + [ + 156, + 182, + 114, + 22 + ], + [ + 42, + 193, + 22, + 11 + ], + [ + 76, + 193, + 34, + 11 + ], + [ + 156, + 216, + 114, + 22 + ], + [ + 156, + 250, + 68, + 11 + ], + [ + 235, + 250, + 22, + 11 + ], + [ + 452, + 307, + 45, + 11 + ], + [ + 64, + 307, + 45, + 11 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 10, + "image": "single_2921.png", + "category": "android_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 51, + 24, + 108, + 12 + ], + [ + 51, + 36, + 48, + 12 + ], + [ + 21, + 72, + 72, + 18 + ], + [ + 231, + 78, + 30, + 6 + ], + [ + 15, + 114, + 78, + 12 + ], + [ + 15, + 150, + 36, + 6 + ], + [ + 51, + 150, + 42, + 12 + ], + [ + 93, + 150, + 6, + 6 + ], + [ + 105, + 150, + 36, + 6 + ], + [ + 15, + 180, + 120, + 12 + ] + ], + "segmentation": [] + } + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/answer.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/answer.json new file mode 100644 index 0000000000000000000000000000000000000000..1fc627ed32af43ec4e04f2ac4c8172d3201bc43b --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/answer.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage." + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address." + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere." + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab." + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases." + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself." + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "\"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified." + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region." + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features." + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date." + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"to earn\" typically suggests that there is an opportunity to gain something—often points, money, or rewards—by performing certain actions or tasks." + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy." + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a human heart symbolizing a strong heart as one of the benefits of running." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A silhouette of a figure measuring their waist indicates that running can aid in weight loss." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"" + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"" + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"" + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"" + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"" + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The light switches are white, contrasting with the dark wall, likely plastic, and appear functional." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Positioned in the background, these white light switches are paired on a wall above the countertop." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This silver oven, with digital controls and a handle, appears modern and built into the cabinetry." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor, constructed of hardwood, showcases a natural finish with variations in wood grain." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A frying pan on a heat source contains sautéed meat and vegetables, emitting steam, indicating the food is hot and being cooked." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Decorative accessory adorned on the ear, visible as a small, shiny object." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Appears to be a young boy, casually dressed, gripping an electronic device with attention." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds–Au), polar angle (θ), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in ." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a footer section of an image or document. It contains a citation that reads \"© 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items." + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners." + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support." + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "\"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge." + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created." + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives." + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data." + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a navigation link that returns the user to the main homepage of the website from any other page." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors." + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "\"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact." + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/prediction.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/prediction.json new file mode 100644 index 0000000000000000000000000000000000000000..46b7886dee75138be7ac702f93ad13e4478cf79c --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/prediction.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "Friday, Sep 2, 61°F" + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, fluffy cloud with a smooth, rounded shape and a slightly irregular outline. The cloud has a soft, cotton-like texture and appears to be illuminated from the left side, giving it a gentle gradient from white to a light blue hue." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white gear with a blue circular center and six evenly spaced, straight teeth." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, featuring three horizontal blue lines of varying lengths inside the bubble." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A red, rectangular play button with rounded corners, featuring a white, centered play triangle pointing to the right." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a red dot above it and a yellow dot below it." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A circular button with a white center and a thin black border." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, right-pointing arrow with a thick border." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Excel\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Facebook\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Speedtest\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Instagram\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lite\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Messenger\" is written in a bold, sans-serif font with a gradient of pink to white. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Home De..." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A pair of cartoonish eyes with large, white, oval-shaped pupils and black, curved eyelashes. The eyes are set against a green background." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A blue square with a white lowercase \"g\" followed by three white horizontal lines of equal length." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "mobile, data" + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The Wi-Fi symbol consists of a lowercase \"i\" with a dot above it, followed by a lowercase \"f\" with a dot above it." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" in lowercase letters, with a serif font, is written in a dark color against a light background." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"hotspot\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "usage, and hotspot" + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "A white, L-shaped object with a smooth surface and rounded edges." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Search or type web address" + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"ebay shopping cart\" is displayed in lowercase letters. The word \"ebay\" is in a larger font size compared to the word \"shopping cart\". The text is in a sans-serif font and is centered horizontally." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"cart\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "https://cart.ebay.com/" + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Welcome to Costco Wholesale" + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com\" is displayed in lowercase letters." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Pay Less." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"Target\" in bold, black, sans-serif font." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"Expect More.\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"target.com\" is displayed in a bold, sans-serif font with a blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A blue circular icon with a white lowercase \"s\" in the center." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is written in lowercase white letters on a green background." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "purchases" + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "In-app purchases" + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black letter \"B\" followed by a black plus sign." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Editors' Choice" + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Search settings" + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "A white envelope icon with a triangular flap on the right side, set against a dark background." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"Add\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "another email" + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"account\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Set up your personal or work email" + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "or work email" + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Network & internet" + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Wi-Fi, mobile, data" + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "usage, and" + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "Wednesday, May 18" + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The word \"Maps\" in a bold, sans-serif font, with a gradient of pink to white, giving it a three-dimensional appearance." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A red, teardrop-shaped pin with a black circular center." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, containing three horizontal blue lines of varying lengths." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a white border, a red circle with a white border, and a yellow circle with a white border." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com/Check\" is displayed in a bold, sans-serif font. The word \"costco.com\" is in lowercase letters, and the word \"Check\" is in uppercase letters. The text is aligned to the left." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo features the word \"COSTCO\" in large, bold, red capital letters with a white outline. Below it, the word \"WHOLESALE\" is written in smaller, bold, blue capital letters." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Warehouses\" is written in a bold, sans-serif font with a light blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Account\" in a bold, sans-serif font, with a gradient of blue shades ranging from light to dark, giving it a three-dimensional appearance." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue shopping cart with a white handle and a white basket area." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular sign with the word \"Shop\" in white, bold, sans-serif font. To the left of the text, there are three horizontal white lines of varying lengths." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a consistent size throughout." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "My Warehouse" + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Delivery Location" + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Seattle\" in bold, black, sans-serif font." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "Fetch Rewards: Play" + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Play\" in bold, black, sans-serif font." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "to earn" + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"MAKE\" is written in bold, uppercase letters with a dark green color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MONEY\" in bold, uppercase letters with a green background and black outline." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is displayed in lowercase letters. The letters are green and have a sans-serif font. The text is aligned to the left." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" is written in a bold, sans-serif font with a light gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"ads\" is written in lowercase letters with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design. The color of the text is a light gray, blending subtly with the background." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"50K+\" is displayed in bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The \"50K\" is in a larger font size compared to the \"+\" sign." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a computer keyboard with a standard QWERTY layout, including function keys, a number pad, and arrow keys. The keys are rectangular with white lettering on black keys, and the keyboard has a slight ergonomic curve." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text \"walmart.com\" is displayed in a bold, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance. The text is aligned to the left." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A white rectangular signboard with the text \"Lenovo Thinkpad\" in black, sans-serif font." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular button with rounded corners featuring the word \"Cancel\" in white, bold, sans-serif font." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad laptop with a visible keyboard and trackpad." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lenovo\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black, rectangular computer keyboard with a standard QWERTY layout, including a number pad on the right side. The keys are chiclet-style with white lettering, and there is a slight sheen on the surface, suggesting a smooth texture. The function keys are aligned along the top, and there is a visible space bar at the bottom center." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"in\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A rectangular electronic device with a screen displaying text, surrounded by a thin bezel." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad charger." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING" + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow background with two human head outlines facing each other. The left head has the words \"FIXED MINDSET\" above a red downward arrow, and the right head has the words \"GROWTH MINDSET\" above a green upward arrow." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon character with a serious expression, featuring a dark gray hair, a light blue shirt, and a red cross symbol on the left side of the head. The character has a red frown and is surrounded by two white, cloud-like shapes on either side of the head." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a beige background featuring five hands in different colors: red, green, blue, purple, and orange, arranged in a circular pattern. Above the hands, the text \"Understanding Diversity\" is written in black." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A black and white illustration of a person with a light bulb on their head, holding a book. The person has a question mark above their head and another question mark to the right of their head. The background is a light peach color." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon illustration of two boys, one wearing a red shirt and blue shorts, and the other wearing a striped shirt and brown shorts, both with their arms raised. The word \"Bullying\" is written above them." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon of a boy and a girl playing with each other. The boy is wearing a yellow shirt, black shorts, and red shoes. The girl is wearing a yellow shirt, blue pants, and red shoes. Both have black hair and are smiling." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue speech bubble with a yellow lightning bolt symbol, and a red speech bubble with a yellow lightning bolt symbol, both containing a person's face." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A rectangular blue traffic sign with a white border, featuring three white arrows. The leftmost arrow curves to the left, the middle arrow points straight up, and the rightmost arrow curves to the right. Below the arrows, the word \"CHANGES\" is written in white capital letters." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of two human figures, one in yellow and the other in blue, both with black outlines. The yellow figure is standing on a staircase, while the blue figure is standing on a platform. The blue figure is holding a microphone and appears to be speaking or presenting. The word \"Leadership\" is written in black text above the figures." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue circle with a white plus sign inside it, followed by a white \"2X\" text." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A hand holding a person with a blue shirt and black pants, with a purple banner below displaying \"$4,000\" in white text." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pie chart with a blue background and a white border, featuring a white line that divides the chart into two sections. The left section is larger and has a white number \"36%\" inside it, while the right section is smaller and has a white number \"36%\" inside it." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A globe with a blue ocean and white continents, featuring a purple banner with white text that reads \"2.7 trillion impact to global GDP from use of more efficient talent platforms.\"" + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "46% of companies are sometimes or frequently understaffed" + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Travel personas: how travelers identify their travel style" + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular planner with a white background featuring a colorful illustration of two people, one wearing a red hat and the other wearing a blue hat, both holding a smartphone. The person in the red hat is holding a book, and the person in the blue hat is holding a suitcase. The background includes a mountain and a sun. The text \"THE SMART PLANNER\" is written in bold, black letters at the top." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "The Relaxed Nomad" + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A family of five, consisting of a man, a woman, and three children, standing together. The man is holding a baby, while the woman is holding a suitcase. The children are standing around them, with one child holding a suitcase. The family is depicted in a circular frame." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A red airplane seat with a high backrest and armrests, featuring a small, rectangular, red and white logo on the backrest." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a white background featuring a stylized illustration of a person in a blue suit with a red briefcase in their left hand and a blue suitcase in their right hand. The person is depicted in a walking motion, with one leg forward and the other leg back. The text \"THE BUSINESS ROAD WARRIOR\" is written in bold, black capital letters above the illustration." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with short brown hair, wearing a purple top with a white collar, is holding a yellow envelope in her right hand. She has a headset on her head and is standing in front of a computer monitor with the word \"BIG\" visible on the screen." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange background with a white border." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a stylized globe in the center. The globe is divided into four quadrants, each in a different shade of blue. A black headset with a microphone is positioned over the globe, with the earpieces extending outward. To the right of the globe, there is a yellow star." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon man with a light blue shirt and a black tie, wearing a headset with a microphone. He has a light brown hair and is pointing upwards with his right hand. To his right, there is a yellow light bulb with a red base." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue and black wrench with a flat-head design, featuring a blue handle with a textured grip and a black head with a serrated edge. The wrench has a long, straight shaft connecting the handle to the head." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person wearing a headset with a microphone, a purple shirt, and a white undershirt. The person is holding a smartphone with a blue and white design on the screen." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a hand holding a black and white telephone handset, with a blue circular background featuring a partial globe and a speech bubble with the word \"BIG\" in white. The hand is wearing an orange life jacket with white stripes." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a red border, featuring a stylized design of a person wearing a white shirt and a black tie, with a blue and white striped hat. The background includes a Union Jack flag and a yellow rectangle." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon woman with brown hair tied back, wearing a purple shirt with a white collar, and a headset. She is holding a yellow star in her right hand and has a black and white striped object in her left hand." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized heart with a vibrant red color, featuring a prominent blue and orange flame-like design on the upper left side, and a smaller blue and orange flame-like design on the lower right side." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized human figure with arms raised in a celebratory pose, surrounded by four blue arrows pointing outward, each arrow with a slight curve and a pointed tip." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A digital bathroom scale with a black base and a white digital display showing the weight." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pair of pink lungs with a central trachea, featuring detailed vein patterns and a slightly curved shape." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow smiley face with a black outline, featuring two black dots for eyes and a curved black line for a mouth." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human knee with a visible bone structure, including the femur and tibia, with a slight curvature and a smooth surface." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human brain with a detailed, textured surface, featuring a prominent cerebral cortex and a visible portion of the brain stem." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A red heart with a white outline and a white line through the center, indicating a heart rate or rhythm." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The word \"HOTAPPE\" is written in large, bold, uppercase letters. The letters are colored in the following order from left to right: red, orange, yellow, light blue, and dark blue." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular smiley face with a yellow face and a red border. The face has two blue teardrop-shaped eyes, a brown curved mouth, and two brown curved lines for eyebrows." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "An orange circle with two brown hands facing each other, with the word \"OPENNESS\" in green capital letters below the circle." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular yellow background with a pair of hands clasped together in the center. The hands are depicted in a light brown color, with the left hand slightly overlapping the right hand. Radiating from the hands are white lines, giving the impression of light or energy emanating from the hands. Below the hands, the word \"TOUCH\" is written in bold, uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a large red heart in the center. Inside the heart, there is a white exclamation mark. Below the heart, the word \"ATTENTION\" is written in bold, uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with two cartoon faces, one on the left and one on the right, separated by a white dashed line. The left face has brown hair and a neutral expression, while the right face has light brown hair and a neutral expression. Below the faces, the word \"PROXIMITY\" is written in bold, uppercase letters." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with a white eye in the center, featuring a brown iris and a white sclera. Below the eye, the words \"EYE CONTACT\" are written in bold, uppercase letters." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt, with a thermometer in their mouth." + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a red nose and a red mouth, and a white hand with three fingers extended." + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a green and white object in their mouth." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person with a green circular background. The person has a gray face with a red nose and a red mouth. The person is wearing a red shirt with a yellow collar." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt and white pants, is sitting on a white chair." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green sign with a white border, featuring a stylized illustration of a pair of red lungs with a gray outline, set against a dark background." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a kidney with a red and pink color scheme, featuring a central red area with a pink outline, flanked by two symmetrical, curved, pink shapes resembling the kidney's lobes, all set against a light green background." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue rectangular header with the text \"Interesting Facts\" in white, bold, sans-serif font. To the left of the text, there are three white horizontal lines. To the right of the text, there is a red heart symbol." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized panda face with a white face, black ears, black patches around the eyes, and a black nose. The panda has a small, curved black mouth and a content expression. The face is set against a green circular background." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A green circular background with a white plate in the center, containing a yellow circle. To the left of the plate is a white fork, and to the right is a white spoon." + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring a red heart in the center. The heart is outlined in white and has a white line running horizontally across its middle. Below the heart, the word \"Diseases\" is written in white, bold, sans-serif font." + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A white long-sleeved shirt with a teal collar and cuffs, featuring a row of black buttons down the front." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange icon with a white film reel design in the center. The film reel has a blue border with white squares on the left and right sides, and a white center with a blue horizontal line dividing it into two sections." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green icon featuring a white syringe with a red cross symbol on the barrel, a white droplet to the right of the syringe, and a yellow needle." + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue icon featuring a white wine glass with a yellow liquid on the left and a white bottle with a yellow liquid and a brown cap on the right." + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring two stylized human figures. The figure on the left has short, light brown hair and is wearing a red shirt. The figure on the right has short, light brown hair and is wearing a brown shirt with a white collar." + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a solid olive green background featuring a white silhouette of the Earth in the center." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person sitting on a chair with their head in their hands, wearing a blue shirt and black pants." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with short brown hair, wearing a blue long-sleeve shirt and yellow pants, is sitting on a white platform with a blue wave design at the bottom." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a pink top, is depicted with a thought bubble above her head." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with red hair, wearing a green top and blue pants, is sitting on a windowsill with their legs crossed." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a blue sleeveless top and black leggings, is running with her arms slightly bent and her legs in motion. She has a white earphone cord hanging from her right ear." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with black hair tied back, wearing a green headband, a green sleeveless top, and black pants, is sitting cross-legged with her hands pressed together in a prayer position." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A green dinosaur with a white belly, a purple dinosaur with a white belly, and a green dinosaur with a white belly." + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon boy with a purple bandage on his forehead, wearing a blue shirt and blue pants, is holding a purple object in his right hand." + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A lime green refrigerator with a single door, featuring a black handle on the right side. The door has a horizontal indentation near the top." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A small, rectangular, lime green refrigerator with a single door featuring a vertical handle on the left side. The bottom section of the refrigerator is orange with a horizontal handle." + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark-colored, rectangular structure with a flat roof and vertical sides, featuring a small, square window on the upper left side and a larger, rectangular window on the lower right side." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A tree with dense, green foliage." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A cylindrical, metallic pole with a consistent diameter throughout its length, featuring a series of evenly spaced, horizontal bands encircling its surface." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A deep, dark-colored bowl with a wide, flared rim and a smooth, glossy finish." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern kitchen with a stainless steel oven and a black cooktop. The oven has a digital display and control panel, and there is a visible handle on the oven door. The cooktop has multiple burners with black grates." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular wall socket with a single, round, black power switch located on the right side." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular light switch with rounded edges, featuring a central toggle switch mechanism." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Stainless steel built-in oven with a large glass door, featuring a digital control panel above the door with multiple buttons and a display screen." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, slender vase with a flared rim and a narrow neck that gradually widens into a bulbous base, featuring a glossy finish with a gradient of colors transitioning from a deep blue at the top to a greenish hue towards the bottom." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor is composed of medium-toned wooden planks with a smooth, polished finish. The wood grain is visible, running lengthwise along the planks, which are laid out parallel to each other. The planks have a consistent width and exhibit a warm, reddish-brown hue." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical, wall-mounted spice rack with multiple tiers, each tier holding several glass jars with metal lids. The jars are arranged in a single column, and the rack appears to be made of a dark, possibly wooden material." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A round, dark-colored table with a smooth surface and a central pedestal base." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The chair features a high, slightly curved backrest and seat cushion upholstered in a woven fabric with a diamond pattern. The fabric is primarily light green with a central vertical stripe in a slightly darker shade. The armrests are padded and covered in the same woven fabric, with a light green color. The chair's legs are dark-colored and straight." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A purple place mat with a textured surface and a white circular design in the center." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical traffic light with three circular lenses arranged in a column, displaying a red light at the top, an unlit middle lens, and a green light at the bottom, all encased in a black housing with a visor over each lens." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A pink and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, dark brown tree trunk with a rough, textured bark. The trunk is relatively straight and has a consistent width throughout its visible length." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A chalkboard sign with a wooden frame displaying the text \"Château de la Bertrandière\" in elegant, cursive script." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, narrow, rectangular window with a dark frame and a single vertical pane of clear glass." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A chestnut horse with a white star on its forehead, dark eyes, and a soft, dark muzzle. Its ears are pricked forward, and the mane appears to be a lighter shade of chestnut, blending into the darker coat. The horse's neck shows a gentle curve, and the coat has a healthy sheen." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown horse with a dark mane." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A light gray horse with a darker mane and tail, featuring a well-muscled build, a straight profile, and a calm demeanor." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular table with rounded corners and a smooth surface." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern black induction cooktop featuring a sleek design with two visible cooking zones, each outlined with a white circular pattern. The front edge has a stainless steel trim with control buttons, and there are small yellow indicator lights above the buttons." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A glossy, metallic spoon with a reflective surface and a tapered handle, featuring a rounded bowl with a slight indentation on one side." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A metallic spoon with a reflective surface and a tapered handle that widens towards the end, featuring a rounded bowl with a pointed tip." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "A zebra with a distinctive black and white striped pattern, featuring a mane of short, erect hair transitioning from black at the base to white at the tips. The stripes on the neck are vertical and become more horizontal as they reach the mane. The visible part of the zebra's face shows a pattern of narrow stripes that converge around the eyes and muzzle, with a white area above the eyes and a dark nose. The ears are pointed and display a striped pattern consistent with the head." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal red stop sign with bold, all-caps white lettering spelling \"STOP\" centered on the sign." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal sign with a red background and a white border, featuring the word \"STOP\" in white uppercase letters." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped cushion with a visible textured surface that suggests a soft, plush fabric." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped pillow with a visible corner that appears to be soft and plush." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "The earlobe is small and rounded, with a smooth, slightly glossy surface. It is adorned with a small, round, gold-colored earring that has a subtle, reflective sheen." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young girl with curly hair, wearing a patterned top with a mix of geometric and floral designs in dark and vibrant colors, paired with long-sleeved pink undershirts. Her expression is one of mild surprise or excitement, with her mouth slightly open and eyes looking upwards. Her arms are outstretched with palms facing up, as if gesturing or presenting something." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young boy with curly hair, wearing a striped sweater with shades of blue, white, and brown, stands with his hands clasped together. He has a focused expression on his face, with his mouth slightly open and his eyes looking to the side." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski parka featuring a color-block design with a white torso, green sleeves, and black shoulder panels. The jacket has a high collar and a front zipper closure. There are red accents on the cuffs and a red logo on the left chest area. The parka is paired with a black helmet with a visor." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The knee of the person is covered in a dark purple snowsuit with a slightly glossy finish. The fabric appears thick and durable, suitable for cold weather. The knee area is slightly bent, indicating a relaxed stance. The snowsuit has a subtle sheen, reflecting light, and the material appears to be tightly fitted around the knee." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The trousers are dark-colored, possibly black or dark gray, with a straight-leg cut. They feature a visible side pocket on the right leg with a flap closure, and there are belt loops around the waistband. The fabric appears to be a sturdy material, potentially denim or a similar thick textile." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A blue helmet with a glossy finish, featuring a prominent white stripe with a red outline running horizontally across the middle. The helmet has a black visor attached at the front, and a black chin strap with a buckle hanging down from the sides." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The ski boot is predominantly black with a sleek, modern design. It features a sturdy, high-ankle structure for support and a smooth, matte finish. The boot has a contoured shape to fit the foot and lower leg, with a slightly raised heel for added stability. The sole is thick and textured for grip and durability." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The sleeve of the sweater is predominantly white with a bold red graphic of a person skiing. The fabric appears to be a soft, knit material, and the sleeve is slightly loose-fitting, extending to the wrist. The red graphic is positioned centrally on the sleeve, adding a dynamic contrast to the white background." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person wearing a black helmet, goggles, and a white jacket with red and black accents, including a red logo on the left chest area. The individual is also wearing black gloves and appears to be in a skiing stance." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A red, white, and green racing motorcycle fairing with the number 69 prominently displayed in white on a red background, flanked by green and white stripes. The fairing features sponsor logos and a black lower section with a vent." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A series of parallel, evenly spaced, horizontal lines with a consistent width, running across the entire width of the image." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The mass of the quark is a very small, massless particle that is part of the quark mass. It is a fundamental particle that is responsible for the mass of the quark, and it is often used in particle physics to study the properties of quarks and their interactions." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A lowercase letter 'v' with a serif at the top and a tail at the bottom, featuring a bold, sans-serif font." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The geometry and adsorption energies for the structures of thio-glycolic acid on Au(111) at 0.25ML." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The table contains a list of adsorption sites with their corresponding adsorption energies. The adsorption sites are listed in a column, with each site having a corresponding adsorption energy. The adsorption energy is listed in a separate column, with each site having a corresponding energy value. The table also includes the adsorption energy for the Au-S bond, which is listed in a separate column. The adsorption energy for the Au-S bond is listed in a separate column, with each site having a corresponding energy value." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy is 0.63 eV." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy for the most stable structure of the HSCH2COOH on the Au(111) surface is 0.63 eV. The adsorption site preferred by the sulfur atom is located at the top of the gold atom. The polar angle between the normal vector of the surface and the S-C2 direction is 74.2 degrees. The HSCH2COOH tend to lie down at the low coverage." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a person's face with a neutral expression, featuring a prominent nose, closed lips, and visible teeth. The person has short hair and is wearing a dark-colored top." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmented image shows two graphs. The first graph on the left is titled \"Atom prediction performance (a and c)\" and features a scatter plot with blue and orange dots. The second graph on the right is titled \"Bond prediction performance (a and c)\" and also features a scatter plot with blue and orange dots. Both graphs have a similar layout with x and y axes, and the dots are evenly spaced." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The F1 score for segmentation and classification networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of segmentation network is measured by the F1 score, which is a metric that takes into account both precision and recall. The F1 score is used to evaluate the performance of the segmentation network in terms of its ability to correctly segment the image. The segmentation network is compared to other networks, and the F1 score is used to determine which network is the most effective." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmentation network is a deep learning model that uses a combination of atom, bond, and charge types to predict the pixel values. The model is trained on a large amount of data and has a high F1 score, indicating its high accuracy. The F1 score is a measure of the model's performance, taking into account both precision and recall. The model is designed to be used for various tasks, such as image segmentation and object detection." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different types of networks is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is higher for the different types of networks, indicating that they are performing well. The performance of the different types of networks is compared to the performance of the segmentation networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different parts of the network is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is used to evaluate the performance of the network in different tasks, such as atom, bond, and charge type classifications. The network is able to do a good job even when the segmentation is not perfect, and the performance of the different parts of the network is significantly higher than the segmentation networks." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "8.3 Overall graph accuracy" + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The overall graph accuracy is a measure of the performance of the different parts of the graph, including the segmentation network and the classification network. The segmentation network is used to segment the graph into different parts, and the classification network is used to predict the type of the graph. The overall graph accuracy is a combination of these two parts, and it is used to measure the performance of the graph." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 11 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The digits are evenly spaced and aligned vertically." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar is a low GI sweet spot, which is a type of sugar that has a low glucose index." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The low GI sweet spot is at least 22mg CE/100mg sucrose." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar has a low GI of about 22-32 mg CE/100g polyphenols. The polyphenol content is high, with a range of 22-32 mg CE/100g polyphenols. The sugar is hygroscopic, with a higher moisture content, and the polyphenol content increases as the sugar becomes more saturated." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuit is a dark brown, viscous liquid with a glossy sheen, contained in a clear glass bottle." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuites are prepared at two different sugar mills, designated as \"Mill 1\" and \"Mill 2\". The polyphenol content of each sample is determined and washed until they reach the desired polyphenol content, which is roughly 500 to 2000 ICUMSA. The results are in Table 4 below." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a sugar cube with a textured surface, featuring a pattern of small, raised dots." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image shows a collection of documents with text, some of which are in black and white, and others in color. The documents are arranged in a grid-like pattern, with some documents having a red border. The text on the documents is in various fonts and sizes, and some documents have a red box around the text." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The OCR process." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a letter written in English, with the first line reading \"Available OCR\" followed by \"for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The third line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The ninth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The tenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The eleventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twelfth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The thirteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventeenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The nineteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twentieth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-first line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twenty-second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-third" + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The Internet Archive image containing this excerpt can be accessed here: https://archive.org/details/b2439867/page/n7" + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Journal of Data Mining and Digital Humanities" + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a web address in blue, displaying the URL \"http://dmh.esciences.org\"." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 9 is displayed in a bold, sans-serif font with a slight shadow effect, giving it a three-dimensional appearance. The color of the number is a gradient of dark to light gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"IOOF Annual Report 2012\" is displayed in a serif font, with \"IOOF\" in a larger size and \"Annual Report 2012\" in a smaller size. The text is in a light green color." + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors' Remuneration" + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "4.1. Components of Non-Executive Director remuneration" + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"page 37\" is written in lowercase letters." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The company is a non-executive director." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year by the Company, its subsidiaries and associated entities may not exceed an amount approved by shareholders. The ceiling amount includes all remuneration provided to Non-Executive Directors, including superannuation but not including retirement benefits. The current limit of $980,000 per annum was approved by shareholders at the 2010 Annual General Meeting. There has been no increase to the Non-Executive Director fee pool since this time." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors receive a fee for being a Director of the Board. An additional fee is paid to the Chairman of the Board. Non-Executive Directors do not receive additional fees for service on Board and Committees. The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year may not exceed an amount approved by shareholders." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a mathematical equation involving the homotopy group of a space." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a monopole, which is a type of topological defect." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The topological conditions of formation of defects only govern the formation of topologically stable defects. It was found that defects solutions can form even when the topology is trivial. The most well-known example are the electro-weak strings, formed during the electroweak symmetry breaking, which are perturbatively stable for a range of parameters which are not realized in nature, and belong to the broader class of embedded defects." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "These defects are a priori unstable though mechanisms (such as plasma effects) have been found to stabilize them. They are of interest for inflation model builders since this mechanism can allow lift the constraints from the formation of cosmic strings (see Sec. IV F on D-term inflation)." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a mathematical equation with the variables \"x\" and \"y\" in a bold font, followed by a period and the number \"1\" in a smaller font." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 72 is displayed in a bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A stylized illustration of a woman with a bun hairstyle, featuring a pattern of white, cloud-like shapes with small black dots scattered throughout. The woman is wearing a pink garment with a polka dot pattern." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A red and white polka dot pattern with a small, dark, irregularly shaped mark near the center." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 62 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Chapter 2: Motivation" + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The string theory is a type of string theory that is compactified on the near-horizon geometry. It is a decoupled theory that combines the two pictures of the same low energy limit of one theory, Type IIB string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a field theory picture of the low energy limit of Type IIB string theory." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The regions for which analytic tools exist for these two different pictures turn out to be completely incompatible." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The equation is a complex mathematical expression involving various variables and functions. It includes a combination of Greek letters, numbers, and mathematical operators. The visible part of the equation shows a series of variables and functions that are interconnected, with some parts appearing to be in parentheses. The equation is written in a formal, mathematical notation." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The presence of an insider on the market does not necessarily lead to arbitrage, and the presence of insiders might be considered beneficial to the market, in the sense that it leads to higher information efficiency of the equilibrium price process." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The fundamental value of the firm is a stochastic process, and the insider can observe it directly or at least observe it in a less noisy way than the other agents on the market." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paper relaxes the assumption of static insider information and studies the equilibrium trading and price processes, as well as market efficiency, in a setting with dynamic private information." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2]." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2] and is designed to include dynamic information. It has a much smaller set of admissible trading strategies and pricing rules compared to the ones considered in the work. The model shows the existence of a unique Markovian equilibrium, which is an equilibrium price that allows the insider to trade undetected and depends only on the total order process. The model also shows that the presence of an insider increases the market informational efficiency for times close to the time of the order." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red, three-dimensional, cursive sign with the word \"Abondana\" written in a flowing, elegant script." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"Cafe\" in a stylized, cursive font with a gradient of red to dark red, giving it a three-dimensional appearance. The letters are slightly italicized and have a shadow effect, enhancing their depth." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The logo features the letters \"USIS\" in bold, with \"US\" in red and \"IS\" in black. Below the letters, there is a tagline in smaller, gray font." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"ESTATE\" is written in bold, black, uppercase letters on a yellow background." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"AGENTS\" in bold, uppercase letters. The letters are black and set against a yellow background." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"SAXONS\" in bold, black capital letters on a yellow background." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SAXONS\" in capital letters with a serif font, set against a dark background. The letters are in a metallic gold color with a slight gradient, giving them a three-dimensional appearance. The signboard has a reflective surface, suggesting it is made of a glossy material." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard displays the word \"ESTATE\" in capital letters, with a serif font. The letters are dark and possibly metallic, with a reflective surface that catches the light, giving them a slightly shiny appearance. The background of the signboard is not visible, but the letters are set against a dark backdrop that contrasts with the lighter color of the text." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features capitalized, serif lettering spelling \"AGENTS\" with a metallic finish and a slight gradient, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Triple\" is written in a cursive, green font with a white outline. The letters are slightly italicized and have a playful, rounded design." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"WHITE\" is written in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SPOT\" is written in bold, uppercase letters with a light blue color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A stylized, cursive letter \"O\" with a green outline and a white fill, featuring a small, curved tail extending from the bottom right." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The visible part of the ag is a white, stylized letter \"A\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"NEW\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"YORK\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features bold, black, uppercase letters spelling \"AYAM\" on a yellow background." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features large, bold, red letters \"RUA\" on a yellow background." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A yellow sign with the word \"SMASHED\" in bold, black, uppercase letters." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A dark brown, rectangular sign with the word \"FRIED\" in bold, uppercase letters." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A brown sign with the word \"CHICKEN\" in bold, uppercase letters." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Accommodation" + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Office\" is written in a bold, sans-serif font with a dark blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Nightline\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red and white signboard with the word \"BUBBA\" in bold, capitalized, red letters on a white background, with a red border around the sign." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"GUMP\" in large, bold, red capital letters with a white outline. The letters are set against a textured, light-colored background that resembles a stone or concrete surface." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SHRIMP\" in capital letters, with a bold, sans-serif font. The letters are white with a slight shadow effect, giving them a three-dimensional appearance. The background of the signboard is a deep blue color, providing a stark contrast to the white text." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved signboard with a red border and a white background, featuring the word \"RESTAURANT\" in bold, black, uppercase letters." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved, red and white signboard with the word \"MARKET\" in capital letters, featuring a serif font." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular blue signboard with a white border and a white symbol resembling a stylized letter 'C' in the center." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular, metallic signboard with a textured background featuring the letters \"TM\" in a bold, sans-serif font, centered on the sign." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"EVENING\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"PRIMROSE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"BASE\" is written in bold, uppercase letters with a red background and white outline. The letters are evenly spaced and have a slightly distressed texture." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"OIL\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred appearance." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The number 46 is displayed in a large, bold, white font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and have a smooth, rounded design." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green calendar with white text displaying the numbers 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369," + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular button with rounded corners featuring the word \"Connecter\" in white lowercase letters, followed by a yellow icon resembling a lock with a keyhole." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized, lowercase letter \"a\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized lowercase \"a\" with a curved tail extending from the bottom right, resembling a lowercase \"i\" with a dot above it." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular sign with white text displaying the numbers \"07\" in a bold, sans-serif font." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The word \"septembre\" in lowercase letters, with a green background and white font." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The text \"Lien Web\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The numbers 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 are displayed in a green color." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and a fringe detail." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Plan du site" + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A white arrow pointing to the right, with a slight curve at the tail end, is positioned to the right of the text \"Book Now.\"" + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in bold, black, sans-serif font." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "contact us" + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"About\" is written in a bold, sans-serif font with a gradient of pink to red, set against a teal background." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A teal-colored horizontal bar with white text and symbols. On the left side, there is a white envelope icon followed by the text \"doonawash@gmail.com\". On the right side, there is a white icon resembling a person in a wheelchair." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Contact us\" is written in a bold, sans-serif font with a pinkish-red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a gradient of blue shades, transitioning from a lighter blue at the top to a darker blue at the bottom." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in a bold, sans-serif font with a pinkish hue. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "OpenStreetMap Belgium" + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A black arrow pointing to the right." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A white rectangular tag with the number \"4.9\" in bold black font centered on it." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with white text that reads \"Pulsuz Konsultasyon.\"" + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The word \"Portfolio\" in a bold, sans-serif font, with a slight italicization, and a drop shadow effect, giving it a three-dimensional appearance. The letters are black with a white outline, and the text is set against a plain background." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white shopping cart icon with a blue outline, featuring a rectangular basket with a grid pattern, two vertical handles, and four wheels, two of which are visible." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"SUPPORT\" in bold, uppercase letters with a blue background and white outline." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white lowercase letter \"f\" with a bold, sans-serif font, set against a blue background." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Your Charts" + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A blue \"X\" with a white outline, featuring a slightly darker blue fill and a lighter blue border." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white sign with the words \"CONTACT US\" in bold, uppercase, blue letters." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with rounded corners and a white border. The button has the words \"ADD TO CART\" in bold, white, uppercase letters centered on it." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PRODUCTS\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PODCAST\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters. The letters are blue and have a slight shadow effect, giving them a three-dimensional appearance. The text is centered horizontally." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"WHAT WE DO\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font style. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "GWM launches livelihood micro-grants" + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with a black border and the word \"Settings\" in black, bold, sans-serif font centered on it." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Privacy Policy" + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"HOME\" in uppercase letters, with a bold, sans-serif font, is centered on the image. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, creating a subtle contrast against the background." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular black button with the word \"Accept\" in white, bold, sans-serif font centered on it." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A red, oval-shaped button with a white border and the word \"DONATE\" in bold, uppercase, red letters centered on it." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"NEWS\" in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a clean, modern font style." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters with a sans-serif font. The letters are evenly spaced and aligned horizontally. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"CONTACT US\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "A black and white icon depicting a stylized, abstract representation of a building with a flat roof and multiple rectangular windows arranged in a grid pattern." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"News\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a dark color, contrasting with the lighter background." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The logo features the word \"ServeGate\" in bold, black letters. To the left of the text, there is a stylized design consisting of two overlapping triangles, one in teal and the other in red, with a black line separating them." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Our Difference" + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"About Us\" is written in a bold, sans-serif font with a red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "A rectangular button with rounded corners, featuring a light pink background and a thin red border. The button displays the text \"Get in touch\" in bold, red, sans-serif font." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" in a serif font, with the letters in a light gray color against a white background." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Services\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Indigenous Impact" + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"ServeGate\" is written in bold, black, sans-serif font. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with rounded corners, featuring the text \"Close issue\" in bold, black, sans-serif font." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "A vertical array of three circular, dark-colored buttons with a slightly raised, smooth surface, aligned centrally on a light-colored background." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/question.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/question.json new file mode 100644 index 0000000000000000000000000000000000000000..542170b11f1047dd46687d7a673bba62960cebab --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/multipanel_detailed_caption_box/question.json @@ -0,0 +1,556 @@ +[ + { + "question_id": 1, + "image": "6.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 213.4428086070215, + 28.742921857304644, + 518.4371460928652, + 84.09966024915062 + ], + [ + 77.18006795016987, + 136.26274065685163, + 267.202718006795, + 258.6862967157418 + ], + [ + 344.3827859569649, + 137.3272933182333, + 231.0079275198188, + 257.62174405436014 + ], + [ + 610.5209513023783, + 123.4881087202718, + 260.8154020385051, + 278.9127972819932 + ], + [ + 605.19818799547, + 426.885617214043, + 272.52548131370327, + 233.1370328425821 + ], + [ + 349.70554926387314, + 440.72480181200456, + 201.2004530011325, + 206.52321630804076 + ], + [ + 72.92185730464327, + 432.2083805209513, + 233.1370328425821, + 216.10419026047566 + ], + [ + 76.11551528878822, + 670.6681766704417, + 269.33182332955835, + 239.52434881087203 + ], + [ + 346.5118912797282, + 668.5390713476784, + 227.81426953567384, + 241.65345413363534 + ], + [ + 620.1019252548132, + 668.5390713476784, + 191.61947904869763, + 222.49150622876556 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 2, + "image": "u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 66.41997729852442, + 87.18955732122588, + 92.4472190692395, + 106.02951191827468 + ], + [ + 193.4801362088536, + 90.25652667423383, + 94.63791146424518, + 119.61180476730986 + ], + [ + 327.55051078320093, + 84.56072644721907, + 106.02951191827468, + 115.23041997729852 + ], + [ + 255.25766174801362, + 216.44040862656072, + 164.30192962542566, + 112.60158910329172 + ], + [ + 117.68217934165722, + 216.87854710556186, + 121.80249716231555, + 115.66855845629965 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 3, + "image": "50.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 21.36677115987461, + 5.362068965517206, + 1064.7774294670846, + 142.44514106583074 + ], + [ + 56.978056426332294, + 167.39341692789966, + 149.56739811912226, + 256.4012539184953 + ], + [ + 338.307210031348, + 156.71003134796234, + 217.22884012539186, + 267.08463949843264 + ], + [ + 619.6363636363636, + 158.49059561128524, + 197.64263322884014, + 268.8652037617555 + ], + [ + 897.4043887147336, + 167.39341692789966, + 179.8369905956113, + 258.1818181818182 + ], + [ + 135.3228840125392, + 450.5031347962382, + 256.4012539184953, + 293.79310344827593 + ], + [ + 480.7523510971787, + 457.6253918495298, + 215.44827586206898, + 275.98746081504703 + ], + [ + 783.448275862069, + 455.8448275862069, + 211.8871473354232, + 283.1097178683386 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 4, + "image": "24.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 133.33333333333334, + 60, + 393.33333333333337, + 446.6666666666667 + ], + [ + 543.3333333333334, + 80, + 430, + 416.6666666666667 + ], + [ + 980, + 80, + 486.6666666666667, + 480 + ], + [ + 956.6666666666667, + 560, + 510, + 386.6666666666667 + ], + [ + 593.3333333333334, + 520, + 360, + 426.6666666666667 + ], + [ + 83.33333333333334, + 520, + 503.33333333333337, + 450 + ], + [ + 86.66666666666667, + 973.3333333333334, + 460, + 443.33333333333337 + ], + [ + 546.6666666666667, + 973.3333333333334, + 470, + 443.33333333333337 + ], + [ + 1016.6666666666667, + 973.3333333333334, + 436.6666666666667, + 460 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 5, + "image": "u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 44.65323496027242, + 62.92849035187289, + 81.31668558456299, + 90.71509648127127 + ], + [ + 143.9494892167991, + 54.3473325766175, + 87.85471055618616, + 88.67196367763904 + ], + [ + 235.89046538024974, + 75.1872871736663, + 85.40295119182747, + 82.54256526674233 + ], + [ + 268.17196367763904, + 164.2678774120318, + 84.17707150964812, + 88.26333711691257 + ], + [ + 19.727014755959143, + 166.31101021566403, + 99.70488081725311, + 79.27355278093073 + ], + [ + 59.36379114642452, + 258.2519863791146, + 75.18728717366628, + 94.39273552780931 + ], + [ + 149.2616345062429, + 275.82292849035184, + 84.17707150964814, + 80.4994324631101 + ], + [ + 242.83711691259933, + 261.5209988649262, + 84.58569807037456, + 93.16685584562995 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 6, + "image": "11.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 1028.513805522209, + 7.994897959183692, + 2225.1500600240097, + 548.8703481392557 + ], + [ + 91.47839135654262, + 631.0369147659063, + 615.624849939976, + 714.5204081632653 + ], + [ + 709.5756302521008, + 628.5645258103241, + 672.4897959183673, + 719.4651860744298 + ], + [ + 1382.065426170468, + 628.5645258103241, + 660.1278511404562, + 719.4651860744298 + ], + [ + 2042.1932773109243, + 623.6197478991596, + 707.1032412965186, + 724.4099639855942 + ], + [ + 2749.296518607443, + 623.6197478991596, + 620.5696278511405, + 724.4099639855942 + ], + [ + 3369.8661464585834, + 668.1227490996398, + 684.8517406962785, + 674.9621848739496 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 7, + "image": "u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 16.8927355278093, + 111.80476730987515, + 131.1010215664018, + 150.3972758229285 + ], + [ + 158.2094211123723, + 111.80476730987515, + 128.2633371169126, + 149.82973893303065 + ], + [ + 286.4727582292849, + 110.66969353007946, + 130.53348467650397, + 150.96481271282633 + ], + [ + 417.00624290578884, + 110.66969353007946, + 128.83087400681043, + 154.93757094211125 + ], + [ + 91.80760499432462, + 290.5788876276958, + 124.29057888762769, + 148.1271282633371 + ], + [ + 217.233257661748, + 286.03859250851303, + 137.34392735527808, + 152.66742338251987 + ], + [ + 355.14472190692396, + 288.30874006810444, + 135.07377979568673, + 153.2349602724177 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 8, + "image": "23.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 26.666666666666714, + 22.814814814814813, + 1053.3333333333333, + 144.49382716049382 + ], + [ + 53.283950617284, + 197.72839506172838, + 353.6296296296296, + 391.65432098765433 + ], + [ + 406.9135802469136, + 186.320987654321, + 288.98765432098764, + 403.0617283950617 + ], + [ + 695.9012345679013, + 186.320987654321, + 311.8024691358025, + 425.87654320987656 + ], + [ + 703.5061728395062, + 616, + 304.1975308641975, + 403.0617283950617 + ], + [ + 403.11111111111114, + 623.604938271605, + 300.39506172839504, + 395.45679012345676 + ], + [ + 45.67901234567906, + 623.604938271605, + 357.4320987654321, + 414.4691358024691 + ], + [ + 49.48148148148153, + 1049.4814814814815, + 342.22222222222223, + 365.037037037037 + ], + [ + 391.70370370370375, + 1026.6666666666667, + 330.8148148148148, + 387.85185185185185 + ], + [ + 722.5185185185186, + 1026.6666666666667, + 300.39506172839504, + 391.65432098765433 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 9, + "image": "u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 0.28376844494892167, + 107.26447219069239, + 169.69353007945517, + 184.4494892167991 + ], + [ + 194.94892167990918, + 107.83200908059024, + 161.74801362088536, + 168.55845629965947 + ], + [ + 374.2905788876277, + 17.593643586833146, + 125.7094211123723, + 268.4449489216799 + ], + [ + 0, + 294.5516458569807, + 155.22133938706017, + 171.96367763904652 + ], + [ + 178.49035187287174, + 284.90351872871736, + 128.2633371169126, + 213.3938706015891 + ], + [ + 323.2122587968218, + 287.17366628830877, + 171.39614074914869, + 212.82633371169126 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 10, + "image": "u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 40.6061293984109, + 73.2122587968218, + 208.2860385925085, + 99.31895573212259 + ], + [ + 264.2156640181612, + 60.1589103291714, + 225.87968217934164, + 225.87968217934164 + ], + [ + 32.66061293984109, + 216.79909194097615, + 218.5017026106697, + 259.9318955732123 + ], + [ + 271.59364358683314, + 337.6844494892168, + 236.40635641316686, + 133.93870601589103 + ] + ], + "segmentation": [] + } + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/answer.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/answer.json new file mode 100644 index 0000000000000000000000000000000000000000..1fc627ed32af43ec4e04f2ac4c8172d3201bc43b --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/answer.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage." + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address." + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere." + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab." + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases." + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself." + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "\"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified." + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region." + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features." + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date." + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"to earn\" typically suggests that there is an opportunity to gain something—often points, money, or rewards—by performing certain actions or tasks." + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy." + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a human heart symbolizing a strong heart as one of the benefits of running." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A silhouette of a figure measuring their waist indicates that running can aid in weight loss." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"" + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"" + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"" + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"" + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"" + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The light switches are white, contrasting with the dark wall, likely plastic, and appear functional." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Positioned in the background, these white light switches are paired on a wall above the countertop." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This silver oven, with digital controls and a handle, appears modern and built into the cabinetry." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor, constructed of hardwood, showcases a natural finish with variations in wood grain." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A frying pan on a heat source contains sautéed meat and vegetables, emitting steam, indicating the food is hot and being cooked." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Decorative accessory adorned on the ear, visible as a small, shiny object." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Appears to be a young boy, casually dressed, gripping an electronic device with attention." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds–Au), polar angle (θ), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in ." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a footer section of an image or document. It contains a citation that reads \"© 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items." + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners." + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support." + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "\"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge." + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created." + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives." + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data." + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a navigation link that returns the user to the main homepage of the website from any other page." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors." + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "\"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact." + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/prediction.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/prediction.json new file mode 100644 index 0000000000000000000000000000000000000000..46b7886dee75138be7ac702f93ad13e4478cf79c --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/prediction.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "Friday, Sep 2, 61°F" + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, fluffy cloud with a smooth, rounded shape and a slightly irregular outline. The cloud has a soft, cotton-like texture and appears to be illuminated from the left side, giving it a gentle gradient from white to a light blue hue." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white gear with a blue circular center and six evenly spaced, straight teeth." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, featuring three horizontal blue lines of varying lengths inside the bubble." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A red, rectangular play button with rounded corners, featuring a white, centered play triangle pointing to the right." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a red dot above it and a yellow dot below it." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A circular button with a white center and a thin black border." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, right-pointing arrow with a thick border." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Excel\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Facebook\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Speedtest\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Instagram\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lite\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Messenger\" is written in a bold, sans-serif font with a gradient of pink to white. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Home De..." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A pair of cartoonish eyes with large, white, oval-shaped pupils and black, curved eyelashes. The eyes are set against a green background." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A blue square with a white lowercase \"g\" followed by three white horizontal lines of equal length." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "mobile, data" + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The Wi-Fi symbol consists of a lowercase \"i\" with a dot above it, followed by a lowercase \"f\" with a dot above it." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" in lowercase letters, with a serif font, is written in a dark color against a light background." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"hotspot\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "usage, and hotspot" + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "A white, L-shaped object with a smooth surface and rounded edges." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Search or type web address" + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"ebay shopping cart\" is displayed in lowercase letters. The word \"ebay\" is in a larger font size compared to the word \"shopping cart\". The text is in a sans-serif font and is centered horizontally." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"cart\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "https://cart.ebay.com/" + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Welcome to Costco Wholesale" + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com\" is displayed in lowercase letters." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Pay Less." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"Target\" in bold, black, sans-serif font." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"Expect More.\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"target.com\" is displayed in a bold, sans-serif font with a blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A blue circular icon with a white lowercase \"s\" in the center." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is written in lowercase white letters on a green background." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "purchases" + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "In-app purchases" + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black letter \"B\" followed by a black plus sign." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Editors' Choice" + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Search settings" + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "A white envelope icon with a triangular flap on the right side, set against a dark background." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"Add\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "another email" + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"account\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Set up your personal or work email" + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "or work email" + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Network & internet" + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Wi-Fi, mobile, data" + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "usage, and" + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "Wednesday, May 18" + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The word \"Maps\" in a bold, sans-serif font, with a gradient of pink to white, giving it a three-dimensional appearance." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A red, teardrop-shaped pin with a black circular center." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, containing three horizontal blue lines of varying lengths." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a white border, a red circle with a white border, and a yellow circle with a white border." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com/Check\" is displayed in a bold, sans-serif font. The word \"costco.com\" is in lowercase letters, and the word \"Check\" is in uppercase letters. The text is aligned to the left." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo features the word \"COSTCO\" in large, bold, red capital letters with a white outline. Below it, the word \"WHOLESALE\" is written in smaller, bold, blue capital letters." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Warehouses\" is written in a bold, sans-serif font with a light blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Account\" in a bold, sans-serif font, with a gradient of blue shades ranging from light to dark, giving it a three-dimensional appearance." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue shopping cart with a white handle and a white basket area." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular sign with the word \"Shop\" in white, bold, sans-serif font. To the left of the text, there are three horizontal white lines of varying lengths." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a consistent size throughout." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "My Warehouse" + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Delivery Location" + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Seattle\" in bold, black, sans-serif font." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "Fetch Rewards: Play" + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Play\" in bold, black, sans-serif font." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "to earn" + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"MAKE\" is written in bold, uppercase letters with a dark green color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MONEY\" in bold, uppercase letters with a green background and black outline." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is displayed in lowercase letters. The letters are green and have a sans-serif font. The text is aligned to the left." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" is written in a bold, sans-serif font with a light gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"ads\" is written in lowercase letters with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design. The color of the text is a light gray, blending subtly with the background." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"50K+\" is displayed in bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The \"50K\" is in a larger font size compared to the \"+\" sign." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a computer keyboard with a standard QWERTY layout, including function keys, a number pad, and arrow keys. The keys are rectangular with white lettering on black keys, and the keyboard has a slight ergonomic curve." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text \"walmart.com\" is displayed in a bold, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance. The text is aligned to the left." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A white rectangular signboard with the text \"Lenovo Thinkpad\" in black, sans-serif font." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular button with rounded corners featuring the word \"Cancel\" in white, bold, sans-serif font." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad laptop with a visible keyboard and trackpad." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lenovo\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black, rectangular computer keyboard with a standard QWERTY layout, including a number pad on the right side. The keys are chiclet-style with white lettering, and there is a slight sheen on the surface, suggesting a smooth texture. The function keys are aligned along the top, and there is a visible space bar at the bottom center." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"in\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A rectangular electronic device with a screen displaying text, surrounded by a thin bezel." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad charger." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING" + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow background with two human head outlines facing each other. The left head has the words \"FIXED MINDSET\" above a red downward arrow, and the right head has the words \"GROWTH MINDSET\" above a green upward arrow." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon character with a serious expression, featuring a dark gray hair, a light blue shirt, and a red cross symbol on the left side of the head. The character has a red frown and is surrounded by two white, cloud-like shapes on either side of the head." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a beige background featuring five hands in different colors: red, green, blue, purple, and orange, arranged in a circular pattern. Above the hands, the text \"Understanding Diversity\" is written in black." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A black and white illustration of a person with a light bulb on their head, holding a book. The person has a question mark above their head and another question mark to the right of their head. The background is a light peach color." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon illustration of two boys, one wearing a red shirt and blue shorts, and the other wearing a striped shirt and brown shorts, both with their arms raised. The word \"Bullying\" is written above them." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon of a boy and a girl playing with each other. The boy is wearing a yellow shirt, black shorts, and red shoes. The girl is wearing a yellow shirt, blue pants, and red shoes. Both have black hair and are smiling." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue speech bubble with a yellow lightning bolt symbol, and a red speech bubble with a yellow lightning bolt symbol, both containing a person's face." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A rectangular blue traffic sign with a white border, featuring three white arrows. The leftmost arrow curves to the left, the middle arrow points straight up, and the rightmost arrow curves to the right. Below the arrows, the word \"CHANGES\" is written in white capital letters." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of two human figures, one in yellow and the other in blue, both with black outlines. The yellow figure is standing on a staircase, while the blue figure is standing on a platform. The blue figure is holding a microphone and appears to be speaking or presenting. The word \"Leadership\" is written in black text above the figures." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue circle with a white plus sign inside it, followed by a white \"2X\" text." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A hand holding a person with a blue shirt and black pants, with a purple banner below displaying \"$4,000\" in white text." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pie chart with a blue background and a white border, featuring a white line that divides the chart into two sections. The left section is larger and has a white number \"36%\" inside it, while the right section is smaller and has a white number \"36%\" inside it." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A globe with a blue ocean and white continents, featuring a purple banner with white text that reads \"2.7 trillion impact to global GDP from use of more efficient talent platforms.\"" + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "46% of companies are sometimes or frequently understaffed" + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Travel personas: how travelers identify their travel style" + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular planner with a white background featuring a colorful illustration of two people, one wearing a red hat and the other wearing a blue hat, both holding a smartphone. The person in the red hat is holding a book, and the person in the blue hat is holding a suitcase. The background includes a mountain and a sun. The text \"THE SMART PLANNER\" is written in bold, black letters at the top." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "The Relaxed Nomad" + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A family of five, consisting of a man, a woman, and three children, standing together. The man is holding a baby, while the woman is holding a suitcase. The children are standing around them, with one child holding a suitcase. The family is depicted in a circular frame." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A red airplane seat with a high backrest and armrests, featuring a small, rectangular, red and white logo on the backrest." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a white background featuring a stylized illustration of a person in a blue suit with a red briefcase in their left hand and a blue suitcase in their right hand. The person is depicted in a walking motion, with one leg forward and the other leg back. The text \"THE BUSINESS ROAD WARRIOR\" is written in bold, black capital letters above the illustration." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with short brown hair, wearing a purple top with a white collar, is holding a yellow envelope in her right hand. She has a headset on her head and is standing in front of a computer monitor with the word \"BIG\" visible on the screen." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange background with a white border." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a stylized globe in the center. The globe is divided into four quadrants, each in a different shade of blue. A black headset with a microphone is positioned over the globe, with the earpieces extending outward. To the right of the globe, there is a yellow star." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon man with a light blue shirt and a black tie, wearing a headset with a microphone. He has a light brown hair and is pointing upwards with his right hand. To his right, there is a yellow light bulb with a red base." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue and black wrench with a flat-head design, featuring a blue handle with a textured grip and a black head with a serrated edge. The wrench has a long, straight shaft connecting the handle to the head." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person wearing a headset with a microphone, a purple shirt, and a white undershirt. The person is holding a smartphone with a blue and white design on the screen." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a hand holding a black and white telephone handset, with a blue circular background featuring a partial globe and a speech bubble with the word \"BIG\" in white. The hand is wearing an orange life jacket with white stripes." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a red border, featuring a stylized design of a person wearing a white shirt and a black tie, with a blue and white striped hat. The background includes a Union Jack flag and a yellow rectangle." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon woman with brown hair tied back, wearing a purple shirt with a white collar, and a headset. She is holding a yellow star in her right hand and has a black and white striped object in her left hand." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized heart with a vibrant red color, featuring a prominent blue and orange flame-like design on the upper left side, and a smaller blue and orange flame-like design on the lower right side." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized human figure with arms raised in a celebratory pose, surrounded by four blue arrows pointing outward, each arrow with a slight curve and a pointed tip." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A digital bathroom scale with a black base and a white digital display showing the weight." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pair of pink lungs with a central trachea, featuring detailed vein patterns and a slightly curved shape." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow smiley face with a black outline, featuring two black dots for eyes and a curved black line for a mouth." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human knee with a visible bone structure, including the femur and tibia, with a slight curvature and a smooth surface." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human brain with a detailed, textured surface, featuring a prominent cerebral cortex and a visible portion of the brain stem." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A red heart with a white outline and a white line through the center, indicating a heart rate or rhythm." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The word \"HOTAPPE\" is written in large, bold, uppercase letters. The letters are colored in the following order from left to right: red, orange, yellow, light blue, and dark blue." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular smiley face with a yellow face and a red border. The face has two blue teardrop-shaped eyes, a brown curved mouth, and two brown curved lines for eyebrows." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "An orange circle with two brown hands facing each other, with the word \"OPENNESS\" in green capital letters below the circle." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular yellow background with a pair of hands clasped together in the center. The hands are depicted in a light brown color, with the left hand slightly overlapping the right hand. Radiating from the hands are white lines, giving the impression of light or energy emanating from the hands. Below the hands, the word \"TOUCH\" is written in bold, uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a large red heart in the center. Inside the heart, there is a white exclamation mark. Below the heart, the word \"ATTENTION\" is written in bold, uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with two cartoon faces, one on the left and one on the right, separated by a white dashed line. The left face has brown hair and a neutral expression, while the right face has light brown hair and a neutral expression. Below the faces, the word \"PROXIMITY\" is written in bold, uppercase letters." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with a white eye in the center, featuring a brown iris and a white sclera. Below the eye, the words \"EYE CONTACT\" are written in bold, uppercase letters." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt, with a thermometer in their mouth." + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a red nose and a red mouth, and a white hand with three fingers extended." + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a green and white object in their mouth." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person with a green circular background. The person has a gray face with a red nose and a red mouth. The person is wearing a red shirt with a yellow collar." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt and white pants, is sitting on a white chair." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green sign with a white border, featuring a stylized illustration of a pair of red lungs with a gray outline, set against a dark background." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a kidney with a red and pink color scheme, featuring a central red area with a pink outline, flanked by two symmetrical, curved, pink shapes resembling the kidney's lobes, all set against a light green background." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue rectangular header with the text \"Interesting Facts\" in white, bold, sans-serif font. To the left of the text, there are three white horizontal lines. To the right of the text, there is a red heart symbol." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized panda face with a white face, black ears, black patches around the eyes, and a black nose. The panda has a small, curved black mouth and a content expression. The face is set against a green circular background." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A green circular background with a white plate in the center, containing a yellow circle. To the left of the plate is a white fork, and to the right is a white spoon." + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring a red heart in the center. The heart is outlined in white and has a white line running horizontally across its middle. Below the heart, the word \"Diseases\" is written in white, bold, sans-serif font." + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A white long-sleeved shirt with a teal collar and cuffs, featuring a row of black buttons down the front." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange icon with a white film reel design in the center. The film reel has a blue border with white squares on the left and right sides, and a white center with a blue horizontal line dividing it into two sections." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green icon featuring a white syringe with a red cross symbol on the barrel, a white droplet to the right of the syringe, and a yellow needle." + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue icon featuring a white wine glass with a yellow liquid on the left and a white bottle with a yellow liquid and a brown cap on the right." + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring two stylized human figures. The figure on the left has short, light brown hair and is wearing a red shirt. The figure on the right has short, light brown hair and is wearing a brown shirt with a white collar." + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a solid olive green background featuring a white silhouette of the Earth in the center." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person sitting on a chair with their head in their hands, wearing a blue shirt and black pants." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with short brown hair, wearing a blue long-sleeve shirt and yellow pants, is sitting on a white platform with a blue wave design at the bottom." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a pink top, is depicted with a thought bubble above her head." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with red hair, wearing a green top and blue pants, is sitting on a windowsill with their legs crossed." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a blue sleeveless top and black leggings, is running with her arms slightly bent and her legs in motion. She has a white earphone cord hanging from her right ear." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with black hair tied back, wearing a green headband, a green sleeveless top, and black pants, is sitting cross-legged with her hands pressed together in a prayer position." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A green dinosaur with a white belly, a purple dinosaur with a white belly, and a green dinosaur with a white belly." + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon boy with a purple bandage on his forehead, wearing a blue shirt and blue pants, is holding a purple object in his right hand." + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A lime green refrigerator with a single door, featuring a black handle on the right side. The door has a horizontal indentation near the top." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A small, rectangular, lime green refrigerator with a single door featuring a vertical handle on the left side. The bottom section of the refrigerator is orange with a horizontal handle." + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark-colored, rectangular structure with a flat roof and vertical sides, featuring a small, square window on the upper left side and a larger, rectangular window on the lower right side." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A tree with dense, green foliage." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A cylindrical, metallic pole with a consistent diameter throughout its length, featuring a series of evenly spaced, horizontal bands encircling its surface." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A deep, dark-colored bowl with a wide, flared rim and a smooth, glossy finish." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern kitchen with a stainless steel oven and a black cooktop. The oven has a digital display and control panel, and there is a visible handle on the oven door. The cooktop has multiple burners with black grates." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular wall socket with a single, round, black power switch located on the right side." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular light switch with rounded edges, featuring a central toggle switch mechanism." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Stainless steel built-in oven with a large glass door, featuring a digital control panel above the door with multiple buttons and a display screen." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, slender vase with a flared rim and a narrow neck that gradually widens into a bulbous base, featuring a glossy finish with a gradient of colors transitioning from a deep blue at the top to a greenish hue towards the bottom." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor is composed of medium-toned wooden planks with a smooth, polished finish. The wood grain is visible, running lengthwise along the planks, which are laid out parallel to each other. The planks have a consistent width and exhibit a warm, reddish-brown hue." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical, wall-mounted spice rack with multiple tiers, each tier holding several glass jars with metal lids. The jars are arranged in a single column, and the rack appears to be made of a dark, possibly wooden material." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A round, dark-colored table with a smooth surface and a central pedestal base." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The chair features a high, slightly curved backrest and seat cushion upholstered in a woven fabric with a diamond pattern. The fabric is primarily light green with a central vertical stripe in a slightly darker shade. The armrests are padded and covered in the same woven fabric, with a light green color. The chair's legs are dark-colored and straight." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A purple place mat with a textured surface and a white circular design in the center." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical traffic light with three circular lenses arranged in a column, displaying a red light at the top, an unlit middle lens, and a green light at the bottom, all encased in a black housing with a visor over each lens." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A pink and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, dark brown tree trunk with a rough, textured bark. The trunk is relatively straight and has a consistent width throughout its visible length." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A chalkboard sign with a wooden frame displaying the text \"Château de la Bertrandière\" in elegant, cursive script." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, narrow, rectangular window with a dark frame and a single vertical pane of clear glass." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A chestnut horse with a white star on its forehead, dark eyes, and a soft, dark muzzle. Its ears are pricked forward, and the mane appears to be a lighter shade of chestnut, blending into the darker coat. The horse's neck shows a gentle curve, and the coat has a healthy sheen." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown horse with a dark mane." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A light gray horse with a darker mane and tail, featuring a well-muscled build, a straight profile, and a calm demeanor." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular table with rounded corners and a smooth surface." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern black induction cooktop featuring a sleek design with two visible cooking zones, each outlined with a white circular pattern. The front edge has a stainless steel trim with control buttons, and there are small yellow indicator lights above the buttons." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A glossy, metallic spoon with a reflective surface and a tapered handle, featuring a rounded bowl with a slight indentation on one side." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A metallic spoon with a reflective surface and a tapered handle that widens towards the end, featuring a rounded bowl with a pointed tip." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "A zebra with a distinctive black and white striped pattern, featuring a mane of short, erect hair transitioning from black at the base to white at the tips. The stripes on the neck are vertical and become more horizontal as they reach the mane. The visible part of the zebra's face shows a pattern of narrow stripes that converge around the eyes and muzzle, with a white area above the eyes and a dark nose. The ears are pointed and display a striped pattern consistent with the head." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal red stop sign with bold, all-caps white lettering spelling \"STOP\" centered on the sign." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal sign with a red background and a white border, featuring the word \"STOP\" in white uppercase letters." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped cushion with a visible textured surface that suggests a soft, plush fabric." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped pillow with a visible corner that appears to be soft and plush." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "The earlobe is small and rounded, with a smooth, slightly glossy surface. It is adorned with a small, round, gold-colored earring that has a subtle, reflective sheen." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young girl with curly hair, wearing a patterned top with a mix of geometric and floral designs in dark and vibrant colors, paired with long-sleeved pink undershirts. Her expression is one of mild surprise or excitement, with her mouth slightly open and eyes looking upwards. Her arms are outstretched with palms facing up, as if gesturing or presenting something." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young boy with curly hair, wearing a striped sweater with shades of blue, white, and brown, stands with his hands clasped together. He has a focused expression on his face, with his mouth slightly open and his eyes looking to the side." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski parka featuring a color-block design with a white torso, green sleeves, and black shoulder panels. The jacket has a high collar and a front zipper closure. There are red accents on the cuffs and a red logo on the left chest area. The parka is paired with a black helmet with a visor." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The knee of the person is covered in a dark purple snowsuit with a slightly glossy finish. The fabric appears thick and durable, suitable for cold weather. The knee area is slightly bent, indicating a relaxed stance. The snowsuit has a subtle sheen, reflecting light, and the material appears to be tightly fitted around the knee." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The trousers are dark-colored, possibly black or dark gray, with a straight-leg cut. They feature a visible side pocket on the right leg with a flap closure, and there are belt loops around the waistband. The fabric appears to be a sturdy material, potentially denim or a similar thick textile." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A blue helmet with a glossy finish, featuring a prominent white stripe with a red outline running horizontally across the middle. The helmet has a black visor attached at the front, and a black chin strap with a buckle hanging down from the sides." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The ski boot is predominantly black with a sleek, modern design. It features a sturdy, high-ankle structure for support and a smooth, matte finish. The boot has a contoured shape to fit the foot and lower leg, with a slightly raised heel for added stability. The sole is thick and textured for grip and durability." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The sleeve of the sweater is predominantly white with a bold red graphic of a person skiing. The fabric appears to be a soft, knit material, and the sleeve is slightly loose-fitting, extending to the wrist. The red graphic is positioned centrally on the sleeve, adding a dynamic contrast to the white background." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person wearing a black helmet, goggles, and a white jacket with red and black accents, including a red logo on the left chest area. The individual is also wearing black gloves and appears to be in a skiing stance." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A red, white, and green racing motorcycle fairing with the number 69 prominently displayed in white on a red background, flanked by green and white stripes. The fairing features sponsor logos and a black lower section with a vent." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A series of parallel, evenly spaced, horizontal lines with a consistent width, running across the entire width of the image." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The mass of the quark is a very small, massless particle that is part of the quark mass. It is a fundamental particle that is responsible for the mass of the quark, and it is often used in particle physics to study the properties of quarks and their interactions." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A lowercase letter 'v' with a serif at the top and a tail at the bottom, featuring a bold, sans-serif font." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The geometry and adsorption energies for the structures of thio-glycolic acid on Au(111) at 0.25ML." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The table contains a list of adsorption sites with their corresponding adsorption energies. The adsorption sites are listed in a column, with each site having a corresponding adsorption energy. The adsorption energy is listed in a separate column, with each site having a corresponding energy value. The table also includes the adsorption energy for the Au-S bond, which is listed in a separate column. The adsorption energy for the Au-S bond is listed in a separate column, with each site having a corresponding energy value." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy is 0.63 eV." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy for the most stable structure of the HSCH2COOH on the Au(111) surface is 0.63 eV. The adsorption site preferred by the sulfur atom is located at the top of the gold atom. The polar angle between the normal vector of the surface and the S-C2 direction is 74.2 degrees. The HSCH2COOH tend to lie down at the low coverage." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a person's face with a neutral expression, featuring a prominent nose, closed lips, and visible teeth. The person has short hair and is wearing a dark-colored top." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmented image shows two graphs. The first graph on the left is titled \"Atom prediction performance (a and c)\" and features a scatter plot with blue and orange dots. The second graph on the right is titled \"Bond prediction performance (a and c)\" and also features a scatter plot with blue and orange dots. Both graphs have a similar layout with x and y axes, and the dots are evenly spaced." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The F1 score for segmentation and classification networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of segmentation network is measured by the F1 score, which is a metric that takes into account both precision and recall. The F1 score is used to evaluate the performance of the segmentation network in terms of its ability to correctly segment the image. The segmentation network is compared to other networks, and the F1 score is used to determine which network is the most effective." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmentation network is a deep learning model that uses a combination of atom, bond, and charge types to predict the pixel values. The model is trained on a large amount of data and has a high F1 score, indicating its high accuracy. The F1 score is a measure of the model's performance, taking into account both precision and recall. The model is designed to be used for various tasks, such as image segmentation and object detection." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different types of networks is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is higher for the different types of networks, indicating that they are performing well. The performance of the different types of networks is compared to the performance of the segmentation networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different parts of the network is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is used to evaluate the performance of the network in different tasks, such as atom, bond, and charge type classifications. The network is able to do a good job even when the segmentation is not perfect, and the performance of the different parts of the network is significantly higher than the segmentation networks." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "8.3 Overall graph accuracy" + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The overall graph accuracy is a measure of the performance of the different parts of the graph, including the segmentation network and the classification network. The segmentation network is used to segment the graph into different parts, and the classification network is used to predict the type of the graph. The overall graph accuracy is a combination of these two parts, and it is used to measure the performance of the graph." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 11 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The digits are evenly spaced and aligned vertically." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar is a low GI sweet spot, which is a type of sugar that has a low glucose index." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The low GI sweet spot is at least 22mg CE/100mg sucrose." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar has a low GI of about 22-32 mg CE/100g polyphenols. The polyphenol content is high, with a range of 22-32 mg CE/100g polyphenols. The sugar is hygroscopic, with a higher moisture content, and the polyphenol content increases as the sugar becomes more saturated." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuit is a dark brown, viscous liquid with a glossy sheen, contained in a clear glass bottle." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuites are prepared at two different sugar mills, designated as \"Mill 1\" and \"Mill 2\". The polyphenol content of each sample is determined and washed until they reach the desired polyphenol content, which is roughly 500 to 2000 ICUMSA. The results are in Table 4 below." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a sugar cube with a textured surface, featuring a pattern of small, raised dots." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image shows a collection of documents with text, some of which are in black and white, and others in color. The documents are arranged in a grid-like pattern, with some documents having a red border. The text on the documents is in various fonts and sizes, and some documents have a red box around the text." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The OCR process." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a letter written in English, with the first line reading \"Available OCR\" followed by \"for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The third line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The ninth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The tenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The eleventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twelfth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The thirteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventeenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The nineteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twentieth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-first line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twenty-second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-third" + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The Internet Archive image containing this excerpt can be accessed here: https://archive.org/details/b2439867/page/n7" + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Journal of Data Mining and Digital Humanities" + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a web address in blue, displaying the URL \"http://dmh.esciences.org\"." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 9 is displayed in a bold, sans-serif font with a slight shadow effect, giving it a three-dimensional appearance. The color of the number is a gradient of dark to light gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"IOOF Annual Report 2012\" is displayed in a serif font, with \"IOOF\" in a larger size and \"Annual Report 2012\" in a smaller size. The text is in a light green color." + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors' Remuneration" + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "4.1. Components of Non-Executive Director remuneration" + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"page 37\" is written in lowercase letters." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The company is a non-executive director." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year by the Company, its subsidiaries and associated entities may not exceed an amount approved by shareholders. The ceiling amount includes all remuneration provided to Non-Executive Directors, including superannuation but not including retirement benefits. The current limit of $980,000 per annum was approved by shareholders at the 2010 Annual General Meeting. There has been no increase to the Non-Executive Director fee pool since this time." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors receive a fee for being a Director of the Board. An additional fee is paid to the Chairman of the Board. Non-Executive Directors do not receive additional fees for service on Board and Committees. The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year may not exceed an amount approved by shareholders." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a mathematical equation involving the homotopy group of a space." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a monopole, which is a type of topological defect." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The topological conditions of formation of defects only govern the formation of topologically stable defects. It was found that defects solutions can form even when the topology is trivial. The most well-known example are the electro-weak strings, formed during the electroweak symmetry breaking, which are perturbatively stable for a range of parameters which are not realized in nature, and belong to the broader class of embedded defects." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "These defects are a priori unstable though mechanisms (such as plasma effects) have been found to stabilize them. They are of interest for inflation model builders since this mechanism can allow lift the constraints from the formation of cosmic strings (see Sec. IV F on D-term inflation)." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a mathematical equation with the variables \"x\" and \"y\" in a bold font, followed by a period and the number \"1\" in a smaller font." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 72 is displayed in a bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A stylized illustration of a woman with a bun hairstyle, featuring a pattern of white, cloud-like shapes with small black dots scattered throughout. The woman is wearing a pink garment with a polka dot pattern." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A red and white polka dot pattern with a small, dark, irregularly shaped mark near the center." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 62 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Chapter 2: Motivation" + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The string theory is a type of string theory that is compactified on the near-horizon geometry. It is a decoupled theory that combines the two pictures of the same low energy limit of one theory, Type IIB string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a field theory picture of the low energy limit of Type IIB string theory." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The regions for which analytic tools exist for these two different pictures turn out to be completely incompatible." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The equation is a complex mathematical expression involving various variables and functions. It includes a combination of Greek letters, numbers, and mathematical operators. The visible part of the equation shows a series of variables and functions that are interconnected, with some parts appearing to be in parentheses. The equation is written in a formal, mathematical notation." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The presence of an insider on the market does not necessarily lead to arbitrage, and the presence of insiders might be considered beneficial to the market, in the sense that it leads to higher information efficiency of the equilibrium price process." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The fundamental value of the firm is a stochastic process, and the insider can observe it directly or at least observe it in a less noisy way than the other agents on the market." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paper relaxes the assumption of static insider information and studies the equilibrium trading and price processes, as well as market efficiency, in a setting with dynamic private information." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2]." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2] and is designed to include dynamic information. It has a much smaller set of admissible trading strategies and pricing rules compared to the ones considered in the work. The model shows the existence of a unique Markovian equilibrium, which is an equilibrium price that allows the insider to trade undetected and depends only on the total order process. The model also shows that the presence of an insider increases the market informational efficiency for times close to the time of the order." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red, three-dimensional, cursive sign with the word \"Abondana\" written in a flowing, elegant script." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"Cafe\" in a stylized, cursive font with a gradient of red to dark red, giving it a three-dimensional appearance. The letters are slightly italicized and have a shadow effect, enhancing their depth." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The logo features the letters \"USIS\" in bold, with \"US\" in red and \"IS\" in black. Below the letters, there is a tagline in smaller, gray font." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"ESTATE\" is written in bold, black, uppercase letters on a yellow background." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"AGENTS\" in bold, uppercase letters. The letters are black and set against a yellow background." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"SAXONS\" in bold, black capital letters on a yellow background." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SAXONS\" in capital letters with a serif font, set against a dark background. The letters are in a metallic gold color with a slight gradient, giving them a three-dimensional appearance. The signboard has a reflective surface, suggesting it is made of a glossy material." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard displays the word \"ESTATE\" in capital letters, with a serif font. The letters are dark and possibly metallic, with a reflective surface that catches the light, giving them a slightly shiny appearance. The background of the signboard is not visible, but the letters are set against a dark backdrop that contrasts with the lighter color of the text." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features capitalized, serif lettering spelling \"AGENTS\" with a metallic finish and a slight gradient, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Triple\" is written in a cursive, green font with a white outline. The letters are slightly italicized and have a playful, rounded design." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"WHITE\" is written in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SPOT\" is written in bold, uppercase letters with a light blue color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A stylized, cursive letter \"O\" with a green outline and a white fill, featuring a small, curved tail extending from the bottom right." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The visible part of the ag is a white, stylized letter \"A\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"NEW\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"YORK\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features bold, black, uppercase letters spelling \"AYAM\" on a yellow background." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features large, bold, red letters \"RUA\" on a yellow background." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A yellow sign with the word \"SMASHED\" in bold, black, uppercase letters." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A dark brown, rectangular sign with the word \"FRIED\" in bold, uppercase letters." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A brown sign with the word \"CHICKEN\" in bold, uppercase letters." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Accommodation" + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Office\" is written in a bold, sans-serif font with a dark blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Nightline\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red and white signboard with the word \"BUBBA\" in bold, capitalized, red letters on a white background, with a red border around the sign." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"GUMP\" in large, bold, red capital letters with a white outline. The letters are set against a textured, light-colored background that resembles a stone or concrete surface." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SHRIMP\" in capital letters, with a bold, sans-serif font. The letters are white with a slight shadow effect, giving them a three-dimensional appearance. The background of the signboard is a deep blue color, providing a stark contrast to the white text." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved signboard with a red border and a white background, featuring the word \"RESTAURANT\" in bold, black, uppercase letters." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved, red and white signboard with the word \"MARKET\" in capital letters, featuring a serif font." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular blue signboard with a white border and a white symbol resembling a stylized letter 'C' in the center." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular, metallic signboard with a textured background featuring the letters \"TM\" in a bold, sans-serif font, centered on the sign." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"EVENING\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"PRIMROSE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"BASE\" is written in bold, uppercase letters with a red background and white outline. The letters are evenly spaced and have a slightly distressed texture." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"OIL\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred appearance." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The number 46 is displayed in a large, bold, white font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and have a smooth, rounded design." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green calendar with white text displaying the numbers 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369," + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular button with rounded corners featuring the word \"Connecter\" in white lowercase letters, followed by a yellow icon resembling a lock with a keyhole." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized, lowercase letter \"a\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized lowercase \"a\" with a curved tail extending from the bottom right, resembling a lowercase \"i\" with a dot above it." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular sign with white text displaying the numbers \"07\" in a bold, sans-serif font." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The word \"septembre\" in lowercase letters, with a green background and white font." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The text \"Lien Web\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The numbers 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 are displayed in a green color." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and a fringe detail." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Plan du site" + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A white arrow pointing to the right, with a slight curve at the tail end, is positioned to the right of the text \"Book Now.\"" + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in bold, black, sans-serif font." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "contact us" + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"About\" is written in a bold, sans-serif font with a gradient of pink to red, set against a teal background." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A teal-colored horizontal bar with white text and symbols. On the left side, there is a white envelope icon followed by the text \"doonawash@gmail.com\". On the right side, there is a white icon resembling a person in a wheelchair." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Contact us\" is written in a bold, sans-serif font with a pinkish-red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a gradient of blue shades, transitioning from a lighter blue at the top to a darker blue at the bottom." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in a bold, sans-serif font with a pinkish hue. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "OpenStreetMap Belgium" + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A black arrow pointing to the right." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A white rectangular tag with the number \"4.9\" in bold black font centered on it." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with white text that reads \"Pulsuz Konsultasyon.\"" + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The word \"Portfolio\" in a bold, sans-serif font, with a slight italicization, and a drop shadow effect, giving it a three-dimensional appearance. The letters are black with a white outline, and the text is set against a plain background." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white shopping cart icon with a blue outline, featuring a rectangular basket with a grid pattern, two vertical handles, and four wheels, two of which are visible." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"SUPPORT\" in bold, uppercase letters with a blue background and white outline." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white lowercase letter \"f\" with a bold, sans-serif font, set against a blue background." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Your Charts" + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A blue \"X\" with a white outline, featuring a slightly darker blue fill and a lighter blue border." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white sign with the words \"CONTACT US\" in bold, uppercase, blue letters." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with rounded corners and a white border. The button has the words \"ADD TO CART\" in bold, white, uppercase letters centered on it." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PRODUCTS\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PODCAST\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters. The letters are blue and have a slight shadow effect, giving them a three-dimensional appearance. The text is centered horizontally." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"WHAT WE DO\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font style. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "GWM launches livelihood micro-grants" + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with a black border and the word \"Settings\" in black, bold, sans-serif font centered on it." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Privacy Policy" + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"HOME\" in uppercase letters, with a bold, sans-serif font, is centered on the image. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, creating a subtle contrast against the background." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular black button with the word \"Accept\" in white, bold, sans-serif font centered on it." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A red, oval-shaped button with a white border and the word \"DONATE\" in bold, uppercase, red letters centered on it." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"NEWS\" in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a clean, modern font style." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters with a sans-serif font. The letters are evenly spaced and aligned horizontally. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"CONTACT US\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "A black and white icon depicting a stylized, abstract representation of a building with a flat roof and multiple rectangular windows arranged in a grid pattern." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"News\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a dark color, contrasting with the lighter background." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The logo features the word \"ServeGate\" in bold, black letters. To the left of the text, there is a stylized design consisting of two overlapping triangles, one in teal and the other in red, with a black line separating them." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Our Difference" + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"About Us\" is written in a bold, sans-serif font with a red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "A rectangular button with rounded corners, featuring a light pink background and a thin red border. The button displays the text \"Get in touch\" in bold, red, sans-serif font." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" in a serif font, with the letters in a light gray color against a white background." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Services\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Indigenous Impact" + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"ServeGate\" is written in bold, black, sans-serif font. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with rounded corners, featuring the text \"Close issue\" in bold, black, sans-serif font." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "A vertical array of three circular, dark-colored buttons with a slightly raised, smooth surface, aligned centrally on a light-colored background." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/question.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/question.json new file mode 100644 index 0000000000000000000000000000000000000000..ec3b6aced856747da57beecc0ea7cbe03f858e9f --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/natural_detailed_caption_box/question.json @@ -0,0 +1,370 @@ +[ + { + "question_id": 1, + "image": "4010.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 152, + 161, + 68, + 69 + ], + [ + 7, + 1, + 126, + 293 + ], + [ + 583, + 160, + 19, + 138 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 2, + "image": "2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 321, + 132, + 47, + 40 + ], + [ + 368, + 233, + 127, + 97 + ], + [ + 3, + 217, + 30, + 39 + ], + [ + 293, + 177, + 26, + 29 + ], + [ + 383, + 186, + 95, + 107 + ], + [ + 360, + 121, + 19, + 49 + ], + [ + 275, + 306, + 105, + 25 + ], + [ + 26, + 193, + 36, + 79 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 3, + "image": "402.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 110, + 483, + 141, + 115 + ], + [ + 564, + 484, + 108, + 114 + ], + [ + 662, + 544, + 85, + 44 + ], + [ + 199, + 259, + 27, + 40 + ], + [ + 418, + 315, + 74, + 43 + ], + [ + 224, + 6, + 106, + 510 + ], + [ + 143, + 358, + 74, + 64 + ], + [ + 64, + 260, + 24, + 36 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 4, + "image": "000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 166.41, + 42.56, + 341.16, + 436.51 + ], + [ + 0.0, + 287.76, + 20.7, + 163.06 + ], + [ + 543.63, + 260.83, + 96.37, + 84.28 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 5, + "image": "000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 171.9, + 272.26, + 468.1, + 143.38 + ], + [ + 1.57, + 268.7, + 513.82, + 156.53 + ], + [ + 571.84, + 326.08, + 68.16, + 54.29 + ], + [ + 185.34, + 231.32, + 23.92, + 88.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 6, + "image": "000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 182.39, + 0.57, + 331.0, + 360.43 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 7, + "image": "000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 249.92, + 99.78, + 131.46, + 183.95 + ], + [ + 257.37, + 177.56, + 124.0, + 106.16 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 8, + "image": "000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 1.34, + 257.38, + 74.46, + 141.2 + ], + [ + 36.97, + 292.0, + 66.92, + 131.17 + ], + [ + 486.58, + 179.23, + 2.1, + 1.91 + ], + [ + 246.21, + 69.46, + 359.56, + 357.53 + ], + [ + 77.9, + 37.18, + 202.29, + 390.82 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 9, + "image": "2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 103, + 135, + 72, + 75 + ], + [ + 144, + 239, + 37, + 14 + ], + [ + 297, + 243, + 93, + 16 + ], + [ + 131, + 48, + 205, + 52 + ], + [ + 143, + 237, + 210, + 15 + ], + [ + 233, + 175, + 30, + 27 + ], + [ + 217, + 152, + 68, + 67 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 10, + "image": "2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 148, + 124, + 58, + 47 + ], + [ + 289, + 111, + 171, + 91 + ] + ], + "segmentation": [] + } + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/answer.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/answer.json new file mode 100644 index 0000000000000000000000000000000000000000..1fc627ed32af43ec4e04f2ac4c8172d3201bc43b --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/answer.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage." + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address." + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere." + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab." + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases." + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself." + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "\"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified." + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region." + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features." + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date." + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"to earn\" typically suggests that there is an opportunity to gain something—often points, money, or rewards—by performing certain actions or tasks." + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy." + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a human heart symbolizing a strong heart as one of the benefits of running." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A silhouette of a figure measuring their waist indicates that running can aid in weight loss." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"" + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"" + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"" + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"" + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"" + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The light switches are white, contrasting with the dark wall, likely plastic, and appear functional." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Positioned in the background, these white light switches are paired on a wall above the countertop." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This silver oven, with digital controls and a handle, appears modern and built into the cabinetry." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor, constructed of hardwood, showcases a natural finish with variations in wood grain." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A frying pan on a heat source contains sautéed meat and vegetables, emitting steam, indicating the food is hot and being cooked." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Decorative accessory adorned on the ear, visible as a small, shiny object." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Appears to be a young boy, casually dressed, gripping an electronic device with attention." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds–Au), polar angle (θ), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in ." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a footer section of an image or document. It contains a citation that reads \"© 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items." + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners." + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support." + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "\"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge." + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created." + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives." + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data." + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a navigation link that returns the user to the main homepage of the website from any other page." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors." + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "\"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact." + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/prediction.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/prediction.json new file mode 100644 index 0000000000000000000000000000000000000000..46b7886dee75138be7ac702f93ad13e4478cf79c --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/prediction.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "Friday, Sep 2, 61°F" + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, fluffy cloud with a smooth, rounded shape and a slightly irregular outline. The cloud has a soft, cotton-like texture and appears to be illuminated from the left side, giving it a gentle gradient from white to a light blue hue." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white gear with a blue circular center and six evenly spaced, straight teeth." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, featuring three horizontal blue lines of varying lengths inside the bubble." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A red, rectangular play button with rounded corners, featuring a white, centered play triangle pointing to the right." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a red dot above it and a yellow dot below it." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A circular button with a white center and a thin black border." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, right-pointing arrow with a thick border." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Excel\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Facebook\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Speedtest\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Instagram\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lite\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Messenger\" is written in a bold, sans-serif font with a gradient of pink to white. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Home De..." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A pair of cartoonish eyes with large, white, oval-shaped pupils and black, curved eyelashes. The eyes are set against a green background." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A blue square with a white lowercase \"g\" followed by three white horizontal lines of equal length." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "mobile, data" + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The Wi-Fi symbol consists of a lowercase \"i\" with a dot above it, followed by a lowercase \"f\" with a dot above it." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" in lowercase letters, with a serif font, is written in a dark color against a light background." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"hotspot\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "usage, and hotspot" + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "A white, L-shaped object with a smooth surface and rounded edges." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Search or type web address" + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"ebay shopping cart\" is displayed in lowercase letters. The word \"ebay\" is in a larger font size compared to the word \"shopping cart\". The text is in a sans-serif font and is centered horizontally." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"cart\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "https://cart.ebay.com/" + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Welcome to Costco Wholesale" + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com\" is displayed in lowercase letters." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Pay Less." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"Target\" in bold, black, sans-serif font." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"Expect More.\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"target.com\" is displayed in a bold, sans-serif font with a blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A blue circular icon with a white lowercase \"s\" in the center." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is written in lowercase white letters on a green background." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "purchases" + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "In-app purchases" + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black letter \"B\" followed by a black plus sign." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Editors' Choice" + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Search settings" + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "A white envelope icon with a triangular flap on the right side, set against a dark background." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"Add\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "another email" + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"account\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Set up your personal or work email" + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "or work email" + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Network & internet" + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Wi-Fi, mobile, data" + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "usage, and" + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "Wednesday, May 18" + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The word \"Maps\" in a bold, sans-serif font, with a gradient of pink to white, giving it a three-dimensional appearance." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A red, teardrop-shaped pin with a black circular center." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, containing three horizontal blue lines of varying lengths." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a white border, a red circle with a white border, and a yellow circle with a white border." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com/Check\" is displayed in a bold, sans-serif font. The word \"costco.com\" is in lowercase letters, and the word \"Check\" is in uppercase letters. The text is aligned to the left." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo features the word \"COSTCO\" in large, bold, red capital letters with a white outline. Below it, the word \"WHOLESALE\" is written in smaller, bold, blue capital letters." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Warehouses\" is written in a bold, sans-serif font with a light blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Account\" in a bold, sans-serif font, with a gradient of blue shades ranging from light to dark, giving it a three-dimensional appearance." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue shopping cart with a white handle and a white basket area." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular sign with the word \"Shop\" in white, bold, sans-serif font. To the left of the text, there are three horizontal white lines of varying lengths." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a consistent size throughout." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "My Warehouse" + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Delivery Location" + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Seattle\" in bold, black, sans-serif font." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "Fetch Rewards: Play" + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Play\" in bold, black, sans-serif font." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "to earn" + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"MAKE\" is written in bold, uppercase letters with a dark green color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MONEY\" in bold, uppercase letters with a green background and black outline." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is displayed in lowercase letters. The letters are green and have a sans-serif font. The text is aligned to the left." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" is written in a bold, sans-serif font with a light gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"ads\" is written in lowercase letters with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design. The color of the text is a light gray, blending subtly with the background." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"50K+\" is displayed in bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The \"50K\" is in a larger font size compared to the \"+\" sign." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a computer keyboard with a standard QWERTY layout, including function keys, a number pad, and arrow keys. The keys are rectangular with white lettering on black keys, and the keyboard has a slight ergonomic curve." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text \"walmart.com\" is displayed in a bold, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance. The text is aligned to the left." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A white rectangular signboard with the text \"Lenovo Thinkpad\" in black, sans-serif font." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular button with rounded corners featuring the word \"Cancel\" in white, bold, sans-serif font." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad laptop with a visible keyboard and trackpad." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lenovo\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black, rectangular computer keyboard with a standard QWERTY layout, including a number pad on the right side. The keys are chiclet-style with white lettering, and there is a slight sheen on the surface, suggesting a smooth texture. The function keys are aligned along the top, and there is a visible space bar at the bottom center." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"in\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A rectangular electronic device with a screen displaying text, surrounded by a thin bezel." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad charger." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING" + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow background with two human head outlines facing each other. The left head has the words \"FIXED MINDSET\" above a red downward arrow, and the right head has the words \"GROWTH MINDSET\" above a green upward arrow." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon character with a serious expression, featuring a dark gray hair, a light blue shirt, and a red cross symbol on the left side of the head. The character has a red frown and is surrounded by two white, cloud-like shapes on either side of the head." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a beige background featuring five hands in different colors: red, green, blue, purple, and orange, arranged in a circular pattern. Above the hands, the text \"Understanding Diversity\" is written in black." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A black and white illustration of a person with a light bulb on their head, holding a book. The person has a question mark above their head and another question mark to the right of their head. The background is a light peach color." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon illustration of two boys, one wearing a red shirt and blue shorts, and the other wearing a striped shirt and brown shorts, both with their arms raised. The word \"Bullying\" is written above them." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon of a boy and a girl playing with each other. The boy is wearing a yellow shirt, black shorts, and red shoes. The girl is wearing a yellow shirt, blue pants, and red shoes. Both have black hair and are smiling." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue speech bubble with a yellow lightning bolt symbol, and a red speech bubble with a yellow lightning bolt symbol, both containing a person's face." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A rectangular blue traffic sign with a white border, featuring three white arrows. The leftmost arrow curves to the left, the middle arrow points straight up, and the rightmost arrow curves to the right. Below the arrows, the word \"CHANGES\" is written in white capital letters." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of two human figures, one in yellow and the other in blue, both with black outlines. The yellow figure is standing on a staircase, while the blue figure is standing on a platform. The blue figure is holding a microphone and appears to be speaking or presenting. The word \"Leadership\" is written in black text above the figures." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue circle with a white plus sign inside it, followed by a white \"2X\" text." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A hand holding a person with a blue shirt and black pants, with a purple banner below displaying \"$4,000\" in white text." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pie chart with a blue background and a white border, featuring a white line that divides the chart into two sections. The left section is larger and has a white number \"36%\" inside it, while the right section is smaller and has a white number \"36%\" inside it." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A globe with a blue ocean and white continents, featuring a purple banner with white text that reads \"2.7 trillion impact to global GDP from use of more efficient talent platforms.\"" + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "46% of companies are sometimes or frequently understaffed" + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Travel personas: how travelers identify their travel style" + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular planner with a white background featuring a colorful illustration of two people, one wearing a red hat and the other wearing a blue hat, both holding a smartphone. The person in the red hat is holding a book, and the person in the blue hat is holding a suitcase. The background includes a mountain and a sun. The text \"THE SMART PLANNER\" is written in bold, black letters at the top." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "The Relaxed Nomad" + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A family of five, consisting of a man, a woman, and three children, standing together. The man is holding a baby, while the woman is holding a suitcase. The children are standing around them, with one child holding a suitcase. The family is depicted in a circular frame." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A red airplane seat with a high backrest and armrests, featuring a small, rectangular, red and white logo on the backrest." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a white background featuring a stylized illustration of a person in a blue suit with a red briefcase in their left hand and a blue suitcase in their right hand. The person is depicted in a walking motion, with one leg forward and the other leg back. The text \"THE BUSINESS ROAD WARRIOR\" is written in bold, black capital letters above the illustration." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with short brown hair, wearing a purple top with a white collar, is holding a yellow envelope in her right hand. She has a headset on her head and is standing in front of a computer monitor with the word \"BIG\" visible on the screen." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange background with a white border." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a stylized globe in the center. The globe is divided into four quadrants, each in a different shade of blue. A black headset with a microphone is positioned over the globe, with the earpieces extending outward. To the right of the globe, there is a yellow star." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon man with a light blue shirt and a black tie, wearing a headset with a microphone. He has a light brown hair and is pointing upwards with his right hand. To his right, there is a yellow light bulb with a red base." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue and black wrench with a flat-head design, featuring a blue handle with a textured grip and a black head with a serrated edge. The wrench has a long, straight shaft connecting the handle to the head." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person wearing a headset with a microphone, a purple shirt, and a white undershirt. The person is holding a smartphone with a blue and white design on the screen." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a hand holding a black and white telephone handset, with a blue circular background featuring a partial globe and a speech bubble with the word \"BIG\" in white. The hand is wearing an orange life jacket with white stripes." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a red border, featuring a stylized design of a person wearing a white shirt and a black tie, with a blue and white striped hat. The background includes a Union Jack flag and a yellow rectangle." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon woman with brown hair tied back, wearing a purple shirt with a white collar, and a headset. She is holding a yellow star in her right hand and has a black and white striped object in her left hand." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized heart with a vibrant red color, featuring a prominent blue and orange flame-like design on the upper left side, and a smaller blue and orange flame-like design on the lower right side." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized human figure with arms raised in a celebratory pose, surrounded by four blue arrows pointing outward, each arrow with a slight curve and a pointed tip." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A digital bathroom scale with a black base and a white digital display showing the weight." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pair of pink lungs with a central trachea, featuring detailed vein patterns and a slightly curved shape." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow smiley face with a black outline, featuring two black dots for eyes and a curved black line for a mouth." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human knee with a visible bone structure, including the femur and tibia, with a slight curvature and a smooth surface." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human brain with a detailed, textured surface, featuring a prominent cerebral cortex and a visible portion of the brain stem." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A red heart with a white outline and a white line through the center, indicating a heart rate or rhythm." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The word \"HOTAPPE\" is written in large, bold, uppercase letters. The letters are colored in the following order from left to right: red, orange, yellow, light blue, and dark blue." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular smiley face with a yellow face and a red border. The face has two blue teardrop-shaped eyes, a brown curved mouth, and two brown curved lines for eyebrows." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "An orange circle with two brown hands facing each other, with the word \"OPENNESS\" in green capital letters below the circle." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular yellow background with a pair of hands clasped together in the center. The hands are depicted in a light brown color, with the left hand slightly overlapping the right hand. Radiating from the hands are white lines, giving the impression of light or energy emanating from the hands. Below the hands, the word \"TOUCH\" is written in bold, uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a large red heart in the center. Inside the heart, there is a white exclamation mark. Below the heart, the word \"ATTENTION\" is written in bold, uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with two cartoon faces, one on the left and one on the right, separated by a white dashed line. The left face has brown hair and a neutral expression, while the right face has light brown hair and a neutral expression. Below the faces, the word \"PROXIMITY\" is written in bold, uppercase letters." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with a white eye in the center, featuring a brown iris and a white sclera. Below the eye, the words \"EYE CONTACT\" are written in bold, uppercase letters." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt, with a thermometer in their mouth." + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a red nose and a red mouth, and a white hand with three fingers extended." + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a green and white object in their mouth." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person with a green circular background. The person has a gray face with a red nose and a red mouth. The person is wearing a red shirt with a yellow collar." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt and white pants, is sitting on a white chair." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green sign with a white border, featuring a stylized illustration of a pair of red lungs with a gray outline, set against a dark background." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a kidney with a red and pink color scheme, featuring a central red area with a pink outline, flanked by two symmetrical, curved, pink shapes resembling the kidney's lobes, all set against a light green background." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue rectangular header with the text \"Interesting Facts\" in white, bold, sans-serif font. To the left of the text, there are three white horizontal lines. To the right of the text, there is a red heart symbol." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized panda face with a white face, black ears, black patches around the eyes, and a black nose. The panda has a small, curved black mouth and a content expression. The face is set against a green circular background." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A green circular background with a white plate in the center, containing a yellow circle. To the left of the plate is a white fork, and to the right is a white spoon." + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring a red heart in the center. The heart is outlined in white and has a white line running horizontally across its middle. Below the heart, the word \"Diseases\" is written in white, bold, sans-serif font." + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A white long-sleeved shirt with a teal collar and cuffs, featuring a row of black buttons down the front." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange icon with a white film reel design in the center. The film reel has a blue border with white squares on the left and right sides, and a white center with a blue horizontal line dividing it into two sections." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green icon featuring a white syringe with a red cross symbol on the barrel, a white droplet to the right of the syringe, and a yellow needle." + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue icon featuring a white wine glass with a yellow liquid on the left and a white bottle with a yellow liquid and a brown cap on the right." + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring two stylized human figures. The figure on the left has short, light brown hair and is wearing a red shirt. The figure on the right has short, light brown hair and is wearing a brown shirt with a white collar." + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a solid olive green background featuring a white silhouette of the Earth in the center." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person sitting on a chair with their head in their hands, wearing a blue shirt and black pants." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with short brown hair, wearing a blue long-sleeve shirt and yellow pants, is sitting on a white platform with a blue wave design at the bottom." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a pink top, is depicted with a thought bubble above her head." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with red hair, wearing a green top and blue pants, is sitting on a windowsill with their legs crossed." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a blue sleeveless top and black leggings, is running with her arms slightly bent and her legs in motion. She has a white earphone cord hanging from her right ear." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with black hair tied back, wearing a green headband, a green sleeveless top, and black pants, is sitting cross-legged with her hands pressed together in a prayer position." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A green dinosaur with a white belly, a purple dinosaur with a white belly, and a green dinosaur with a white belly." + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon boy with a purple bandage on his forehead, wearing a blue shirt and blue pants, is holding a purple object in his right hand." + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A lime green refrigerator with a single door, featuring a black handle on the right side. The door has a horizontal indentation near the top." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A small, rectangular, lime green refrigerator with a single door featuring a vertical handle on the left side. The bottom section of the refrigerator is orange with a horizontal handle." + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark-colored, rectangular structure with a flat roof and vertical sides, featuring a small, square window on the upper left side and a larger, rectangular window on the lower right side." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A tree with dense, green foliage." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A cylindrical, metallic pole with a consistent diameter throughout its length, featuring a series of evenly spaced, horizontal bands encircling its surface." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A deep, dark-colored bowl with a wide, flared rim and a smooth, glossy finish." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern kitchen with a stainless steel oven and a black cooktop. The oven has a digital display and control panel, and there is a visible handle on the oven door. The cooktop has multiple burners with black grates." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular wall socket with a single, round, black power switch located on the right side." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular light switch with rounded edges, featuring a central toggle switch mechanism." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Stainless steel built-in oven with a large glass door, featuring a digital control panel above the door with multiple buttons and a display screen." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, slender vase with a flared rim and a narrow neck that gradually widens into a bulbous base, featuring a glossy finish with a gradient of colors transitioning from a deep blue at the top to a greenish hue towards the bottom." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor is composed of medium-toned wooden planks with a smooth, polished finish. The wood grain is visible, running lengthwise along the planks, which are laid out parallel to each other. The planks have a consistent width and exhibit a warm, reddish-brown hue." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical, wall-mounted spice rack with multiple tiers, each tier holding several glass jars with metal lids. The jars are arranged in a single column, and the rack appears to be made of a dark, possibly wooden material." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A round, dark-colored table with a smooth surface and a central pedestal base." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The chair features a high, slightly curved backrest and seat cushion upholstered in a woven fabric with a diamond pattern. The fabric is primarily light green with a central vertical stripe in a slightly darker shade. The armrests are padded and covered in the same woven fabric, with a light green color. The chair's legs are dark-colored and straight." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A purple place mat with a textured surface and a white circular design in the center." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical traffic light with three circular lenses arranged in a column, displaying a red light at the top, an unlit middle lens, and a green light at the bottom, all encased in a black housing with a visor over each lens." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A pink and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, dark brown tree trunk with a rough, textured bark. The trunk is relatively straight and has a consistent width throughout its visible length." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A chalkboard sign with a wooden frame displaying the text \"Château de la Bertrandière\" in elegant, cursive script." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, narrow, rectangular window with a dark frame and a single vertical pane of clear glass." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A chestnut horse with a white star on its forehead, dark eyes, and a soft, dark muzzle. Its ears are pricked forward, and the mane appears to be a lighter shade of chestnut, blending into the darker coat. The horse's neck shows a gentle curve, and the coat has a healthy sheen." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown horse with a dark mane." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A light gray horse with a darker mane and tail, featuring a well-muscled build, a straight profile, and a calm demeanor." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular table with rounded corners and a smooth surface." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern black induction cooktop featuring a sleek design with two visible cooking zones, each outlined with a white circular pattern. The front edge has a stainless steel trim with control buttons, and there are small yellow indicator lights above the buttons." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A glossy, metallic spoon with a reflective surface and a tapered handle, featuring a rounded bowl with a slight indentation on one side." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A metallic spoon with a reflective surface and a tapered handle that widens towards the end, featuring a rounded bowl with a pointed tip." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "A zebra with a distinctive black and white striped pattern, featuring a mane of short, erect hair transitioning from black at the base to white at the tips. The stripes on the neck are vertical and become more horizontal as they reach the mane. The visible part of the zebra's face shows a pattern of narrow stripes that converge around the eyes and muzzle, with a white area above the eyes and a dark nose. The ears are pointed and display a striped pattern consistent with the head." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal red stop sign with bold, all-caps white lettering spelling \"STOP\" centered on the sign." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal sign with a red background and a white border, featuring the word \"STOP\" in white uppercase letters." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped cushion with a visible textured surface that suggests a soft, plush fabric." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped pillow with a visible corner that appears to be soft and plush." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "The earlobe is small and rounded, with a smooth, slightly glossy surface. It is adorned with a small, round, gold-colored earring that has a subtle, reflective sheen." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young girl with curly hair, wearing a patterned top with a mix of geometric and floral designs in dark and vibrant colors, paired with long-sleeved pink undershirts. Her expression is one of mild surprise or excitement, with her mouth slightly open and eyes looking upwards. Her arms are outstretched with palms facing up, as if gesturing or presenting something." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young boy with curly hair, wearing a striped sweater with shades of blue, white, and brown, stands with his hands clasped together. He has a focused expression on his face, with his mouth slightly open and his eyes looking to the side." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski parka featuring a color-block design with a white torso, green sleeves, and black shoulder panels. The jacket has a high collar and a front zipper closure. There are red accents on the cuffs and a red logo on the left chest area. The parka is paired with a black helmet with a visor." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The knee of the person is covered in a dark purple snowsuit with a slightly glossy finish. The fabric appears thick and durable, suitable for cold weather. The knee area is slightly bent, indicating a relaxed stance. The snowsuit has a subtle sheen, reflecting light, and the material appears to be tightly fitted around the knee." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The trousers are dark-colored, possibly black or dark gray, with a straight-leg cut. They feature a visible side pocket on the right leg with a flap closure, and there are belt loops around the waistband. The fabric appears to be a sturdy material, potentially denim or a similar thick textile." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A blue helmet with a glossy finish, featuring a prominent white stripe with a red outline running horizontally across the middle. The helmet has a black visor attached at the front, and a black chin strap with a buckle hanging down from the sides." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The ski boot is predominantly black with a sleek, modern design. It features a sturdy, high-ankle structure for support and a smooth, matte finish. The boot has a contoured shape to fit the foot and lower leg, with a slightly raised heel for added stability. The sole is thick and textured for grip and durability." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The sleeve of the sweater is predominantly white with a bold red graphic of a person skiing. The fabric appears to be a soft, knit material, and the sleeve is slightly loose-fitting, extending to the wrist. The red graphic is positioned centrally on the sleeve, adding a dynamic contrast to the white background." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person wearing a black helmet, goggles, and a white jacket with red and black accents, including a red logo on the left chest area. The individual is also wearing black gloves and appears to be in a skiing stance." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A red, white, and green racing motorcycle fairing with the number 69 prominently displayed in white on a red background, flanked by green and white stripes. The fairing features sponsor logos and a black lower section with a vent." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A series of parallel, evenly spaced, horizontal lines with a consistent width, running across the entire width of the image." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The mass of the quark is a very small, massless particle that is part of the quark mass. It is a fundamental particle that is responsible for the mass of the quark, and it is often used in particle physics to study the properties of quarks and their interactions." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A lowercase letter 'v' with a serif at the top and a tail at the bottom, featuring a bold, sans-serif font." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The geometry and adsorption energies for the structures of thio-glycolic acid on Au(111) at 0.25ML." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The table contains a list of adsorption sites with their corresponding adsorption energies. The adsorption sites are listed in a column, with each site having a corresponding adsorption energy. The adsorption energy is listed in a separate column, with each site having a corresponding energy value. The table also includes the adsorption energy for the Au-S bond, which is listed in a separate column. The adsorption energy for the Au-S bond is listed in a separate column, with each site having a corresponding energy value." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy is 0.63 eV." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy for the most stable structure of the HSCH2COOH on the Au(111) surface is 0.63 eV. The adsorption site preferred by the sulfur atom is located at the top of the gold atom. The polar angle between the normal vector of the surface and the S-C2 direction is 74.2 degrees. The HSCH2COOH tend to lie down at the low coverage." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a person's face with a neutral expression, featuring a prominent nose, closed lips, and visible teeth. The person has short hair and is wearing a dark-colored top." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmented image shows two graphs. The first graph on the left is titled \"Atom prediction performance (a and c)\" and features a scatter plot with blue and orange dots. The second graph on the right is titled \"Bond prediction performance (a and c)\" and also features a scatter plot with blue and orange dots. Both graphs have a similar layout with x and y axes, and the dots are evenly spaced." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The F1 score for segmentation and classification networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of segmentation network is measured by the F1 score, which is a metric that takes into account both precision and recall. The F1 score is used to evaluate the performance of the segmentation network in terms of its ability to correctly segment the image. The segmentation network is compared to other networks, and the F1 score is used to determine which network is the most effective." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmentation network is a deep learning model that uses a combination of atom, bond, and charge types to predict the pixel values. The model is trained on a large amount of data and has a high F1 score, indicating its high accuracy. The F1 score is a measure of the model's performance, taking into account both precision and recall. The model is designed to be used for various tasks, such as image segmentation and object detection." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different types of networks is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is higher for the different types of networks, indicating that they are performing well. The performance of the different types of networks is compared to the performance of the segmentation networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different parts of the network is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is used to evaluate the performance of the network in different tasks, such as atom, bond, and charge type classifications. The network is able to do a good job even when the segmentation is not perfect, and the performance of the different parts of the network is significantly higher than the segmentation networks." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "8.3 Overall graph accuracy" + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The overall graph accuracy is a measure of the performance of the different parts of the graph, including the segmentation network and the classification network. The segmentation network is used to segment the graph into different parts, and the classification network is used to predict the type of the graph. The overall graph accuracy is a combination of these two parts, and it is used to measure the performance of the graph." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 11 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The digits are evenly spaced and aligned vertically." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar is a low GI sweet spot, which is a type of sugar that has a low glucose index." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The low GI sweet spot is at least 22mg CE/100mg sucrose." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar has a low GI of about 22-32 mg CE/100g polyphenols. The polyphenol content is high, with a range of 22-32 mg CE/100g polyphenols. The sugar is hygroscopic, with a higher moisture content, and the polyphenol content increases as the sugar becomes more saturated." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuit is a dark brown, viscous liquid with a glossy sheen, contained in a clear glass bottle." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuites are prepared at two different sugar mills, designated as \"Mill 1\" and \"Mill 2\". The polyphenol content of each sample is determined and washed until they reach the desired polyphenol content, which is roughly 500 to 2000 ICUMSA. The results are in Table 4 below." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a sugar cube with a textured surface, featuring a pattern of small, raised dots." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image shows a collection of documents with text, some of which are in black and white, and others in color. The documents are arranged in a grid-like pattern, with some documents having a red border. The text on the documents is in various fonts and sizes, and some documents have a red box around the text." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The OCR process." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a letter written in English, with the first line reading \"Available OCR\" followed by \"for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The third line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The ninth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The tenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The eleventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twelfth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The thirteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventeenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The nineteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twentieth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-first line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twenty-second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-third" + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The Internet Archive image containing this excerpt can be accessed here: https://archive.org/details/b2439867/page/n7" + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Journal of Data Mining and Digital Humanities" + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a web address in blue, displaying the URL \"http://dmh.esciences.org\"." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 9 is displayed in a bold, sans-serif font with a slight shadow effect, giving it a three-dimensional appearance. The color of the number is a gradient of dark to light gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"IOOF Annual Report 2012\" is displayed in a serif font, with \"IOOF\" in a larger size and \"Annual Report 2012\" in a smaller size. The text is in a light green color." + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors' Remuneration" + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "4.1. Components of Non-Executive Director remuneration" + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"page 37\" is written in lowercase letters." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The company is a non-executive director." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year by the Company, its subsidiaries and associated entities may not exceed an amount approved by shareholders. The ceiling amount includes all remuneration provided to Non-Executive Directors, including superannuation but not including retirement benefits. The current limit of $980,000 per annum was approved by shareholders at the 2010 Annual General Meeting. There has been no increase to the Non-Executive Director fee pool since this time." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors receive a fee for being a Director of the Board. An additional fee is paid to the Chairman of the Board. Non-Executive Directors do not receive additional fees for service on Board and Committees. The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year may not exceed an amount approved by shareholders." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a mathematical equation involving the homotopy group of a space." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a monopole, which is a type of topological defect." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The topological conditions of formation of defects only govern the formation of topologically stable defects. It was found that defects solutions can form even when the topology is trivial. The most well-known example are the electro-weak strings, formed during the electroweak symmetry breaking, which are perturbatively stable for a range of parameters which are not realized in nature, and belong to the broader class of embedded defects." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "These defects are a priori unstable though mechanisms (such as plasma effects) have been found to stabilize them. They are of interest for inflation model builders since this mechanism can allow lift the constraints from the formation of cosmic strings (see Sec. IV F on D-term inflation)." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a mathematical equation with the variables \"x\" and \"y\" in a bold font, followed by a period and the number \"1\" in a smaller font." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 72 is displayed in a bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A stylized illustration of a woman with a bun hairstyle, featuring a pattern of white, cloud-like shapes with small black dots scattered throughout. The woman is wearing a pink garment with a polka dot pattern." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A red and white polka dot pattern with a small, dark, irregularly shaped mark near the center." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 62 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Chapter 2: Motivation" + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The string theory is a type of string theory that is compactified on the near-horizon geometry. It is a decoupled theory that combines the two pictures of the same low energy limit of one theory, Type IIB string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a field theory picture of the low energy limit of Type IIB string theory." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The regions for which analytic tools exist for these two different pictures turn out to be completely incompatible." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The equation is a complex mathematical expression involving various variables and functions. It includes a combination of Greek letters, numbers, and mathematical operators. The visible part of the equation shows a series of variables and functions that are interconnected, with some parts appearing to be in parentheses. The equation is written in a formal, mathematical notation." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The presence of an insider on the market does not necessarily lead to arbitrage, and the presence of insiders might be considered beneficial to the market, in the sense that it leads to higher information efficiency of the equilibrium price process." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The fundamental value of the firm is a stochastic process, and the insider can observe it directly or at least observe it in a less noisy way than the other agents on the market." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paper relaxes the assumption of static insider information and studies the equilibrium trading and price processes, as well as market efficiency, in a setting with dynamic private information." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2]." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2] and is designed to include dynamic information. It has a much smaller set of admissible trading strategies and pricing rules compared to the ones considered in the work. The model shows the existence of a unique Markovian equilibrium, which is an equilibrium price that allows the insider to trade undetected and depends only on the total order process. The model also shows that the presence of an insider increases the market informational efficiency for times close to the time of the order." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red, three-dimensional, cursive sign with the word \"Abondana\" written in a flowing, elegant script." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"Cafe\" in a stylized, cursive font with a gradient of red to dark red, giving it a three-dimensional appearance. The letters are slightly italicized and have a shadow effect, enhancing their depth." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The logo features the letters \"USIS\" in bold, with \"US\" in red and \"IS\" in black. Below the letters, there is a tagline in smaller, gray font." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"ESTATE\" is written in bold, black, uppercase letters on a yellow background." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"AGENTS\" in bold, uppercase letters. The letters are black and set against a yellow background." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"SAXONS\" in bold, black capital letters on a yellow background." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SAXONS\" in capital letters with a serif font, set against a dark background. The letters are in a metallic gold color with a slight gradient, giving them a three-dimensional appearance. The signboard has a reflective surface, suggesting it is made of a glossy material." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard displays the word \"ESTATE\" in capital letters, with a serif font. The letters are dark and possibly metallic, with a reflective surface that catches the light, giving them a slightly shiny appearance. The background of the signboard is not visible, but the letters are set against a dark backdrop that contrasts with the lighter color of the text." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features capitalized, serif lettering spelling \"AGENTS\" with a metallic finish and a slight gradient, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Triple\" is written in a cursive, green font with a white outline. The letters are slightly italicized and have a playful, rounded design." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"WHITE\" is written in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SPOT\" is written in bold, uppercase letters with a light blue color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A stylized, cursive letter \"O\" with a green outline and a white fill, featuring a small, curved tail extending from the bottom right." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The visible part of the ag is a white, stylized letter \"A\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"NEW\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"YORK\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features bold, black, uppercase letters spelling \"AYAM\" on a yellow background." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features large, bold, red letters \"RUA\" on a yellow background." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A yellow sign with the word \"SMASHED\" in bold, black, uppercase letters." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A dark brown, rectangular sign with the word \"FRIED\" in bold, uppercase letters." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A brown sign with the word \"CHICKEN\" in bold, uppercase letters." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Accommodation" + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Office\" is written in a bold, sans-serif font with a dark blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Nightline\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red and white signboard with the word \"BUBBA\" in bold, capitalized, red letters on a white background, with a red border around the sign." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"GUMP\" in large, bold, red capital letters with a white outline. The letters are set against a textured, light-colored background that resembles a stone or concrete surface." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SHRIMP\" in capital letters, with a bold, sans-serif font. The letters are white with a slight shadow effect, giving them a three-dimensional appearance. The background of the signboard is a deep blue color, providing a stark contrast to the white text." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved signboard with a red border and a white background, featuring the word \"RESTAURANT\" in bold, black, uppercase letters." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved, red and white signboard with the word \"MARKET\" in capital letters, featuring a serif font." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular blue signboard with a white border and a white symbol resembling a stylized letter 'C' in the center." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular, metallic signboard with a textured background featuring the letters \"TM\" in a bold, sans-serif font, centered on the sign." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"EVENING\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"PRIMROSE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"BASE\" is written in bold, uppercase letters with a red background and white outline. The letters are evenly spaced and have a slightly distressed texture." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"OIL\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred appearance." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The number 46 is displayed in a large, bold, white font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and have a smooth, rounded design." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green calendar with white text displaying the numbers 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369," + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular button with rounded corners featuring the word \"Connecter\" in white lowercase letters, followed by a yellow icon resembling a lock with a keyhole." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized, lowercase letter \"a\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized lowercase \"a\" with a curved tail extending from the bottom right, resembling a lowercase \"i\" with a dot above it." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular sign with white text displaying the numbers \"07\" in a bold, sans-serif font." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The word \"septembre\" in lowercase letters, with a green background and white font." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The text \"Lien Web\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The numbers 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 are displayed in a green color." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and a fringe detail." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Plan du site" + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A white arrow pointing to the right, with a slight curve at the tail end, is positioned to the right of the text \"Book Now.\"" + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in bold, black, sans-serif font." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "contact us" + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"About\" is written in a bold, sans-serif font with a gradient of pink to red, set against a teal background." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A teal-colored horizontal bar with white text and symbols. On the left side, there is a white envelope icon followed by the text \"doonawash@gmail.com\". On the right side, there is a white icon resembling a person in a wheelchair." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Contact us\" is written in a bold, sans-serif font with a pinkish-red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a gradient of blue shades, transitioning from a lighter blue at the top to a darker blue at the bottom." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in a bold, sans-serif font with a pinkish hue. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "OpenStreetMap Belgium" + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A black arrow pointing to the right." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A white rectangular tag with the number \"4.9\" in bold black font centered on it." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with white text that reads \"Pulsuz Konsultasyon.\"" + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The word \"Portfolio\" in a bold, sans-serif font, with a slight italicization, and a drop shadow effect, giving it a three-dimensional appearance. The letters are black with a white outline, and the text is set against a plain background." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white shopping cart icon with a blue outline, featuring a rectangular basket with a grid pattern, two vertical handles, and four wheels, two of which are visible." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"SUPPORT\" in bold, uppercase letters with a blue background and white outline." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white lowercase letter \"f\" with a bold, sans-serif font, set against a blue background." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Your Charts" + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A blue \"X\" with a white outline, featuring a slightly darker blue fill and a lighter blue border." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white sign with the words \"CONTACT US\" in bold, uppercase, blue letters." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with rounded corners and a white border. The button has the words \"ADD TO CART\" in bold, white, uppercase letters centered on it." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PRODUCTS\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PODCAST\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters. The letters are blue and have a slight shadow effect, giving them a three-dimensional appearance. The text is centered horizontally." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"WHAT WE DO\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font style. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "GWM launches livelihood micro-grants" + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with a black border and the word \"Settings\" in black, bold, sans-serif font centered on it." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Privacy Policy" + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"HOME\" in uppercase letters, with a bold, sans-serif font, is centered on the image. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, creating a subtle contrast against the background." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular black button with the word \"Accept\" in white, bold, sans-serif font centered on it." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A red, oval-shaped button with a white border and the word \"DONATE\" in bold, uppercase, red letters centered on it." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"NEWS\" in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a clean, modern font style." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters with a sans-serif font. The letters are evenly spaced and aligned horizontally. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"CONTACT US\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "A black and white icon depicting a stylized, abstract representation of a building with a flat roof and multiple rectangular windows arranged in a grid pattern." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"News\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a dark color, contrasting with the lighter background." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The logo features the word \"ServeGate\" in bold, black letters. To the left of the text, there is a stylized design consisting of two overlapping triangles, one in teal and the other in red, with a black line separating them." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Our Difference" + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"About Us\" is written in a bold, sans-serif font with a red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "A rectangular button with rounded corners, featuring a light pink background and a thin red border. The button displays the text \"Get in touch\" in bold, red, sans-serif font." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" in a serif font, with the letters in a light gray color against a white background." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Services\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Indigenous Impact" + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"ServeGate\" is written in bold, black, sans-serif font. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with rounded corners, featuring the text \"Close issue\" in bold, black, sans-serif font." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "A vertical array of three circular, dark-colored buttons with a slightly raised, smooth surface, aligned centrally on a light-colored background." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/question.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/question.json new file mode 100644 index 0000000000000000000000000000000000000000..99b35112dedb201515539a940d46d9047f03debc --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_doc_detailed_caption_box/question.json @@ -0,0 +1,448 @@ +[ + { + "question_id": 1, + "image": "22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 237, + 123, + 638, + 402 + ], + [ + 533, + 965, + 16, + 14 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 2, + "image": "3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 120, + 106, + 780, + 126 + ], + [ + 302, + 249, + 414, + 395 + ], + [ + 120, + 689, + 780, + 96 + ], + [ + 120, + 797, + 781, + 150 + ], + [ + 496, + 978, + 9, + 15 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 3, + "image": "0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 212.0, + 158.0, + 787.0, + 581.0 + ], + [ + 212.0, + 741.0, + 803.0, + 97.0 + ], + [ + 212.0, + 871.0, + 440.0, + 32.0 + ], + [ + 214.0, + 917.0, + 802.0, + 115.0 + ], + [ + 212.0, + 1054.0, + 450.0, + 29.0 + ], + [ + 207.0, + 1088.0, + 813.0, + 215.0 + ], + [ + 210.0, + 1312.0, + 296.0, + 33.0 + ], + [ + 212.0, + 1356.0, + 801.0, + 89.0 + ], + [ + 595.0, + 1480.0, + 35.0, + 28.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 4, + "image": "2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 122, + 90, + 796, + 37 + ], + [ + 122, + 147, + 802, + 138 + ], + [ + 122, + 525, + 796, + 239 + ], + [ + 123, + 784, + 653, + 13 + ], + [ + 122, + 817, + 800, + 113 + ], + [ + 122, + 305, + 251, + 13 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 5, + "image": "0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 250.0, + 168.0, + 696.0, + 652.0 + ], + [ + 468.0, + 831.0, + 254.0, + 32.0 + ], + [ + 247.0, + 957.0, + 697.0, + 439.0 + ], + [ + 134.0, + 1404.0, + 924.0, + 90.0 + ], + [ + 139.0, + 1573.0, + 309.0, + 43.0 + ], + [ + 845.0, + 1575.0, + 208.0, + 28.0 + ], + [ + 581.0, + 1566.0, + 28.0, + 28.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 6, + "image": "6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 778, + 43, + 148, + 11 + ], + [ + 98, + 137, + 404, + 20 + ], + [ + 98, + 167, + 469, + 16 + ], + [ + 881, + 973, + 44, + 11 + ], + [ + 95, + 315, + 829, + 612 + ], + [ + 98, + 231, + 827, + 76 + ], + [ + 98, + 193, + 808, + 28 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 7, + "image": "c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 120, + 94, + 780, + 340 + ], + [ + 120, + 481, + 780, + 97 + ], + [ + 120, + 590, + 781, + 150 + ], + [ + 120, + 752, + 780, + 96 + ], + [ + 468, + 446, + 433, + 26 + ], + [ + 491, + 978, + 19, + 15 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 8, + "image": "0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 0.0, + 0.0, + 1170.0, + 1548.0 + ], + [ + 477.0, + 1510.0, + 216.0, + 21.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 9, + "image": "8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 824, + 124, + 19, + 13 + ], + [ + 180, + 124, + 193, + 13 + ], + [ + 180, + 177, + 663, + 230 + ], + [ + 180, + 424, + 663, + 292 + ], + [ + 180, + 734, + 663, + 75 + ], + [ + 294, + 849, + 549, + 19 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 10, + "image": "1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 120, + 144, + 783, + 155 + ], + [ + 120, + 314, + 783, + 127 + ], + [ + 120, + 456, + 783, + 70 + ], + [ + 148, + 541, + 755, + 14 + ], + [ + 120, + 570, + 783, + 354 + ], + [ + 507, + 960, + 9, + 14 + ] + ], + "segmentation": [] + } + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/answer.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/answer.json new file mode 100644 index 0000000000000000000000000000000000000000..1fc627ed32af43ec4e04f2ac4c8172d3201bc43b --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/answer.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage." + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address." + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere." + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab." + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases." + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself." + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "\"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified." + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region." + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features." + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date." + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"to earn\" typically suggests that there is an opportunity to gain something—often points, money, or rewards—by performing certain actions or tasks." + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy." + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a human heart symbolizing a strong heart as one of the benefits of running." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A silhouette of a figure measuring their waist indicates that running can aid in weight loss." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"" + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"" + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"" + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"" + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"" + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The light switches are white, contrasting with the dark wall, likely plastic, and appear functional." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Positioned in the background, these white light switches are paired on a wall above the countertop." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This silver oven, with digital controls and a handle, appears modern and built into the cabinetry." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor, constructed of hardwood, showcases a natural finish with variations in wood grain." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A frying pan on a heat source contains sautéed meat and vegetables, emitting steam, indicating the food is hot and being cooked." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Decorative accessory adorned on the ear, visible as a small, shiny object." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Appears to be a young boy, casually dressed, gripping an electronic device with attention." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds–Au), polar angle (θ), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in ." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a footer section of an image or document. It contains a citation that reads \"© 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items." + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners." + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support." + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "\"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge." + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created." + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives." + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data." + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a navigation link that returns the user to the main homepage of the website from any other page." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors." + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "\"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact." + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/prediction.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/prediction.json new file mode 100644 index 0000000000000000000000000000000000000000..46b7886dee75138be7ac702f93ad13e4478cf79c --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/prediction.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "Friday, Sep 2, 61°F" + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, fluffy cloud with a smooth, rounded shape and a slightly irregular outline. The cloud has a soft, cotton-like texture and appears to be illuminated from the left side, giving it a gentle gradient from white to a light blue hue." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white gear with a blue circular center and six evenly spaced, straight teeth." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, featuring three horizontal blue lines of varying lengths inside the bubble." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A red, rectangular play button with rounded corners, featuring a white, centered play triangle pointing to the right." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a red dot above it and a yellow dot below it." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A circular button with a white center and a thin black border." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, right-pointing arrow with a thick border." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Excel\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Facebook\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Speedtest\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Instagram\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lite\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Messenger\" is written in a bold, sans-serif font with a gradient of pink to white. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Home De..." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A pair of cartoonish eyes with large, white, oval-shaped pupils and black, curved eyelashes. The eyes are set against a green background." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A blue square with a white lowercase \"g\" followed by three white horizontal lines of equal length." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "mobile, data" + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The Wi-Fi symbol consists of a lowercase \"i\" with a dot above it, followed by a lowercase \"f\" with a dot above it." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" in lowercase letters, with a serif font, is written in a dark color against a light background." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"hotspot\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "usage, and hotspot" + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "A white, L-shaped object with a smooth surface and rounded edges." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Search or type web address" + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"ebay shopping cart\" is displayed in lowercase letters. The word \"ebay\" is in a larger font size compared to the word \"shopping cart\". The text is in a sans-serif font and is centered horizontally." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"cart\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "https://cart.ebay.com/" + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Welcome to Costco Wholesale" + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com\" is displayed in lowercase letters." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Pay Less." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"Target\" in bold, black, sans-serif font." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"Expect More.\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"target.com\" is displayed in a bold, sans-serif font with a blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A blue circular icon with a white lowercase \"s\" in the center." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is written in lowercase white letters on a green background." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "purchases" + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "In-app purchases" + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black letter \"B\" followed by a black plus sign." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Editors' Choice" + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Search settings" + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "A white envelope icon with a triangular flap on the right side, set against a dark background." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"Add\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "another email" + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"account\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Set up your personal or work email" + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "or work email" + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Network & internet" + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Wi-Fi, mobile, data" + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "usage, and" + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "Wednesday, May 18" + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The word \"Maps\" in a bold, sans-serif font, with a gradient of pink to white, giving it a three-dimensional appearance." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A red, teardrop-shaped pin with a black circular center." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, containing three horizontal blue lines of varying lengths." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a white border, a red circle with a white border, and a yellow circle with a white border." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com/Check\" is displayed in a bold, sans-serif font. The word \"costco.com\" is in lowercase letters, and the word \"Check\" is in uppercase letters. The text is aligned to the left." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo features the word \"COSTCO\" in large, bold, red capital letters with a white outline. Below it, the word \"WHOLESALE\" is written in smaller, bold, blue capital letters." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Warehouses\" is written in a bold, sans-serif font with a light blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Account\" in a bold, sans-serif font, with a gradient of blue shades ranging from light to dark, giving it a three-dimensional appearance." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue shopping cart with a white handle and a white basket area." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular sign with the word \"Shop\" in white, bold, sans-serif font. To the left of the text, there are three horizontal white lines of varying lengths." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a consistent size throughout." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "My Warehouse" + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Delivery Location" + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Seattle\" in bold, black, sans-serif font." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "Fetch Rewards: Play" + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Play\" in bold, black, sans-serif font." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "to earn" + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"MAKE\" is written in bold, uppercase letters with a dark green color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MONEY\" in bold, uppercase letters with a green background and black outline." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is displayed in lowercase letters. The letters are green and have a sans-serif font. The text is aligned to the left." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" is written in a bold, sans-serif font with a light gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"ads\" is written in lowercase letters with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design. The color of the text is a light gray, blending subtly with the background." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"50K+\" is displayed in bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The \"50K\" is in a larger font size compared to the \"+\" sign." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a computer keyboard with a standard QWERTY layout, including function keys, a number pad, and arrow keys. The keys are rectangular with white lettering on black keys, and the keyboard has a slight ergonomic curve." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text \"walmart.com\" is displayed in a bold, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance. The text is aligned to the left." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A white rectangular signboard with the text \"Lenovo Thinkpad\" in black, sans-serif font." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular button with rounded corners featuring the word \"Cancel\" in white, bold, sans-serif font." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad laptop with a visible keyboard and trackpad." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lenovo\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black, rectangular computer keyboard with a standard QWERTY layout, including a number pad on the right side. The keys are chiclet-style with white lettering, and there is a slight sheen on the surface, suggesting a smooth texture. The function keys are aligned along the top, and there is a visible space bar at the bottom center." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"in\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A rectangular electronic device with a screen displaying text, surrounded by a thin bezel." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad charger." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING" + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow background with two human head outlines facing each other. The left head has the words \"FIXED MINDSET\" above a red downward arrow, and the right head has the words \"GROWTH MINDSET\" above a green upward arrow." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon character with a serious expression, featuring a dark gray hair, a light blue shirt, and a red cross symbol on the left side of the head. The character has a red frown and is surrounded by two white, cloud-like shapes on either side of the head." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a beige background featuring five hands in different colors: red, green, blue, purple, and orange, arranged in a circular pattern. Above the hands, the text \"Understanding Diversity\" is written in black." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A black and white illustration of a person with a light bulb on their head, holding a book. The person has a question mark above their head and another question mark to the right of their head. The background is a light peach color." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon illustration of two boys, one wearing a red shirt and blue shorts, and the other wearing a striped shirt and brown shorts, both with their arms raised. The word \"Bullying\" is written above them." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon of a boy and a girl playing with each other. The boy is wearing a yellow shirt, black shorts, and red shoes. The girl is wearing a yellow shirt, blue pants, and red shoes. Both have black hair and are smiling." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue speech bubble with a yellow lightning bolt symbol, and a red speech bubble with a yellow lightning bolt symbol, both containing a person's face." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A rectangular blue traffic sign with a white border, featuring three white arrows. The leftmost arrow curves to the left, the middle arrow points straight up, and the rightmost arrow curves to the right. Below the arrows, the word \"CHANGES\" is written in white capital letters." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of two human figures, one in yellow and the other in blue, both with black outlines. The yellow figure is standing on a staircase, while the blue figure is standing on a platform. The blue figure is holding a microphone and appears to be speaking or presenting. The word \"Leadership\" is written in black text above the figures." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue circle with a white plus sign inside it, followed by a white \"2X\" text." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A hand holding a person with a blue shirt and black pants, with a purple banner below displaying \"$4,000\" in white text." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pie chart with a blue background and a white border, featuring a white line that divides the chart into two sections. The left section is larger and has a white number \"36%\" inside it, while the right section is smaller and has a white number \"36%\" inside it." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A globe with a blue ocean and white continents, featuring a purple banner with white text that reads \"2.7 trillion impact to global GDP from use of more efficient talent platforms.\"" + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "46% of companies are sometimes or frequently understaffed" + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Travel personas: how travelers identify their travel style" + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular planner with a white background featuring a colorful illustration of two people, one wearing a red hat and the other wearing a blue hat, both holding a smartphone. The person in the red hat is holding a book, and the person in the blue hat is holding a suitcase. The background includes a mountain and a sun. The text \"THE SMART PLANNER\" is written in bold, black letters at the top." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "The Relaxed Nomad" + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A family of five, consisting of a man, a woman, and three children, standing together. The man is holding a baby, while the woman is holding a suitcase. The children are standing around them, with one child holding a suitcase. The family is depicted in a circular frame." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A red airplane seat with a high backrest and armrests, featuring a small, rectangular, red and white logo on the backrest." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a white background featuring a stylized illustration of a person in a blue suit with a red briefcase in their left hand and a blue suitcase in their right hand. The person is depicted in a walking motion, with one leg forward and the other leg back. The text \"THE BUSINESS ROAD WARRIOR\" is written in bold, black capital letters above the illustration." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with short brown hair, wearing a purple top with a white collar, is holding a yellow envelope in her right hand. She has a headset on her head and is standing in front of a computer monitor with the word \"BIG\" visible on the screen." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange background with a white border." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a stylized globe in the center. The globe is divided into four quadrants, each in a different shade of blue. A black headset with a microphone is positioned over the globe, with the earpieces extending outward. To the right of the globe, there is a yellow star." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon man with a light blue shirt and a black tie, wearing a headset with a microphone. He has a light brown hair and is pointing upwards with his right hand. To his right, there is a yellow light bulb with a red base." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue and black wrench with a flat-head design, featuring a blue handle with a textured grip and a black head with a serrated edge. The wrench has a long, straight shaft connecting the handle to the head." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person wearing a headset with a microphone, a purple shirt, and a white undershirt. The person is holding a smartphone with a blue and white design on the screen." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a hand holding a black and white telephone handset, with a blue circular background featuring a partial globe and a speech bubble with the word \"BIG\" in white. The hand is wearing an orange life jacket with white stripes." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a red border, featuring a stylized design of a person wearing a white shirt and a black tie, with a blue and white striped hat. The background includes a Union Jack flag and a yellow rectangle." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon woman with brown hair tied back, wearing a purple shirt with a white collar, and a headset. She is holding a yellow star in her right hand and has a black and white striped object in her left hand." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized heart with a vibrant red color, featuring a prominent blue and orange flame-like design on the upper left side, and a smaller blue and orange flame-like design on the lower right side." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized human figure with arms raised in a celebratory pose, surrounded by four blue arrows pointing outward, each arrow with a slight curve and a pointed tip." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A digital bathroom scale with a black base and a white digital display showing the weight." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pair of pink lungs with a central trachea, featuring detailed vein patterns and a slightly curved shape." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow smiley face with a black outline, featuring two black dots for eyes and a curved black line for a mouth." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human knee with a visible bone structure, including the femur and tibia, with a slight curvature and a smooth surface." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human brain with a detailed, textured surface, featuring a prominent cerebral cortex and a visible portion of the brain stem." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A red heart with a white outline and a white line through the center, indicating a heart rate or rhythm." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The word \"HOTAPPE\" is written in large, bold, uppercase letters. The letters are colored in the following order from left to right: red, orange, yellow, light blue, and dark blue." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular smiley face with a yellow face and a red border. The face has two blue teardrop-shaped eyes, a brown curved mouth, and two brown curved lines for eyebrows." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "An orange circle with two brown hands facing each other, with the word \"OPENNESS\" in green capital letters below the circle." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular yellow background with a pair of hands clasped together in the center. The hands are depicted in a light brown color, with the left hand slightly overlapping the right hand. Radiating from the hands are white lines, giving the impression of light or energy emanating from the hands. Below the hands, the word \"TOUCH\" is written in bold, uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a large red heart in the center. Inside the heart, there is a white exclamation mark. Below the heart, the word \"ATTENTION\" is written in bold, uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with two cartoon faces, one on the left and one on the right, separated by a white dashed line. The left face has brown hair and a neutral expression, while the right face has light brown hair and a neutral expression. Below the faces, the word \"PROXIMITY\" is written in bold, uppercase letters." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with a white eye in the center, featuring a brown iris and a white sclera. Below the eye, the words \"EYE CONTACT\" are written in bold, uppercase letters." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt, with a thermometer in their mouth." + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a red nose and a red mouth, and a white hand with three fingers extended." + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a green and white object in their mouth." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person with a green circular background. The person has a gray face with a red nose and a red mouth. The person is wearing a red shirt with a yellow collar." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt and white pants, is sitting on a white chair." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green sign with a white border, featuring a stylized illustration of a pair of red lungs with a gray outline, set against a dark background." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a kidney with a red and pink color scheme, featuring a central red area with a pink outline, flanked by two symmetrical, curved, pink shapes resembling the kidney's lobes, all set against a light green background." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue rectangular header with the text \"Interesting Facts\" in white, bold, sans-serif font. To the left of the text, there are three white horizontal lines. To the right of the text, there is a red heart symbol." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized panda face with a white face, black ears, black patches around the eyes, and a black nose. The panda has a small, curved black mouth and a content expression. The face is set against a green circular background." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A green circular background with a white plate in the center, containing a yellow circle. To the left of the plate is a white fork, and to the right is a white spoon." + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring a red heart in the center. The heart is outlined in white and has a white line running horizontally across its middle. Below the heart, the word \"Diseases\" is written in white, bold, sans-serif font." + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A white long-sleeved shirt with a teal collar and cuffs, featuring a row of black buttons down the front." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange icon with a white film reel design in the center. The film reel has a blue border with white squares on the left and right sides, and a white center with a blue horizontal line dividing it into two sections." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green icon featuring a white syringe with a red cross symbol on the barrel, a white droplet to the right of the syringe, and a yellow needle." + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue icon featuring a white wine glass with a yellow liquid on the left and a white bottle with a yellow liquid and a brown cap on the right." + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring two stylized human figures. The figure on the left has short, light brown hair and is wearing a red shirt. The figure on the right has short, light brown hair and is wearing a brown shirt with a white collar." + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a solid olive green background featuring a white silhouette of the Earth in the center." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person sitting on a chair with their head in their hands, wearing a blue shirt and black pants." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with short brown hair, wearing a blue long-sleeve shirt and yellow pants, is sitting on a white platform with a blue wave design at the bottom." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a pink top, is depicted with a thought bubble above her head." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with red hair, wearing a green top and blue pants, is sitting on a windowsill with their legs crossed." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a blue sleeveless top and black leggings, is running with her arms slightly bent and her legs in motion. She has a white earphone cord hanging from her right ear." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with black hair tied back, wearing a green headband, a green sleeveless top, and black pants, is sitting cross-legged with her hands pressed together in a prayer position." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A green dinosaur with a white belly, a purple dinosaur with a white belly, and a green dinosaur with a white belly." + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon boy with a purple bandage on his forehead, wearing a blue shirt and blue pants, is holding a purple object in his right hand." + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A lime green refrigerator with a single door, featuring a black handle on the right side. The door has a horizontal indentation near the top." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A small, rectangular, lime green refrigerator with a single door featuring a vertical handle on the left side. The bottom section of the refrigerator is orange with a horizontal handle." + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark-colored, rectangular structure with a flat roof and vertical sides, featuring a small, square window on the upper left side and a larger, rectangular window on the lower right side." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A tree with dense, green foliage." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A cylindrical, metallic pole with a consistent diameter throughout its length, featuring a series of evenly spaced, horizontal bands encircling its surface." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A deep, dark-colored bowl with a wide, flared rim and a smooth, glossy finish." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern kitchen with a stainless steel oven and a black cooktop. The oven has a digital display and control panel, and there is a visible handle on the oven door. The cooktop has multiple burners with black grates." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular wall socket with a single, round, black power switch located on the right side." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular light switch with rounded edges, featuring a central toggle switch mechanism." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Stainless steel built-in oven with a large glass door, featuring a digital control panel above the door with multiple buttons and a display screen." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, slender vase with a flared rim and a narrow neck that gradually widens into a bulbous base, featuring a glossy finish with a gradient of colors transitioning from a deep blue at the top to a greenish hue towards the bottom." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor is composed of medium-toned wooden planks with a smooth, polished finish. The wood grain is visible, running lengthwise along the planks, which are laid out parallel to each other. The planks have a consistent width and exhibit a warm, reddish-brown hue." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical, wall-mounted spice rack with multiple tiers, each tier holding several glass jars with metal lids. The jars are arranged in a single column, and the rack appears to be made of a dark, possibly wooden material." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A round, dark-colored table with a smooth surface and a central pedestal base." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The chair features a high, slightly curved backrest and seat cushion upholstered in a woven fabric with a diamond pattern. The fabric is primarily light green with a central vertical stripe in a slightly darker shade. The armrests are padded and covered in the same woven fabric, with a light green color. The chair's legs are dark-colored and straight." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A purple place mat with a textured surface and a white circular design in the center." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical traffic light with three circular lenses arranged in a column, displaying a red light at the top, an unlit middle lens, and a green light at the bottom, all encased in a black housing with a visor over each lens." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A pink and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, dark brown tree trunk with a rough, textured bark. The trunk is relatively straight and has a consistent width throughout its visible length." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A chalkboard sign with a wooden frame displaying the text \"Château de la Bertrandière\" in elegant, cursive script." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, narrow, rectangular window with a dark frame and a single vertical pane of clear glass." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A chestnut horse with a white star on its forehead, dark eyes, and a soft, dark muzzle. Its ears are pricked forward, and the mane appears to be a lighter shade of chestnut, blending into the darker coat. The horse's neck shows a gentle curve, and the coat has a healthy sheen." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown horse with a dark mane." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A light gray horse with a darker mane and tail, featuring a well-muscled build, a straight profile, and a calm demeanor." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular table with rounded corners and a smooth surface." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern black induction cooktop featuring a sleek design with two visible cooking zones, each outlined with a white circular pattern. The front edge has a stainless steel trim with control buttons, and there are small yellow indicator lights above the buttons." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A glossy, metallic spoon with a reflective surface and a tapered handle, featuring a rounded bowl with a slight indentation on one side." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A metallic spoon with a reflective surface and a tapered handle that widens towards the end, featuring a rounded bowl with a pointed tip." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "A zebra with a distinctive black and white striped pattern, featuring a mane of short, erect hair transitioning from black at the base to white at the tips. The stripes on the neck are vertical and become more horizontal as they reach the mane. The visible part of the zebra's face shows a pattern of narrow stripes that converge around the eyes and muzzle, with a white area above the eyes and a dark nose. The ears are pointed and display a striped pattern consistent with the head." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal red stop sign with bold, all-caps white lettering spelling \"STOP\" centered on the sign." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal sign with a red background and a white border, featuring the word \"STOP\" in white uppercase letters." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped cushion with a visible textured surface that suggests a soft, plush fabric." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped pillow with a visible corner that appears to be soft and plush." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "The earlobe is small and rounded, with a smooth, slightly glossy surface. It is adorned with a small, round, gold-colored earring that has a subtle, reflective sheen." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young girl with curly hair, wearing a patterned top with a mix of geometric and floral designs in dark and vibrant colors, paired with long-sleeved pink undershirts. Her expression is one of mild surprise or excitement, with her mouth slightly open and eyes looking upwards. Her arms are outstretched with palms facing up, as if gesturing or presenting something." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young boy with curly hair, wearing a striped sweater with shades of blue, white, and brown, stands with his hands clasped together. He has a focused expression on his face, with his mouth slightly open and his eyes looking to the side." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski parka featuring a color-block design with a white torso, green sleeves, and black shoulder panels. The jacket has a high collar and a front zipper closure. There are red accents on the cuffs and a red logo on the left chest area. The parka is paired with a black helmet with a visor." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The knee of the person is covered in a dark purple snowsuit with a slightly glossy finish. The fabric appears thick and durable, suitable for cold weather. The knee area is slightly bent, indicating a relaxed stance. The snowsuit has a subtle sheen, reflecting light, and the material appears to be tightly fitted around the knee." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The trousers are dark-colored, possibly black or dark gray, with a straight-leg cut. They feature a visible side pocket on the right leg with a flap closure, and there are belt loops around the waistband. The fabric appears to be a sturdy material, potentially denim or a similar thick textile." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A blue helmet with a glossy finish, featuring a prominent white stripe with a red outline running horizontally across the middle. The helmet has a black visor attached at the front, and a black chin strap with a buckle hanging down from the sides." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The ski boot is predominantly black with a sleek, modern design. It features a sturdy, high-ankle structure for support and a smooth, matte finish. The boot has a contoured shape to fit the foot and lower leg, with a slightly raised heel for added stability. The sole is thick and textured for grip and durability." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The sleeve of the sweater is predominantly white with a bold red graphic of a person skiing. The fabric appears to be a soft, knit material, and the sleeve is slightly loose-fitting, extending to the wrist. The red graphic is positioned centrally on the sleeve, adding a dynamic contrast to the white background." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person wearing a black helmet, goggles, and a white jacket with red and black accents, including a red logo on the left chest area. The individual is also wearing black gloves and appears to be in a skiing stance." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A red, white, and green racing motorcycle fairing with the number 69 prominently displayed in white on a red background, flanked by green and white stripes. The fairing features sponsor logos and a black lower section with a vent." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A series of parallel, evenly spaced, horizontal lines with a consistent width, running across the entire width of the image." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The mass of the quark is a very small, massless particle that is part of the quark mass. It is a fundamental particle that is responsible for the mass of the quark, and it is often used in particle physics to study the properties of quarks and their interactions." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A lowercase letter 'v' with a serif at the top and a tail at the bottom, featuring a bold, sans-serif font." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The geometry and adsorption energies for the structures of thio-glycolic acid on Au(111) at 0.25ML." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The table contains a list of adsorption sites with their corresponding adsorption energies. The adsorption sites are listed in a column, with each site having a corresponding adsorption energy. The adsorption energy is listed in a separate column, with each site having a corresponding energy value. The table also includes the adsorption energy for the Au-S bond, which is listed in a separate column. The adsorption energy for the Au-S bond is listed in a separate column, with each site having a corresponding energy value." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy is 0.63 eV." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy for the most stable structure of the HSCH2COOH on the Au(111) surface is 0.63 eV. The adsorption site preferred by the sulfur atom is located at the top of the gold atom. The polar angle between the normal vector of the surface and the S-C2 direction is 74.2 degrees. The HSCH2COOH tend to lie down at the low coverage." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a person's face with a neutral expression, featuring a prominent nose, closed lips, and visible teeth. The person has short hair and is wearing a dark-colored top." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmented image shows two graphs. The first graph on the left is titled \"Atom prediction performance (a and c)\" and features a scatter plot with blue and orange dots. The second graph on the right is titled \"Bond prediction performance (a and c)\" and also features a scatter plot with blue and orange dots. Both graphs have a similar layout with x and y axes, and the dots are evenly spaced." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The F1 score for segmentation and classification networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of segmentation network is measured by the F1 score, which is a metric that takes into account both precision and recall. The F1 score is used to evaluate the performance of the segmentation network in terms of its ability to correctly segment the image. The segmentation network is compared to other networks, and the F1 score is used to determine which network is the most effective." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmentation network is a deep learning model that uses a combination of atom, bond, and charge types to predict the pixel values. The model is trained on a large amount of data and has a high F1 score, indicating its high accuracy. The F1 score is a measure of the model's performance, taking into account both precision and recall. The model is designed to be used for various tasks, such as image segmentation and object detection." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different types of networks is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is higher for the different types of networks, indicating that they are performing well. The performance of the different types of networks is compared to the performance of the segmentation networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different parts of the network is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is used to evaluate the performance of the network in different tasks, such as atom, bond, and charge type classifications. The network is able to do a good job even when the segmentation is not perfect, and the performance of the different parts of the network is significantly higher than the segmentation networks." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "8.3 Overall graph accuracy" + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The overall graph accuracy is a measure of the performance of the different parts of the graph, including the segmentation network and the classification network. The segmentation network is used to segment the graph into different parts, and the classification network is used to predict the type of the graph. The overall graph accuracy is a combination of these two parts, and it is used to measure the performance of the graph." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 11 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The digits are evenly spaced and aligned vertically." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar is a low GI sweet spot, which is a type of sugar that has a low glucose index." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The low GI sweet spot is at least 22mg CE/100mg sucrose." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar has a low GI of about 22-32 mg CE/100g polyphenols. The polyphenol content is high, with a range of 22-32 mg CE/100g polyphenols. The sugar is hygroscopic, with a higher moisture content, and the polyphenol content increases as the sugar becomes more saturated." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuit is a dark brown, viscous liquid with a glossy sheen, contained in a clear glass bottle." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuites are prepared at two different sugar mills, designated as \"Mill 1\" and \"Mill 2\". The polyphenol content of each sample is determined and washed until they reach the desired polyphenol content, which is roughly 500 to 2000 ICUMSA. The results are in Table 4 below." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a sugar cube with a textured surface, featuring a pattern of small, raised dots." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image shows a collection of documents with text, some of which are in black and white, and others in color. The documents are arranged in a grid-like pattern, with some documents having a red border. The text on the documents is in various fonts and sizes, and some documents have a red box around the text." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The OCR process." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a letter written in English, with the first line reading \"Available OCR\" followed by \"for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The third line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The ninth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The tenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The eleventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twelfth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The thirteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventeenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The nineteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twentieth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-first line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twenty-second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-third" + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The Internet Archive image containing this excerpt can be accessed here: https://archive.org/details/b2439867/page/n7" + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Journal of Data Mining and Digital Humanities" + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a web address in blue, displaying the URL \"http://dmh.esciences.org\"." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 9 is displayed in a bold, sans-serif font with a slight shadow effect, giving it a three-dimensional appearance. The color of the number is a gradient of dark to light gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"IOOF Annual Report 2012\" is displayed in a serif font, with \"IOOF\" in a larger size and \"Annual Report 2012\" in a smaller size. The text is in a light green color." + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors' Remuneration" + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "4.1. Components of Non-Executive Director remuneration" + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"page 37\" is written in lowercase letters." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The company is a non-executive director." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year by the Company, its subsidiaries and associated entities may not exceed an amount approved by shareholders. The ceiling amount includes all remuneration provided to Non-Executive Directors, including superannuation but not including retirement benefits. The current limit of $980,000 per annum was approved by shareholders at the 2010 Annual General Meeting. There has been no increase to the Non-Executive Director fee pool since this time." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors receive a fee for being a Director of the Board. An additional fee is paid to the Chairman of the Board. Non-Executive Directors do not receive additional fees for service on Board and Committees. The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year may not exceed an amount approved by shareholders." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a mathematical equation involving the homotopy group of a space." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a monopole, which is a type of topological defect." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The topological conditions of formation of defects only govern the formation of topologically stable defects. It was found that defects solutions can form even when the topology is trivial. The most well-known example are the electro-weak strings, formed during the electroweak symmetry breaking, which are perturbatively stable for a range of parameters which are not realized in nature, and belong to the broader class of embedded defects." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "These defects are a priori unstable though mechanisms (such as plasma effects) have been found to stabilize them. They are of interest for inflation model builders since this mechanism can allow lift the constraints from the formation of cosmic strings (see Sec. IV F on D-term inflation)." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a mathematical equation with the variables \"x\" and \"y\" in a bold font, followed by a period and the number \"1\" in a smaller font." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 72 is displayed in a bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A stylized illustration of a woman with a bun hairstyle, featuring a pattern of white, cloud-like shapes with small black dots scattered throughout. The woman is wearing a pink garment with a polka dot pattern." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A red and white polka dot pattern with a small, dark, irregularly shaped mark near the center." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 62 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Chapter 2: Motivation" + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The string theory is a type of string theory that is compactified on the near-horizon geometry. It is a decoupled theory that combines the two pictures of the same low energy limit of one theory, Type IIB string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a field theory picture of the low energy limit of Type IIB string theory." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The regions for which analytic tools exist for these two different pictures turn out to be completely incompatible." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The equation is a complex mathematical expression involving various variables and functions. It includes a combination of Greek letters, numbers, and mathematical operators. The visible part of the equation shows a series of variables and functions that are interconnected, with some parts appearing to be in parentheses. The equation is written in a formal, mathematical notation." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The presence of an insider on the market does not necessarily lead to arbitrage, and the presence of insiders might be considered beneficial to the market, in the sense that it leads to higher information efficiency of the equilibrium price process." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The fundamental value of the firm is a stochastic process, and the insider can observe it directly or at least observe it in a less noisy way than the other agents on the market." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paper relaxes the assumption of static insider information and studies the equilibrium trading and price processes, as well as market efficiency, in a setting with dynamic private information." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2]." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2] and is designed to include dynamic information. It has a much smaller set of admissible trading strategies and pricing rules compared to the ones considered in the work. The model shows the existence of a unique Markovian equilibrium, which is an equilibrium price that allows the insider to trade undetected and depends only on the total order process. The model also shows that the presence of an insider increases the market informational efficiency for times close to the time of the order." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red, three-dimensional, cursive sign with the word \"Abondana\" written in a flowing, elegant script." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"Cafe\" in a stylized, cursive font with a gradient of red to dark red, giving it a three-dimensional appearance. The letters are slightly italicized and have a shadow effect, enhancing their depth." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The logo features the letters \"USIS\" in bold, with \"US\" in red and \"IS\" in black. Below the letters, there is a tagline in smaller, gray font." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"ESTATE\" is written in bold, black, uppercase letters on a yellow background." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"AGENTS\" in bold, uppercase letters. The letters are black and set against a yellow background." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"SAXONS\" in bold, black capital letters on a yellow background." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SAXONS\" in capital letters with a serif font, set against a dark background. The letters are in a metallic gold color with a slight gradient, giving them a three-dimensional appearance. The signboard has a reflective surface, suggesting it is made of a glossy material." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard displays the word \"ESTATE\" in capital letters, with a serif font. The letters are dark and possibly metallic, with a reflective surface that catches the light, giving them a slightly shiny appearance. The background of the signboard is not visible, but the letters are set against a dark backdrop that contrasts with the lighter color of the text." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features capitalized, serif lettering spelling \"AGENTS\" with a metallic finish and a slight gradient, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Triple\" is written in a cursive, green font with a white outline. The letters are slightly italicized and have a playful, rounded design." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"WHITE\" is written in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SPOT\" is written in bold, uppercase letters with a light blue color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A stylized, cursive letter \"O\" with a green outline and a white fill, featuring a small, curved tail extending from the bottom right." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The visible part of the ag is a white, stylized letter \"A\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"NEW\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"YORK\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features bold, black, uppercase letters spelling \"AYAM\" on a yellow background." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features large, bold, red letters \"RUA\" on a yellow background." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A yellow sign with the word \"SMASHED\" in bold, black, uppercase letters." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A dark brown, rectangular sign with the word \"FRIED\" in bold, uppercase letters." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A brown sign with the word \"CHICKEN\" in bold, uppercase letters." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Accommodation" + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Office\" is written in a bold, sans-serif font with a dark blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Nightline\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red and white signboard with the word \"BUBBA\" in bold, capitalized, red letters on a white background, with a red border around the sign." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"GUMP\" in large, bold, red capital letters with a white outline. The letters are set against a textured, light-colored background that resembles a stone or concrete surface." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SHRIMP\" in capital letters, with a bold, sans-serif font. The letters are white with a slight shadow effect, giving them a three-dimensional appearance. The background of the signboard is a deep blue color, providing a stark contrast to the white text." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved signboard with a red border and a white background, featuring the word \"RESTAURANT\" in bold, black, uppercase letters." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved, red and white signboard with the word \"MARKET\" in capital letters, featuring a serif font." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular blue signboard with a white border and a white symbol resembling a stylized letter 'C' in the center." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular, metallic signboard with a textured background featuring the letters \"TM\" in a bold, sans-serif font, centered on the sign." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"EVENING\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"PRIMROSE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"BASE\" is written in bold, uppercase letters with a red background and white outline. The letters are evenly spaced and have a slightly distressed texture." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"OIL\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred appearance." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The number 46 is displayed in a large, bold, white font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and have a smooth, rounded design." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green calendar with white text displaying the numbers 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369," + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular button with rounded corners featuring the word \"Connecter\" in white lowercase letters, followed by a yellow icon resembling a lock with a keyhole." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized, lowercase letter \"a\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized lowercase \"a\" with a curved tail extending from the bottom right, resembling a lowercase \"i\" with a dot above it." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular sign with white text displaying the numbers \"07\" in a bold, sans-serif font." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The word \"septembre\" in lowercase letters, with a green background and white font." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The text \"Lien Web\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The numbers 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 are displayed in a green color." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and a fringe detail." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Plan du site" + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A white arrow pointing to the right, with a slight curve at the tail end, is positioned to the right of the text \"Book Now.\"" + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in bold, black, sans-serif font." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "contact us" + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"About\" is written in a bold, sans-serif font with a gradient of pink to red, set against a teal background." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A teal-colored horizontal bar with white text and symbols. On the left side, there is a white envelope icon followed by the text \"doonawash@gmail.com\". On the right side, there is a white icon resembling a person in a wheelchair." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Contact us\" is written in a bold, sans-serif font with a pinkish-red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a gradient of blue shades, transitioning from a lighter blue at the top to a darker blue at the bottom." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in a bold, sans-serif font with a pinkish hue. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "OpenStreetMap Belgium" + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A black arrow pointing to the right." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A white rectangular tag with the number \"4.9\" in bold black font centered on it." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with white text that reads \"Pulsuz Konsultasyon.\"" + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The word \"Portfolio\" in a bold, sans-serif font, with a slight italicization, and a drop shadow effect, giving it a three-dimensional appearance. The letters are black with a white outline, and the text is set against a plain background." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white shopping cart icon with a blue outline, featuring a rectangular basket with a grid pattern, two vertical handles, and four wheels, two of which are visible." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"SUPPORT\" in bold, uppercase letters with a blue background and white outline." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white lowercase letter \"f\" with a bold, sans-serif font, set against a blue background." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Your Charts" + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A blue \"X\" with a white outline, featuring a slightly darker blue fill and a lighter blue border." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white sign with the words \"CONTACT US\" in bold, uppercase, blue letters." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with rounded corners and a white border. The button has the words \"ADD TO CART\" in bold, white, uppercase letters centered on it." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PRODUCTS\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PODCAST\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters. The letters are blue and have a slight shadow effect, giving them a three-dimensional appearance. The text is centered horizontally." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"WHAT WE DO\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font style. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "GWM launches livelihood micro-grants" + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with a black border and the word \"Settings\" in black, bold, sans-serif font centered on it." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Privacy Policy" + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"HOME\" in uppercase letters, with a bold, sans-serif font, is centered on the image. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, creating a subtle contrast against the background." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular black button with the word \"Accept\" in white, bold, sans-serif font centered on it." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A red, oval-shaped button with a white border and the word \"DONATE\" in bold, uppercase, red letters centered on it." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"NEWS\" in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a clean, modern font style." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters with a sans-serif font. The letters are evenly spaced and aligned horizontally. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"CONTACT US\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "A black and white icon depicting a stylized, abstract representation of a building with a flat roof and multiple rectangular windows arranged in a grid pattern." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"News\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a dark color, contrasting with the lighter background." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The logo features the word \"ServeGate\" in bold, black letters. To the left of the text, there is a stylized design consisting of two overlapping triangles, one in teal and the other in red, with a black line separating them." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Our Difference" + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"About Us\" is written in a bold, sans-serif font with a red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "A rectangular button with rounded corners, featuring a light pink background and a thin red border. The button displays the text \"Get in touch\" in bold, red, sans-serif font." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" in a serif font, with the letters in a light gray color against a white background." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Services\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Indigenous Impact" + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"ServeGate\" is written in bold, black, sans-serif font. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with rounded corners, featuring the text \"Close issue\" in bold, black, sans-serif font." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "A vertical array of three circular, dark-colored buttons with a slightly raised, smooth surface, aligned centrally on a light-colored background." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/question.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/question.json new file mode 100644 index 0000000000000000000000000000000000000000..ca20e47a5ad590aea97f9459211a3f28b30e7d6c --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/ocr_spotting_detailed_caption_box/question.json @@ -0,0 +1,334 @@ +[ + { + "question_id": 1, + "image": "0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 83, + 147, + 257, + 83 + ], + [ + 162, + 201, + 87, + 50 + ], + [ + 665, + 377, + 60, + 39 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 2, + "image": "img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 70.0, + 157.0, + 207.0, + 47.0 + ], + [ + 311.0, + 165.0, + 225.0, + 45.0 + ], + [ + 254.0, + 247.0, + 186.0, + 35.0 + ], + [ + 191.0, + 380.0, + 92.0, + 19.0 + ], + [ + 293.0, + 383.0, + 89.0, + 17.0 + ], + [ + 392.0, + 385.0, + 92.0, + 16.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 3, + "image": "0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 581, + 352, + 389, + 391 + ], + [ + 663, + 790, + 229, + 241 + ], + [ + 905, + 780, + 181, + 233 + ], + [ + 1019, + 324, + 156, + 158 + ], + [ + 689, + 721, + 44, + 54 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 4, + "image": "0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 51, + 103, + 362, + 264 + ], + [ + 516, + 120, + 466, + 335 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 5, + "image": "img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 386.0, + 291.0, + 261.0, + 43.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 6, + "image": "img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 451.0, + 44.0, + 121.0, + 115.0 + ], + [ + 655.0, + 144.0, + 54.0, + 65.0 + ], + [ + 412.0, + 113.0, + 111.0, + 71.0 + ], + [ + 521.0, + 150.0, + 50.0, + 46.0 + ], + [ + 567.0, + 166.0, + 57.0, + 46.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 7, + "image": "img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 71.0, + 191.0, + 314.0, + 36.0 + ], + [ + 404.0, + 189.0, + 117.0, + 34.0 + ], + [ + 77.0, + 262.0, + 161.0, + 40.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 8, + "image": "0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 165, + 290, + 192, + 89 + ], + [ + 363, + 281, + 159, + 57 + ], + [ + 225, + 354, + 140, + 46 + ], + [ + 185, + 376, + 156, + 94 + ], + [ + 426, + 344, + 83, + 101 + ], + [ + 419, + 347, + 56, + 23 + ], + [ + 481, + 420, + 22, + 16 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 9, + "image": "tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 758.0, + 3763.0, + 498.0, + 112.0 + ], + [ + 1291.0, + 3751.0, + 572.0, + 119.0 + ], + [ + 1061.0, + 3894.0, + 303.0, + 105.0 + ], + [ + 1386.0, + 3882.0, + 180.0, + 110.0 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 10, + "image": "tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 134.0, + 94.0, + 317.0, + 225.0 + ] + ], + "segmentation": [] + } + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/answer.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/answer.json new file mode 100644 index 0000000000000000000000000000000000000000..1fc627ed32af43ec4e04f2ac4c8172d3201bc43b --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/answer.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage." + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address." + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere." + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab." + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases." + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself." + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "\"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified." + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region." + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features." + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date." + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The phrase \"to earn\" typically suggests that there is an opportunity to gain something—often points, money, or rewards—by performing certain actions or tasks." + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy." + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a human heart symbolizing a strong heart as one of the benefits of running." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A silhouette of a figure measuring their waist indicates that running can aid in weight loss." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"" + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"" + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"" + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"" + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"" + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The light switches are white, contrasting with the dark wall, likely plastic, and appear functional." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Positioned in the background, these white light switches are paired on a wall above the countertop." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "This silver oven, with digital controls and a handle, appears modern and built into the cabinetry." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor, constructed of hardwood, showcases a natural finish with variations in wood grain." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A frying pan on a heat source contains sautéed meat and vegetables, emitting steam, indicating the food is hot and being cooked." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Decorative accessory adorned on the ear, visible as a small, shiny object." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "Appears to be a young boy, casually dressed, gripping an electronic device with attention." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds–Au), polar angle (θ), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in ." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "This is a footer section of an image or document. It contains a citation that reads \"© 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items." + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners." + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support." + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "\"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge." + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created." + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives." + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data." + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a navigation link that returns the user to the main homepage of the website from any other page." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors." + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "\"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact." + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/prediction.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/prediction.json new file mode 100644 index 0000000000000000000000000000000000000000..46b7886dee75138be7ac702f93ad13e4478cf79c --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/prediction.json @@ -0,0 +1,2186 @@ +[ + { + "question_id": 1, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "Friday, Sep 2, 61°F" + }, + { + "question_id": 2, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 3, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, fluffy cloud with a smooth, rounded shape and a slightly irregular outline. The cloud has a soft, cotton-like texture and appears to be illuminated from the left side, giving it a gentle gradient from white to a light blue hue." + }, + { + "question_id": 4, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white gear with a blue circular center and six evenly spaced, straight teeth." + }, + { + "question_id": 5, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, featuring three horizontal blue lines of varying lengths inside the bubble." + }, + { + "question_id": 6, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A red, rectangular play button with rounded corners, featuring a white, centered play triangle pointing to the right." + }, + { + "question_id": 7, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 8, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a red dot above it and a yellow dot below it." + }, + { + "question_id": 9, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A circular button with a white center and a thin black border." + }, + { + "question_id": 10, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_251980.png", + "category": "android_detailed_caption_box", + "text": "A white, right-pointing arrow with a thick border." + }, + { + "question_id": 11, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Excel\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 12, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Facebook\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 13, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Speedtest\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 14, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Instagram\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 15, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 16, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lite\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 17, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "The word \"Messenger\" is written in a bold, sans-serif font with a gradient of pink to white. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 18, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "Home De..." + }, + { + "question_id": 19, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A pair of cartoonish eyes with large, white, oval-shaped pupils and black, curved eyelashes. The eyes are set against a green background." + }, + { + "question_id": 20, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "category": "android_detailed_caption_box", + "text": "A blue square with a white lowercase \"g\" followed by three white horizontal lines of equal length." + }, + { + "question_id": 21, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 22, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"settings\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 23, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "mobile, data" + }, + { + "question_id": 24, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"data\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 25, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The Wi-Fi symbol consists of a lowercase \"i\" with a dot above it, followed by a lowercase \"f\" with a dot above it." + }, + { + "question_id": 26, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"and\" in lowercase letters, with a serif font, is written in a dark color against a light background." + }, + { + "question_id": 27, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"hotspot\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design." + }, + { + "question_id": 28, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "usage, and hotspot" + }, + { + "question_id": 29, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "The word \"Connected\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 30, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "category": "android_detailed_caption_box", + "text": "A white, L-shaped object with a smooth surface and rounded edges." + }, + { + "question_id": 31, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Search or type web address" + }, + { + "question_id": 32, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"ebay shopping cart\" is displayed in lowercase letters. The word \"ebay\" is in a larger font size compared to the word \"shopping cart\". The text is in a sans-serif font and is centered horizontally." + }, + { + "question_id": 33, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"cart\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced." + }, + { + "question_id": 34, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "https://cart.ebay.com/" + }, + { + "question_id": 35, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Welcome to Costco Wholesale" + }, + { + "question_id": 36, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com\" is displayed in lowercase letters." + }, + { + "question_id": 37, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "Pay Less." + }, + { + "question_id": 38, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The word \"Target\" in bold, black, sans-serif font." + }, + { + "question_id": 39, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"Expect More.\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 40, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/web_shopping_98501.png", + "category": "android_detailed_caption_box", + "text": "The text \"target.com\" is displayed in a bold, sans-serif font with a blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 41, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 42, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 43, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A blue circular icon with a white lowercase \"s\" in the center." + }, + { + "question_id": 44, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Install\" is written in lowercase white letters on a green background." + }, + { + "question_id": 45, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "The word \"Skype\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 46, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "purchases" + }, + { + "question_id": 47, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "In-app purchases" + }, + { + "question_id": 48, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 49, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "A black letter \"B\" followed by a black plus sign." + }, + { + "question_id": 50, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "category": "android_detailed_caption_box", + "text": "Editors' Choice" + }, + { + "question_id": 51, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Search settings" + }, + { + "question_id": 52, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "A white envelope icon with a triangular flap on the right side, set against a dark background." + }, + { + "question_id": 53, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"Add\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 54, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "another email" + }, + { + "question_id": 55, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "The word \"account\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 56, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Set up your personal or work email" + }, + { + "question_id": 57, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "or work email" + }, + { + "question_id": 58, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Network & internet" + }, + { + "question_id": 59, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "Wi-Fi, mobile, data" + }, + { + "question_id": 60, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "category": "android_detailed_caption_box", + "text": "usage, and" + }, + { + "question_id": 61, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "Wednesday, May 18" + }, + { + "question_id": 62, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "The word \"Maps\" in a bold, sans-serif font, with a gradient of pink to white, giving it a three-dimensional appearance." + }, + { + "question_id": 63, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom." + }, + { + "question_id": 64, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A red, teardrop-shaped pin with a black circular center." + }, + { + "question_id": 65, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A white speech bubble icon with a blue outline, containing three horizontal blue lines of varying lengths." + }, + { + "question_id": 66, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom." + }, + { + "question_id": 67, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "category": "android_detailed_caption_box", + "text": "A blue circle with a white border, a red circle with a white border, and a yellow circle with a white border." + }, + { + "question_id": 68, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The text \"costco.com/Check\" is displayed in a bold, sans-serif font. The word \"costco.com\" is in lowercase letters, and the word \"Check\" is in uppercase letters. The text is aligned to the left." + }, + { + "question_id": 69, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The logo features the word \"COSTCO\" in large, bold, red capital letters with a white outline. Below it, the word \"WHOLESALE\" is written in smaller, bold, blue capital letters." + }, + { + "question_id": 70, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Warehouses\" is written in a bold, sans-serif font with a light blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 71, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Account\" in a bold, sans-serif font, with a gradient of blue shades ranging from light to dark, giving it a three-dimensional appearance." + }, + { + "question_id": 72, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue shopping cart with a white handle and a white basket area." + }, + { + "question_id": 73, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular sign with the word \"Shop\" in white, bold, sans-serif font. To the left of the text, there are three horizontal white lines of varying lengths." + }, + { + "question_id": 74, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Search\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a consistent size throughout." + }, + { + "question_id": 75, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "My Warehouse" + }, + { + "question_id": 76, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "Delivery Location" + }, + { + "question_id": 77, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_1849.png", + "category": "android_detailed_caption_box", + "text": "The word \"Seattle\" in bold, black, sans-serif font." + }, + { + "question_id": 78, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "Fetch Rewards: Play" + }, + { + "question_id": 79, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Play\" in bold, black, sans-serif font." + }, + { + "question_id": 80, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "to earn" + }, + { + "question_id": 81, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"MAKE\" is written in bold, uppercase letters with a dark green color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 82, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"MONEY\" in bold, uppercase letters with a green background and black outline." + }, + { + "question_id": 83, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"appminer st\" is displayed in lowercase letters. The letters are green and have a sans-serif font. The text is aligned to the left." + }, + { + "question_id": 84, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The word \"Contains\" is written in a bold, sans-serif font with a light gray color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 85, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"ads\" is written in lowercase letters with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design. The color of the text is a light gray, blending subtly with the background." + }, + { + "question_id": 86, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "The text \"50K+\" is displayed in bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The \"50K\" is in a larger font size compared to the \"+\" sign." + }, + { + "question_id": 87, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "category": "android_detailed_caption_box", + "text": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form." + }, + { + "question_id": 88, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a computer keyboard with a standard QWERTY layout, including function keys, a number pad, and arrow keys. The keys are rectangular with white lettering on black keys, and the keyboard has a slight ergonomic curve." + }, + { + "question_id": 89, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The text \"walmart.com\" is displayed in a bold, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance. The text is aligned to the left." + }, + { + "question_id": 90, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A white rectangular signboard with the text \"Lenovo Thinkpad\" in black, sans-serif font." + }, + { + "question_id": 91, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A blue rectangular button with rounded corners featuring the word \"Cancel\" in white, bold, sans-serif font." + }, + { + "question_id": 92, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad laptop with a visible keyboard and trackpad." + }, + { + "question_id": 93, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"Lenovo\" in lowercase letters, with a bold font and a slight italicization, set against a plain background." + }, + { + "question_id": 94, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black, rectangular computer keyboard with a standard QWERTY layout, including a number pad on the right side. The keys are chiclet-style with white lettering, and there is a slight sheen on the surface, suggesting a smooth texture. The function keys are aligned along the top, and there is a visible space bar at the bottom center." + }, + { + "question_id": 95, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "The word \"in\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 96, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A rectangular electronic device with a screen displaying text, surrounded by a thin bezel." + }, + { + "question_id": 97, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/android_detailed_caption/images/single_2921.png", + "category": "android_detailed_caption_box", + "text": "A black and white image of a Lenovo ThinkPad charger." + }, + { + "question_id": 98, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING" + }, + { + "question_id": 99, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow background with two human head outlines facing each other. The left head has the words \"FIXED MINDSET\" above a red downward arrow, and the right head has the words \"GROWTH MINDSET\" above a green upward arrow." + }, + { + "question_id": 100, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon character with a serious expression, featuring a dark gray hair, a light blue shirt, and a red cross symbol on the left side of the head. The character has a red frown and is surrounded by two white, cloud-like shapes on either side of the head." + }, + { + "question_id": 101, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a beige background featuring five hands in different colors: red, green, blue, purple, and orange, arranged in a circular pattern. Above the hands, the text \"Understanding Diversity\" is written in black." + }, + { + "question_id": 102, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A black and white illustration of a person with a light bulb on their head, holding a book. The person has a question mark above their head and another question mark to the right of their head. The background is a light peach color." + }, + { + "question_id": 103, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon illustration of two boys, one wearing a red shirt and blue shorts, and the other wearing a striped shirt and brown shorts, both with their arms raised. The word \"Bullying\" is written above them." + }, + { + "question_id": 104, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon of a boy and a girl playing with each other. The boy is wearing a yellow shirt, black shorts, and red shoes. The girl is wearing a yellow shirt, blue pants, and red shoes. Both have black hair and are smiling." + }, + { + "question_id": 105, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue speech bubble with a yellow lightning bolt symbol, and a red speech bubble with a yellow lightning bolt symbol, both containing a person's face." + }, + { + "question_id": 106, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A rectangular blue traffic sign with a white border, featuring three white arrows. The leftmost arrow curves to the left, the middle arrow points straight up, and the rightmost arrow curves to the right. Below the arrows, the word \"CHANGES\" is written in white capital letters." + }, + { + "question_id": 107, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/6.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of two human figures, one in yellow and the other in blue, both with black outlines. The yellow figure is standing on a staircase, while the blue figure is standing on a platform. The blue figure is holding a microphone and appears to be speaking or presenting. The word \"Leadership\" is written in black text above the figures." + }, + { + "question_id": 108, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue circle with a white plus sign inside it, followed by a white \"2X\" text." + }, + { + "question_id": 109, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A hand holding a person with a blue shirt and black pants, with a purple banner below displaying \"$4,000\" in white text." + }, + { + "question_id": 110, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pie chart with a blue background and a white border, featuring a white line that divides the chart into two sections. The left section is larger and has a white number \"36%\" inside it, while the right section is smaller and has a white number \"36%\" inside it." + }, + { + "question_id": 111, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A globe with a blue ocean and white continents, featuring a purple banner with white text that reads \"2.7 trillion impact to global GDP from use of more efficient talent platforms.\"" + }, + { + "question_id": 112, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "46% of companies are sometimes or frequently understaffed" + }, + { + "question_id": 113, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "Travel personas: how travelers identify their travel style" + }, + { + "question_id": 114, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular planner with a white background featuring a colorful illustration of two people, one wearing a red hat and the other wearing a blue hat, both holding a smartphone. The person in the red hat is holding a book, and the person in the blue hat is holding a suitcase. The background includes a mountain and a sun. The text \"THE SMART PLANNER\" is written in bold, black letters at the top." + }, + { + "question_id": 115, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "The Relaxed Nomad" + }, + { + "question_id": 116, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A family of five, consisting of a man, a woman, and three children, standing together. The man is holding a baby, while the woman is holding a suitcase. The children are standing around them, with one child holding a suitcase. The family is depicted in a circular frame." + }, + { + "question_id": 117, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 118, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular white background with a thin black border." + }, + { + "question_id": 119, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A red airplane seat with a high backrest and armrests, featuring a small, rectangular, red and white logo on the backrest." + }, + { + "question_id": 120, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/50.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a white background featuring a stylized illustration of a person in a blue suit with a red briefcase in their left hand and a blue suitcase in their right hand. The person is depicted in a walking motion, with one leg forward and the other leg back. The text \"THE BUSINESS ROAD WARRIOR\" is written in bold, black capital letters above the illustration." + }, + { + "question_id": 121, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with short brown hair, wearing a purple top with a white collar, is holding a yellow envelope in her right hand. She has a headset on her head and is standing in front of a computer monitor with the word \"BIG\" visible on the screen." + }, + { + "question_id": 122, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange background with a white border." + }, + { + "question_id": 123, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a stylized globe in the center. The globe is divided into four quadrants, each in a different shade of blue. A black headset with a microphone is positioned over the globe, with the earpieces extending outward. To the right of the globe, there is a yellow star." + }, + { + "question_id": 124, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon man with a light blue shirt and a black tie, wearing a headset with a microphone. He has a light brown hair and is pointing upwards with his right hand. To his right, there is a yellow light bulb with a red base." + }, + { + "question_id": 125, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue and black wrench with a flat-head design, featuring a blue handle with a textured grip and a black head with a serrated edge. The wrench has a long, straight shaft connecting the handle to the head." + }, + { + "question_id": 126, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person wearing a headset with a microphone, a purple shirt, and a white undershirt. The person is holding a smartphone with a blue and white design on the screen." + }, + { + "question_id": 127, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a hand holding a black and white telephone handset, with a blue circular background featuring a partial globe and a speech bubble with the word \"BIG\" in white. The hand is wearing an orange life jacket with white stripes." + }, + { + "question_id": 128, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a red border, featuring a stylized design of a person wearing a white shirt and a black tie, with a blue and white striped hat. The background includes a Union Jack flag and a yellow rectangle." + }, + { + "question_id": 129, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/24.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon woman with brown hair tied back, wearing a purple shirt with a white collar, and a headset. She is holding a yellow star in her right hand and has a black and white striped object in her left hand." + }, + { + "question_id": 130, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized heart with a vibrant red color, featuring a prominent blue and orange flame-like design on the upper left side, and a smaller blue and orange flame-like design on the lower right side." + }, + { + "question_id": 131, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized human figure with arms raised in a celebratory pose, surrounded by four blue arrows pointing outward, each arrow with a slight curve and a pointed tip." + }, + { + "question_id": 132, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A digital bathroom scale with a black base and a white digital display showing the weight." + }, + { + "question_id": 133, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A pair of pink lungs with a central trachea, featuring detailed vein patterns and a slightly curved shape." + }, + { + "question_id": 134, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A yellow smiley face with a black outline, featuring two black dots for eyes and a curved black line for a mouth." + }, + { + "question_id": 135, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human knee with a visible bone structure, including the femur and tibia, with a slight curvature and a smooth surface." + }, + { + "question_id": 136, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A human brain with a detailed, textured surface, featuring a prominent cerebral cortex and a visible portion of the brain stem." + }, + { + "question_id": 137, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A red heart with a white outline and a white line through the center, indicating a heart rate or rhythm." + }, + { + "question_id": 138, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "The word \"HOTAPPE\" is written in large, bold, uppercase letters. The letters are colored in the following order from left to right: red, orange, yellow, light blue, and dark blue." + }, + { + "question_id": 139, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular smiley face with a yellow face and a red border. The face has two blue teardrop-shaped eyes, a brown curved mouth, and two brown curved lines for eyebrows." + }, + { + "question_id": 140, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "An orange circle with two brown hands facing each other, with the word \"OPENNESS\" in green capital letters below the circle." + }, + { + "question_id": 141, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular yellow background with a pair of hands clasped together in the center. The hands are depicted in a light brown color, with the left hand slightly overlapping the right hand. Radiating from the hands are white lines, giving the impression of light or energy emanating from the hands. Below the hands, the word \"TOUCH\" is written in bold, uppercase letters." + }, + { + "question_id": 142, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular logo with a light blue background featuring a large red heart in the center. Inside the heart, there is a white exclamation mark. Below the heart, the word \"ATTENTION\" is written in bold, uppercase letters." + }, + { + "question_id": 143, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with two cartoon faces, one on the left and one on the right, separated by a white dashed line. The left face has brown hair and a neutral expression, while the right face has light brown hair and a neutral expression. Below the faces, the word \"PROXIMITY\" is written in bold, uppercase letters." + }, + { + "question_id": 144, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/11.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue background with a white eye in the center, featuring a brown iris and a white sclera. Below the eye, the words \"EYE CONTACT\" are written in bold, uppercase letters." + }, + { + "question_id": 145, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt, with a thermometer in their mouth." + }, + { + "question_id": 146, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a red nose and a red mouth, and a white hand with three fingers extended." + }, + { + "question_id": 147, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a pink shirt, with a green and white object in their mouth." + }, + { + "question_id": 148, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a person with a green circular background. The person has a gray face with a red nose and a red mouth. The person is wearing a red shirt with a yellow collar." + }, + { + "question_id": 149, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with a green background, wearing a red shirt and white pants, is sitting on a white chair." + }, + { + "question_id": 150, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green sign with a white border, featuring a stylized illustration of a pair of red lungs with a gray outline, set against a dark background." + }, + { + "question_id": 151, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized illustration of a kidney with a red and pink color scheme, featuring a central red area with a pink outline, flanked by two symmetrical, curved, pink shapes resembling the kidney's lobes, all set against a light green background." + }, + { + "question_id": 152, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A blue rectangular header with the text \"Interesting Facts\" in white, bold, sans-serif font. To the left of the text, there are three white horizontal lines. To the right of the text, there is a red heart symbol." + }, + { + "question_id": 153, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A stylized panda face with a white face, black ears, black patches around the eyes, and a black nose. The panda has a small, curved black mouth and a content expression. The face is set against a green circular background." + }, + { + "question_id": 154, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A green circular background with a white plate in the center, containing a yellow circle. To the left of the plate is a white fork, and to the right is a white spoon." + }, + { + "question_id": 155, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring a red heart in the center. The heart is outlined in white and has a white line running horizontally across its middle. Below the heart, the word \"Diseases\" is written in white, bold, sans-serif font." + }, + { + "question_id": 156, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A white long-sleeved shirt with a teal collar and cuffs, featuring a row of black buttons down the front." + }, + { + "question_id": 157, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular orange icon with a white film reel design in the center. The film reel has a blue border with white squares on the left and right sides, and a white center with a blue horizontal line dividing it into two sections." + }, + { + "question_id": 158, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular green icon featuring a white syringe with a red cross symbol on the barrel, a white droplet to the right of the syringe, and a yellow needle." + }, + { + "question_id": 159, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular blue icon featuring a white wine glass with a yellow liquid on the left and a white bottle with a yellow liquid and a brown cap on the right." + }, + { + "question_id": 160, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a dark blue background featuring two stylized human figures. The figure on the left has short, light brown hair and is wearing a red shirt. The figure on the right has short, light brown hair and is wearing a brown shirt with a white collar." + }, + { + "question_id": 161, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/23.png", + "category": "multipanel_detailed_caption_box", + "text": "A circular icon with a solid olive green background featuring a white silhouette of the Earth in the center." + }, + { + "question_id": 162, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person sitting on a chair with their head in their hands, wearing a blue shirt and black pants." + }, + { + "question_id": 163, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with short brown hair, wearing a blue long-sleeve shirt and yellow pants, is sitting on a white platform with a blue wave design at the bottom." + }, + { + "question_id": 164, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a pink top, is depicted with a thought bubble above her head." + }, + { + "question_id": 165, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A person with red hair, wearing a green top and blue pants, is sitting on a windowsill with their legs crossed." + }, + { + "question_id": 166, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with dark hair tied back, wearing a blue sleeveless top and black leggings, is running with her arms slightly bent and her legs in motion. She has a white earphone cord hanging from her right ear." + }, + { + "question_id": 167, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "category": "multipanel_detailed_caption_box", + "text": "A woman with black hair tied back, wearing a green headband, a green sleeveless top, and black pants, is sitting cross-legged with her hands pressed together in a prayer position." + }, + { + "question_id": 168, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A green dinosaur with a white belly, a purple dinosaur with a white belly, and a green dinosaur with a white belly." + }, + { + "question_id": 169, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A cartoon boy with a purple bandage on his forehead, wearing a blue shirt and blue pants, is holding a purple object in his right hand." + }, + { + "question_id": 170, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A lime green refrigerator with a single door, featuring a black handle on the right side. The door has a horizontal indentation near the top." + }, + { + "question_id": 171, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "category": "multipanel_detailed_caption_box", + "text": "A small, rectangular, lime green refrigerator with a single door featuring a vertical handle on the left side. The bottom section of the refrigerator is orange with a horizontal handle." + }, + { + "question_id": 172, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark-colored, rectangular structure with a flat roof and vertical sides, featuring a small, square window on the upper left side and a larger, rectangular window on the lower right side." + }, + { + "question_id": 173, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A tree with dense, green foliage." + }, + { + "question_id": 174, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/4010.jpg", + "category": "natural_detailed_caption_box", + "text": "A cylindrical, metallic pole with a consistent diameter throughout its length, featuring a series of evenly spaced, horizontal bands encircling its surface." + }, + { + "question_id": 175, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A deep, dark-colored bowl with a wide, flared rim and a smooth, glossy finish." + }, + { + "question_id": 176, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern kitchen with a stainless steel oven and a black cooktop. The oven has a digital display and control panel, and there is a visible handle on the oven door. The cooktop has multiple burners with black grates." + }, + { + "question_id": 177, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular wall socket with a single, round, black power switch located on the right side." + }, + { + "question_id": 178, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular light switch with rounded edges, featuring a central toggle switch mechanism." + }, + { + "question_id": 179, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "Stainless steel built-in oven with a large glass door, featuring a digital control panel above the door with multiple buttons and a display screen." + }, + { + "question_id": 180, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, slender vase with a flared rim and a narrow neck that gradually widens into a bulbous base, featuring a glossy finish with a gradient of colors transitioning from a deep blue at the top to a greenish hue towards the bottom." + }, + { + "question_id": 181, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "The floor is composed of medium-toned wooden planks with a smooth, polished finish. The wood grain is visible, running lengthwise along the planks, which are laid out parallel to each other. The planks have a consistent width and exhibit a warm, reddish-brown hue." + }, + { + "question_id": 182, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407550.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical, wall-mounted spice rack with multiple tiers, each tier holding several glass jars with metal lids. The jars are arranged in a single column, and the rack appears to be made of a dark, possibly wooden material." + }, + { + "question_id": 183, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A round, dark-colored table with a smooth surface and a central pedestal base." + }, + { + "question_id": 184, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "The chair features a high, slightly curved backrest and seat cushion upholstered in a woven fabric with a diamond pattern. The fabric is primarily light green with a central vertical stripe in a slightly darker shade. The armrests are padded and covered in the same woven fabric, with a light green color. The chair's legs are dark-colored and straight." + }, + { + "question_id": 185, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A purple place mat with a textured surface and a white circular design in the center." + }, + { + "question_id": 186, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A vertical traffic light with three circular lenses arranged in a column, displaying a red light at the top, an unlit middle lens, and a green light at the bottom, all encased in a black housing with a visor over each lens." + }, + { + "question_id": 187, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A pink and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 188, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, dark brown tree trunk with a rough, textured bark. The trunk is relatively straight and has a consistent width throughout its visible length." + }, + { + "question_id": 189, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A chalkboard sign with a wooden frame displaying the text \"Château de la Bertrandière\" in elegant, cursive script." + }, + { + "question_id": 190, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/402.jpg", + "category": "natural_detailed_caption_box", + "text": "A tall, narrow, rectangular window with a dark frame and a single vertical pane of clear glass." + }, + { + "question_id": 191, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A chestnut horse with a white star on its forehead, dark eyes, and a soft, dark muzzle. Its ears are pricked forward, and the mane appears to be a lighter shade of chestnut, blending into the darker coat. The horse's neck shows a gentle curve, and the coat has a healthy sheen." + }, + { + "question_id": 192, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A brown horse with a dark mane." + }, + { + "question_id": 193, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000518836.jpg", + "category": "natural_detailed_caption_box", + "text": "A light gray horse with a darker mane and tail, featuring a well-muscled build, a straight profile, and a calm demeanor." + }, + { + "question_id": 194, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A white, rectangular table with rounded corners and a smooth surface." + }, + { + "question_id": 195, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A modern black induction cooktop featuring a sleek design with two visible cooking zones, each outlined with a white circular pattern. The front edge has a stainless steel trim with control buttons, and there are small yellow indicator lights above the buttons." + }, + { + "question_id": 196, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A glossy, metallic spoon with a reflective surface and a tapered handle, featuring a rounded bowl with a slight indentation on one side." + }, + { + "question_id": 197, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000205601.jpg", + "category": "natural_detailed_caption_box", + "text": "A metallic spoon with a reflective surface and a tapered handle that widens towards the end, featuring a rounded bowl with a pointed tip." + }, + { + "question_id": 198, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000299654.jpg", + "category": "natural_detailed_caption_box", + "text": "A zebra with a distinctive black and white striped pattern, featuring a mane of short, erect hair transitioning from black at the base to white at the tips. The stripes on the neck are vertical and become more horizontal as they reach the mane. The visible part of the zebra's face shows a pattern of narrow stripes that converge around the eyes and muzzle, with a white area above the eyes and a dark nose. The ears are pointed and display a striped pattern consistent with the head." + }, + { + "question_id": 199, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal red stop sign with bold, all-caps white lettering spelling \"STOP\" centered on the sign." + }, + { + "question_id": 200, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000107939.jpg", + "category": "natural_detailed_caption_box", + "text": "An octagonal sign with a red background and a white border, featuring the word \"STOP\" in white uppercase letters." + }, + { + "question_id": 201, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped cushion with a visible textured surface that suggests a soft, plush fabric." + }, + { + "question_id": 202, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A dark brown, square-shaped pillow with a visible corner that appears to be soft and plush." + }, + { + "question_id": 203, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "The earlobe is small and rounded, with a smooth, slightly glossy surface. It is adorned with a small, round, gold-colored earring that has a subtle, reflective sheen." + }, + { + "question_id": 204, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young girl with curly hair, wearing a patterned top with a mix of geometric and floral designs in dark and vibrant colors, paired with long-sleeved pink undershirts. Her expression is one of mild surprise or excitement, with her mouth slightly open and eyes looking upwards. Her arms are outstretched with palms facing up, as if gesturing or presenting something." + }, + { + "question_id": 205, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/000000437374.jpg", + "category": "natural_detailed_caption_box", + "text": "A young boy with curly hair, wearing a striped sweater with shades of blue, white, and brown, stands with his hands clasped together. He has a focused expression on his face, with his mouth slightly open and his eyes looking to the side." + }, + { + "question_id": 206, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A ski parka featuring a color-block design with a white torso, green sleeves, and black shoulder panels. The jacket has a high collar and a front zipper closure. There are red accents on the cuffs and a red logo on the left chest area. The parka is paired with a black helmet with a visor." + }, + { + "question_id": 207, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The knee of the person is covered in a dark purple snowsuit with a slightly glossy finish. The fabric appears thick and durable, suitable for cold weather. The knee area is slightly bent, indicating a relaxed stance. The snowsuit has a subtle sheen, reflecting light, and the material appears to be tightly fitted around the knee." + }, + { + "question_id": 208, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The trousers are dark-colored, possibly black or dark gray, with a straight-leg cut. They feature a visible side pocket on the right leg with a flap closure, and there are belt loops around the waistband. The fabric appears to be a sturdy material, potentially denim or a similar thick textile." + }, + { + "question_id": 209, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A blue helmet with a glossy finish, featuring a prominent white stripe with a red outline running horizontally across the middle. The helmet has a black visor attached at the front, and a black chin strap with a buckle hanging down from the sides." + }, + { + "question_id": 210, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The ski boot is predominantly black with a sleek, modern design. It features a sturdy, high-ankle structure for support and a smooth, matte finish. The boot has a contoured shape to fit the foot and lower leg, with a slightly raised heel for added stability. The sole is thick and textured for grip and durability." + }, + { + "question_id": 211, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "The sleeve of the sweater is predominantly white with a bold red graphic of a person skiing. The fabric appears to be a soft, knit material, and the sleeve is slightly loose-fitting, extending to the wrist. The red graphic is positioned centrally on the sleeve, adding a dynamic contrast to the white background." + }, + { + "question_id": 212, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2407508.jpg", + "category": "natural_detailed_caption_box", + "text": "A person wearing a black helmet, goggles, and a white jacket with red and black accents, including a red logo on the left chest area. The individual is also wearing black gloves and appears to be in a skiing stance." + }, + { + "question_id": 213, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A red, white, and green racing motorcycle fairing with the number 69 prominently displayed in white on a red background, flanked by green and white stripes. The fairing features sponsor logos and a black lower section with a vent." + }, + { + "question_id": 214, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/natural_detailed_caption/images/2411153.jpg", + "category": "natural_detailed_caption_box", + "text": "A series of parallel, evenly spaced, horizontal lines with a consistent width, running across the entire width of the image." + }, + { + "question_id": 215, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The mass of the quark is a very small, massless particle that is part of the quark mass. It is a fundamental particle that is responsible for the mass of the quark, and it is often used in particle physics to study the properties of quarks and their interactions." + }, + { + "question_id": 216, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A lowercase letter 'v' with a serif at the top and a tail at the bottom, featuring a bold, sans-serif font." + }, + { + "question_id": 217, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The geometry and adsorption energies for the structures of thio-glycolic acid on Au(111) at 0.25ML." + }, + { + "question_id": 218, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The table contains a list of adsorption sites with their corresponding adsorption energies. The adsorption sites are listed in a column, with each site having a corresponding adsorption energy. The adsorption energy is listed in a separate column, with each site having a corresponding energy value. The table also includes the adsorption energy for the Au-S bond, which is listed in a separate column. The adsorption energy for the Au-S bond is listed in a separate column, with each site having a corresponding energy value." + }, + { + "question_id": 219, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy is 0.63 eV." + }, + { + "question_id": 220, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The adsorption energy for the most stable structure of the HSCH2COOH on the Au(111) surface is 0.63 eV. The adsorption site preferred by the sulfur atom is located at the top of the gold atom. The polar angle between the normal vector of the surface and the S-C2 direction is 74.2 degrees. The HSCH2COOH tend to lie down at the low coverage." + }, + { + "question_id": 221, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a person's face with a neutral expression, featuring a prominent nose, closed lips, and visible teeth. The person has short hair and is wearing a dark-colored top." + }, + { + "question_id": 222, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmented image shows two graphs. The first graph on the left is titled \"Atom prediction performance (a and c)\" and features a scatter plot with blue and orange dots. The second graph on the right is titled \"Bond prediction performance (a and c)\" and also features a scatter plot with blue and orange dots. Both graphs have a similar layout with x and y axes, and the dots are evenly spaced." + }, + { + "question_id": 223, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The F1 score for segmentation and classification networks." + }, + { + "question_id": 224, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of segmentation network is measured by the F1 score, which is a metric that takes into account both precision and recall. The F1 score is used to evaluate the performance of the segmentation network in terms of its ability to correctly segment the image. The segmentation network is compared to other networks, and the F1 score is used to determine which network is the most effective." + }, + { + "question_id": 225, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The segmentation network is a deep learning model that uses a combination of atom, bond, and charge types to predict the pixel values. The model is trained on a large amount of data and has a high F1 score, indicating its high accuracy. The F1 score is a measure of the model's performance, taking into account both precision and recall. The model is designed to be used for various tasks, such as image segmentation and object detection." + }, + { + "question_id": 226, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different types of networks is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is higher for the different types of networks, indicating that they are performing well. The performance of the different types of networks is compared to the performance of the segmentation networks." + }, + { + "question_id": 227, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The performance of the different parts of the network is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is used to evaluate the performance of the network in different tasks, such as atom, bond, and charge type classifications. The network is able to do a good job even when the segmentation is not perfect, and the performance of the different parts of the network is significantly higher than the segmentation networks." + }, + { + "question_id": 228, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "8.3 Overall graph accuracy" + }, + { + "question_id": 229, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The overall graph accuracy is a measure of the performance of the different parts of the graph, including the segmentation network and the classification network. The segmentation network is used to segment the graph into different parts, and the classification network is used to predict the type of the graph. The overall graph accuracy is a combination of these two parts, and it is used to measure the performance of the graph." + }, + { + "question_id": 230, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 11 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The digits are evenly spaced and aligned vertically." + }, + { + "question_id": 231, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar is a low GI sweet spot, which is a type of sugar that has a low glucose index." + }, + { + "question_id": 232, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The low GI sweet spot is at least 22mg CE/100mg sucrose." + }, + { + "question_id": 233, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The sugar has a low GI of about 22-32 mg CE/100g polyphenols. The polyphenol content is high, with a range of 22-32 mg CE/100g polyphenols. The sugar is hygroscopic, with a higher moisture content, and the polyphenol content increases as the sugar becomes more saturated." + }, + { + "question_id": 234, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuit is a dark brown, viscous liquid with a glossy sheen, contained in a clear glass bottle." + }, + { + "question_id": 235, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The massacuites are prepared at two different sugar mills, designated as \"Mill 1\" and \"Mill 2\". The polyphenol content of each sample is determined and washed until they reach the desired polyphenol content, which is roughly 500 to 2000 ICUMSA. The results are in Table 4 below." + }, + { + "question_id": 236, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a sugar cube with a textured surface, featuring a pattern of small, raised dots." + }, + { + "question_id": 237, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The image shows a collection of documents with text, some of which are in black and white, and others in color. The documents are arranged in a grid-like pattern, with some documents having a red border. The text on the documents is in various fonts and sizes, and some documents have a red box around the text." + }, + { + "question_id": 238, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The OCR process." + }, + { + "question_id": 239, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a letter written in English, with the first line reading \"Available OCR\" followed by \"for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The third line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The ninth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The tenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The eleventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twelfth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The thirteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventeenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The nineteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twentieth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-first line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twenty-second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-third" + }, + { + "question_id": 240, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The Internet Archive image containing this excerpt can be accessed here: https://archive.org/details/b2439867/page/n7" + }, + { + "question_id": 241, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "Journal of Data Mining and Digital Humanities" + }, + { + "question_id": 242, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a web address in blue, displaying the URL \"http://dmh.esciences.org\"." + }, + { + "question_id": 243, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 9 is displayed in a bold, sans-serif font with a slight shadow effect, giving it a three-dimensional appearance. The color of the number is a gradient of dark to light gray, with the darker shade at the top and the lighter shade at the bottom." + }, + { + "question_id": 244, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"IOOF Annual Report 2012\" is displayed in a serif font, with \"IOOF\" in a larger size and \"Annual Report 2012\" in a smaller size. The text is in a light green color." + }, + { + "question_id": 245, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors' Remuneration" + }, + { + "question_id": 246, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "4.1. Components of Non-Executive Director remuneration" + }, + { + "question_id": 247, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text \"page 37\" is written in lowercase letters." + }, + { + "question_id": 248, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The company is a non-executive director." + }, + { + "question_id": 249, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year by the Company, its subsidiaries and associated entities may not exceed an amount approved by shareholders. The ceiling amount includes all remuneration provided to Non-Executive Directors, including superannuation but not including retirement benefits. The current limit of $980,000 per annum was approved by shareholders at the 2010 Annual General Meeting. There has been no increase to the Non-Executive Director fee pool since this time." + }, + { + "question_id": 250, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Non-Executive Directors receive a fee for being a Director of the Board. An additional fee is paid to the Chairman of the Board. Non-Executive Directors do not receive additional fees for service on Board and Committees. The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year may not exceed an amount approved by shareholders." + }, + { + "question_id": 251, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The text is a mathematical equation involving the homotopy group of a space." + }, + { + "question_id": 252, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a monopole, which is a type of topological defect." + }, + { + "question_id": 253, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The topological conditions of formation of defects only govern the formation of topologically stable defects. It was found that defects solutions can form even when the topology is trivial. The most well-known example are the electro-weak strings, formed during the electroweak symmetry breaking, which are perturbatively stable for a range of parameters which are not realized in nature, and belong to the broader class of embedded defects." + }, + { + "question_id": 254, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "These defects are a priori unstable though mechanisms (such as plasma effects) have been found to stabilize them. They are of interest for inflation model builders since this mechanism can allow lift the constraints from the formation of cosmic strings (see Sec. IV F on D-term inflation)." + }, + { + "question_id": 255, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A black and white image of a mathematical equation with the variables \"x\" and \"y\" in a bold font, followed by a period and the number \"1\" in a smaller font." + }, + { + "question_id": 256, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 72 is displayed in a bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 257, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A stylized illustration of a woman with a bun hairstyle, featuring a pattern of white, cloud-like shapes with small black dots scattered throughout. The woman is wearing a pink garment with a polka dot pattern." + }, + { + "question_id": 258, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "category": "ocr_doc_detailed_caption_box", + "text": "A red and white polka dot pattern with a small, dark, irregularly shaped mark near the center." + }, + { + "question_id": 259, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The number 62 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The numerals are evenly spaced and aligned horizontally." + }, + { + "question_id": 260, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "Chapter 2: Motivation" + }, + { + "question_id": 261, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The string theory is a type of string theory that is compactified on the near-horizon geometry. It is a decoupled theory that combines the two pictures of the same low energy limit of one theory, Type IIB string theory." + }, + { + "question_id": 262, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The region is a field theory picture of the low energy limit of Type IIB string theory." + }, + { + "question_id": 263, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The regions for which analytic tools exist for these two different pictures turn out to be completely incompatible." + }, + { + "question_id": 264, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The equation is a complex mathematical expression involving various variables and functions. It includes a combination of Greek letters, numbers, and mathematical operators. The visible part of the equation shows a series of variables and functions that are interconnected, with some parts appearing to be in parentheses. The equation is written in a formal, mathematical notation." + }, + { + "question_id": 265, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The presence of an insider on the market does not necessarily lead to arbitrage, and the presence of insiders might be considered beneficial to the market, in the sense that it leads to higher information efficiency of the equilibrium price process." + }, + { + "question_id": 266, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The fundamental value of the firm is a stochastic process, and the insider can observe it directly or at least observe it in a less noisy way than the other agents on the market." + }, + { + "question_id": 267, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The paper relaxes the assumption of static insider information and studies the equilibrium trading and price processes, as well as market efficiency, in a setting with dynamic private information." + }, + { + "question_id": 268, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2]." + }, + { + "question_id": 269, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "The model is a generalization of the static information setting of [2] and is designed to include dynamic information. It has a much smaller set of admissible trading strategies and pricing rules compared to the ones considered in the work. The model shows the existence of a unique Markovian equilibrium, which is an equilibrium price that allows the insider to trade undetected and depends only on the total order process. The model also shows that the presence of an insider increases the market informational efficiency for times close to the time of the order." + }, + { + "question_id": 270, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "category": "ocr_doc_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and fringe detailing." + }, + { + "question_id": 271, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red, three-dimensional, cursive sign with the word \"Abondana\" written in a flowing, elegant script." + }, + { + "question_id": 272, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"Cafe\" in a stylized, cursive font with a gradient of red to dark red, giving it a three-dimensional appearance. The letters are slightly italicized and have a shadow effect, enhancing their depth." + }, + { + "question_id": 273, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The logo features the letters \"USIS\" in bold, with \"US\" in red and \"IS\" in black. Below the letters, there is a tagline in smaller, gray font." + }, + { + "question_id": 274, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"ESTATE\" is written in bold, black, uppercase letters on a yellow background." + }, + { + "question_id": 275, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"AGENTS\" in bold, uppercase letters. The letters are black and set against a yellow background." + }, + { + "question_id": 276, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage displays the word \"SAXONS\" in bold, black capital letters on a yellow background." + }, + { + "question_id": 277, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SAXONS\" in capital letters with a serif font, set against a dark background. The letters are in a metallic gold color with a slight gradient, giving them a three-dimensional appearance. The signboard has a reflective surface, suggesting it is made of a glossy material." + }, + { + "question_id": 278, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard displays the word \"ESTATE\" in capital letters, with a serif font. The letters are dark and possibly metallic, with a reflective surface that catches the light, giving them a slightly shiny appearance. The background of the signboard is not visible, but the letters are set against a dark backdrop that contrasts with the lighter color of the text." + }, + { + "question_id": 279, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features capitalized, serif lettering spelling \"AGENTS\" with a metallic finish and a slight gradient, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 280, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Triple\" is written in a cursive, green font with a white outline. The letters are slightly italicized and have a playful, rounded design." + }, + { + "question_id": 281, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"WHITE\" is written in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 282, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"SPOT\" is written in bold, uppercase letters with a light blue color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 283, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A stylized, cursive letter \"O\" with a green outline and a white fill, featuring a small, curved tail extending from the bottom right." + }, + { + "question_id": 284, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The visible part of the ag is a white, stylized letter \"A\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 285, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"NEW\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 286, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"YORK\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 287, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The text \"Colchester\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 288, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features bold, black, uppercase letters spelling \"AYAM\" on a yellow background." + }, + { + "question_id": 289, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signage features large, bold, red letters \"RUA\" on a yellow background." + }, + { + "question_id": 290, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A yellow sign with the word \"SMASHED\" in bold, black, uppercase letters." + }, + { + "question_id": 291, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A dark brown, rectangular sign with the word \"FRIED\" in bold, uppercase letters." + }, + { + "question_id": 292, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A brown sign with the word \"CHICKEN\" in bold, uppercase letters." + }, + { + "question_id": 293, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "Accommodation" + }, + { + "question_id": 294, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Office\" is written in a bold, sans-serif font with a dark blue color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 295, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"Nightline\" is written in a bold, sans-serif font with a dark blue color." + }, + { + "question_id": 296, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A red and white signboard with the word \"BUBBA\" in bold, capitalized, red letters on a white background, with a red border around the sign." + }, + { + "question_id": 297, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"GUMP\" in large, bold, red capital letters with a white outline. The letters are set against a textured, light-colored background that resembles a stone or concrete surface." + }, + { + "question_id": 298, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The signboard features the word \"SHRIMP\" in capital letters, with a bold, sans-serif font. The letters are white with a slight shadow effect, giving them a three-dimensional appearance. The background of the signboard is a deep blue color, providing a stark contrast to the white text." + }, + { + "question_id": 299, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved signboard with a red border and a white background, featuring the word \"RESTAURANT\" in bold, black, uppercase letters." + }, + { + "question_id": 300, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A curved, red and white signboard with the word \"MARKET\" in capital letters, featuring a serif font." + }, + { + "question_id": 301, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular blue signboard with a white border and a white symbol resembling a stylized letter 'C' in the center." + }, + { + "question_id": 302, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "A circular, metallic signboard with a textured background featuring the letters \"TM\" in a bold, sans-serif font, centered on the sign." + }, + { + "question_id": 303, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"EVENING\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 304, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"PRIMROSE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance." + }, + { + "question_id": 305, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"BASE\" is written in bold, uppercase letters with a red background and white outline. The letters are evenly spaced and have a slightly distressed texture." + }, + { + "question_id": 306, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The word \"OIL\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred appearance." + }, + { + "question_id": 307, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "category": "ocr_spotting_detailed_caption_box", + "text": "The number 46 is displayed in a large, bold, white font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and have a smooth, rounded design." + }, + { + "question_id": 308, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green calendar with white text displaying the numbers 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369," + }, + { + "question_id": 309, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular button with rounded corners featuring the word \"Connecter\" in white lowercase letters, followed by a yellow icon resembling a lock with a keyhole." + }, + { + "question_id": 310, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized, lowercase letter \"a\" with a slight shadow effect, giving it a three-dimensional appearance." + }, + { + "question_id": 311, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green, stylized lowercase \"a\" with a curved tail extending from the bottom right, resembling a lowercase \"i\" with a dot above it." + }, + { + "question_id": 312, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green rectangular sign with white text displaying the numbers \"07\" in a bold, sans-serif font." + }, + { + "question_id": 313, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The word \"septembre\" in lowercase letters, with a green background and white font." + }, + { + "question_id": 314, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The text \"Lien Web\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 315, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "The numbers 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 are displayed in a green color." + }, + { + "question_id": 316, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "A green and white striped awning with a scalloped edge and a fringe detail." + }, + { + "question_id": 317, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Plan du site" + }, + { + "question_id": 318, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A white arrow pointing to the right, with a slight curve at the tail end, is positioned to the right of the text \"Book Now.\"" + }, + { + "question_id": 319, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 320, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in bold, black, sans-serif font." + }, + { + "question_id": 321, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "About Us" + }, + { + "question_id": 322, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "contact us" + }, + { + "question_id": 323, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"About\" is written in a bold, sans-serif font with a gradient of pink to red, set against a teal background." + }, + { + "question_id": 324, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "A teal-colored horizontal bar with white text and symbols. On the left side, there is a white envelope icon followed by the text \"doonawash@gmail.com\". On the right side, there is a white icon resembling a person in a wheelchair." + }, + { + "question_id": 325, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Contact us\" is written in a bold, sans-serif font with a pinkish-red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 326, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" is written in a bold, sans-serif font with a gradient of blue shades, transitioning from a lighter blue at the top to a darker blue at the bottom." + }, + { + "question_id": 327, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "The text \"Pay Balance\" is written in a bold, sans-serif font with a pinkish hue. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 328, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "OpenStreetMap Belgium" + }, + { + "question_id": 329, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A black arrow pointing to the right." + }, + { + "question_id": 330, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A white rectangular tag with the number \"4.9\" in bold black font centered on it." + }, + { + "question_id": 331, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with white text that reads \"Pulsuz Konsultasyon.\"" + }, + { + "question_id": 332, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "The word \"Portfolio\" in a bold, sans-serif font, with a slight italicization, and a drop shadow effect, giving it a three-dimensional appearance. The letters are black with a white outline, and the text is set against a plain background." + }, + { + "question_id": 333, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white shopping cart icon with a blue outline, featuring a rectangular basket with a grid pattern, two vertical handles, and four wheels, two of which are visible." + }, + { + "question_id": 334, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"SUPPORT\" in bold, uppercase letters with a blue background and white outline." + }, + { + "question_id": 335, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A white lowercase letter \"f\" with a bold, sans-serif font, set against a blue background." + }, + { + "question_id": 336, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Your Charts" + }, + { + "question_id": 337, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A blue \"X\" with a white outline, featuring a slightly darker blue fill and a lighter blue border." + }, + { + "question_id": 338, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white sign with the words \"CONTACT US\" in bold, uppercase, blue letters." + }, + { + "question_id": 339, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "A rectangular blue button with rounded corners and a white border. The button has the words \"ADD TO CART\" in bold, white, uppercase letters centered on it." + }, + { + "question_id": 340, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PRODUCTS\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 341, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The word \"PODCAST\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance." + }, + { + "question_id": 342, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters. The letters are blue and have a slight shadow effect, giving them a three-dimensional appearance. The text is centered horizontally." + }, + { + "question_id": 343, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"WHAT WE DO\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font style. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 344, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "GWM launches livelihood micro-grants" + }, + { + "question_id": 345, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with a black border and the word \"Settings\" in black, bold, sans-serif font centered on it." + }, + { + "question_id": 346, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Privacy Policy" + }, + { + "question_id": 347, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"HOME\" in uppercase letters, with a bold, sans-serif font, is centered on the image. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, creating a subtle contrast against the background." + }, + { + "question_id": 348, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular black button with the word \"Accept\" in white, bold, sans-serif font centered on it." + }, + { + "question_id": 349, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "A red, oval-shaped button with a white border and the word \"DONATE\" in bold, uppercase, red letters centered on it." + }, + { + "question_id": 350, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The word \"NEWS\" in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a clean, modern font style." + }, + { + "question_id": 351, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"ABOUT US\" is written in bold, uppercase letters with a sans-serif font. The letters are evenly spaced and aligned horizontally. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 352, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "The text \"CONTACT US\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font. The color of the text is black, and it stands out against a light background." + }, + { + "question_id": 353, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "A black and white icon depicting a stylized, abstract representation of a building with a flat roof and multiple rectangular windows arranged in a grid pattern." + }, + { + "question_id": 354, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"News\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a dark color, contrasting with the lighter background." + }, + { + "question_id": 355, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The logo features the word \"ServeGate\" in bold, black letters. To the left of the text, there is a stylized design consisting of two overlapping triangles, one in teal and the other in red, with a black line separating them." + }, + { + "question_id": 356, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Our Difference" + }, + { + "question_id": 357, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"About Us\" is written in a bold, sans-serif font with a red color. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 358, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "A rectangular button with rounded corners, featuring a light pink background and a thin red border. The button displays the text \"Get in touch\" in bold, red, sans-serif font." + }, + { + "question_id": 359, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Home\" in a serif font, with the letters in a light gray color against a white background." + }, + { + "question_id": 360, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The word \"Services\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 361, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Indigenous Impact" + }, + { + "question_id": 362, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "The text \"ServeGate\" is written in bold, black, sans-serif font. The letters are evenly spaced and aligned horizontally." + }, + { + "question_id": 363, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "A rectangular white button with rounded corners, featuring the text \"Close issue\" in bold, black, sans-serif font." + }, + { + "question_id": 364, + "image": "/mnt/bn/zilongdata-us/wanghaochen/caption/merge/evaluation/MDVP-bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "A vertical array of three circular, dark-colored buttons with a slightly raised, smooth surface, aligned centrally on a light-colored background." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/question.json b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/question.json new file mode 100644 index 0000000000000000000000000000000000000000..75123d00e04c5085da2f6ce8142935bd54641442 --- /dev/null +++ b/evaluation/MDVP-Bench/mdvp_for_gpt4v_eval/web_detailed_caption_box/question.json @@ -0,0 +1,454 @@ +[ + { + "question_id": 1, + "image": "60e34f9315443cb6c77d32da7ba5eee1.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 1248, + 156, + 19, + 19 + ], + [ + 1286, + 40, + 134, + 19 + ], + [ + 480, + 386, + 153, + 19 + ], + [ + 1382, + 117, + 76, + 0 + ], + [ + 768, + 156, + 19, + 19 + ], + [ + 480, + 348, + 153, + 19 + ], + [ + 652, + 981, + 153, + 19 + ], + [ + 921, + 156, + 19, + 19 + ], + [ + 710, + 156, + 19, + 19 + ], + [ + 825, + 1000, + 172, + 19 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 2, + "image": "0180e97a3e9609ea8c72b6b8db0071c8.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 1420, + 21, + 134, + 19 + ], + [ + 614, + 252, + 192, + 19 + ], + [ + 614, + 136, + 134, + 19 + ], + [ + 614, + 98, + 115, + 19 + ], + [ + 614, + 156, + 115, + 19 + ], + [ + 614, + 213, + 38, + 19 + ], + [ + 326, + 2, + 422, + 38 + ], + [ + 614, + 309, + 96, + 19 + ], + [ + 614, + 60, + 76, + 19 + ], + [ + 614, + 290, + 96, + 19 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 3, + "image": "web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 333, + 971, + 366, + 36 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 4, + "image": "367173643a055b0657de17afff8d541d.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 1324, + 40, + 96, + 19 + ], + [ + 364, + 732, + 19, + 19 + ], + [ + 307, + 559, + 172, + 57 + ], + [ + 960, + 21, + 57, + 57 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 5, + "image": "5c9b9883310423712e466bee13a36a02.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 1478, + 21, + 95, + 57 + ], + [ + 1017, + 98, + 95, + 38 + ], + [ + 1228, + 21, + 19, + 57 + ], + [ + 1056, + 616, + 76, + 19 + ], + [ + 1267, + 21, + 38, + 57 + ], + [ + 1286, + 98, + 115, + 38 + ], + [ + 1075, + 463, + 153, + 38 + ], + [ + 691, + 98, + 115, + 38 + ], + [ + 902, + 98, + 96, + 38 + ], + [ + 1152, + 98, + 115, + 38 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 6, + "image": "53b4ab2cb706a43fec7ce4ac5eac181e.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 998, + 60, + 115, + 19 + ], + [ + 1267, + 232, + 153, + 76 + ], + [ + 1689, + 1039, + 76, + 19 + ], + [ + 729, + 1039, + 96, + 19 + ], + [ + 825, + 60, + 76, + 19 + ], + [ + 1785, + 1039, + 76, + 19 + ], + [ + 1305, + 40, + 134, + 38 + ], + [ + 1113, + 60, + 76, + 19 + ], + [ + 902, + 60, + 96, + 19 + ], + [ + 1190, + 60, + 115, + 19 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 7, + "image": "web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 2282, + 775, + 75, + 68 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 8, + "image": "656b47ffb1270a8038d876586e92a71b.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 1612, + 40, + 76, + 76 + ], + [ + 57, + 40, + 268, + 76 + ], + [ + 1132, + 40, + 153, + 76 + ], + [ + 979, + 40, + 153, + 76 + ], + [ + 1708, + 40, + 134, + 76 + ], + [ + 345, + 175, + 57, + 19 + ], + [ + 1478, + 40, + 134, + 76 + ], + [ + 1286, + 40, + 192, + 76 + ], + [ + 57, + 60, + 268, + 19 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 9, + "image": "web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 801, + 1139, + 196, + 68 + ] + ], + "segmentation": [] + } + }, + { + "question_id": 10, + "image": "web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "category": "web_detailed_caption_box", + "text": "Please provide a detailed description of each marked region in the image.", + "annotation": { + "bbox": [ + [ + 1880, + 205, + 66, + 67 + ] + ], + "segmentation": [] + } + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/model_outputs/gar_1b.json b/evaluation/MDVP-Bench/model_outputs/gar_1b.json new file mode 100644 index 0000000000000000000000000000000000000000..19fb58c921010496231bf417184c01c90809eb46 --- /dev/null +++ b/evaluation/MDVP-Bench/model_outputs/gar_1b.json @@ -0,0 +1,1822 @@ +[ + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "Friday, Sep 2, 61°F", + "gt": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with a blue at the bottom.", + "gt": "This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A white, fluffy cloud with a smooth, rounded shape and a slightly irregular outline. The cloud has a soft, cotton-like texture and appears to be in a light, airy state.", + "gt": "This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A white gear with a blue circular center, featuring a series of evenly spaced, straight teeth around the perimeter.", + "gt": "The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A white speech bubble icon with three horizontal blue lines inside it.", + "gt": "The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A red, rectangular play button with rounded corners, featuring a white, right-pointing arrow in the center.", + "gt": "This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with a blue at the bottom.", + "gt": "The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A blue circle with a red dot and a yellow dot to its right.", + "gt": "This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A circular icon with a white center and a thick black border.", + "gt": "The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A white, right-pointing arrow with a thick border.", + "gt": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Excel\" in white, bold, sans-serif font on a pink background.", + "gt": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Facebook\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Speedtest\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Instagram\" is written in lowercase white letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Home\" in a bold, sans-serif font, with a slight shadow effect, set against a light pink background.", + "gt": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Lite\" in a bold, sans-serif font with a gradient of pink to white, giving it a three-dimensional appearance.", + "gt": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Messenger\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "Home De...", + "gt": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "A stylized, cartoonish face with large, round, white eyes featuring black pupils and a small, orange, oval-shaped nose.", + "gt": "The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "A blue square with a white lowercase \"g\" followed by three white horizontal lines.", + "gt": "This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"Search\" in a sans-serif font, with a light gray color and a slight shadow effect, giving it a three-dimensional appearance.", + "gt": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"settings\" in lowercase letters, with a sans-serif font, is centered on a white background.", + "gt": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"mobile\" in lowercase letters, with a modern sans-serif font, is written in a light gray color.", + "gt": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"data\" in lowercase letters, with a sans-serif font, is centered on the image.", + "gt": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The Wi-Fi symbol consists of the letters \"Wi-Fi\" in a bold, sans-serif font. The letters are evenly spaced and aligned horizontally.", + "gt": "This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The text \"and\" is written in lowercase letters with a sans-serif font. The letters are evenly spaced and have a consistent size. The color of the text is a light gray, and it is set against a white background.", + "gt": "The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"hotspot\" in lowercase letters, with a sans-serif font, is centered on the image.", + "gt": "This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"usage\" in lowercase letters, with a sans-serif font, is written in a light gray color.", + "gt": "The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"Connected\" in lowercase letters, with a bold font and a slight italicization.", + "gt": "The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "A white, rectangular, flat-panel computer monitor with a thin bezel and a visible power button on the bottom right corner.", + "gt": "The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "Search or type web address", + "gt": "This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The text \"eBay shopping cart\" is displayed in a bold, sans-serif font. The letters are black and evenly spaced against a white background.", + "gt": "This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "A black shopping cart with a curved handle and a visible wheel at the bottom right corner.", + "gt": "This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "//cart.ebay.com/\" in lowercase letters.", + "gt": "This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "Welcome to Costco Wholesale", + "gt": "This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The text \"costco.com\" is displayed in a bold, sans-serif font with a blue color.", + "gt": "Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "Pay Less.", + "gt": "This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The word \"Target\" in bold, black font with a slight shadow effect, set against a white background.", + "gt": "The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "Expect More.", + "gt": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The text \"target.com\" is displayed in a bold, sans-serif font with a blue color.", + "gt": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"Skype\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced against a white background.", + "gt": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"Skype\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced against a white background.", + "gt": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "A blue circular logo with a white lowercase \"s\" in the center.", + "gt": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"Install\" in white, bold, sans-serif font on a green background.", + "gt": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"Skype\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced against a white background.", + "gt": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"purchases\" in lowercase letters, with a sans-serif font, is written in a dark color against a light background.", + "gt": "The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "In-app purchases", + "gt": "Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "A black star with a white outline, positioned to the right of the number \"4.1\".", + "gt": "The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "A black and white logo consisting of the letters \"1B+\" in bold, black font, with a white plus sign between the \"1\" and the \"+\"", + "gt": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"Editors\" in a bold, sans-serif font, with a slight italicization, and a dark color on a light background.", + "gt": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "Search settings", + "gt": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "A white, stylized lowercase \"m\" with a bold, rounded top and a thick, curved bottom, set against a dark background.", + "gt": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "The text \"Add\" is written in bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "another email", + "gt": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "The word \"account\" in lowercase letters, with a bold font and a slight italicization.", + "gt": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "Set up your personal", + "gt": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "or work email", + "gt": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "Network & internet", + "gt": "\"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "Wi-Fi, mobile, data usage, and hotspot", + "gt": "This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "The text \"usage, and hotspot\" is written in lowercase letters.", + "gt": "The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "Wednesday, May 18", + "gt": "This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "The word \"Maps\" in a bold, sans-serif font, with a gradient of pink to white, set against a pink background.", + "gt": "The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom.", + "gt": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A red, teardrop-shaped marker with a black circular center.", + "gt": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A white speech bubble with a blue outline, containing three horizontal blue lines of varying lengths.", + "gt": "This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom.", + "gt": "This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A blue circle with a red dot in the center, a yellow dot below it, and a green dot to the right.", + "gt": "The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "A web browser displaying the URL \"costco.com/Check\" with a black lock icon to the left of the text.", + "gt": "The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The logo features the word \"COSTCO\" in large, bold, red letters with a white outline. Below it, the word \"WHOLESALE\" is written in smaller, bold, blue letters with a white outline.", + "gt": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The word \"Warehouses\" in a bold, sans-serif font, with a blue color and a slight shadow effect, giving it a three-dimensional appearance.", + "gt": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The word \"Account\" in a bold, sans-serif font, with a blue background and white lettering.", + "gt": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "A blue shopping cart icon with a white outline, featuring a handle on the top and a base with four wheels.", + "gt": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "A blue rectangular sign with the word \"Shop\" in white, bold, sans-serif font. To the left of the text, there are three horizontal white lines of varying lengths.", + "gt": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "A search bar with the word \"Search\" in a bold, sans-serif font, centered on a light background.", + "gt": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "My Warehouse", + "gt": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "Delivery Location", + "gt": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The word \"Seattle\" in bold, black, sans-serif font.", + "gt": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": " Play", + "gt": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The word \"Play\" in bold, black, sans-serif font.", + "gt": "This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "to earn", + "gt": "The phrase \"to earn\" typically suggests that there is an opportunity to gain something—often points, money, or rewards—by performing certain actions or tasks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The text \"MAKE MONEY\" is written in bold, uppercase letters with a green background. The letters are black and evenly spaced.", + "gt": "The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The word \"MONEY\" in bold, uppercase letters with a green background.", + "gt": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The text \"appminer st\" is displayed in a green, sans-serif font.", + "gt": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The word \"Contains\" in a serif font, with the letters in a dark color against a light background.", + "gt": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The word \"ads\" in lowercase letters, with a sans-serif font, is written in a dark color against a light background.", + "gt": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "50K+", + "gt": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "A black star with a white outline, positioned to the right of the number \"4.7\".", + "gt": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A web browser displaying a search engine results page with the text \"Walmart.com | Save\" visible.", + "gt": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "The text \"walmart.com\" is displayed in a bold, sans-serif font. The letters are black and evenly spaced against a white background.", + "gt": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A white search bar with the text \"lenovo thinkpad\" in black, bold, sans-serif font.", + "gt": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A blue button with the word \"Cancel\" in white, bold, sans-serif font.", + "gt": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black Lenovo ThinkPad with a visible keyboard and trackpad, featuring a glossy finish and a rectangular design.", + "gt": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black laptop with a visible keyboard and trackpad, featuring a glossy finish and a partially visible screen displaying a white background.", + "gt": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black and white image of a Lenovo ThinkPad laptop with a visible keyboard and trackpad, featuring a QWERTY layout and a power button on the top right corner.", + "gt": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black laptop with a visible keyboard and trackpad, featuring a silver hinge and a partially visible screen displaying a white background with text.", + "gt": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black laptop with a visible keyboard and trackpad, featuring a silver hinge and a partially visible screen displaying a white background with text.", + "gt": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black laptop charger with a cylindrical shape and a ribbed design, featuring a series of horizontal ridges along its body for added grip and aesthetic appeal.", + "gt": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING", + "gt": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A yellow background with two stylized human figures facing each other, each with a different colored arrow pointing upwards. The left figure has a red arrow, and the right figure has a green arrow. Above the figures, the text \"GROWTH MINDSET\" is written in black.", + "gt": "This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A cartoon character with a serious expression, featuring a red cross symbol on the left side of the head, and a red cross symbol on the right side of the head. The character has short, dark hair and is wearing a blue shirt.", + "gt": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": " red, green, blue, purple, and orange. The hands are arranged in a circular pattern, with each hand pointing outward. The text \"Understanding Diversity\" is written in black at the top of the logo.", + "gt": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A black and white illustration of a person with a speech bubble containing a question mark, surrounded by three question marks. The person is depicted with a speech bubble containing a question mark, and the speech bubble is positioned above the person's head. The person is wearing a black hat and a black jacket.", + "gt": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A cartoon boy with short brown hair, wearing a red shirt and blue shorts, is pushing another boy with short brown hair, wearing a striped shirt with horizontal stripes in red, orange, and green, and brown shorts.", + "gt": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A young boy with short black hair, wearing a yellow shirt and black shorts, is holding a young girl with long black hair, wearing a yellow shirt and blue pants, who is sitting on the ground with her legs spread apart.", + "gt": "Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A cartoon of two people with brown hair, facing each other, with a speech bubble between them containing a lightning bolt symbol.", + "gt": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A rectangular blue traffic sign with a white border, featuring five white arrows pointing upwards and to the right. Below the arrows, the word \"CHANGES\" is written in bold white capital letters.", + "gt": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A stylized illustration of two figures, one in a yellow suit and the other in a blue suit, both standing on a staircase. The figure in the yellow suit is holding a black object in its right hand, while the figure in the blue suit is holding a black object in its left hand. Both figures have a simple, cartoonish design with minimal detailing.", + "gt": "Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "Time to hire has more than doubled over the last 5 years", + "gt": "The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "A purple banner with the text \"$4,000 average cost to hire in U.S.\" in white, with a white border around the banner.", + "gt": "This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "A pie chart with a blue color scheme, displaying a percentage of 36% on the right side, with a white border around the pie.", + "gt": "Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "A globe with a blue ocean and white landmasses, featuring a black stand and a purple banner with white text.", + "gt": "Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "46% of companies are sometimes or frequently understaffed", + "gt": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": " how travelers identify their travel style", + "gt": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular planner with a white background featuring a colorful illustration of a man in a red hat and a woman in a blue hat, both holding a smartphone. The man is holding a map and a book, while the woman is holding a suitcase. The text \"THE SMART PLANNER\" is written in bold, black letters at the top.", + "gt": "A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular graphic with a white background featuring a stylized illustration of a man and a woman standing side by side. The man is wearing a dark jacket, a hat, and a backpack, while the woman is dressed in a red top, blue shorts, and a hat. The text \"THE RELAXED NOMAD\" is written in bold, uppercase letters at the top of the circle.", + "gt": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular graphic with a white background featuring a family of four. The father is holding a baby in his arms, and the mother is holding a suitcase. The two children are standing next to them, with the boy on the left holding a suitcase and the girl on the right holding a suitcase. The family is surrounded by a thin, light-colored border.", + "gt": "Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular graphic with a white background featuring a cartoon character sitting on a blue chair, holding a brown suitcase. The character is wearing a yellow shirt and gray pants, and is looking out of a window with a blue sky and clouds. The word \"STRESSER\" is written in bold, black capital letters at the top of the circle.", + "gt": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular white background with a thin black border.", + "gt": "Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "The Luxury Budget-Buster", + "gt": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A person with long hair, wearing a blue and white striped outfit, is walking while holding a red suitcase in their left hand and a blue suitcase in their right hand.", + "gt": "Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A flat-screen computer monitor with a thin bezel, displaying a colorful interface with text and graphics. The monitor is mounted on a stand with a rectangular base.", + "gt": "An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A circular orange background with a white border.", + "gt": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A circular logo with a light blue background featuring a stylized globe in the center, divided into four quadrants. The globe is depicted in a darker blue shade with lighter blue highlights. Surrounding the globe is a thin, dark blue border. To the right of the globe, there is a yellow star with a black outline.", + "gt": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A cartoon character with a white shirt, black tie, and black headphones. The character has a light brown complexion and is wearing a black headset with a microphone. The character is pointing upwards with the right hand.", + "gt": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A double-ended wrench with a blue handle and black jaws. The wrench has a straight, flat head with a pointed tip and a circular opening. The handle is cylindrical with a textured grip and a blue band near the head.", + "gt": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A person with short brown hair, wearing a purple short-sleeved shirt and a white undershirt, is holding a smartphone in their right hand. The person is also wearing a black headset with a microphone.", + "gt": "The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A hand holding a black and gray telephone handset, with the hand wearing an orange life jacket.", + "gt": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A circular logo with a red border and a white background. Inside the circle, there is a stylized representation of a person wearing a white shirt and a black tie. The person is holding a microphone in their right hand. The background of the circle features a pattern of various flags, including the Union Jack, the flag of the United Kingdom, and the flag of the European Union.", + "gt": "Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A woman with short brown hair, wearing a headset with a microphone, a pink short-sleeved shirt, and a white undershirt. She is holding a yellow star in her right hand and a white rectangular object with a black border in her left hand.", + "gt": "A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A stylized illustration of a human heart with a prominent red color, featuring a blue and pink color scheme on the upper left side, and a blue and pink color scheme on the lower right side. The heart has a distinct, rounded shape with a visible aorta and a ventricle. The illustration includes a small, circular blue detail near the top left, resembling a valve or a small organ.", + "gt": "The image features a human heart symbolizing a strong heart as one of the benefits of running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A stylized human figure with outstretched arms and legs, featuring a circular head with a dot in the center, and a body with a simple, flat design.", + "gt": "This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A digital scale with a blue display showing the weight, featuring a pair of feet on a black surface.", + "gt": "A silhouette of a figure measuring their waist indicates that running can aid in weight loss." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A pair of pink, heart-shaped lungs with a central tube connecting them, labeled \"Respiratory system.\"", + "gt": "The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A yellow smiley face with a black outline, featuring two black dots for eyes and a curved black line for a mouth, set against a white circular background.", + "gt": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A human knee joint with a visible bone structure, including the knee cap, patella, and surrounding ligaments, with a smooth, rounded shape and a slight curvature.", + "gt": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A brain with a detailed, textured surface, featuring various shades of brown and tan, with a prominent, curved, and slightly protruding structure on the lower right side.", + "gt": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A red heart with a white outline and a white line running through the center, resembling a stethoscope.", + "gt": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "The word \"HOTAP\" is written in bold, uppercase letters. The letters are in a gradient of colors, starting with red at the top, followed by orange, yellow, light blue, and ending with dark blue at the bottom. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular smiley face with a yellow face and a red background. The face has two closed eyes with black pupils and a wide, open mouth with a brown tongue. Two blue tears are depicted on either side of the mouth.", + "gt": "Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular orange background with two stylized human hands facing each other, each hand with five fingers. Below the hands, the word \"OPENNESS\" is written in bold, uppercase letters.", + "gt": "This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A yellow circular background with a white hand clapping, featuring a light brown hand on the left and a white hand on the right. The hands are touching each other, and there are small white lines radiating from the center where they meet. Below the circle, the word \"TOUCH\" is written in bold, uppercase letters.", + "gt": "Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular light blue background with a large red heart in the center. Inside the heart, there is a white exclamation mark. Below the heart, the word \"ATTENTION\" is written in bold, uppercase letters.", + "gt": "A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular blue background with two cartoon-style faces, one on the left and one on the right, both with a neutral expression. The left face has brown hair with a tuft on top, and the right face has light brown hair with a bun. A small white line separates the two faces. Below the circles, the word \"PROXIMITY\" is written in bold, uppercase letters.", + "gt": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular blue eye with a white sclera and a brown iris, featuring a small white dot in the center of the iris.", + "gt": "The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A person with a red shirt and a green background.", + "gt": "The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A person with a surprised expression, wearing a red shirt and a green circular background.", + "gt": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A circular green background with a stylized illustration of a person's face, featuring a red shirt and a green tie.", + "gt": "This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A stylized illustration of a person with a surprised expression, featuring a large, round head with a small, round nose and a wide, open mouth. The person has short, dark hair and is wearing a red garment with a yellow collar.", + "gt": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A person with a serious expression, wearing a red shirt and white pants, is sitting on a white chair.", + "gt": "An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A circular green background with a white border.", + "gt": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A circular green background with two red kidney-shaped structures, each with a yellowish-brown outline, positioned symmetrically on either side of a central pinkish-red structure.", + "gt": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A blue rectangular banner with the text \"Interesting Facts\" in white, centered. To the left of the text, there are three white horizontal lines. To the right of the text, there is a red heart symbol.", + "gt": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A stylized panda with a round face, black ears, and black patches around its eyes. The panda has a white face with a black nose and a small, curved mouth. The background is a solid green circle.", + "gt": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A green circular plate with a white center and a yellow ring around the edge.", + "gt": "Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular logo with a dark blue background featuring a red heart shape in the center. A white, wavy line crosses the heart horizontally.", + "gt": "Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A white long-sleeved shirt with a teal collar and cuffs, featuring a row of black buttons down the front.", + "gt": "Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular orange background with a white film clapperboard icon in the center. The clapperboard has a blue border with white squares on the left and right sides, and a white rectangular area in the middle.", + "gt": "Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A green circular icon with a white syringe and a red cross on the syringe. The syringe has a white needle and a white plunger. The word \"Drugs\" is written in black at the bottom of the icon.", + "gt": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A blue circular icon with a white border, featuring a white wine glass on the left and a white wine bottle on the right. The wine glass has a yellow liquid inside, and the bottle has a yellow label with a blue top.", + "gt": "Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular icon with a blue background featuring two stylized human figures facing each other. The figure on the left has short blonde hair and is wearing a red shirt, while the figure on the right has short blonde hair and is wearing a brown shirt with a white collar. Below the figures, the word \"Gender\" is written in black.", + "gt": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular green background with a white globe in the center, featuring a detailed map of the world. Below the globe, the word \"Global\" is written in black.", + "gt": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A person sitting on a chair with their head in their hands, wearing a blue shirt and black pants.", + "gt": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A person with short brown hair, wearing a blue long-sleeve shirt and yellow pants, is sitting on a white platform with a blue wave at the bottom.", + "gt": "This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A woman with dark hair, wearing a red top, is standing with her arms crossed. Above her, there is a thought bubble with a smiling face and a cloud.", + "gt": "An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A person with red hair, wearing a green top and blue pants, is sitting on a window sill with their legs crossed.", + "gt": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A woman with dark hair tied back, wearing a blue sleeveless top and black leggings, is running with her right arm bent and her left arm extended forward. She has a black headband and is wearing white socks and gray running shoes.", + "gt": "The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A woman with long black hair, wearing a green tank top and black pants, is sitting in a meditative pose with her hands raised above her head.", + "gt": "This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "caption": "A green dinosaur with a white belly, a red comb on its head, and a purple tail.", + "gt": "The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "caption": "A cartoon boy with a purple headband, wearing a blue t-shirt and blue jeans, is holding a purple object to his head.", + "gt": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "caption": "A green refrigerator with a yellow handle on the door, featuring a freezer compartment on top and a larger refrigeration section below.", + "gt": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "caption": "A green refrigerator with a yellow drawer at the bottom.", + "gt": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/4010.jpg", + "caption": "A black, rectangular, metal trash can with a hinged lid and a handle on the front.", + "gt": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/4010.jpg", + "caption": "A dense cluster of green leaves with a mix of light and dark green hues, forming a thick canopy.", + "gt": "This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/4010.jpg", + "caption": "A cylindrical, metallic pole with a reflective surface and a consistent diameter throughout its visible length.", + "gt": "A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A deep, dark blue ceramic bowl with a glossy finish and a slightly flared rim.", + "gt": "This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "Stainless steel oven with a black glass door, featuring a digital control panel on the right side with multiple buttons and a display screen.", + "gt": "The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A white, dual electrical outlet with two vertical receptacles, each consisting of two parallel slots above a round grounding hole, installed on a wall plate.", + "gt": "The light switches are white, contrasting with the dark wall, likely plastic, and appear functional." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A white, dual electrical outlet with two vertical receptacles, each consisting of two parallel slots above a round grounding hole, installed on a wall plate.", + "gt": "Positioned in the background, these white light switches are paired on a wall above the countertop." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "Stainless steel built-in oven featuring a large glass door with a curved handle at the top, a digital control panel above the door with multiple buttons and a display screen, and a prominent brand logo centered above the control panel.", + "gt": "This silver oven, with digital controls and a handle, appears modern and built into the cabinetry." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A tall, slender vase with a flared rim and a narrow neck that gradually widens into a bulbous body, featuring a glossy finish with a gradient of colors transitioning from a deep blue at the top to a lighter blue and then to a greenish hue towards the base.", + "gt": "An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "The floor is composed of medium-toned wooden planks with a smooth, polished finish. The planks are laid out in a parallel arrangement, running horizontally across the image. The wood grain is subtly visible, adding a natural texture to the surface.", + "gt": "The floor, constructed of hardwood, showcases a natural finish with variations in wood grain." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A vertical, multi-tiered spice rack with a dark finish, featuring an array of round, transparent jars with visible contents, each tier progressively smaller from top to bottom, and a flat base for stability.", + "gt": "The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A round, metallic table with a smooth, reflective surface and a central pedestal base.", + "gt": "This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A chair with a high, curved backrest featuring a diamond pattern in a light color, possibly beige or cream. The seat appears to be a solid, light-colored material, and the chair has a dark-colored frame or legs.", + "gt": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A purple place mat with a textured surface and a white circular design in the center.", + "gt": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A vertical traffic light with three circular lenses arranged in a column, displaying a red light in the top lens, with the middle and bottom lenses unlit.", + "gt": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A pink and white striped umbrella with a scalloped edge design.", + "gt": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A tall, dark brown tree trunk with a rough, textured bark.", + "gt": "A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A chalkboard sign with a wooden frame displaying the text \"Chocolat & Sables\" in elegant cursive script, followed by \"Café\" in a smaller, bold font.", + "gt": "Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A vertical, rectangular street sign with a white background and black lettering, featuring a list of destinations or locations aligned to the left, each separated by a horizontal line.", + "gt": "A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000518836.jpg", + "caption": "A chestnut horse with a white star-shaped marking on its forehead, dark eyes, and a soft, dark muzzle. Its ears are pricked forward, and it has a well-groomed mane.", + "gt": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000518836.jpg", + "caption": "A brown horse with a dark mane.", + "gt": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000518836.jpg", + "caption": "A light gray horse with a darker mane and tail, featuring a well-muscled build, standing profile with all four legs visible, and a calm demeanor.", + "gt": "A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000205601.jpg", + "caption": "A white, rectangular table with a smooth surface and rounded corners, featuring a shadow pattern cast across it.", + "gt": "A frying pan on a heat source contains sautéed meat and vegetables, emitting steam, indicating the food is hot and being cooked." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000205601.jpg", + "caption": "A modern electric stove with a sleek black glass cooktop featuring multiple heating elements, including a large circular element and a smaller oval-shaped element, both with visible heating patterns. The stove has a stainless steel trim along the front edge, and there are control knobs located at the front, partially visible, suggesting a contemporary design.", + "gt": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000205601.jpg", + "caption": "A metallic spoon with a reflective surface and a tapered handle, featuring a rounded bowl with a slight indentation on one side.", + "gt": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000205601.jpg", + "caption": "A metallic spoon with a reflective surface and a tapered handle that widens towards the end, featuring a rounded bowl with a pointed tip.", + "gt": "A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000299654.jpg", + "caption": "An adult zebra with a distinctive black and white striped pattern covering its head, neck, and partial torso. The stripes on the head are narrow and closely spaced, wrapping around the muzzle and forehead, while the stripes on the neck are broader and more defined. The ears are upright and striped, and the mane is short, erect, and follows the contour of the neck with alternating black and white stripes. The visible part of the torso shows a pattern that transitions from broader stripes on the neck to narrower stripes on the body.", + "gt": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000107939.jpg", + "caption": "An octagonal red stop sign with bold, all-caps, white \"STOP\" lettering centered on the sign.", + "gt": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000107939.jpg", + "caption": "An octagonal red stop sign with bold, all-caps, white \"STOP\" lettering centered on the sign.", + "gt": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "A dark brown, square-shaped cushion with a visible corner that appears to be soft and plush.", + "gt": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "A dark brown, square-shaped pillow with a visible corner that appears to be soft and plush.", + "gt": "Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "The pillow features a dark brown, textured fabric with a subtle, intricate pattern of interwoven lines and shapes, creating a rich, tactile surface.", + "gt": "Decorative accessory adorned on the ear, visible as a small, shiny object." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "A young girl with curly dark hair, wearing a long-sleeved pink shirt under a patterned top with a mix of blue, white, and pink colors. Her expression is one of surprise or excitement, with her mouth slightly open and eyes wide. Her arms are raised with hands open, as if gesturing or reaching for something.", + "gt": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "A young boy with curly black hair and a light complexion. He is wearing a striped sweater with horizontal bands of blue, white, and black, and a ribbed crew neck. His expression is one of mild surprise or curiosity, with his mouth slightly open and eyes wide. His hands are clasped together in front of him.", + "gt": "Appears to be a young boy, casually dressed, gripping an electronic device with attention." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "A person wearing a black helmet with a visor, a white and green ski jacket with a red logo on the chest, and red gloves.", + "gt": "The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "The knee of the trousers is made of a dark, durable fabric with a slightly textured surface. It features a prominent, irregular tear on the right side, revealing a lighter, worn inner lining. The tear is jagged and extends from the upper thigh to the lower calf, with frayed edges. The fabric appears to be thick and suitable for outdoor activities, with a subtle sheen that catches the light.", + "gt": "A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "A person wearing a red jacket with a hood, black pants, and black gloves.", + "gt": "There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "A blue helmet with a glossy finish, featuring a prominent white stripe running horizontally across the middle, bordered by thin red lines. The helmet has a black visor attached to the front, with a red and white logo centered on the visor. The top of the helmet displays a red and white emblem, and there are black straps visible on the sides.", + "gt": "A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "A person wearing a red jacket with a hood, black pants, and black gloves.", + "gt": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "The sleeve of the sweater is predominantly white with a bold red graphic design featuring a stylized figure in motion. The design includes a dynamic, angular shape with sharp lines and a sense of movement. The fabric appears to be a lightweight, breathable material suitable for athletic activities.", + "gt": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "A person wearing a black helmet with a visor, a white and black jacket with a red logo on the left chest, and black gloves.", + "gt": "A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2411153.jpg", + "caption": "A red, white, and green racing motorcycle fairing with the number 69 prominently displayed in white on a red background, flanked by two smaller white numbers on a red background. The fairing features a black lower section with a white stripe and a green accent. The motorcycle's front cowl is white with a red and green design, and the headlight is visible at the top.", + "gt": "Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2411153.jpg", + "caption": "A rectangular, flat, gray surface with a slightly rough texture and subtle horizontal lines running across it.", + "gt": "This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "caption": "The mass squared of the n lightest pion excitations for different values of the UV cutoff. The dots represent the experimental values.", + "gt": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "caption": "The letter \"v\" is written in a serif font with a black color. The \"v\" has a classic, elegant design with a slight curve at the top and a straight vertical line extending downward. The bottom of the \"v\" is connected to a horizontal line that extends to the right.", + "gt": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": " initial and optimized site stand for the S atom attachment site before and after optimization. The maximum adsorption energy is underlined.", + "gt": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": "The table shows the adsorption energy of various adsorbents on the surface of a gold atom. The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorption energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\" with their corresponding adsorbent energy values in a column labeled \"adsorbent energy.\" The adsorbents are listed in a column labeled \"adsorbent\"", + "gt": "This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds–Au), polar angle (θ), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": "The adsorption energy is underlined.", + "gt": "Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": "The adsorption energy is underlined.", + "gt": "This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": "A black number \"5\" with a white outline, featuring a bold, sans-serif font.", + "gt": "The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The graph shows the performance of two different models, \"Atom segmentation\" and \"Bond segmentation,\" in terms of their ability to predict the frequency of certain elements. The x-axis represents the frequency of the elements, and the y-axis represents the percentage of correct predictions. The graph is divided into two sections, with the left section showing the performance of \"Atom segmentation\" and the right section showing the performance of \"Bond segmentation.\" The left section has a blue line for \"Atom segmentation\" and a red line for \"Bond segmentation,\" while the right section has a blue line for \"Atom segmentation\" and a red line for \"Bond segmentation.\" The graph also includes a legend at the bottom, which explains the colors used for each model.", + "gt": "The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The F1 score for segmentation and classification networks. There is clearly a correlation between the performance of the networks on the different prediction types and the frequency of the specific type in the training data set. The classification networks perform significantly better than the segmentation networks.", + "gt": "This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "Performance of segmentation network", + "gt": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The F1 score is a measure of the performance of a segmentation network, which is a network that predicts the presence or absence of a certain object or feature in an image. The F1 score is defined as the weighted average of the precision and recall of the network, where precision is the proportion of true positives and recall is the proportion of true positives and false positives. The F1 score is used to evaluate the performance of segmentation networks and is often used in image segmentation tasks.", + "gt": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "Performance of classification networks", + "gt": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The F1 score is a measure of the performance of a classification network, which is a network that uses a set of input features to predict a label or class. The F1 score is defined as the harmonic mean of the precision and recall of the network, and it is used to evaluate the performance of a network in terms of its ability to correctly classify instances. The F1 score is a common metric used in machine learning and artificial intelligence to evaluate the performance of classification networks.", + "gt": "This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in ." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "8.3 Overall graph accuracy", + "gt": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "Now that we know the performance of the different parts, we can combine those building blocks and measure the overall accuracy of the resulting graph predictions. As already mentioned in a previous section, the segmentation network and classification networks should be used as presented in algorithm 1 in order to build the resulting graph. Images in 3 different", + "gt": "In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The number 11 is displayed in a bold, sans-serif font. The numerals are black and evenly spaced, with the \"1\" on the left and the \"1\" on the right. The \"1\" on the left is slightly larger than the \"1\" on the right.", + "gt": "This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "little washing resulted in a higher reducing sugar content, which is thought to overpower the GI lowering effect of the polyphenols and increase the GI of the sugar.", + "gt": "This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "The low GI sweet spot was demonstrated by graphing the results of the sugars in Table 3 below. This graph shows that at least 22mg CE/100mg sucrose needs to be retained during sugar processing to produce a low GI sugar.", + "gt": "This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "A graph of GI polyphenol content of these sugars.", + "gt": "This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "Example 6 - Washing of massecuite to desired polyphenol content", + "gt": "The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "The polyphenol content of each sample was determined (see Example 2). The massecuite samples were washed until they were the depth of color that is associated with the desired polyphenol content (roughly 500 to 2000 ICUMSA) and the polyphenol content measured. The results are in Table 4 below. The skilled person", + "gt": "This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "Table 3 - Example sugars", + "gt": "This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "The image shows a collection of documents and a QR code. The documents are arranged in a grid, with each document having a black background and white text. The top row of documents has a QR code in the center. The bottom row of documents has a QR code in the center. The QR codes are rectangular and have a grid pattern.", + "gt": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": " The OCR process.", + "gt": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "Available OCR", + "gt": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": " Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here", + "gt": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "Journal of Data Mining and Digital Humanities", + "gt": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "//jmdh.ep sciences.org\" in lowercase letters.", + "gt": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "A black number \"9\" with a bold, sans-serif font, featuring a slight shadow effect on the right side, giving it a three-dimensional appearance.", + "gt": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "The text \"IOOF Annual Report 2012\" is displayed in a serif font, with the letters \"IOOF\" in a larger size and the words \"Annual Report 2012\" in a smaller size, aligned to the left.", + "gt": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "4. Non-Executive Directors' Remuneration", + "gt": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "4.1 Components of Non-Executive Director remuneration", + "gt": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "A green text on a white background that reads \"page 37\".", + "gt": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "The region is a company.", + "gt": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year by the Company, its subsidiaries and associated entities may not exceed an amount approved by shareholders. This ceiling amount includes all remuneration provided to Non-Executive Directors, including superannuation but not including retirement benefits. The current limit of $980,000 per annum was approved by shareholders at the 2010 Annual General Meeting. There has been no increase to the Non-Executive Director fee pool since this time.", + "gt": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "Non-Executive Directors receive a fee for being a Director of the Board. An additional fee is paid to the Chairman of the Board. Non-Executive Directors do not receive additional fees for service on Board Committees.", + "gt": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "The region is a space with a dimension of 1, which is a dimensionless quantity. It is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which is a dimensionless quantity. The region is a part of a larger space with a dimension of 1, which", + "gt": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "The formation of monopole (point-like) defects is the origin of the well-known monopole problem, since the Standard Model group contains a U(1) factor. This formation of unwanted defects was one of the original motivations to introduce a phase of inflation.", + "gt": "This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": " \"Note that the above topological conditions of formation of defects only govern the formation of topologically stable defects. It was however found that defects solutions can form even when the topology is trivial. The most well-known example are the electro-weak symmetry breaking which are perturbatively stable for a range of parameters which are not realized in nature, and belong to the broader class of embedded defects.\"", + "gt": "The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "These defects are a priori unstable though mechanisms (such as plasma effects) have been found to stabilize them. They are of interest for inflation model builders since this mechanism can allow lift the constraints from the formation of cosmic strings (see Sec. IV F on D-term inflation)", + "gt": "The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "The text \"π3-d\" is written in a serif font, with the letters \"π\" and \"3\" in a larger size, and the letters \"d\" in a smaller size. The text is aligned to the left.", + "gt": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "The number 72 is displayed in a bold, sans-serif font with a black color. The digits are evenly spaced and aligned horizontally.", + "gt": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "caption": "A stylized illustration of a woman's profile with a serene expression, featuring a large, detailed hair bun with a spiral design. The woman has a fair complexion and is wearing a pink top with a pattern of small, dark dots. Her eyes are closed, and her lips are gently curved into a smile. The background consists of abstract, cloud-like shapes in shades of white and gray, creating a soft, ethereal atmosphere.", + "gt": "The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "caption": "2021 Scientific American", + "gt": "This is a footer section of an image or document. It contains a citation that reads \"© 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The number 62 is displayed in a bold, sans-serif font with a slight italicization. The digits are black with a white outline, giving them a three-dimensional appearance. The number is centered on the page.", + "gt": "This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": " Motivation", + "gt": "This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The region is a redshifted throat region in the near horizon, appearing redshifted to an observer at infinity.", + "gt": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The region is a white background with black text.", + "gt": "In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The regions for which analytic tools exist for these two different pictures turn out to be completely incompatible.", + "gt": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "2 = 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p) - 2g(2p", + "gt": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The private information of the insider has been generally assumed to be static. For example, in [2] the insider knows ex ante the final value of the firm, and in [6] she knows ex ante the time of default of the company issuing the asset. This literature has shown that i) the presence of an insider on the market does not necessarily lead to arbitrage (i.e. the value function of the insider is finite), and that ii) the presence of insiders might be considered beneficial to the market, in the sense that it leads to higher information efficiency of the equilibrium price process. Nevertheless, the assumption of insider's perfect foresight is unrealistic, since the fundamental value of the firm should be connected to elements like future cash flows, productivity, sales, etc.", + "gt": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The assumption of insider's perfect foresight is unrealistic, since the fundamental value of the firm should be connected to elements (like future cash-flows, productivity, sales, etc.) that have an intrinsically an aleatory component. That is, a more natural assumption would be that the fundamental value is in itself a stochastic process, and that the insider can observe it directly or at least observe it in a less noisy way than the other agents on the market.", + "gt": "The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The paper relaxes the assumption of static insider information and studies the equilibrium trading and price processes, as well as market efficiency, in a setting with dynamic private information.", + "gt": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The model I consider in this paper is a generalization of the static information setting of 2.", + "gt": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The market information is a generalization of the static information setting of [2]. It is a much smaller set of admissible trading strategies and pricing rules, with much more stringent assumptions on the parameters, than the ones considered in my work. It shows the existence of one possible Markovian equilibrium, while my work characterizes all optimal strategies and establishes that there is a unique Markovian equilibrium price process, i.e. an equilibrium price that allows the insider to trade undetected and depends only on the total order process.", + "gt": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "A black number \"3\" with a white outline, featuring a bold, sans-serif font.", + "gt": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "caption": "A red, cursive sign with the word \"Abroad\" written in a flowing, elegant font.", + "gt": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "caption": "A red, three-dimensional, cursive-style sign with the word \"Cafe\" written in a flowing, elegant font.", + "gt": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "caption": "The signboard features the letters \"USIS\" in a bold, sans-serif font, with the \"U\" and \"S\" in red and the \"I\" and \"S\" in black. The background of the signboard is white, and there is a thin black border around the edge of the signboard.", + "gt": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The word \"ESTATE\" is written in bold, black capital letters on a yellow background.", + "gt": "The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The word \"AGENTS\" is written in bold, uppercase letters with a black outline and a yellow fill.", + "gt": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The word \"SAXONS\" is written in bold, black capital letters on a yellow background.", + "gt": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The signboard features capitalized, serif lettering with a metallic finish, spelling out \"SAXONS\" against a dark background.", + "gt": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The signboard features capitalized, serif lettering with a metallic finish, spelling out \"ESTATE\" in a bold, sans-serif font. The letters are evenly spaced and have a reflective surface that suggests a polished texture.", + "gt": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The signboard features capitalized, serif lettering spelling \"AGENTS\" with a dark background and a lighter, possibly metallic, border. The letters are evenly spaced and have a three-dimensional appearance, suggesting a raised or embossed texture.", + "gt": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "The word \"Triple\" is written in a cursive, green font with a white outline. The letters are slightly italicized and have a playful, rounded design.", + "gt": "The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "The word \"WHITE\" is written in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "\"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "The word \"SPOT\" is written in bold, white, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "A stylized, cursive letter \"O\" with a green outline and a white interior, featuring a small, curved tail extending from the bottom right of the \"O\".", + "gt": "\"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "A white, stylized letter \"A\" with a bold, rounded top and a slightly curved bottom, featuring a smooth, glossy surface.", + "gt": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "caption": "The word \"NEW\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "caption": "The word \"YORK\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "caption": "The signage displays the word \"Colchester\" in a bold, sans-serif font with a dark blue color on a light background.", + "gt": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "The signage features bold, uppercase letters in a dark color, spelling out \"AUAM\" with a slight arch to the letters.", + "gt": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "The Signage - Store features large, bold, red letters with a slight shadow effect, spelling out \"RIA\" on a yellow background.", + "gt": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "A yellow rectangular sign with the word \"SMASHED\" in bold, black capital letters.", + "gt": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "The signage displays the word \"FRIED\" in bold, uppercase letters. The letters are dark and set against a lighter background, creating a contrast that makes them stand out.", + "gt": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "A rectangular, yellow sign with the word \"CHICKEN\" in bold, black, uppercase letters.", + "gt": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "caption": "Accommodation", + "gt": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "caption": "The word \"Office\" is written in a bold, sans-serif font with a dark blue color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "caption": "The word \"Nightline\" is written in a bold, sans-serif font with a dark blue color.", + "gt": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A red and white sign with the word \"BUBBA\" in bold, red, uppercase letters on a white background, bordered by a thin red line.", + "gt": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A red, three-dimensional, uppercase letter \"G\" with a shadow effect, set against a white background with a red border.", + "gt": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "The word \"SHRIMP\" in bold, white, uppercase letters with a slight shadow effect, set against a dark blue background.", + "gt": "The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A curved signboard with a white background and a red border. The word \"RESTAURANT\" is written in black, capital letters, with each letter spaced evenly along the length of the sign.", + "gt": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A curved signboard with a white background and a red border. The word \"MARKET\" is written in large, bold, black capital letters, with a smaller \"M\" in the same style positioned to the left of the word \"MARKET.\"", + "gt": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A circular object with a white border and a dark blue center, featuring a white symbol resembling a stylized letter 'C' with a dot above it.", + "gt": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A circular trademark with a white background and a black border, featuring the letters \"TM\" in black, with the \"T\" above the \"M\".", + "gt": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "caption": "The text \"EVENING\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly shadowed effect, giving them a three-dimensional appearance.", + "gt": "The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "caption": "The word \"PRIMROSE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly distressed texture.", + "gt": "\"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "caption": "The word \"BASE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly textured appearance.", + "gt": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "caption": "The word \"OIL\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly textured appearance.", + "gt": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "caption": "A white, three-dimensional number \"4\" with a smooth, glossy finish and a slight shadow effect on the right side, giving it a three-dimensional appearance.", + "gt": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "The number \"31\" is displayed in a bold, sans-serif font. The digits are green and have a slight shadow effect, giving them a three-dimensional appearance. The number is centered on the image.", + "gt": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green rectangular button with rounded corners featuring the word \"Connecter\" in white lowercase letters on the left side and a yellow lock icon with a keyhole on the right side.", + "gt": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "The month \"Novembre\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally.", + "gt": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green rectangular button with rounded corners, featuring a white lowercase \"a\" followed by a lowercase \"n\" and a lowercase \"z\" in a sans-serif font.", + "gt": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green calendar with white text displaying the days of the week from Sunday to Saturday, with the days of the week labeled in a vertical arrangement. The text is in a sans-serif font, and the calendar appears to be a digital or printed format.", + "gt": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "The month \"September\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally.", + "gt": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "The text \"Lien Web\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally.", + "gt": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "The date is 14 May 2021.", + "gt": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30", + "gt": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "Plan du site", + "gt": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "A dark green banner with the text \"Book Now\" in white, bold, sans-serif font. To the right of the text, there is a white arrow pointing to the right.", + "gt": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "About Us\nBuyLuxurious Doona", + "gt": "This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "A black rectangular button with the text \"Pay Balance\" in white, bold, sans-serif font.", + "gt": "This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "About Us", + "gt": "Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "contact us", + "gt": "This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "A teal-colored text with a gradient effect, featuring the word \"Home\" in a bold, sans-serif font. The text is in a gradient of pink to purple, with the pink being more prominent at the top and the purple more visible at the bottom.", + "gt": "\"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "A teal-colored email address with a white envelope icon on the left, followed by the text \"doonawash@gmail.com\" in white. To the right, there is a white text \"skip & drop free pickup.\"", + "gt": "The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "Contact us", + "gt": "Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "The word \"Home\" in a bold, sans-serif font, with a gradient of teal to light blue, set against a white background.", + "gt": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "A pink text that reads \"Pay Balance\" in a bold, sans-serif font.", + "gt": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "caption": "OpenStreetMap Belgium", + "gt": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "caption": "A white rectangular sign with black text. The top line reads \"Technik Distak\" in bold, uppercase letters. Below it, there is a right-pointing arrow. The bottom line displays a phone number \"055 292-50-49\" in bold, uppercase letters.", + "gt": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "caption": "A white rectangular badge with the number \"4.9\" in bold black font centered on it.", + "gt": "This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "caption": "A rectangular blue button with white text that reads \"Pulsuz Consultasiya.\"", + "gt": "The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "caption": "The word \"Portfolio\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced against a white background.", + "gt": "This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A blue shopping cart icon with a white outline, featuring a rectangular basket and a handle extending from the top. To the right of the basket, the word \"ITEMS\" is written in bold, white, uppercase letters.", + "gt": "This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "The word \"SUPPORT\" in bold, uppercase letters with a blue background and white text.", + "gt": "This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A white lowercase letter \"f\" with a bold, sans-serif font, featuring a slight shadow effect on the right side, giving it a three-dimensional appearance.", + "gt": "The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A blue text that reads \"Your Charts\" with a white border around the text.", + "gt": "It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A blue and white logo featuring a stylized \"X\" with a curved tail extending from the bottom of the \"X\".", + "gt": "Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "contact us", + "gt": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A rectangular blue button with rounded corners and a white border. The button has the text \"ADD TO CART\" in bold, white, uppercase letters centered on it.", + "gt": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "The word \"PRODUCTS\" in bold, uppercase letters with a blue background and white text.", + "gt": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "The word \"PODCAST\" in bold, uppercase letters with a blue background and white text.", + "gt": "It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A blue rectangular button with the word \"ABOUT US\" in bold, uppercase, white letters.", + "gt": "This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The text \"WHAT WE DO\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font style. The color of the text is black, and it is centered on a white background.", + "gt": "This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "GWM launches livelihood micro-grants", + "gt": "This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "A rectangular white button with a black border and the word \"Settings\" in bold, black, sans-serif font centered on the button.", + "gt": "This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "Privacy Policy", + "gt": "This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The word \"HOME\" in uppercase letters, with a bold, sans-serif font, is centered on the image. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of dark to light gray, with the darker shade at the top and the lighter shade at the bottom.", + "gt": "This is typically a navigation link that returns the user to the main homepage of the website from any other page." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "A black rectangular button with the word \"Accept\" in white, bold, sans-serif font.", + "gt": "It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "A red-bordered, oval-shaped button with the word \"DONATE\" in bold, uppercase, black letters centered on a white background.", + "gt": "This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The word \"NEWS\" in bold, uppercase letters with a black background and white text.", + "gt": "Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The text \"ABOUT US\" is written in uppercase letters. The letters are evenly spaced and have a bold, sans-serif font. The color of the text is black, and it is centered on the image.", + "gt": "This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "contact us", + "gt": "Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "caption": "A rectangular icon with a white background and a black border. Inside, there are three horizontal black lines of varying lengths, with the middle line being the longest. The lines are evenly spaced and aligned horizontally.", + "gt": "This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The word \"News\" in a bold, sans-serif font, with a slight shadow effect, set against a light background.", + "gt": "This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The logo features a stylized, angular design with a teal and red color scheme. The teal shape resembles a stylized letter \"S\" with a red outline, while the red shape is a smaller, inverted \"S\" with a black outline. The word \"ServeGate\" is written in bold, black, sans-serif font to the right of the logo.", + "gt": "This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "Our Difference", + "gt": "This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "About Us", + "gt": "Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "A rectangular button with a white background and a thin red border. The text \"Get in touch\" is centered in bold, red, sans-serif font.", + "gt": "The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The word \"Home\" in a serif font, with the letters \"H\", \"o\", \"m\", and \"e\" visible, each letter capitalized and evenly spaced.", + "gt": "The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The word \"Services\" in a serif font, with a drop shadow effect, set against a light background.", + "gt": "The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "Indigenous Impact", + "gt": "\"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "ServeGate", + "gt": "This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "caption": "A rectangular white button with a black border and the text \"Close issue\" in bold black letters.", + "gt": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "caption": "A vertical stack of three black dots on a white background.", + "gt": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/model_outputs/gar_8b.json b/evaluation/MDVP-Bench/model_outputs/gar_8b.json new file mode 100644 index 0000000000000000000000000000000000000000..f66d2e3520f1ea3668dc55c15b78cc0c8bdf3008 --- /dev/null +++ b/evaluation/MDVP-Bench/model_outputs/gar_8b.json @@ -0,0 +1,1822 @@ +[ + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "Friday, Sep 2, 61°F", + "gt": "This is a status bar displaying the date \"Friday, Sep 2,\" along with the temperature \"61°F.\" It informs the user about the current day, date, and ambient temperature, likely provided by a weather service. This text is generally not interactive and serves as an informational display only." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom.", + "gt": "This represents a capital letter \"G\" set inside a circle. It is part of a larger icon that is likely associated with a search engine or service, commonly used as a button to initiate web searches or to access related services. Interaction with this icon typically leads to the search engine's interface or homepage." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A white, fluffy cloud with a smooth, rounded shape and a slightly irregular outline. The cloud has a soft, cotton-like texture and appears to be illuminated from the left side, giving it a gentle gradient from white to a light blue hue.", + "gt": "This icon depicts clouds, which is a visual representation often used for weather-related services or apps. It indicates current or forecasted weather conditions and is typically interactive, leading the user to a detailed weather report upon tapping." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A white gear with a blue circular center and six evenly spaced, straight teeth.", + "gt": "The gear-like icon signifies settings or options. It is a common symbol for accessing system settings or preferences in an application or operating system. Interacting with this icon usually opens a menu where various settings can be adjusted." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A white speech bubble icon with a blue outline, featuring three horizontal blue lines of varying lengths inside the bubble.", + "gt": "The icon resembling a speech bubble is often associated with messaging services or chat applications. Tapping it would commonly open a messaging app where users can send messages, photos, or videos to others." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A red, rectangular play button with rounded corners, featuring a white, centered play triangle pointing to the right.", + "gt": "This icon, featuring a triangle resembling a \"play\" button, is widely recognized as a symbol for media players or services offering video and music content. Interaction with this icon would likely launch a media playback application or service." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom.", + "gt": "The icon represents a search engine or a suite of services provided by a major tech company, indicated by the letter \"G\" and vibrant colors. Interacting with this icon typically brings the user to a home screen with access to various services offered by the company, such as search, email, maps, and more." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A blue circle with a red dot above it and a yellow dot below it.", + "gt": "This icon, representing a microphone inside a colorful outline, is suggestive of a digital assistant or voice search feature. When interacted with, it would generally activate a voice recognition service allowing users to speak commands or queries for assistance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A circular button with a white center and a thin black border.", + "gt": "The circle icon in a navigation bar is typically an interactive home button on a mobile device, often bringing the user back to the home screen when tapped." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_251980.png", + "caption": "A white, right-pointing arrow with a thick border.", + "gt": "The icon with a triangle pointing leftwards resembles a \"back\" navigation button, generally used to go back to the previous screen in an application or navigate backwards in a browser." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Excel\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "This icon is indicative of the Microsoft Excel mobile application, recognizable by its green 'X' on a white background, which suggests a tool for creating and editing spreadsheets. It is likely an interactive element that, upon touch or click, launches the application allowing users to work with spreadsheets on their mobile device." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Facebook\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "This is the Facebook mobile application icon, featuring a lowercase 'f' on a blue background. When interacted with, it typically opens the Facebook app where users can browse their news feed, connect with friends and family, post updates, and engage in social networking activities." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Speedtest\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and aligned horizontally.", + "gt": "Represented here is an icon for the Speedtest application by Ookla, depicted by a speedometer graphic suggesting the app's function of measuring internet connection speed. Tapping on this icon will likely open the app and allow the user to test their current internet speed." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Instagram\" is written in a bold, sans-serif font with a light pink color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "This icon, featuring a camera lens and a gradient background, is for the Instagram mobile application. Interacting with this icon will usually open the app, providing access to photo and video sharing, as well as viewing the content from others on the Instagram social network." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Home\" is written in a bold, sans-serif font with a white color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "Resembling a house with a triangular roof, this icon signifies a home automation or real estate application. Interaction with this icon would open the respective app, providing controls for smart home devices or real estate listings, depending on its specific function." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Lite\" in a bold, sans-serif font with a gradient of pink to white, giving it a three-dimensional appearance.", + "gt": "This icon has a feather, hinting at a lite version of an application that offers a minimalistic or resource-efficient option, typically for use in areas with limited connectivity or on devices with lower performance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "The word \"Messenger\" is written in a bold, sans-serif font with a gradient of pink to white. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "Featuring a speech bubble with a lightning bolt, this is the Facebook Messenger app icon. It signifies an app dedicated to messaging which, upon interaction, opens a platform where users can send messages, share media, and participate in video calls." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "Home De...", + "gt": "The icon partially reads \"Home De...\" against an orange square, suggesting a home improvement or retail company's app, possibly offering goods or services related to home refurbishment or decoration. The app's full functionality would be revealed upon opening it." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "A pair of cartoonish eyes with large, round white sclera, black pupils, and a hint of green around the edges. The eyes are set against a bright green background.", + "gt": "The green owl represents Duolingo, an educational platform icon with its function being language learning. Upon touching the icon, the user would engage with the app to learn a new language through interactive lessons." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_18183248185514867672_2.png", + "caption": "A blue square with a white lowercase \"g\" followed by three white horizontal lines of equal length.", + "gt": "This icon, with an abstract design and the letters 'GE', likely signifies a news or media application that provides users with news articles, updates, and possibly live reporting, accessible by touching the icon to open the app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"Search\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and aligned horizontally.", + "gt": "This area contains the term \"Search,\" suggesting it is likely related to a search function where a user can input queries to locate specific settings or information within this system or application." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"settings\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, with the darker shade at the top and the lighter shade at the bottom.", + "gt": "The word \"settings\" indicates an option or heading that relates to configuration options. Interacting with it would typically bring up a menu to adjust system preferences or application parameters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "mobile, data", + "gt": "The term \"mobile,\" followed by a comma hints at a list or continuation of related topics, likely referring to mobile network settings or features in the context of this system or application." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"data\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design.", + "gt": "The word \"data\" in this context may refer to mobile data usage and settings. It suggests an option to view or adjust how the device handles cellular data." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The Wi-Fi symbol consists of a lowercase \"i\" with a dot above it, followed by a lowercase \"f\" with a dot above it.", + "gt": "This term \"Wi-Fi,\" ending with a comma, implies it is part of a series, possibly relating to Wi-Fi settings where a user can manage Wi-Fi networks and preferences." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"and\" in lowercase letters, with a serif font, is written in a dark color against a light background.", + "gt": "The word \"and\" serves as a conjunction within a list or sentence, indicating the addition of more items or concepts that are related to the ones previously mentioned." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"hotspot\" in lowercase letters, with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design.", + "gt": "This term \"hotspot\" typically refers to a feature where the device can share its internet connection with other devices through Wi-Fi, Bluetooth, or USB." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "usage, and hotspot", + "gt": "The term \"usage,\" followed by a comma, likely relates to the tracking or monitoring of resource consumption, such as data, battery, or connectivity usage." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "The word \"Connected\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "The word \"Connected\" suggests it pertains to the status or management of connected devices or networks, such as Bluetooth connections or Wi-Fi networks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_14797572530627259726_40.png", + "caption": "A white, L-shaped object with a smooth surface and rounded edges.", + "gt": "The partially visible term \"Lo\" could be part of a word that identifies a feature, option, or information related to the system or application settings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "Search or type web address", + "gt": "This area is a text-entry field in a web browser, typically used for typing in web addresses or performing web searches. Interaction with this field usually involves clicking or tapping to enter text, and pressing Enter would initiate a web search or take the user to the entered web address." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The text \"ebay shopping cart\" is displayed in lowercase letters. The word \"ebay\" is in a larger font size compared to the word \"shopping cart\". The text is in a sans-serif font and is centered horizontally.", + "gt": "This is a tab title within a web browser, indicating that the user has accessed or searched for 'eBay shopping' in this tab. The text serves a navigational purpose, allowing the user to identify and switch to the associated web page when multiple tabs are open." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The word \"cart\" in lowercase letters, with a bold, sans-serif font. The letters are black and evenly spaced.", + "gt": "This label refers to a 'cart' on a shopping platform, hinting at a functionality that allows users to view items that have been added to a virtual shopping cart. It is likely interactive and clicking it would navigate the user to a page summarizing their selected items for purchase." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "//cart.ebay.com/", + "gt": "This text is a URL displayed in the address bar of a web browser. It indicates that the current web page pertains to the shopping cart of the eBay website. The user can click on this text to edit the URL or copy it for use elsewhere." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "Welcome to Costco Wholesale", + "gt": "This text suggests that the user is visiting or has searched for a page related to 'Welcome to Costco Wholesale'. It might serve as a title for a page, potentially indicating that the user can find information regarding Costco's offerings through this tab." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The text \"costco.com\" is displayed in lowercase letters.", + "gt": "Here, 'costco.com' is the simplified representation of an address bar or tab title suggesting that the user is visiting Costco's website. Users interact with this by clicking it to switch to the Costco tab within the browser." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "Pay Less.", + "gt": "This phrase 'Pay Less.' is likely associated with a slogan or branding message, suggesting a value proposition to customers; the promise of spending less for the products or services offered by the entity associated with this phrase." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The word \"Target\" in bold, black, sans-serif font.", + "gt": "The text 'Target:' resembles a title or a navigational cue for a segment within a web browser, it may indicate a web page related to the retail company Target and is probably part of a list or compilation of bookmarks or frequently visited sites." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The text \"Expect More.\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and aligned horizontally.", + "gt": "Similar to , 'Expect More.' is a slogan that implies a promise of greater value, service, or product quality from the associated entity. It is designed to communicate a marketing message or company ethos to customers." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/web_shopping_98501.png", + "caption": "The text \"target.com\" is displayed in a bold, sans-serif font with a blue color. The letters are evenly spaced and aligned horizontally.", + "gt": "This text 'target.com' represents a simplified address, similar to , likely indicating that the user is visiting or has the option to visit Target's website. Interaction with this area would navigate to or indicate presence at Target's web page." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"skype\" is written in lowercase letters with a modern, sans-serif font. The letters are black and evenly spaced, with a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "The text appears at the top of the screenshot and is likely the title of the application or page currently being viewed. It suggests that the content of the page is related to the Skype application, possibly for download or further information purposes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"Skype\" is written in a bold, sans-serif font with a black color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "This text is also indicative of the Skype application. It is usually the main header on an app page and is a non-interactive element that provides the user with confirmation of the app's identity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "A blue circular icon with a white lowercase \"s\" in the center.", + "gt": "The icon displayed represents the Skype application. It's typically used as a visual identifier of the app within digital stores or on a device's home screen. It serves as a non-interactive branding element in this context." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"Install\" is written in lowercase white letters on a green background.", + "gt": "The word \"Install\" is generally an interactive button when found on an application download page. Tapping this button would initiate the download and installation of the app onto the user's device." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "The word \"Skype\" is written in a bold, sans-serif font with a dark gray color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "This repeated mention of \"Skype\" may refer to the name of the application on its store page. It usually appears below the app icon and serves as a non-interactive title or label." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "purchases", + "gt": "The term \"purchases\" here likely relates to transactions associated with the app, suggesting that the app might offer in-app purchases." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "In-app purchases", + "gt": "Paired with the previous \"purchases\" text, \"In-app\" specifies the location or type of purchases available, indicating that users can buy items or services within the app itself." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form.", + "gt": "The rating \"4.1*\" is indicative of user reviews and ratings for the app. It reflects the app's quality as perceived by its users and is usually an averaged score based on individual user ratings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "A black letter \"B\" followed by a black plus sign.", + "gt": "\"1B+\" signifies the number of times the app has been downloaded, indicating that the Skype app has been downloaded over one billion times." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_5797941172247377583_7.png", + "caption": "Editors' Choice", + "gt": "The phrase \"Editors' Choice\" likely denotes a special recognition or endorsement by the app store's editorial team, suggesting that the app comes highly recommended." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "Search settings", + "gt": "This region contains the phrase \"search settings,\" which indicates a function allowing the user to search within the settings menu. The presence of a magnifying glass icon suggests that this is an interactive search bar interface element. Typically, a user would tap this area and input text to locate specific settings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "A white envelope icon with a triangular flap on the right side, set against a dark background.", + "gt": "This portion features the single character \"M\" which is typically representative of a user's initial or an application's logo. It is stylized with a certain thickness and distinct font that can be indicative of a branding design or user personalization within a software interface." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "The word \"Add\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance.", + "gt": "This region includes the word \"Add\" presented with clear, bold lettering on a button or interactive element. This is commonly used to initiate the process of adding new elements, possibly in this context to add a new email account, as suggested by the surrounding text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "another email", + "gt": "The text \"another email\" is part of a larger phrase that suggests functionality for adding additional email addresses to the account or application in use. It is likely not interactive by itself but is part of instructional or descriptive text guiding the user's actions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "The word \"account\" in lowercase letters, with a bold font and a slight italicization, set against a plain background.", + "gt": "Featuring the word \"account,\" this text complements the nearby phrase and is associated with the process of adding or managing email accounts within the application or device settings. It would generally not be interactive but adds context to the interface's options." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "Set up your personal or work email", + "gt": "The phrase \"Set up your personal\" suggests a prompt or direction for the user to configure personal settings, possibly related to an email or other account settings. This text is usually static and provides guidance or instructions within a user interface." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "or work email", + "gt": "Paired with the previous text, \"or work email\" completes instructions for setting up email accounts of different types (personal or professional) within an application. This portion of text helps to further define the user's options for account configuration." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "Network & internet", + "gt": "\"Network & internet\" signifies a category or menu within the settings that pertains to adjusting network-related preferences including Wi-Fi, data usage, and related connectivity features. This text typically leads to a subsection where related settings can be modified." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "Wi-Fi, mobile, data", + "gt": "This section, \"Wi-Fi, mobile, data\" lists different connectivity options available to the user for configuration. It could be informative text providing a summary of the settings contained within the \"Network & internet\" menu mentioned in the previous region." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_8853455778583749835_5.png", + "caption": "usage, and hotspot", + "gt": "The presence of the word \"usage,\" combined with the context from the surrounding text, relates to data consumption aspects of the device's network settings. \"And\" suggests there are additional relevant aspects listed after this text, likely related to managing network services or features." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "Wednesday, May 18", + "gt": "This area displays the text \"Wednesday, May 18,\" which appears to show the date information, presumably representing the current day of the week and the month's date. This is typically displayed on mobile devices as part of the user interface to inform the user of the current date." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "The word \"Maps\" in a bold, sans-serif font, with a gradient of pink to white, giving it a three-dimensional appearance.", + "gt": "The text \"Maps\" suggests an application name, likely a mapping or navigation app, which users commonly utilize to find locations, get directions, or explore maps of different areas. It is probable that tapping on this text would open the associated application." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, blue, and ending with purple at the bottom.", + "gt": "This is a single character \"G,\" customarily associated with Google’s branding. It often represents access to Google's search services or apps affiliated with Google. Interacting with this symbol would possibly lead to a Google product or service." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A red, teardrop-shaped pin with a black circular center.", + "gt": "This icon, often indicative of location services or map functionality, is commonly used to represent a user's current location or to access location-based features. Interacting with this icon typically opens a mapping application that shows the user's real-time location on a map." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A white speech bubble icon with a blue outline, containing three horizontal blue lines of varying lengths.", + "gt": "This icon depicts a chat bubble, usually associated with messaging or communication services. It typically indicates the user's chat or messaging applications, and interaction would likely open the associated messaging service to send or receive messages." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A stylized letter \"G\" with a gradient of colors starting from red at the top, transitioning to orange, yellow, green, and ending with blue at the bottom.", + "gt": "This icon with a colorful design resembling a camera shutter or a wheel hints at the Google Chrome browser, which is widely used for Internet browsing. Tapping this icon would typically open the Chrome browser for web navigation." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/google_apps_9088043504221051292_1.png", + "caption": "A blue circle with a white border, a red circle with a white border, and a yellow circle with a white border.", + "gt": "The icon exhibits the Google Assistant symbol, suggesting voice-activated or typing search query functionality. Interacting with this icon would likely invoke Google Assistant to help with tasks, answer questions, or control smart home devices." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The text \"costco.com/Check\" is displayed in a bold, sans-serif font. The word \"costco.com\" is in lowercase letters, and the word \"Check\" is in uppercase letters. The text is aligned to the left.", + "gt": "The URL displayed in the address bar indicates that the webpage belongs to costco.com and includes a path, or endpoint, that suggests a functional page, which the text 'Check' implies may be related to a checkout or verification process. This is the web address users can navigate to for interacting with the website's functionality." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The logo features the word \"COSTCO\" in large, bold, red capital letters with a white outline. Below it, the word \"WHOLESALE\" is written in smaller, bold, blue capital letters.", + "gt": "The logo represents the brand identity for Costco Wholesale, indicating that the user is currently on the official website of this retail company. As a logo, it may serve as a clickable element that typically redirects users to the homepage of the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The word \"Warehouses\" is written in a bold, sans-serif font with a light blue color. The letters are evenly spaced and aligned horizontally.", + "gt": "Labeled 'Warehouses,' this indicates a section of the website where users can find information about Costco's physical store locations. It likely functions as a link that, when clicked, will take the user to a page detailing warehouse locations and related information." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The word \"Account\" in a bold, sans-serif font, with a gradient of blue shades ranging from light to dark, giving it a three-dimensional appearance.", + "gt": "Marked 'Account,' this suggests a section pertaining to user account management. Clicking on this would likely allow the user to access their personal account details, sign in, or manage their membership and profile." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "A blue shopping cart with a white handle and a white basket area.", + "gt": "This icon appears to represent a shopping cart, which is commonly used on e-commerce websites to signify where users can view items they intend to purchase. Clicking on it would probably take the user to view their current selections or to the checkout page." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "A blue rectangular sign with the word \"Shop\" in white, bold, sans-serif font. To the left of the text, there are three horizontal white lines of varying lengths.", + "gt": "Highlighting the word 'Shop,' this implies a navigational link designed to direct users to the online shopping section of the website, where they can browse and choose products for purchase." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The word \"Search\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a consistent size throughout.", + "gt": "This appears to be a search bar, where users can enter keywords or phrases to find specific items or information on the website. Such fields typically include an interactive function that generates search results upon entry confirmation." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "My Warehouse", + "gt": "'My Warehouse' likely refers to the user's preferred or designated Costco warehouse location. It may include functionality for the user to select or change their preferred store and may show additional details, such as operational hours." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "Delivery Location", + "gt": "The phrase 'Delivery Location' accompanied with what appears to be a postal code implies a feature that allows users to specify or view the location to which online purchases will be delivered." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_1849.png", + "caption": "The word \"Seattle\" in bold, black, sans-serif font.", + "gt": "Containing the city name 'Seattle,' this suggests the chosen warehouse or delivery location for the user. It might be interactive to allow the user to change the location or view information on the selected warehouse." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The text \"Fetch Rewards\" is written in bold, black, sans-serif font. The letters are evenly spaced and aligned horizontally.", + "gt": "This text likely represents the name of an application or service known as Fetch Rewards, potentially hinting at a rewards system that users can utilize by engaging with the app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The word \"Play\" in bold, black, sans-serif font.", + "gt": "This is the word \"Play,\" which usually is associated with initiating an action or starting something, such as a video or game within an application." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "to earn", + "gt": "The phrase \"to earn\" typically suggests that there is an opportunity to gain something—often points, money, or rewards—by performing certain actions or tasks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The text \"MAKE\" is written in bold, uppercase letters with a dark green color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "The word \"MAKE\" usually denotes the action of creating something or obtaining an outcome, perhaps insinuating that users can create or earn money through the app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The word \"MONEY\" in bold, uppercase letters with a green background and black outline.", + "gt": "This text states \"MONEY,\" which indicates that the application or service likely involves opportunities for users to earn financial rewards or benefits." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The text \"appminer st\" is displayed in lowercase letters. The letters are green and have a sans-serif font. The text is aligned to the left.", + "gt": "The text \"appminer st\" is not immediately clear, but it could be a truncation or part of a larger phrase, possibly indicating a feature within the app, or related to app mining or statistics." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The word \"Contains\" is written in a bold, sans-serif font with a light gray color. The letters are evenly spaced and aligned horizontally.", + "gt": "The word \"Contains\" usually suggests that what follows will describe the contents or features within the app, which in this case could be related to advertisements." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The text \"ads\" is written in lowercase letters with a modern, sans-serif font. The letters are evenly spaced and have a clean, minimalist design. The color of the text is a light gray, blending subtly with the background.", + "gt": "This term generally refers to \"advertisements,\" suggesting that the application includes ads that users might see while utilizing the app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "The text \"50K+\" is displayed in bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The \"50K\" is in a larger font size compared to the \"+\" sign, which is smaller and positioned to the right of the \"50K\".", + "gt": "The numerical figure \"50K+\" generally implies a quantity greater than 50,000, typically used in the context of downloads, users, or items within an app." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/install_125967318814166469_6.png", + "caption": "A black star with a five-pointed shape, featuring a slightly irregular outline and a textured surface that suggests a three-dimensional form.", + "gt": "The characters \"4.7*\" indicate a rating, likely on a 5-point scale, suggesting that users have rated the app positively, with 4.7 out of 5 stars." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black and white image of a computer keyboard with a standard QWERTY layout, including function keys, number keys, and a space bar. The keys are rectangular with rounded edges and are arranged in horizontal rows. The keyboard appears to be a full-size model, suitable for desktop use.", + "gt": "This area contains the webpage title indicating the user is on a retail website known for its wide range of products, hinting at online shopping capabilities. The title is typically non-interactive and serves as an identifier of the site." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "The text \"walmart.com\" is displayed in lowercase letters.", + "gt": "This text seems to be an incorrect or truncated URL for the same retail website mentioned in Region 1. Possibly a typographical error within the text, it seems non-functional." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A white rectangular signboard with the text \"Lenovo Thinkpad\" in black, sans-serif font.", + "gt": "The text here represents a search query within a search bar of the website, suggesting the user is looking for a Lenovo ThinkPad, which is a model of a laptop computer." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A blue rectangular button with rounded corners featuring the word \"Cancel\" in white, bold, sans-serif font.", + "gt": "Labeled 'Cancel,' this is likely an interactive button used to clear the current search query within the search bar. Once tapped or clicked, it should clear the input text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black and white image of a Lenovo ThinkPad laptop with a visible keyboard and trackpad.", + "gt": "This text duplicates the query in Region 3 and is part of the search bar suggestions or search history, indicating a previous or common search made by the user." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "The word \"Lenovo\" in lowercase letters, with a bold font and a slight italicization, set against a plain background.", + "gt": "The single word 'Lenovo,' which is part of a search suggestion below the search bar, represents the brand that manufactures various electronic devices, including laptops." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black, rectangular computer keyboard with a standard QWERTY layout, including a number pad on the right side. The keys are chiclet-style with white lettering, and there is a slight sheen on the surface, suggesting a smooth texture. The function keys are aligned along the top, and there is a visible space bar at the bottom center.", + "gt": "The word 'ThinkPad' refers to a specific line of laptops and is part of a search suggestion. Standalone, it specifies the user's interest in the ThinkPad series by Lenovo." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "The word \"in\" in lowercase letters, with a bold font and a slight shadow effect, giving it a three-dimensional appearance.", + "gt": "The text 'ir' appears to be an incomplete or mistyped search term or fragment within the search suggestions. Its context is unclear without additional information." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A rectangular electronic device with a screen displaying text, surrounded by a thin bezel.", + "gt": "This text, likely a category label, indicates the section of the site the user is navigating, presumably the electronics category where items like laptops would be found." + }, + { + "image_path": "evaluation/MDVP-Bench/data/android_detailed_caption/images/single_2921.png", + "caption": "A black and white image of a Lenovo ThinkPad charger.", + "gt": "Representing a longer search suggestion, this phrase indicates a related accessory for the Lenovo ThinkPad, specifically a charger, suggesting the user might be looking to purchase this item." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING", + "gt": "The image displays a title that reads \"NAVIGATING SPECIAL EDUCATION SOCIAL & EMOTIONAL LEARNING.\" It's styled in bold, white capital letters against a red background, and it appears to serve as a header for the entire visual presentation, indicating the overarching theme of the content below." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A yellow background with two human head outlines facing each other. The left head has the words \"FIXED MINDSET\" above it, with a red downward arrow below. The right head has the words \"GROWTH MINDSET\" above it, with a green upward arrow below.", + "gt": "This panel illustrates the concept of a \"Growth Mindset\" juxtaposed with \"Fixed Mindset.\" Two head silhouettes are shown with arrows pointing towards a \"Growth Mindset\" tag indicating a positive transformation away from a \"Fixed Mindset,\" symbolizing the adaptability and learning potential of the mind." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A cartoon character with a serious expression, featuring a dark gray hair, a light blue shirt, and a red cross symbol on the left side of the head. The character has a red frown and is surrounded by two white, cloud-like shapes on either side of the head.", + "gt": "Depicted here is a person with a grim expression, and steam coming out of their ears, conveying the theme of \"Anger Management.\" This symbolizes the need to control tempers, with visual cues highlighting the struggle typically associated with anger." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": " red, green, blue, purple, and orange, arranged in a circular pattern. Above the hands, the text \"Understanding Diversity\" is written in black.", + "gt": "This panel represents \"Understanding Diversity.\" It features a circle of variously colored handprints reaching towards the center, signifying unity and inclusiveness among diverse individuals or groups. The image communicates the idea of embracing diversity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A black and white illustration of a person with a light bulb on their head, holding a book. The person has a question mark above their head and another question mark to the right of their head. The background is a light peach color.", + "gt": "The image here is indicative of \"Social Inferencing.\" A figure stands perplexed before an open box with question marks floating above, suggesting the process of interpreting social cues and understanding social contexts or scenarios that are not overtly expressed." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A cartoon illustration of two boys, one wearing a red shirt and blue shorts, and the other wearing a striped shirt and brown shorts, both with their arms raised. The word \"Bullying\" is written above them.", + "gt": "An illustration of two children, one standing over the other with a raised fist, typifies \"Bullying.\" This image portrays an aggressive interaction between youth, emphasizing the dynamic of power and intimidation present in bullying behaviors." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A cartoon of a boy and a girl playing with each other. The boy is wearing a yellow shirt, black shorts, and red shoes. The girl is wearing a yellow shirt, blue pants, and red shoes. Both have black hair and are smiling.", + "gt": "Here, \"Helping Others\" is symbolized by two children, one assisting the other by tying their shoe. This image evokes themes of kindness, helpfulness, and cooperation among individuals, highlighting the importance of social support." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A blue speech bubble with a yellow lightning bolt symbol, and a red speech bubble with a yellow lightning bolt symbol.", + "gt": "Showing two profiles with opposing arrows and a lightning bolt in between, this panel discusses \"Conflict Resolution.\" The imagery suggests two individuals facing a conflict with a potential for resolution, emphasizing communication and problem-solving." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A rectangular blue traffic sign with a white border, featuring three white arrows. The leftmost arrow curves to the left, the middle arrow points straight up, and the rightmost arrow curves to the right. Below the arrows, the word \"CHANGES\" is written in white capital letters.", + "gt": "This panel depicts \"Coping With Changes,\" represented by a signpost with arrows pointing in different directions, labeled \"CHANGES.\" It symbolizes the various paths one may take when encountering life's transitions and the importance of adaptability." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/6.png", + "caption": "A stylized illustration of a person in a yellow suit and tie, standing on a set of orange stairs, with a blue figure in a blue suit and tie, holding a microphone, standing on the top step.", + "gt": "Finally, \"Leadership\" is expressed through an individual climbing a staircase while assisting another person upward. It represents the concept of leading by example, and guiding others towards success, showcasing the traits of a good leader." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "A blue circle with a white plus sign inside it, followed by a white \"2X\" text.", + "gt": "The image highlights a statistic related to hiring efficiency, pointing out that the time to hire has more than doubled over the last 5 years. A graphical element beside the text emphasizes this increase in time with a \"+2X\" indicating the doubling." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "A hand holding a person with a blue shirt and black pants, with a purple banner below displaying \"$4,000\" in white text.", + "gt": "This section of the image indicates that the average cost to hire someone in the U.S. is $4,000, emphasizing the financial implications of the recruitment process for employers." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "A pie chart with a blue background and a white border, featuring a white line that divides the chart into two sections. The left section is larger and has a white number \"36%\" inside it, while the right section is smaller and has a white number \"36%\" inside it.", + "gt": "Illustrated here is a pie chart displaying that 36% of employers are unable to find the talent they need when it is needed. This statistic points to the challenges in matching skills and job openings in a timely manner." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "A globe with a blue ocean and white continents, featuring a purple banner with white text that reads \"2.7 trillion impact to global GDP from use of more efficient talent platforms.\"", + "gt": "Described here is the significant potential economic benefit (\"$2.7 trillion impact to global GDP\") that could result from using more efficient talent platforms, suggesting that improvements in recruiting methods could have a profound impact on the global economy." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3115468935,3930284634&fm=253&app=138&f=JPEG.png", + "caption": "46% of companies are sometimes or frequently understaffed", + "gt": "This part of the image addresses organizational efficiency, with a statement that 46% of companies are sometimes or frequently understaffed. The figure is accompanied by a graphic showing the 46% proportion." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": " how travelers identify their travel style", + "gt": "This area presents the title and introductory text providing an overview of the image's intent. It introduces the concept of \"Travel Personas,\" indicates that these personas are used to identify individual travel styles, and how these styles are significant for personalized engagement in marketing. It references a report by the CMO Council from 2018." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular planner with a white background featuring a colorful illustration of two people, one wearing a red hat and the other wearing a blue hat, both holding a smartphone. The person in the red hat is holding a book, and the person in the blue hat is holding a suitcase. The background includes a mountain and a tree. The text \"THE SMART PLANNER\" is written in bold, black letters at the top.", + "gt": "A visual and textual depiction of \"The Smart Planner\" travel persona. This persona, representing 31% of travelers, is illustrated by a character with suitcases, a camera, binoculars, and a hat, suggesting a well-prepared and organized traveler." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "The Relaxed Nomad", + "gt": "This illustrates \"The Relaxed Nomad\" persona. With 25% representation, the image shows two individuals in hiking attire with a backpack, indicating a laid-back and adventurous travel style." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A family of five, consisting of a man, a woman, and three children, standing together. The man is holding a baby, while the woman is holding a suitcase. The children are standing around them, with one child holding a suitcase. The family is depicted in a circular frame.", + "gt": "Depicts \"The Deal Seeker\" persona, representing 22% of travelers. The image shows a family with suitcases and shopping bags, suggesting a focus on economical travel and value for money." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular white background with a thin black border.", + "gt": "Presents \"The Nervous Stresser\" persona with 13% representation. The image depicts an anxious individual in an airplane seat, clutching the armrests, reflecting a traveler who experiences stress during trips." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular logo with a white background featuring two cartoon characters in blue wetsuits and yellow oxygen tanks, swimming underwater. The characters are positioned diagonally from each other, with one character on the left and the other on the right. The text \"THE ADVENTUROUS THRILL-SEEKER\" is written in bold, black capital letters above the characters.", + "gt": "Showcases \"The Adventurous Thrill-Seeker\" persona, accounting for 5% of the traveler demographic. The image portrays two characters skydiving, indicating a preference for high-energy and adventure-filled travel experiences." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "The luxury budget-buster", + "gt": "Portrays \"The Luxury Budget-Buster\" persona, constituting 1% of travelers according to this depiction. The image includes a character sipping a drink on a plane, implying a tendency towards indulgence and high expenditure." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/50.png", + "caption": "A circular logo with a white background featuring a stylized illustration of a person in a blue suit with a red briefcase in their left hand and a blue suitcase in their right hand. The person is depicted in a walking motion, with one leg forward and the other leg back. The text \"THE BUSINESS ROAD WARRIOR\" is written in bold, black, uppercase letters above the illustration.", + "gt": "Displays \"The Business Road Warrior\" persona, also making up 1% of the traveler profile. The graphical representation includes a character briskly walking with a rolling suitcase and carrying a briefcase, suggesting frequent travel for business purposes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A woman with short brown hair, wearing a purple top with a white collar, is holding a yellow envelope in her right hand. She has a headset on her head and is standing in front of a computer monitor with the word \"BIG\" visible on the screen.", + "gt": "An illustration depicts a female customer service representative wearing a headset and holding a notepad, with an envelope icon indicating email communication." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A circular orange background with a white border.", + "gt": "Depicted here is a male customer support agent with a headset. Behind him are symbols such as a magnifying glass and a wrench, suggesting a focus on service and problem-solving." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A circular logo with a light blue background featuring a stylized globe in the center. The globe is divided into four quadrants, each in a different shade of blue. A black headset with a microphone is positioned over the globe, with the earpieces extending outward. To the right of the globe, there is a yellow star.", + "gt": "This image shows a customer support agent with a globe and a phone headset in the background. The presence of a star and headphones suggests excellence in global support." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A man with short brown hair, wearing a white shirt and a black tie, is holding a yellow light bulb in his right hand. He has a headset on his head and is pointing upwards with his left hand.", + "gt": "In this graphic, a male figure with a headset is surrounded by symbols: a question mark, gears, and a light bulb. This represents expertise in finding solutions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A blue and black wrench with a flat-head design, featuring a blue handle with a textured grip and a black head with a serrated edge. The wrench has a long, straight shaft connecting the handle to the head.", + "gt": "Featuring tools and a gauge, the illustration conveys a commitment to quality in customer service, indicated by the 'Quality Service' text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A stylized illustration of a person wearing a headset with a microphone, a purple shirt, and a white undershirt. The person is holding a smartphone with a blue and white design on the screen.", + "gt": "The design shows a female representative with a headset alongside a mobile phone displaying a wifi signal and a callback option, emphasizing telecommunications services." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A stylized illustration of a hand holding a black and white telephone handset, with a blue circular background featuring a partial globe and a speech bubble with the word \"BIG\" in white.", + "gt": "This panel portrays a customer support individual with a wrench, highlighting the concept of assistance with technical or practical issues." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A circular logo with a red border, featuring a stylized design of a person wearing a white shirt and a black tie, with a blue and white striped hat. The background includes a Union Jack flag and a yellow rectangle.", + "gt": "Illustrated here is a customer support agent with a headset in front of a backdrop depicting the UK flag, a speech bubble, and a phone, suggesting language translation services." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/24.png", + "caption": "A woman with brown hair tied back, wearing a purple top with a white collar, and a headset with a microphone. She is holding a yellow star in her right hand and has a black and white striped object in her left hand.", + "gt": "A female customer service consultant is represented, with symbols of 24-hour availability and a gold star, signifying round-the-clock excellence." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A stylized heart with a vibrant red color, featuring a prominent blue and orange flame-like design on the upper left side, and a smaller blue and orange flame-like design on the lower right side.", + "gt": "The image features a human heart symbolizing a strong heart as one of the benefits of running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A stylized human figure with arms raised in a celebratory pose, surrounded by four blue arrows pointing outward, each arrow with a slight curve and a pointed tip.", + "gt": "This panel showcases an icon of a shield with a check mark, representing the immune system's boost from running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A digital bathroom scale with a black base and a white digital display showing the weight.", + "gt": "A silhouette of a figure measuring their waist indicates that running can aid in weight loss." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A pair of pink lungs with a central trachea, featuring a detailed structure with visible veins and a slightly curved shape.", + "gt": "The artwork depicts a pair of lungs, signifying the respiratory system's enhancement due to running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A yellow smiley face with a black outline, featuring two black dots for eyes and a curved black line for a mouth.", + "gt": "A smiling face emoticon suggests that running can improve one's mood." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A human knee with a visible bone structure, including the femur and tibia, with a slight curvature and a smooth surface.", + "gt": "An illustration of a leg bone signifies that running increases bone density." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A human brain with a detailed, textured surface, featuring a prominent cerebral cortex and a visible portion of the brain stem.", + "gt": "An image of a brain is used to illustrate the benefit of improved brain function from running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=1241354889,2739849697&fm=253&app=138&f=JPEG.png", + "caption": "A red heart with a white outline and a white line through the center, indicating a heart rate or rhythm.", + "gt": "The image features a detailed representation of the cardiovascular system, emphasizing its strengthening through running." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "The word \"HOTAPPE\" is written in large, bold, uppercase letters. The letters are colored in a gradient of warm colors, starting with red on the left, followed by orange, yellow, light blue, and ending with dark blue on the right.", + "gt": "The image displays the word 'HOT' in large, bold, uppercase letters with varying colors for each letter. The 'H' is in red, the 'O' is in mustard yellow, and the 'T' is in a light blue color." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular smiley face with a yellow face and a red border. The face has two blue teardrop-shaped eyes, a brown curved mouth, and two brown curved lines for eyebrows.", + "gt": "Here, a round, yellow cartoon-like emoji with blue tears, symbolizing laughter or crying with joy, is shown. Below it, the word 'HUMOR' is written in uppercase letters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "An orange circle with two brown hands facing each other, with the word \"OPENNESS\" in green capital letters below the circle.", + "gt": "This depicts an open hand graphic in brown color, which is centered within an orange circular background. Underneath the image, the word 'OPENNESS' appears in capital letters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular yellow background with a pair of hands clasped together in the center. The hands are depicted in a light brown color, with the left hand slightly overlapping the right hand. Radiating from the hands are white lines, giving the impression of light or energy. Below the hands, the word \"TOUCH\" is written in bold, uppercase letters.", + "gt": "Shown is an illustration of two hands coming together in a handshake or high five, set against a yellow circle. Below, the word 'TOUCH' is described in uppercase letters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular logo with a light blue background featuring a large red heart in the center. Inside the heart, there is a white exclamation mark. Below the heart, the word \"ATTENTION\" is written in bold, uppercase letters.", + "gt": "A graphic of a heart with an exclamation mark within it is displayed within a light green circular background. Written below is the word 'ATTENTION' in uppercase letters." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular blue background with two cartoon faces, one on the left and one on the right, separated by a white dashed line. The left face has brown hair and a neutral expression, while the right face has light brown hair and a neutral expression. Below the faces, the word \"PROXIMITY\" is written in bold, uppercase letters.", + "gt": "A portrayal of a person with a gender-neutral appearance, featuring brown hair, is encircled in blue. A sequence of dashes leads from the character to the bottom right, with the word 'PROXIMITY' written in block capitals." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/11.png", + "caption": "A circular blue background with a white eye in the center, featuring a brown iris and a white sclera. Below the eye, the words \"EYE CONTACT\" are written in bold, uppercase letters.", + "gt": "The image shows a close-up illustration of a stylized blue eye with a large brown pupil, against a dark blue background. Below the eye is the word 'EYE CONTACT' written in all caps." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A person with a green background, wearing a red shirt, with a thermometer in their mouth.", + "gt": "The image depicts an illustration of a person with flushed cheeks and a thermometer in their mouth, indicating a high temperature, alongside the word \"fever.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A person with a green background, wearing a pink shirt, with a red nose and a red mouth, and a white hand with three fingers extended.", + "gt": "The image shows an individual coughing into their hand, representing a symptom identified by the word \"cough.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A person with a green background, wearing a pink shirt, with a green and white object in their mouth.", + "gt": "This panel illustrates an individual appearing unwell, with a sick expression and a hand over their mouth. The word \"vomiting\" is associated, indicating it as a symptom." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A stylized illustration of a person with a green circular background. The person has a gray face with a red nose, black eyes, and a red mouth. The person is wearing a red shirt with a yellow collar.", + "gt": "Here, an individual is portrayed with their hands near their throat, their cheeks flushed, and an uneasy expression. The term \"dyspnea\" adjacent to the figure defines the displayed respiratory distress." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A person with a green background, wearing a red shirt and white pants, is sitting on a white chair.", + "gt": "An individual is seen clutching their stomach, with a distressed expression, representative of \"diarrhea\" which is indicated by the corresponding label." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A circular green sign with a white border, featuring a stylized illustration of a pair of red lungs with a gray outline, set against a dark background.", + "gt": "The illustration shows a pair of human lungs with a highlighted area indicating inflammation. The word \"pneumonia\" is present to describe the condition being depicted." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2557672574,3690790099&fm=253&app=138&f=JPEG.png", + "caption": "A stylized illustration of a kidney with a red and pink color scheme, featuring a central red area with a pink outline, flanked by two symmetrical, curved, pink shapes resembling the kidney's lobes, all set against a light green background.", + "gt": "Featured is a graphic representation of a pair of kidneys with a highlighted area in red, indicating distress or damage. Alongside is the phrase \"renal failure,\" signifying the medical condition exhibited." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A blue rectangular header with the text \"Interesting Facts\" in white, bold, sans-serif font. To the left of the text, there are three white horizontal lines. To the right of the text, there is a red heart symbol.", + "gt": "This region displays the heading \"Interesting Facts\" at the top, set against a blue background with a three-line menu icon to the left and a heart symbol to the right." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A stylized panda face with a white face, black ears, black patches around the eyes, and a black nose. The panda has a small, curved black mouth and a content expression. The face is set against a green circular background.", + "gt": "Depicts a graphical icon of a panda bear's face on a green background with the label \"Animals\" beneath it." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A green circular background with a white plate in the center, containing a yellow ring around the edge. To the left of the plate is a white fork, and to the right is a white spoon.", + "gt": "Shows an icon representing a plate and silverware on a green background, labeled as \"Diet Nutrition.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular icon with a dark blue background featuring a red heart in the center. The heart is outlined in white and has a white line running horizontally across its middle. Below the heart, the word \"Diseases\" is written in white, bold, sans-serif font.", + "gt": "Includes a graphical icon that combines a heart shape and a pulse line on a dark background, labeled \"Diseases Disorders.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A white long-sleeved shirt with a teal collar and cuffs, featuring a row of black buttons down the front.", + "gt": "Features an image of a fashionable shirt on a dark background with the word \"Fashion\" underneath it." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular orange icon with a white film reel design in the center. The film reel has a blue border with white squares on the left and right sides, and a white center with a blue horizontal line dividing it into two sections.", + "gt": "Contains an icon resembling a film strip on an orange background, indicating the \"Entertainment\" category." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular green icon with a white syringe featuring a red cross and a white droplet next to it.", + "gt": "This panel displays an icon of a syringe with a drop, which is on a green background, and is described with the words \"Drugs Addiction.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A blue circular icon featuring a white wine glass with a yellow liquid on the left and a white bottle with a yellow liquid and a brown cap on the right.", + "gt": "Shows a depiction of a wine bottle and glass on a blue background, labeled \"Food & Drink.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular icon with a dark blue background featuring two stylized human figures. The figure on the left has short, light brown hair and is wearing a red shirt. The figure on the right has short, light brown hair and is wearing a brown shirt with a white collar.", + "gt": "Exhibits two stylized human figures, one male and one female, on a blue background, with the inscription \"Gender.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/23.png", + "caption": "A circular icon with a solid olive green background featuring a white silhouette of the Earth in the center.", + "gt": "Contains a depiction of the Earth on a green background, with the word \"Global\" beneath it." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A person sitting on a chair with their head in their hands, wearing a blue shirt and black pants.", + "gt": "The image depicts two individuals engaged in conversation. One appears to be a professional, possibly a therapist, sitting across from a person who seems to be seeking help. The scene is accompanied by the text \"Seek Professional Help,\" suggesting that the image represents the advice to consult a mental health professional when dealing with depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A person with short brown hair, wearing a blue long-sleeve shirt and yellow pants, is sitting on a white platform with a blue wave design at the bottom.", + "gt": "This panel features an individual sitting on the ground with their head down, projecting a dejected or hopeless demeanor. Above the figure, the text reads \"Don't Lose Hope.\" The image conveys the message of maintaining hope as a countermeasure against feelings of depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A woman with dark hair tied back, wearing a pink top, is depicted with a thought bubble above her head.", + "gt": "An illustration of a female figure is shown alongside the phrase \"Practice Mindfulness.\" She appears calm and collected, with her eyes closed and a slight smile, which indicates a serene state of mind, commonly associated with mindfulness practice." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A person with red hair, wearing a green top and blue pants, is sitting on a windowsill with their legs crossed.", + "gt": "Here, an individual is seen looking out of a large window onto a sunny landscape with trees. The phrase \"Rethink Your Perspective\" suggests that the image is advising a change in one's outlook, possibly to a more positive or broader view, as a way to combat depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A woman with dark hair tied back, wearing a blue sleeveless top and black leggings, is running with her arms slightly bent and her legs in motion. She has a white earphone cord hanging from her right ear.", + "gt": "The image presents a person in activewear, taking a stride forward with a focused expression. The associated text, \"Stay Active,\" recommends physical activity as a method for improving mental health and battling depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=2795077530,1349319423&fm=253&app=138&f=JPEG.png", + "caption": "A woman with black hair tied back, wearing a green headband, a green sleeveless top, and black pants, is sitting cross-legged with her hands pressed together in a prayer position.", + "gt": "This panel displays a person in a yoga pose, meditating with eyes closed and hands in a position of focus. The text \"Meditate\" indicates that the image is suggesting meditation as a therapeutic practice for managing depression." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "caption": "A green dinosaur with a white belly, a purple dinosaur with a white belly, and a green dinosaur with a white belly.", + "gt": "The image shows three gel ice packs in green, purple, and blue colors, with distinctive shapes, resembling a dinosaur, a star, and a fish. Accompanying text suggests \"Take out the one you need.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "caption": "A cartoon boy with a purple bandage on his forehead, wearing a blue shirt and blue pants, is holding a purple object in his right hand.", + "gt": "An illustration depicts a cartoon boy holding an ice pack to his head. Text indicates the ice pack has multi-functionality and advises using the ice pack for \"the relief area for the doctor recommended time of 20 minutes.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "caption": "A lime green refrigerator with a single door featuring a vertical handle on the right side. The door has a horizontal indentation near the top. Below the door, there is a yellow drawer with a horizontal handle.", + "gt": "Detailed instructions on how to use the ice pack are given, with two methods highlighted: \"TO USE COLD\" involving refrigeration, and \"TO USE HOT\" instructing to microwave the pack for 10 seconds and check the temperature." + }, + { + "image_path": "evaluation/MDVP-Bench/data/multipanel_detailed_caption/images/u=3691911939,3379985415&fm=253&app=138&f=PNG.png", + "caption": "A small, rectangular, lime green refrigerator with a single door featuring a black handle on the left side. The bottom section of the refrigerator is orange with a horizontal black handle.", + "gt": "Guidelines for storage are portrayed, advising to \"put the item in the storage bag, for longer shelf life, keep pack in freezer while not in use.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/4010.jpg", + "caption": "A dark-colored, rectangular structure with a flat roof and vertical sides, featuring a small, square window on the upper left side and a larger, rectangular window on the lower right side.", + "gt": "The object appears to be a small brown wooden shed, likely used for storage, situated on a patch of grass. It has a clearly visible slanted roof, possibly for rain runoff, and looks to be a single-door structure typically found in a backyard or garden setting." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/4010.jpg", + "caption": "A dense cluster of green leaves with a mix of light and dark green hues, forming a rounded, bushy appearance.", + "gt": "This object is a tree with thick, lush foliage, representing a mature specimen that provides shade and greenery. It stands behind a smaller, sparser tree and is part of a larger grouping of trees that appear to create a natural boundary or backdrop for the area." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/4010.jpg", + "caption": "A cylindrical, metallic pole with a consistent diameter throughout its length, featuring a series of evenly spaced, horizontal bands encircling its surface.", + "gt": "A single metal pole is embedded in the ground in a vertical orientation. It seems to be a simple, slender structure, possibly serving as a support or part of a larger construction that isn't fully visible. The lawn surrounding it is well-trimmed and maintains an even appearance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A deep, dark-colored bowl with a wide, flared rim and a smooth, glossy finish.", + "gt": "This bowl, appearing to be dark blue, is situated against a background, likely part of kitchenware." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A modern kitchen with a stainless steel oven and a black cooktop. The oven has a digital display and control panel, and there is a visible handle on the oven door. The cooktop has multiple burners with black grates.", + "gt": "The tabletop is made of dark marble, showcasing a glossy finish and reflecting its surroundings slightly." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A white, rectangular wall socket with a single, round, black power switch located on the right side.", + "gt": "The light switches are white, contrasting with the dark wall, likely plastic, and appear functional." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A white, rectangular light switch with rounded edges, featuring a central toggle switch mechanism.", + "gt": "Positioned in the background, these white light switches are paired on a wall above the countertop." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "Stainless steel built-in oven featuring a large glass door with a curved handle at the top, a digital control panel above the door with multiple buttons and a display screen, and a brand logo centered on the control panel.", + "gt": "This silver oven, with digital controls and a handle, appears modern and built into the cabinetry." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A tall, slender vase with a flared rim and a narrow neck that gradually widens into a bulbous base. The vase features a glossy finish with a gradient color scheme transitioning from a deep blue at the top to a lighter blue and then to a greenish hue towards the bottom.", + "gt": "An indistinct blue and green object, possibly decorative, is partially visible against a lighter backdrop." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "The floor is composed of medium-toned wooden planks with a smooth, polished finish. The wood grain is visible, featuring a mix of straight and wavy lines, creating a natural and textured appearance. The planks are laid out in a parallel arrangement, running horizontally across the image.", + "gt": "The floor, constructed of hardwood, showcases a natural finish with variations in wood grain." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407550.jpg", + "caption": "A vertical, wall-mounted spice rack with multiple tiers, each tier holding several glass jars with metal lids. The jars are arranged in a single column, and the rack appears to be made of a dark, possibly wooden material.", + "gt": "The jar holder, likely metal, is mounted to the wall, containing jars that may hold spices or ingredients." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A round, dark-colored table with a smooth surface and a central pedestal base.", + "gt": "This is an image of a silver metal table situated outside on a paved ground. The table has a shiny, reflective surface indicative of being metallic." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "The chair features a high, slightly curved backrest and seat cushion upholstered in a woven fabric with a diamond pattern. The fabric is primarily light green with a central vertical stripe in a slightly darker shade. The armrests are padded and covered in the same woven fabric, with a light green color. The chair's legs are dark-colored and straight.", + "gt": "The object is an outdoor chair characterized by its red backrest and tan seat. It appears sturdy and designed for outdoor settings, likely part of a café or restaurant patio." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A purple place mat with a textured surface and a white circular design in the center.", + "gt": "The item in question is a piece of lavender paper that seems to be placed atop a metal table. The paper's edges are distinctly visible against the table's surface." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A vertical traffic light with three circular lenses arranged in a column, displaying a red light at the top, an unlit middle lens, and a green light at the bottom, all encased in a black housing with a visor over each lens.", + "gt": "Visible here is a yellow traffic light, suspended above the street. The light is not illuminated and it stands against a light sky, possibly signaling a traffic-stop scenario." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A pink and white striped awning with a scalloped edge and fringe detailing.", + "gt": "A large red and white striped umbrella stands open, presumably providing shade or shelter in an outdoor setting. Its vibrant colors attract attention." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A tall, dark brown tree trunk with a rough, textured bark. The trunk is relatively straight and has a consistent width throughout its visible length.", + "gt": "A brown tree trunk is seen beside a sidewalk. The trunk's bark is rugged and it appears to be a mature, healthy tree, offering shade to the vicinity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A chalkboard sign with a wooden frame displaying the text \"Château de la Bertrandière\" in elegant, cursive script.", + "gt": "Displayed is a black chalkboard featuring white text. It seems to be placed on a sidewalk, often used for displaying messages or menus outside establishments." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/402.jpg", + "caption": "A tall, narrow, rectangular window with a dark frame and a single vertical pane of clear glass.", + "gt": "A window is seen on the side of a tan-colored building. It appears to be rectangular, typical of building windows, and reflects the adjacent surroundings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000518836.jpg", + "caption": "A chestnut horse with a white star on its forehead, dark eyes, and a soft, dark muzzle. Its ears are pricked forward, and the mane appears to be a lighter shade of chestnut, blending into the darker coat. The horse's neck shows a gentle curve, and the coat has a healthy sheen.", + "gt": "A close-up view of a horse's head, predominantly brown with a distinctive white patch on its forehead and visible mane." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000518836.jpg", + "caption": "A brown horse with a dark mane.", + "gt": "This is the body of a brown horse, most likely the same one as the head seen in the close-up. Its front body is visible." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000518836.jpg", + "caption": "A light gray horse with a darker mane and tail, featuring a well-muscled build, a straight profile, and a calm demeanor.", + "gt": "A white horse is seen from a side angle in the distance, grazing or standing in a meadow with trees and a fence." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000205601.jpg", + "caption": "A white, rectangular table with rounded corners and a smooth surface.", + "gt": "A frying pan on a heat source contains sautéed meat and vegetables, emitting steam, indicating the food is hot and being cooked." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000205601.jpg", + "caption": "A modern black induction cooktop featuring a sleek design with two visible cooking zones, each outlined with a white circular pattern. The front edge of the cooktop has a stainless steel trim, and there are control buttons located at the bottom front, with indicator lights above them.", + "gt": "An electric stovetop features a radiant burner that is glowing, suggesting it is turned on and providing heat for cooking." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000205601.jpg", + "caption": "A glossy, metallic spoon with a reflective surface and a tapered handle, featuring a rounded bowl with a slight indentation on one side.", + "gt": "A kitchen knife with a green handle rests on a countertop; its blade appears sharp and suitable for food preparation." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000205601.jpg", + "caption": "A metallic spoon with a reflective surface and a tapered handle that widens towards the end, featuring a rounded bowl with a pointed tip.", + "gt": "A human hand is captured in motion, seasoning or stirring the food in the pan, contributing to the cooking process." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000299654.jpg", + "caption": "A zebra with a distinctive black and white striped pattern, featuring a mane of short, erect hair transitioning from black at the base to white at the tips. The stripes on the neck are vertical and become more horizontal as they reach the mane. The visible part of the zebra's face shows a pattern of narrow stripes that converge around the eyes and muzzle, with a white area above the eyes and a dark nose. The ears are pointed and display a striped pattern consistent with the head.", + "gt": "The image depicts the head of a zebra, with distinctive black and white stripes covering its fur. The animal's ears are pointed upwards, indicating alertness. The eyes are visible, showcasing a gentle gaze, and the nose is close to the ground, suggesting the zebra is grazing or sniffing the terrain. The mane is partially visible as a series of short, erect black hair between the zebra's ears." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000107939.jpg", + "caption": "An octagonal red stop sign with bold, all-caps white lettering spelling \"STOP\" centered on the sign.", + "gt": "The object is a rectangular street sign with white letters on a green background, indicating the name of a street. It is affixed to a metal pole and is located above and slightly to the left of a stop sign. The sign reads 'NORTH AVE' suggesting it's likely an indication of the street or direction. It appears to be a standard street name sign used in many urban settings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000107939.jpg", + "caption": "An octagonal sign with a red background and a white border, featuring the word \"STOP\" in white uppercase letters.", + "gt": "This object is a red hexagonal stop sign with white uppercase letters spelling 'STOP'. It is attached to the same metal pole as another sign, below and to the right of it. The sign is designed to alert drivers to stop and is a widely recognized traffic control device. The edges of the sign appear sharp and undamaged, suggesting it is in good condition." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "A dark brown, square-shaped cushion with a visible textured surface that suggests a soft, plush fabric.", + "gt": "A plush, padded object designed for comfort, potentially used on a sofa." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "A dark brown, square-shaped pillow with a visible corner that appears to be soft and plush.", + "gt": "Similar to the first object, this is also a stuffed and soft piece intended for supporting or resting." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "The earlobe is small and rounded, with a smooth, slightly glossy surface. It is adorned with a small, round, gold-colored earring that has a subtle, reflective sheen.", + "gt": "Decorative accessory adorned on the ear, visible as a small, shiny object." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "A young girl with curly hair, wearing a patterned top with a mix of geometric and floral designs in dark and vibrant colors, paired with long-sleeved pink undershirts. Her expression is one of mild surprise or excitement, with her mouth slightly open and eyes looking upwards. Her arms are outstretched with palms facing up, as if gesturing or presenting something.", + "gt": "This is a child with an open mouth and animated facial expression, possibly speaking or expressing surprise." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/000000437374.jpg", + "caption": "A young boy with curly hair, wearing a striped sweater with shades of blue, white, and brown, stands with his hands clasped together. He has a focused expression on his face, with his mouth slightly open and his eyes looking to the side.", + "gt": "Appears to be a young boy, casually dressed, gripping an electronic device with attention." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "A ski parka featuring a color-block design with a white torso, green sleeves, and black shoulder panels. The jacket has a high collar and a front zipper closure. There are red accents on the cuffs and a red logo on the left chest area. The parka is paired with a black helmet with a visor.", + "gt": "The figure is wearing a red ski suit with a blue helmet and goggles. Their stance is open and welcoming, arms outstretched, and they seem to be an instructor addressing a group of students on a snowy slope." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "The knee of the person is covered in a dark purple snowsuit with a slightly glossy finish. The fabric appears thick and durable, suitable for cold weather. The knee area is slightly bent, indicating a natural stance. The snowsuit has a subtle sheen, reflecting light, and there are no visible patches or additional features on the knee itself.", + "gt": "A person is mostly obscured by the instructor but can be identified as a ski student by the helmet. The student is wearing a purple jacket with green sleeves and appears to be in mid-motion, learning to ski." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "The trousers are black with a glossy finish, featuring a slim fit design. They have a smooth texture and appear to be made of a synthetic material. The trousers are paired with black shoes that have a sleek, modern design.", + "gt": "There is a student dressed in green ski gear with visible ski poles, possibly following instructions. They are viewed from the side, indicating movement or a pause during skiing." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "A blue helmet with a glossy finish, featuring a prominent white stripe with a red outline running horizontally across the middle. The helmet has a black visor attached at the front, and a black chin strap with a buckle hanging down from the sides.", + "gt": "A clear blue sky with scant clouds, indicative of a bright, sunny day ideal for outdoor activities such as skiing. This backdrop is above a snowy mountain setting." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "The ski boot is predominantly black with a sleek, modern design. It features a sturdy, high-ankle structure for support and a smooth, matte finish. The boot has a contoured shape to fit the foot and lower leg, with a slightly raised heel for added stability. The upper part of the boot is designed to secure the foot, while the lower part extends down to cover the ankle and lower calf.", + "gt": "A ski student is captured from behind, suggesting they are moving away from the viewer. They are wearing a red jacket with black pants, indicative of typical ski wear fit for the cold environment." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "The sleeve of the sweater is predominantly white with a bold red graphic of a person skiing. The fabric appears to be a soft, knit material, and the sleeve is slightly loose-fitting, extending to the wrist. The red graphic is positioned centrally on the sleeve, adding a dynamic contrast to the white background.", + "gt": "This student, visible from the side, is wearing a green and purple ski outfit with a matching helmet, possibly in the midst of practicing or following a ski maneuver." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2407508.jpg", + "caption": "A person wearing a black helmet, goggles, and a white jacket with red and black accents, including a red logo on the left chest area. The individual is also wearing black gloves and appears to be in a skiing stance.", + "gt": "A detailed examination of the instructor's black glove, which is part of standard skiing attire, suited to protect hands from cold conditions and providing better grip on ski poles." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2411153.jpg", + "caption": "A full-face racing helmet with a predominantly white base color, featuring a large number '69' in red with a white outline on the front. The helmet has a green and red stripe design on the sides, with additional sponsor logos in red and black. The visor is tinted and appears to be in a closed position.", + "gt": "Captured in this section is a motorcycle racer, sharply tilting while maneuvering a turn on a race track. The rider, outfitted in a full-body racing suit, is almost in a horizontal position relative to the ground, a technique used in high-speed motorcycle racing to navigate tight turns while maintaining speed. The motorcycle itself is predominantly red with hints of white and black, and it showcases a sleek, aerodynamic design typical of high-performance racing bikes. The rider's focused posture and the bike's dynamic angle suggest this is a moment of intense action during a race." + }, + { + "image_path": "evaluation/MDVP-Bench/data/natural_detailed_caption/images/2411153.jpg", + "caption": "A series of parallel, evenly spaced, horizontal lines with a consistent width, running across the entire width of the image.", + "gt": "This portion of the image displays the texture of an asphalt road, detailed with small granular elements indicative of a typical racing track surface built to offer traction and durability. A crisp white boundary line marks the edge of the racing track, contrasting with the dark gray tone of the asphalt. The road surface is illuminated by ambient light, highlighting the texture and suggesting a dry weather condition which is ideal for racing. The condition of the road suggests it is well-maintained, a necessity for the safety and performance of high-speed motorsport events." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "caption": "The mass of the quark is a massless particle, denoted by the symbol \"m\" in the text.", + "gt": "This region of the image displays a portion of a scientific or academic paper, specifically focusing on points that seem to outlay contents or headings within the document. The page appears to discuss topics in physics, with references to quark and meson masses, as well as lattice data. The content suggests that the document may be exploring the relationship between subatomic particles and their masses, experimental data, and theoretical models (likely within the field of particle physics or quantum chromodynamics). Each item listed is followed by ellipsis and a numerical value, denoting sections or page numbers where these topics are expanded upon within the document." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/22d3844dcf29a07bd10f557a33684e331846f81b938ca0f742afab09c542133f.png", + "caption": "A lowercase letter 'v' with a serif at the top and a tail at the bottom, featuring a bold, sans-serif font.", + "gt": "This region is at the bottom of the document, typically referred to as the page-footer. In academic or scientific papers, this section could include information such as the page number, publication date, author's name, or part of the document classification system. However, the specifics of what this footer contains are not visible, as the black rectangle with a white numeric identifier covers it entirely." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": "The geometry and adsorption energies for the structures of thio-glycolic acid on Au(111) at 0.25ML.", + "gt": "The content is a caption designated for a table, which generally serves to describe the table's subject matter. The caption reads, \"TABLE 1: The geometries and adsorption energies for the structures of thioglycolic acid on Au(111) at 0.25ML.\" It provides a clear indication that Table 1 will present quantitative data regarding the geometry and energy characteristics of thioglycolic acid adsorbed on a gold (Au) substrate at a specific coverage level." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": " \"bri\" has an adsorption energy of 2.60 eV, \"fcc\" has an adsorption energy of 2.60 eV, \"top\" has an adsorption energy of 2.60 eV, and \"hep\" has an adsorption energy of 2.60 eV. The adsorption energies are listed in a column next to the adsorption sites.", + "gt": "This is a table containing organized data. It lists various configurations of thioglycolic acid adsorbed on an Au(111) surface, along with numerical values for initial and optimized parameters such as adsorption distance (ds–Au), polar angle (θ), and adsorption energy (E_ads). The data is structured in columns with headings for different parameters and rows corresponding to different adsorption sites and tilt directions. The table is used to convey detailed quantitative information in a comparative format, facilitating the analysis of changes in geometry and energy after optimization." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": "The adsorption energy is 0.63 eV.", + "gt": "Here appears to be an excerpt of text, possibly from a research article or report, focusing on detailed scientific analysis. The text discusses the shortest Au-S bond length and mentions 'initial and optimized site,' likely referring to the states before and after some experimental or computational procedure. The content seems to pertain to the interpretation of the data presented in the table above it, providing context and insights into the structural data of the adsorption process." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": "The adsorption energy for the most stable structure of the HSCH2COOH on the Au(111) surface is 0.63 eV. The adsorption site preferred by the sulfur atom is located at the top of the gold atom. The polar angle between the normal vector of the surface and the S-C2 direction is 74.2 degrees. The HSCH2COOH tend to lie down at the low coverage.", + "gt": "This section of text also seems to be a detailed analytical discussion, possibly a continuation of the content from the previous text excerpt. It specifically highlights the adsorption energy for the most stable structure of a molecule on the Au(111) surface and the preferred adsorption site. It suggests a close relationship with both the data in the table above and the scientific interpretation or conclusion drawn from that data." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/3b5a913400aa0be183eed5cdd06246f1f1c27dfc145a62a4296fe3ef38fc44f8.png", + "caption": "A black and white image of a person's face with a neutral expression, featuring a prominent nose, closed lips, and visible teeth. The person has short hair and is wearing a dark-colored top.", + "gt": "The content in this region appears to be a page-footer, usually found at the bottom of journal pages or official documents. It may contain information such as the page number, document section, publication date, or authors' names. Such footers are used for navigation and citation purposes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The segmented image shows two graphs. The first graph on the left is titled \"Atom prediction performance (a and c)\" and features a scatter plot with blue and orange dots. The second graph on the right is titled \"Bond prediction performance (a and c)\" and also features a scatter plot with blue and orange dots. Both graphs have a similar layout with x and y axes, and the dots are evenly spaced.", + "gt": "The image is a collection of three scatter plots, each corresponding to a distinct type of prediction performance evaluated by F1 score. The x-axis represents the frequency (presumably of occurrence in the training set), while the y-axis represents the F1 score, which is a measure of test accuracy. The plots are labeled (a) Atom prediction performance, (b) Bond prediction performance, and (c) Charge prediction performance. Each plot features a variety of points labeled with chemical symbols or bond types, indicating that the data relates to chemical structures." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The F1 score for segmentation and classification networks.", + "gt": "This is a caption for the series of scatter plots shown in . It provides an interpretation of the data, stating that there is a clear correlation between the performance of neural networks on different prediction types and the frequency of the specific type in the training dataset. It is noted that classification networks perform significantly better than segmentation networks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The performance of segmentation network is measured by the F1 score, which is a metric that takes into account both precision and recall. The F1 score is used to evaluate the performance of the segmentation network in terms of its ability to correctly segment the image. The segmentation network is compared to other networks, and the F1 score is used to determine which network is the most effective.", + "gt": "This is a subsection title within the document that reads \"Performance of segmentation network.\" It indicates that the following text will discuss the results and analysis related to the evaluation of the segmentation network's performance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The segmentation network is a deep learning model that uses a combination of atom, bond, and charge types to predict the pixel values. The model is trained on a large amount of data and is designed to segment images into different parts. The F1 score is used to measure the performance of the model, with a higher score indicating better performance. The model is also used to predict the pixel values for different atom, bond, and charge types.", + "gt": "This is a paragraph that explains how the performance of the segmentation network is measured by the F1 score for pixel predictions for different atom, bond, and charge types. The text discusses how performance correlates with the frequency of these types in the training data and references a correlation visible in Figure 4, assuming that Figure 4 corresponds to the scatter plots in ." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The performance of the different types of networks is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is higher for the different types of networks, indicating that they are performing well. The performance of the different types of networks is compared to the performance of the segmentation networks.", + "gt": "This is another subsection title within the document that reads \"Performance of classification networks.\" It signals that the subsequent paragraph will describe the performance evaluation for classification networks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The performance of the different parts of the network is measured by the F1 score, which is a measure of the accuracy of the network. The F1 score is used to evaluate the performance of the network in different tasks, such as atom, bond, and charge type classifications. The network is able to do a good job even when the segmentation is not perfect, and the performance of the different parts of the network is significantly higher than the segmentation networks.", + "gt": "This paragraph details the performance of classification networks, mentioning that the F1 score is used for evaluation. It highlights a correlation between F1 score and the frequency of different atom, bond, and charge types in the training set. Although the segmentation is not perfect, the classification networks can maintain accuracy. Results are summarized in Figure 4, which likely refers to the scatter plots in ." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "8.3 Overall graph accuracy", + "gt": "This is a subsection title denoted \"Overall graph accuracy,\" which suggests that the following section of the document will focus on the combined accuracy measurements of the previously discussed networks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "The overall graph accuracy is a measure of the performance of the different parts of the graph, including the segmentation network and the classification network. The segmentation network is used to segment the graph into different parts, and the classification network is used to predict the type of the graph. The overall graph accuracy is a combination of these two parts, and it is used to measure the performance of the graph.", + "gt": "In this paragraph, the text outlines how combining the performance of different parts can produce an overall accuracy for graph predictions. It implies that integration of segmentation and classification network results, as indicated by an algorithm, can construct the resulting graph, referencing images in three different blocks." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0146_0010.jpg", + "caption": "A black and white image of a person's face with a neutral expression, featuring a prominent nose, closed lips, and visible teeth. The person has short hair and is wearing a dark-colored top.", + "gt": "This is the page number of the document, specifically '11,' marking its location within the document's sequence of pages." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "The sugar is a low GI sweet spot, which is a type of sugar that has a low glucose index.", + "gt": "This region appears to be a paragraph of text discussing the outcome of a washing process on reducing sugar content. It notes that this process resulted in a higher content of reducing sugar which is thought to overshadow the glycemic index (GI) lowering effect of the polyphenols and may increase the GI of the sugar." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "The low GI sweet spot is at least 22mg CE/100mg sucrose.", + "gt": "This region of text outlines a graphical demonstration of the 'GI sweet spot' related to the sugars shown in a referenced table. It explains that a certain minimum amount of sucrose (22mg CE/100mg) needs to be retained during sugar processing to maintain a low GI, and that if additional polyphenols are present but the reducing sugars are too high, then the low GI effect is negated." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "The sugar has a low GI of about 22-32 mg CE/100g polyphenols. The polyphenol content is high, with a range of 22-32 mg CE/100g polyphenols. The sugar is hygroscopic, with a higher moisture content, and the polyphenol content increases as the sugar becomes more saturated.", + "gt": "This region includes a section header titled \"Table 3 - Example sugars,\" which implies that the region is categorizing and summarizing data related to various sugars, likely in a tabular format." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "The massacuit is a dark brown, viscous liquid with a glossy sheen, contained in a clear glass bottle.", + "gt": "The text in this region describes the effects of increasing the reducing sugar content of sugar and its impact on the GI, moisture content, and the behavior of glucose and fructose when polyphenol content is increased. It concludes that optimizing moisture and reducing sugar content is insufficient to lower the GI in the presence of higher polyphenol levels." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "The massacuites are prepared at two different sugar mills, designated as \"Mill 1\" and \"Mill 2\". The polyphenol content of each sample is determined, with the results shown in the table below. The massacuites are washed until they reach the desired polyphenol content, which is roughly 500 to 2000 ICUMSA.", + "gt": "This region is a section header for the text that follows, indicating that the content will deal with \"Example b - Washing or massecuite to desired polyphenol content.\" This text likely explains an example or case study related to the process of washing sugar massecuite to achieve a certain level of polyphenol content." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/2802b6a4f8f0c20b8ba717634b852ef20bc4d76831853ff5279febe7cc8b98a1.png", + "caption": "A black sugar with a glossy finish and a slightly irregular, rounded shape.", + "gt": "This region contains text describing an experimental process where two different sugar mill samples were tested for polyphenol content after undergoing washing to a certain depth of color. It discusses how the polyphenol content was measured against desired levels and mentions results found in a specific table." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "The image shows a collection of documents with text, some of which are in black and white, and others in color. The documents are arranged in a grid-like pattern, with some documents having a red border. The text on the documents is in various fonts and sizes, and some documents have a red box around the text.", + "gt": "This is a figure. It includes several images arranged in a grid layout depicting various stages of a document life cycle or processing steps. Each image shows a different state of documents, possibly related to digitalization or text recognition processes. These images likely serve as a visual representation of the document's evolution through a particular workflow, such as scanning or Optical Character Recognition (OCR)." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "The OCR process.", + "gt": "This is a caption associated with a figure. It reads \"Figure 5: The OCR process.\" This caption identifies and describes the figure that it is associated with. The figure it refers to likely illustrates the stages or aspects of the OCR process, which could involve converting scanned images of text into machine-encoded text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "The text is a letter written in English, with the first line reading \"Available OC\" followed by \"for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The third line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The ninth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The tenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The eleventh line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twelfth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The thirteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The fourteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The fifteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The sixteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The seventeenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The eighteenth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The nineteenth line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twentieth line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-first line reads \"I have the honour to forward herewith for your information a Report upon the Plague in Hong Kong in 1894, so far as it concerns the medical work which I carried out under your direction.\" The twenty-second line reads \"I regret that several important matters, including the epidemiology of the disease, have been touched upon in the Report, which I will ask you to accept as an excuse for my shortcomings in this Report.\" The twenty-third line", + "gt": "This is a figure. It shows a piece of text with visual markings comparing two sections labeled \"Available OCR\" and \"Improved OCR.\" The annotations indicate corrections or enhancements made in the 'Improved OCR' section compared to the 'Available OCR' section. This figure serves to demonstrate the efficacy of certain OCR technologies or methodologies by providing a before-and-after comparison." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": " https", + "gt": "This is a caption associated with a figure. It reads \"Figure 6: Excerpt from the Hong Kong report with different versions of OCR output. The Internet Archive image containing this excerpt can be accessed here:\" followed by a URL. This caption provides context for the associated figure, indicating that it is an excerpt from a specific report and acknowledges the source of the image. It helps readers understand the purpose of the figure and where they can find additional information." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "Journal of Data Mining and Digital Humanities", + "gt": "This is part of a footer. It contains the name of a journal, \"Journal of Data Mining and Digital Humanities,\" along with the ISSN number, which is a unique identifier used for serial publications. This area of the document provides information about the publication in which the article or research paper may be found." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "//dmh.esciences.org\".", + "gt": "This is also part of a footer. It includes a URL, \"http://jdmdh.episciences.org\", which likely directs readers to the website of the journal or publication mentioned in . This URL provides a way for readers to access more information or related content online." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0501_0148_0008.jpg", + "caption": "The image is too blurry to provide a detailed description of the object.", + "gt": "This is a page number. It is located in the footer area of the document and provides the numerical identifier \"9\" for the current page. This helps readers navigate the document and facilitates referencing specific sections." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "The text \"IOOF Annual Report 2012\" is displayed in a serif font, with \"IOOF\" in a larger size and \"Annual Report 2012\" in a smaller size. The text is in a light green color.", + "gt": "This region is identified as the page-header of the document. It contains the title of the document, which reads \"2012 Annual Report 2013.\"" + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "4. Non-Executive Directors' Remuneration", + "gt": "This portion is a section-header labeled \"Non-Executive Directors' Remuneration.\" It indicates that the following section will discuss the payment and remuneration details for non-executive directors of the company." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "4.1. Components of Non-Executive Director remuneration", + "gt": "This area is another section-header specifying \"Components of Non-Executive Director remuneration.\" This header suggests a breakdown of the various elements that constitute the remuneration for non-executive directors." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "The text \"page 37\" is written in lowercase letters.", + "gt": "Found at the bottom of the page, this region is the page-footer. It's a small section that is typically used for providing footnotes, disclaimers, or publication information for the document." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "The company is a non-executive director.", + "gt": "This is a table detailing a \"Share purchase plan.\" It contains columns for the name of the individual, the amount of shares acquired, and the share price range at acquisition dates, alongside with the total sum. It lists information about shares acquired by specific individuals at specified price ranges during a specific time frame." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year by the Company, its subsidiaries and associated entities may not exceed an amount approved by shareholders. The ceiling amount includes all remuneration provided to Non-Executive Directors, including superannuation but not including retirement benefits. The current limit of $980,000 per annum was approved by shareholders at the 2010 Annual General Meeting. There has been no increase to the Non-Executive Director fee pool since this time.", + "gt": "This is a block of text providing detailed information on \"Current Board fees\" and \"Post-employment benefits.\" It specifies the annual fees for different board roles and outlines the post-retirement benefits provided to non-executive directors with terms of board service." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/6b5108198110934af9975bbb77b66ac0e9d095ec2031082b58970252031cc686.png", + "caption": "The Company's Constitution requires that the aggregate remuneration paid or provided to all Non-Executive Directors in any financial year may not exceed an amount approved by shareholders.", + "gt": "This region contains text related to the \"Deferred share purchase plan.\" It describes the nature of the share purchase plan, specifying the conditions under which shares were purchased, the performance criteria associated with the plan, and details regarding the share price and acquisition dates." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "The text is a mathematical equation involving the homotopy group of a space.", + "gt": "This region is categorized as text. It discusses the mathematical concept of homotopy groups designated π_n(M), focusing on their ability to classify different dimensional hypersurfaces within a manifold M. The text further explains that the triviality of these homotopy groups is linked to the connectivity of the space they represent, with specific mentions of the concepts of domain walls in cosmology and topological defects arising from symmetry breaking during phase transitions in the universe." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "The region is a monopole, which is a type of topological defect.", + "gt": "This region of text delves into the implications of symmetry breaking in theoretical physics. It connects the process of symmetry breaking to the generation of monopole-like defects, and it references the Standard Model's group construction that includes a U(1) factor. The text implies that this formation of defects played a pivotal role in historical scientific developments." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "The topological conditions of formation of defects only govern the formation of topologically stable defects. It was found that defects solutions can form even when the topology is trivial. The most well-known example are the electro-weak strings, formed during the electroweak symmetry breaking, which are perturbatively stable for a range of parameters which are not realized in nature, and belong to the broader class of embedded defects.", + "gt": "The text in this section links the theoretical concepts mentioned earlier to a practical application: the motivation for introducing a phase of inflation in cosmological models. It characterizes the topological conditions for the formation of defects and points out that certain solutions for these conditions can exist even in the absence of topologically stable defects. References are made to specific types of defects and academic citations are included to support these statements." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "These defects are a priori unstable though mechanisms (such as plasma effects) have been found to stabilize them. They are of interest for inflation model builders since this mechanism can allow lift the constraints from the formation of cosmic strings (see Sec. IV F on D-term inflation).", + "gt": "The content in this region pertains to the study of cosmic defects and their stability. It addresses scenarios in which initially unstable defects might become stable through various mechanisms, such as the effects of plasma. These considerations are relevant to the inflationary model in cosmology, particularly the constraints from the formation of cosmic strings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "A black and white image of a mathematical equation with the variables \"x\" and \"y\" in a bold font, followed by a period and the number \"1\" in a smaller font.", + "gt": "This region is categorized as a formula and presents a mathematical equation related to the text's discussion about topological defects and homotopy groups. The equation seems to represent a relationship that is essential to the argument or analysis presented in the categorical text regions it is associated with." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/c17e4a6722b8cd30e591150720d363de3b00a3b5510eeafc2e5d83a3c1e9c08b.png", + "caption": "The number 72 is displayed in a bold, black font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and aligned horizontally.", + "gt": "The last region is identified as a page-footer. It likely contains publication and/or authorship information, a page number, or possibly an indication of the section of the document where the content can be found. Since it is a page-footer, its purpose is primarily to aid in the organization and navigation of the document." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "caption": "A stylized illustration of a woman with a bun hairstyle, featuring a pattern of white, cloud-like shapes with small black dots scattered throughout. The woman is wearing a pink garment with a polka dot pattern.", + "gt": "The image appears to be a stylized illustration of a side profile of a person's head and upper torso. The person's face is depicted with a serene or peaceful expression, eyes closed and a faint smile, implying a sense of calm or contemplation. Behind the figure, there are abstract shapes resembling clouds or wind patterns that swirl around the head, which could suggest thoughts, memories, or a state of mental flow. The illustration uses a muted color palette, predominantly warm shades of beige, pink, and gray, with a touch of red in the figure's attire, which has a dotted pattern. This artwork likely serves to evoke a mood or theme related to the content of the document in which it's included, possibly regarding mental health, mindfulness, psychology, or the creative process." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/0301_0188_0040.jpg", + "caption": "2021 Scientific American", + "gt": "This is a footer section of an image or document. It contains a citation that reads \"© 2021 Scientific American,\" indicating that the image or the content of the document is copyrighted by Scientific American. This informs the viewer about the source of the content and copyright year, serving both as an attribution and a legal notice to respect the intellectual property rights associated with the material." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The number 62 is displayed in a bold, sans-serif font with a dark color, possibly black or dark gray, against a lighter background. The numerals are evenly spaced and aligned horizontally.", + "gt": "This section is labeled as \"Chapter 2: Motivation.\" It functions as a heading signifying the start of a new chapter or section within the document, providing readers with an indication of the chapter's theme, which in this case is to establish the rationale or impetus behind the subject matter discussed in the chapter." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": " Motivation", + "gt": "This region, also labeled as \"Chapter 2: Motivation,\" serves a similar purpose to , functioning as part of the chapter heading that presents the focus of the chapter, potentially implying that the author will delve into the reasons or driving forces guiding the study or research presented." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The string theory is a type of string theory that is compactified on the near-horizon geometry. It is a decoupled theory that combines the two pictures of the same low energy limit of one theory, Type IIB string theory.", + "gt": "This region contains a paragraph of text that discusses specific concepts related to a theoretical framework, possibly in the field of theoretical physics or string theory. It mentions a scenario where excitations in a certain 'near horizon throat region' appear redshifted to an observer at infinity. The text discusses the energy associated with these excitations and touches on limits pertaining to string theory, suggesting that in a particular limit, the full Type IIB string theory must be considered. The paragraph concludes with an implication that supergravity must be considered in the context of near-horizon geometry within the scope of string theory." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The region is a field theory picture of the low energy limit of Type IIB string theory.", + "gt": "In this text paragraph, the document appears to be discussing two theoretical pictures related to the same low-energy limit within theoretical physics or string theory. It mentions the field theory picture with supergravity and an \\( N = 4 \\text{SU}(N) \\) SYM on the D branes, as well as the geometry picture with supergravity in flat space and Type IIB string theory. It suggests that the document is comparing and contrasting these two theoretical perspectives and proposing that they are both decoupled theories with identical asymptotic conditions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The regions for which analytic tools exist for these two different pictures turn out to be completely incompatible.", + "gt": "This text section seems to conclude the discussion by mentioning that the analytical tools for two differing theoretical scenarios are completely incompatible. It references the Born-Infeld action and suggests that a mathematical comparison between different models yields coincident D-branes for an \\( \\text{SU}(N) \\) two-form field strength, relating to a broader discussion on theoretical physics and string theory." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/8785a083270bd6931b7096508bdb57d938bca3a9787c50b82c188f7744a5b5f7.png", + "caption": "The equation is a complex mathematical expression involving various variables and functions. It includes a combination of Greek letters, numbers, and mathematical operators. The visible part of the equation shows a series of variables and functions that are interconnected, with some parts appearing to be in parentheses. The equation is written in a formal, mathematical notation.", + "gt": "This region exhibits a mathematical formula that is relevant to the discussion within the document. The formula appears to link certain theoretical physics concepts, connecting string coupling constants \\( g_s \\) with D-brane charges and configurations. The formula is most likely important in the context of supporting the document's claims about supergravity or string theory." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The presence of an insider on the market does not necessarily lead to arbitrage, and the presence of insiders might be considered beneficial to the market, in the sense that it leads to higher information efficiency of the equilibrium price process.", + "gt": "The paragraph in discusses the conventional assumption that an insider's private information is static, citing specific examples from the literature. It elaborates by stating that in certain works, insiders are assumed to know the final value of an asset both before and after the default of the company issuing the asset. The text suggests that the presence of insiders does not always lead to market arbitrage and may contribute positively to the market by leading to higher information efficiency in price processes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The fundamental value of the firm is a stochastic process, and the insider can observe it directly or at least observe it in a less noisy way than the other agents on the market.", + "gt": "The text in challenges the assumption of an insider's perfect foresight as unrealistic, reasoning that the fundamental value of a firm is tied to dynamically changing elements like cash flows and sales, among other factors. The paragraph presents the idea that the fundamental value is stochastic, implying that it is subject to random fluctuations, and that the insider has the advantage of perceiving these fluctuations more clearly than other market participants." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The paper relaxes the assumption of static insider information and studies the equilibrium trading and price processes, as well as market efficiency, in a setting with dynamic private information.", + "gt": "In , the document introduces the paper's goal, which is to relax the assumption of static information and examine the equilibrium in trading and price processes and market efficiency when insiders have dynamic private information. The paragraph sets the context for a more detailed exploration of how markets operate under these conditions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The model is a generalization of the static information setting of [2].", + "gt": " contains text which explains that the model considered in this paper is a broader version of the earlier static models. The paper's intention is to cover dynamic information scenarios and improve on previous models that covered a narrower range of trading strategies and pricing rules." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "The model is a generalization of the static information setting of [2] and is designed to include dynamic information. It has a much smaller set of admissible trading strategies and pricing rules compared to the ones considered in the work. The model shows the existence of a unique Markovian equilibrium, which is an equilibrium price that allows the insider to trade undetected and depends only on the total order process. The model also shows that the presence of an insider increases the market informational efficiency for times close to the dividend payment.", + "gt": "The paragraph in discusses the findings of the paper, which include the identification of a Markovian equilibrium that is inconspicuous, allows insiders to trade without being detected, and is solely dependent on the total order process. It underscores the unique nature of this equilibrium and how it enhances the market efficiency in certain conditions." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_doc_detailed_caption/images/1d4428fcd749139270136afe55a72185c44b5ff783c1161767efacc2f08a43c3.png", + "caption": "A black and white image of a person with a beard and mustache, wearing a dark-colored jacket over a light-colored shirt, and dark pants. The person is holding a microphone in their right hand and appears to be singing or speaking.", + "gt": "The text in suggests empirical outcomes where revealing information might be beneficial. Specifically, it contrasts different market equilibrium scenarios and suggests that in non-Markovian price processes, it’s often better for insiders to disclose their private information." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "caption": "A red, three-dimensional, cursive sign with the word \"Abondana\" written in a flowing, elegant script.", + "gt": "The Comprehensive Description includes \"Abbondanza\" displayed in a script font that conveys a sense of stylishness or elegance, which may suggest that it is the name of a business, possibly a restaurant or some sort of food-related establishment, given its association with abundance or plenty often related to food." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "caption": "The signboard features the word \"Cafe\" in a stylized, cursive font with a gradient of red to dark red, giving it a three-dimensional appearance. The letters are slightly italicized and have a shadow effect, enhancing their depth.", + "gt": "The Comprehensive Description for \"Cafe\" suggests that the text is identifying a type of establishment where coffee and light meals may be served. The font is straightforward and easily legible, which is typically used for clarity and immediate recognition for passersby." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000288.jpg", + "caption": "The logo features the letters \"USIS\" in bold, with \"US\" in red and \"IS\" in black. Below the letters, there is a tagline in smaller, gray font.", + "gt": "The Comprehensive Description for \"USIS\" indicates a text that is likely an acronym or a name presented in a bold and blocky font, common for official or institutional entities. It is placed on the side of a van, suggesting it could be the branding of a company or a service, possibly linked to the van's purpose or ownership." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The word \"ESTATE\" is written in bold, black, uppercase letters on a yellow background.", + "gt": "The text \"ESTATE\" is written in capital letters with a bold typeface that has clear and uniform strokes, implying a sense of authority and prominence. It appears against a yellow background, which suggests visibility and is likely meant to catch the eye of passersby. The text is likely part of a business sign for a company dealing with property, real estate sales, or management." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The signage displays the word \"AGENTS\" in bold, uppercase letters. The letters are black and set against a yellow background.", + "gt": "The text \"AGENTS\" displayed similarly in capital letters and bold typeface complements the text in . The typeface is consistent, suggesting that both are part of the same sign. The dark text against the yellow background stands out, indicating the nature of the business below, which is likely involved in real estate agency work." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The signage displays the word \"SAXONS\" in bold, black capital letters on a yellow background.", + "gt": "The word \"SAXONS\" is written in capital letters with a font style that is bold and prominent, but with a slightly more decorative style than and . This difference could be a stylized choice to make the brand name distinctive. Positioned on a façade above a window, it is part of the business's branding, likely the name of the company." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The signboard features the word \"SAXONS\" in capital letters with a serif font, set against a dark background. The letters are in a metallic finish with a reflective surface, giving them a shiny appearance. The font size is large and bold, suggesting a prominent display.", + "gt": "This contains the same text as , \"SAXONS\", indicating that the text is repeated within the image. This repetition reinforces the importance of the name as part of the branding. The text style and location, again above a window, maintain the brand's visibility from multiple angles." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The signboard displays the word \"ESTATE\" in capital letters, with a serif font. The letters are dark and possibly metallic, with a reflective surface that catches light, giving them a slightly shiny appearance. The background of the signboard is not visible, but the letters are set against a dark backdrop that contrasts with the lighter color of the text.", + "gt": "Displaying the word \"ESTATE\" in the same bold, capital letter style as observed in . This repetition at a lower part of the building indicates a consistent branding approach across the business' presence on the building, and its placement closer to eye level increases readability for pedestrians." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_49.jpg", + "caption": "The signboard displays the word \"AGENTS\" in capital letters, with a serif font. The letters are in a dark color, possibly black or dark brown, with a metallic or reflective finish that gives them a slightly shiny appearance. The background of the signboard is not visible, but the letters are evenly spaced and aligned horizontally.", + "gt": "The word \"AGENTS\" is identified, and like , it mirrors the style and size of the sign in , ensuring that the message of the business being an estate agency is clear. This consistent branding facilitates quick recognition and understanding of the services offered." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "The word \"Triple\" is written in a cursive, green font with a white outline. The letters are slightly italicized and have a playful, rounded design.", + "gt": "The text \"Triple\" appears in a retro cursive script, likely chosen to convey a sense of nostalgia or classic style, which is consistent with the overall branding. It is set against a yellow portion of the sign, and the color choice here is a mint green which provides a pleasing contrast that makes the text stand out." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "The word \"WHITE\" is written in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "\"WHITE\" is written in bold, capital letters, featured on a green ribbon-like background that cuts across the sign. The font is sans-serif, which gives a modern and clean look. The use of capital letters in this context suggests emphasis and importance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "The word \"SPOT\" is written in bold, uppercase letters with a light blue color. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "The phrase \"SPOT\" is displayed in a sans-serif, uppercase font similar to the text in . It is placed within the same green ribbon background, mirroring the style and maintaining design consistency. This positioning completes the name or title represented on the signage." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "A stylized, cursive letter \"O\" with a green outline and a white fill, featuring a small, curved tail extending from the bottom right.", + "gt": "\"O's\" is written in a script that echoes the retro flair seen in . This script is mint green, presented on a yellow backdrop, and it features an apostrophe, signifying a possessive or a contraction. The stylized \"O\" has a red center dot, adding to the thematic color scheme." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0000808.jpg", + "caption": "The visible part of the ag is a white, stylized letter \"A\" with a slight shadow effect, giving it a three-dimensional appearance.", + "gt": "The word \"by\" is located on the lower left area of the central graphic and is likely to be a connector or a preposition relating to the larger text elements in the image. It's written in small, lowercase letters, contrasting in size to the other texts, suggesting a subordinate role in the information hierarchy." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "caption": "The text \"NEW\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "The text in this area reads \"NEW\". The font is bold and stylized with thick vertical lines and sharp edges, suggesting a strong, impactful presence. The letters are colored in red, which stands out against the white background of the fabric they are printed on. There are black vertical lines that run down the fabric, giving the impression of pinstripes. The text placement and style are reminiscent of classic athletic or team-related apparel, often used to represent a specific city or team name." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001122.jpg", + "caption": "The text \"YORK\" is written in bold, red, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and aligned horizontally.", + "gt": "The text in this area reads \"YORK\". Similar to the previous region, it features a bold and stylized font in red, contrasting with the white pinstriped background. The consistent style between this text and that of suggests they form a single phrase, typically associated with a particular location or team. The font size and its commanding presence imply that the text is intended to be easily read and recognized from a distance, characteristic of team jerseys or sports merchandise." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_162.jpg", + "caption": "The text \"Colchester\" is written in a bold, sans-serif font with a dark blue color.", + "gt": "The text \"Colchester\" is displayed in a straightforward, sans-serif font with a bold weight, which makes it highly legible and easy to read. The text color is white, which contrasts sharply with the red background, creating a standout effect that captures attention. This type of text presentation is typically used for clear communication and effective signage. \"Colchester\" is likely the name of a place, possibly a destination or location referenced on a signpost or directional marker. The choice of a bold and contrasting color scheme is intentional, aimed at ensuring that the text is discernible from a distance and under various lighting conditions. The text is centrally aligned within the marked area, suggesting the importance of the information it conveys. The presence of the symbol above the text, resembling a stylized pair of railway tracks, indicates that this sign is associated with a railway service or station. The purpose of the text in this context is to inform viewers of a railway station name or a destination reachable via train services." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "The signage features bold, black, uppercase letters spelling \"AYAM\" on a yellow background.", + "gt": "The text \"AYAM\" is presented in large uppercase letters on a signage board. The font appears bold and designed to be eye-catching, serving the purpose of promoting or identifying a business or product associated with chicken, as \"ayam\" means chicken in Malay and Indonesian." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "The signage features large, bold, red letters \"RUA\" on a yellow background.", + "gt": "The text \"RIA\" appears next to \"AYAM\" in the same font and style, following the design pattern of the sign. It seems to be part of a larger phrase or brand name, although without additional context it is challenging to ascertain its full meaning or association." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "A yellow sign with the word \"SMASHED\" in bold, black, uppercase letters.", + "gt": "The word \"SMASHED\" is in uppercase letters and retains the same font consistency and styling as the previous words, indicating it's part of the same signboard. The use of the word \"smashed\" could be describing a method of food preparation, possibly relating to the menu items offered by the establishment." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "A dark brown, rectangular sign with the word \"FRIED\" in bold, uppercase letters.", + "gt": "The word \"FRIED\" appears in the same bold, attention-grabbing font as the other text elements in the signage. The usage of the term \"fried\" aligns well with food-oriented establishments and could denote a particular style of cooking advertised by the business." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_940.jpg", + "caption": "A brown sign with the word \"CHICKEN\" in bold, uppercase letters.", + "gt": "The term \"CHICKEN\" completes what seems to be a descriptive phrase relating to the nature of the food provided at this location. Presented in the same visual style as the other text elements on the sign, it confirms the establishment’s focus on chicken dishes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "caption": "Accommodation", + "gt": "The text \"Accommodation\" appears on a signboard, suggesting the label for a location where lodging facilities are provided. The text is bold and capitalized, providing clear visibility and significance, thus indicating direction to the accommodation facilities within the vicinity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "caption": "The word \"Office\" is written in a bold, sans-serif font with a dark blue color. The letters are evenly spaced and aligned horizontally.", + "gt": "The text \"Office\" displayed similarly to , is also on the signboard, and its typography suggests it is an instructional marker guiding individuals towards offices located nearby. Its distinct appearance functions as a navigational aid for visitors seeking office spaces." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/img_70.jpg", + "caption": "The word \"Nightline\" is written in a bold, sans-serif font with a dark blue color.", + "gt": "The term \"Nightline\" is prominently featured, possibly indicating a nighttime service or a helpline available after-hours. This text, like the others on the sign, caters to nighttime assistance or inquiries, potentially providing crucial information for individuals seeking support during late hours." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A red and white signboard with the word \"BUBBA\" in bold, capitalized, red letters on a white background, with a red border around the sign.", + "gt": "The text \"BUBBA\" appears in bold, capital letters with a font that is playful and somewhat informal, possibly evoking a casual or friendly atmosphere. The position is prominently displayed at the top of a circular logo, which suggests its importance as a distinguishing element or a brand name." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "The signboard features the word \"GUMP\" in large, bold, red capital letters with a white outline. The letters are set against a textured, light-colored background that resembles a stone or concrete surface.", + "gt": "The word \"GUMP\" is presented in a similar bold and playful font directly below . Both words form a cohesive phrase when read together, implying a connection or partnership, possibly in a business context." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "The signboard features the word \"SHRIMP\" in capital letters, with a bold, sans-serif font. The letters are white with a slight shadow effect, giving them a three-dimensional appearance. The background of the signboard is a deep blue color, providing a stark contrast to the white text.", + "gt": "The word \"SHRIMP\" is placed below and , completing the phrase that seems to be the focal point of the circular logo. The font style remains consistent with the previous text, reinforcing the brand's identity and likely indicating the type of product or service offered." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A curved signboard with a red border and a white background, featuring the word \"RESTAURANT\" in bold, black, uppercase letters.", + "gt": "\"RESTAURANT\" is written in a smaller, yet bold font beneath the word \"SHRIMP\". This text specifies the nature of the business associated with the overarching brand identified by the preceding text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A curved, red and white signboard with the word \"MARKET\" in capital letters, featuring a serif font.", + "gt": "The word \"MARKET\" appears in a smaller font at the bottom of the circle, suggesting a secondary or additional aspect of the business, perhaps indicating a place where goods are sold as part of the company's offerings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A circular blue signboard with a white border and a white symbol resembling a stylized letter 'C' in the center.", + "gt": "\"CO\" could stand for \"Company,\" abbreviated and presented beside the main brand name, which is common practice for businesses to denote a corporate entity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/0001250.jpg", + "caption": "A circular, metallic signboard with a textured background featuring the letters \"TM\" in a bold, sans-serif font, centered on the sign.", + "gt": "\"TM\" indicates that the entire phrase formed by , , and is a trademark. This protects the brand's unique identity and legally secures its use exclusively for the business's purposes." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "caption": "The word \"EVENING\" is written in bold, uppercase letters with a gradient of red to pink. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, glowing appearance.", + "gt": "The text \"EVENING\" appears in a sans-serif, bold font that is capitalized for emphasis. It is located on the lower part of a product label, positioned just above another text element that indicates further details about the product. The text serves to indicate either the usage time or a key ingredient, \"Evening Primrose,\" of the product, likely related to wellness or personal care." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "caption": "The word \"PRIMROSE\" is written in bold, uppercase letters with a gradient of pink to red. The letters are evenly spaced and have a slightly blurred effect, giving them a soft, diffused appearance.", + "gt": "\"PRIMROSE\" is the second part of a phrase in which the text is styled similarly to the text in . It follows directly below \"EVENING,\" suggesting the complete term \"EVENING PRIMROSE.\" The positioning and styling are consistent with , reinforcing the connection between the two words. The term \"Evening Primrose\" is usually associated with the name of a plant, often used in the context of essential oils or natural product ingredients." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "caption": "The word \"BASE\" is written in bold, uppercase letters with a red background and white outline. The letters are evenly spaced and have a slightly distressed texture.", + "gt": "The term \"BASE\" is part of the full term \"BASE OIL,\" which is shown in a smaller and possibly lighter weight sans-serif typeface compared to and . Positioned at the bottom of the product label, it likely indicates the type of product, suggesting that the contents of the bottle can be used as a carrier or base oil in aromatherapy or skincare." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01235.jpg", + "caption": "The word \"OIL\" is written in bold, uppercase letters with a gradient of red to pink colors. The letters are evenly spaced and have a slightly blurred appearance.", + "gt": "\"OIL\" completes the term \"BASE OIL,\" as mentioned in . The font and positioning maintain consistency with the description provided in , reinforcing its role as part of a descriptive phrase related to the product's use or contents. Together, \"BASE OIL\" likely designates the product's category within a larger set of similar goods." + }, + { + "image_path": "evaluation/MDVP-Bench/data/ocr_spotting_detailed_caption/images/tr_img_01644.jpg", + "caption": "The number 46 is displayed in a large, bold, white font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and have a smooth, rounded design.", + "gt": "The text \"46\" appears in a serif font with distinct, prominent stylistic flares at the ends of the strokes, which is characteristic of serif fonts. These numerals are white, providing a strong contrast against a dark background plate, which appears to be made of slate or a similar material. The plate is mounted onto a brick wall, and there are two spherical, possibly metallic, fixtures attached to the plate on either side, which seem to be serving as decorative mounting posts. The purpose of this text likely indicates an address or number associated with the location, commonly used to identify specific residential or commercial units." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green calendar with white text displaying the numbers 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369,", + "gt": "This area seems to indicate the numeric value '31,' which, in a calendrical context, may reference the number of days in a month. It does not appear to have any interactive features based on the screenshot." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green rectangular button with rounded corners featuring the word \"Connecter\" in white lowercase letters, followed by a yellow icon resembling a lock with a keyhole.", + "gt": "This seems to be a button or a link labeled 'Connecter' which, when translated from French, means 'Connect' or 'Log in'. It is likely an interactive element that upon being clicked, would prompt the user to access an account or initiate a connection process." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green, stylized, lowercase letter \"n\" with a slight shadow effect, giving it a three-dimensional appearance.", + "gt": "This section contains the French word 'Novembre', which is the month of November. It appears to be a part of a list of months, possibly for navigating a calendar or archives by month. It may be an interactive element that allows users to view content from November." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green, stylized lowercase \"a\" with a curved tail extending from the bottom right, resembling a lowercase \"i\" with a dot above it.", + "gt": "This is a button or link with the text 'Annoncez' followed by information icon (i). The French word 'Annoncez' translates to 'Advertise'. This suggests that it is a call-to-action for users to advertise, possibly by clicking this button or link. The information icon typically indicates additional details available upon interaction." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green rectangular button with rounded corners, featuring a white icon of a person in a wheelchair on the left side. To the right of the icon, there is white text that reads \"Accessible\" above a smaller text \"Elevator\" and a white arrow pointing downwards.", + "gt": "This area displays the number '07', which could signify a day of the month, especially since it is seen next to a date heading in the format 'Vendredi 7 Mai 2021', which translates to 'Friday, May 7, 2021'. It seems to be a static element without interactivity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "The word \"septembre\" in lowercase letters, with a green background and white text.", + "gt": "Similar to , this is labeled 'Septembre', which is the French word for September. It is part of the same apparent navigational element for a calendar or archive sorted by months and is likely interactive as well." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "The text \"Lien Web\" is written in a bold, sans-serif font with a green color. The letters are evenly spaced and aligned horizontally.", + "gt": "This section reads 'Liens Web', which translates to 'Web Links' from French. This likely refers to a section of the web page intended to direct users to other related sites or resources. It is probably interactive, with each listed link being clickable." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "The number \"15\" is displayed in a bold, green font with a slight shadow effect, giving it a three-dimensional appearance. The numerals are evenly spaced and aligned horizontally.", + "gt": "The number '15' is visible here, and when combined with the context of a calendar visible in the screenshot, it likely represents the 15th day of a month. This element does not seem to be interactive itself." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "A green and white striped awning with a scalloped edge and a fringe detail.", + "gt": "Here we see the number '04' which, in the context of the surrounding calendar, might represent the 4th day of a month. It doesn't show any sign of interactivity." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/60e34f9315443cb6c77d32da7ba5eee1.png", + "caption": "Plan du site", + "gt": "This text, 'Plan du site', stands for 'Site Map' in French and usually refers to a detailed page listing where one can find an overview of all the sections and pages within the website. It is usually an interactive element that, when clicked, will take the user to the sitemap page." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "A white arrow pointing to the right, with a slight curve at the tail end, is positioned to the right of the text \"Book Now.\"", + "gt": "This region depicts an interactive button with the text \"BOOK NOW\" overlaid on it. Usually, buttons like this on websites are clickable and lead the user to a page where they can schedule an appointment or reserve a service. The button is stylistically designed to stand out and grab attention, potentially suggesting it is a call-to-action feature for users to quickly access the booking process for the service provided by the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "About Us", + "gt": "This section of the website features a text link with the phrase \"Buy Luxurious Doona.\" It likely serves as a navigational element, which upon clicking, would redirect users to a page where they can purchase a \"Luxurious Doona.\" The term \"Doona\" typically refers to a type of bedding, suggesting that the site might be related to home goods or personal comfort items." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "The text \"Pay Balance\" is written in bold, black, sans-serif font.", + "gt": "This area contains a text link that reads \"Pay Balance.\" It is probably an interactive link that, once clicked, would take the user to a section of the website where they can complete a payment - likely concerning a service or product they have previously engaged with." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "About Us", + "gt": "Featured here is a clickable text link titled \"About Us.\" Such links generally lead users to a webpage that elaborates on the history, mission, values, or team behind the company or service. It helps users learn more about the company or website owners." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "contact us", + "gt": "This part of the webpage indicates a \"Contact us\" link. Clicking on this text would typically guide the visitor to a page featuring contact information or a form enabling the users to reach out to the company for inquiries or support." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "The word \"About\" is written in a bold, sans-serif font with a gradient of pink to red, set against a teal background.", + "gt": "\"Home\" appears to be a navigation link that, when selected, would likely redirect users to the homepage of the website. The homepage is the main page that often provides a comprehensive overview of what the website offers." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "A teal-colored horizontal bar with white text and symbols. On the left side, there is a white envelope icon followed by the text \"doonawash@gmail.com\". On the right side, there is a white icon resembling a person in a wheelchair.", + "gt": "The text here, \"doonawash@gmail.com,\" suggests an email address. This is likely provided for users to directly contact the company or service provider through email. It is not clickable but can be used to send an email using an email client or service." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "The text \"Contact us\" is written in a bold, sans-serif font with a pinkish-red color. The letters are evenly spaced and aligned horizontally.", + "gt": "Similar to , this \"Contact us\" link would allow users to access a contact section or page on the website, promoting user interaction with the service provider for queries or assistance." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "The word \"Home\" is written in a bold, sans-serif font with a gradient of blue shades, transitioning from a lighter blue at the top to a darker blue at the bottom.", + "gt": "Just like , this \"Home\" link is a navigational feature intended to bring the user back to the site's main page, presenting a starting point or central hub for exploring the website's contents." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/0180e97a3e9609ea8c72b6b8db0071c8.png", + "caption": "The text \"Pay Balance\" is written in a bold, sans-serif font with a pinkish hue. The letters are evenly spaced and aligned horizontally.", + "gt": "Echoing , the \"Pay Balance\" text link is associated with the payment part of a transaction on the website. It is intended to facilitate users in clearing dues or completing transactions related to the services offered by the site." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/web_0558c1f4-c05b-49a8-8479-04b1575779d2.png", + "caption": "OpenStreetMap Belgium", + "gt": "This area of the webpage is part of a bullet point list under the subheading \"Local Chapters\". The subheading describes Local Chapters as country or region-level groups affiliated with the OpenStreetMap Foundation that represent their local mapping community in dealings with government, business, and media. The bullet point \"OpenStreetMap Belgium\" likely indicates that there is an established local chapter for the country of Belgium. The text appears in blue with an underline, suggesting that it is a hyperlink. Clicking on this hyperlink would presumably direct the user to more information about the OpenStreetMap community in Belgium or to their specific website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "caption": "A white rectangular button with a black border, featuring the text \"Tekniki Dastak\" in black, followed by a right-pointing arrow. Below the arrow, the text \"055 292-50-49\" is displayed in black.", + "gt": "This area appears to be a contact detail, specifically a phone number. It typically serves as a direct line of communication for users to reach out to the company or organization featured on the website. Such contact information is usually clickable on mobile devices, enabling the user to initiate a phone call directly." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "caption": "A white rectangular tag with the number \"4.9\" in bold black font centered on it.", + "gt": "This section includes a numerical rating, which is indicative of client satisfaction, service quality, or performance measurement. It suggests that it may be connected to reviews or ratings received from clients, as denoted by the star symbol which commonly represents ratings." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "caption": "A rectangular blue button with white text that reads \"Pulsuz Konsultasyon.\"", + "gt": "The text translates to \"Free Consultation\" in Azerbaijani, indicating an offering from the company to prospective clients. It is likely a call-to-action button which upon clicking, would lead a user to a form or contact option to set up a consultation without any charge." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/367173643a055b0657de17afff8d541d.png", + "caption": "The word \"Portfolio\" in a bold, sans-serif font, with a slight italicization, is centered on the image. The letters are black with a subtle shadow effect, giving them a three-dimensional appearance. The background is a light, neutral color, providing a clean and modern look.", + "gt": "This part of the website is labeled \"Portfolio,\" signifying that it's likely a navigation element leading to a page where the company showcases their previous work, projects, or case studies to highlight their experience and expertise." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A white shopping cart icon with a blue outline, featuring a rectangular basket with a grid pattern, two vertical handles, and four wheels, two of which are visible.", + "gt": "This space indicates a shopping cart feature with a count of items currently in the cart, which currently stands at zero. This interactive element likely becomes clickable when items are added, allowing users to view and manage the contents of their cart." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "The word \"SUPPORT\" in bold, uppercase letters with a blue background and white outline.", + "gt": "This area is typically a customer service feature, allowing users to access help or assistance through various means such as a help center, live chat, or contact information. It's usually clickable and would direct the user to a support section of the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A white lowercase letter \"f\" with a bold, sans-serif font, set against a blue background.", + "gt": "The text suggests a prompt to visit the company's Facebook page. This is an interactive element that, when clicked, likely redirects users to the specified social media page to engage with the company's content on that platform." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "Your Charts", + "gt": "It denotes an area that likely relates to personalization for users, where they can view their astrology charts. This is expected to be a clickable feature which, when accessed, leads the user to a section where their personalized charts are displayed or can be created." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A blue, stylized letter \"X\" with a white outline, featuring a slight shadow effect on the right side, giving it a three-dimensional appearance.", + "gt": "Similar to , this is a call to action to visit the company's Twitter page. Clicking on this interactive element would redirect a user to the company's Twitter profile to view tweets and engage with their content." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A rectangular white sign with the words \"CONTACT US\" in bold, uppercase, blue letters.", + "gt": "This is a customer contact area, providing users with a way to get in touch with the company. Clicking on this is likely to take the user to a section of the site with various contact options like email, phone, or a contact form." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "A rectangular blue button with rounded corners and a white border. The button has the words \"ADD TO CART\" in bold, white, uppercase letters centered on it.", + "gt": "This is a call-to-action button that allows users to add a product to their shopping cart. This button is interactive, and upon clicking, the chosen product would be added to the user's cart, with the action possibly reflected in the shopping cart count in ." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "The word \"PRODUCTS\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "This area is likely dedicated to showcasing the company's range of products. Clicking here would probably lead users to a product catalog where they can browse and select items of interest." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "The word \"PODCAST\" is written in bold, uppercase letters with a blue background. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance.", + "gt": "It represents an area designated for a podcast. Users can expect to interact with this button to be taken to a media player or section of the website where they can listen to recorded audio content." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/5c9b9883310423712e466bee13a36a02.png", + "caption": "The text \"ABOUT US\" is written in bold, uppercase letters. The letters are blue and have a slight shadow effect, giving them a three-dimensional appearance. The text is centered horizontally.", + "gt": "This part of the website provides company information to the user. It's normally a clickable element that leads the user to learn more about the company's history, values, mission, and team members." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The text \"WHAT WE DO\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font. The text is centered horizontally.", + "gt": "This section typically represents a menu item on a website that describes the services or actions undertaken by the organization. It usually links to a page with detailed information on the work that the organization performs, including projects, mission statements, or other relevant content." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "GWM launches livelihood micro-grants", + "gt": "This appears to be a news headline or feature article title on the website. It suggests that the organization has introduced a new initiative offering financial assistance for livelihood projects. Clicking on this title would likely lead to an article or post giving more information about the micro-grants program and its objectives." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "A rectangular white button with a black border and the word \"Settings\" in black, bold, sans-serif font centered on it.", + "gt": "This is likely a button or link to a settings page where users can adjust their preferences for the website, which might include language settings, account details, notification preferences, and more." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "Privacy Policy", + "gt": "This commonly links to the website's privacy policy document, where users can learn about how the organization collects, uses, stores, and protects personal data." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The word \"HOME\" in uppercase letters, with a bold, sans-serif font, is centered on the image. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a gradient of light to dark gray, creating a subtle contrast against the background.", + "gt": "This is typically a navigation link that returns the user to the main homepage of the website from any other page." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "A rectangular black button with rounded corners featuring the word \"Accept\" in white, sans-serif font.", + "gt": "It usually indicates a button the user can click to accept the terms of a policy, possibly related to cookies or usage terms, as indicated by the accompanying text." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "A red, oval-shaped button with a white border and the word \"DONATE\" in bold, uppercase, red letters centered on it.", + "gt": "This is often a prominent call-to-action button meant to direct users to a page where they can make financial contributions to the organization or cause." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The word \"NEWS\" in bold, uppercase letters with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a clean, modern font style.", + "gt": "Commonly a menu item that links to a news section containing articles, updates, blog posts, press releases, or other information that keeps readers informed about the organization's activities or relevant topics." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The text \"ABOUT US\" is written in bold, uppercase letters with a sans-serif font. The letters are evenly spaced and aligned horizontally. The color of the text is black, and it stands out against a light background.", + "gt": "This is typically a link to a page where users can find more information about the organization, including history, values, team members, or accomplishments." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/53b4ab2cb706a43fec7ce4ac5eac181e.png", + "caption": "The text \"CONTACT US\" is written in bold, uppercase letters. The letters are evenly spaced and have a modern, sans-serif font. The color of the text is black, and it stands out against a light background.", + "gt": "Usually a link to a page where visitors can find contact information for the organization, such as an address, phone number, email, or a contact form." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/web_6c677961-e540-4cc5-b725-5e301019a9f9.png", + "caption": "A black and white icon depicting a stylized, abstract representation of a building with a flat roof and multiple rectangular windows arranged in a grid pattern.", + "gt": "This region appears to be a toolbar located within a content editing area, likely part of a web-based application interface. The specific feature highlighted is an icon that suggests functionality related to inserting tables into the content. In a typical text editor or content management system interface, clicking this icon would presumably open a menu or dialogue box allowing the user to create and insert a table into the document. The table insertion feature commonly lets users specify the number of rows and columns, choose a table style, and sometimes adjust additional table properties such as cell padding or headers." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The word \"News\" in a bold, sans-serif font, with a slight shadow effect, giving it a three-dimensional appearance. The letters are evenly spaced and have a dark color, contrasting with the lighter background.", + "gt": "This area encompasses a navigation element labeled \"News.\" It likely leads to a section of the website where current news relevant to the organization or its field of operation is disseminated. As a navigational element, it is interactive and upon clicking would redirect users to the page where news articles or updates are posted." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The logo features the word \"ServeGate\" in bold, black lowercase letters. To the left of the text, there is a stylized graphic consisting of two overlapping triangles, one in teal and the other in red, with a black line separating them.", + "gt": "This area displays the company's name ServeGate, which appears to be stylized as a logo. This typically acts as a home button; clicking on it would usually take users back to the main or home page of the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "Our Difference", + "gt": "This heading titled \"Our Difference\" may signify a navigational item that leads to content describing what sets the organization apart from competitors. Interaction would involve clicking it to navigate to a page that likely discusses the company's unique selling propositions (USPs), mission, values, or other differentiating factors." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The text \"About Us\" is written in a bold, sans-serif font with a red color. The letters are evenly spaced and aligned horizontally.", + "gt": "Labeled \"About Us,\" this is another navigation item that, when clicked, would take the user to a section of the site that provides information about ServeGate, such as its history, leadership team, vision, and mission." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "A rectangular button with rounded corners, featuring a light pink background and a thin red border. The button displays the text \"Get in touch\" in bold, red, sans-serif font.", + "gt": "The text \"Get in touch\" suggests an interactive component that leads to a part of the website where users can contact the organization. This may include a contact form, phone numbers, email addresses, or other means of communication." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The word \"Home\" in a serif font, with the letters in a light gray color against a white background.", + "gt": "The term \"Home\" designates a navigational link that typically redirects users to the front page of the website. Clicking this link would generally return the user to the starting point of their navigation experience." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The word \"Services\" in lowercase letters, with a bold, sans-serif font. The letters are evenly spaced and have a slight shadow effect, giving them a three-dimensional appearance. The color of the text is a dark gray, contrasting with the light background.", + "gt": "The item labeled \"Services\" is likely a drop-down menu or a link to a page that outlines the company's offerings. Users can click on it to discover more about the services provided by ServeGate, including descriptions and possibly pricing or someone to contact for further inquiry." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "Indigenous Impact", + "gt": "\"Indigenous Impact\" might be a navigation link to a page detailing the company's impact on, contributions to, or programs associated with Indigenous communities. Interaction with this element would bring the user to either a dedicated section or might expand into a submenu listing various facets of this impact." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/656b47ffb1270a8038d876586e92a71b.png", + "caption": "The text \"ServeGate\" is written in bold, black, sans-serif font. The letters are evenly spaced and aligned horizontally.", + "gt": "This appears to be another instance of the company logo for ServeGate, similar to . It likely serves the same function as a clickable link leading back to the home page of the website." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/web_5a35d9c2-2c2d-4a49-ad0a-1408d9cac78e.png", + "caption": "A rectangular white button with rounded corners, featuring the text \"Close issue\" in bold, black, sans-serif font.", + "gt": "The area appears to contain an interface element labeled \"Close issue\" situated within a software development environment, likely a part of an issue tracking or project management system. This interface element is a button, as suggested by its design and placement near the text input area for comments. When clicked, it would typically result in the associated issue being marked as resolved or closed in the system, thereby updating the status of the issue within the project's workflow. The button is designed for users to signify that the discussion, problem, or task represented by the issue no longer requires attention and can be archived or removed from active consideration." + }, + { + "image_path": "evaluation/MDVP-Bench/data/web_detailed_caption/images/web_3fed2169-3c3d-43e7-baaa-3bf0e0c7134c.png", + "caption": "A vertical array of three circular, dark-colored buttons with a slightly raised, smooth surface, aligned centrally on a light-colored background.", + "gt": "This is a close button found on a web-based issue tracking platform, specifically GitLab. In the context of the page, it is presumably used to close the issue which is currently being viewed. When a user clicks this button, the active issue (#1460 in this case) will likely be marked as closed within the system, changing its status from open to closed. This is a standard feature in issue tracking and project management software, allowing users to manage the lifecycle of issues and tasks." + } +] \ No newline at end of file diff --git a/evaluation/MDVP-Bench/paint_util.py b/evaluation/MDVP-Bench/paint_util.py new file mode 100644 index 0000000000000000000000000000000000000000..05f224c46f1cd5f0c015da00306bd4ee5d485080 --- /dev/null +++ b/evaluation/MDVP-Bench/paint_util.py @@ -0,0 +1,219 @@ +import base64 +import os + +import cv2 +import numpy as np + + +# Function to encode the image +def encode_image(image_path): + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") + + +def paint_text_point(image_path, points): + image = cv2.imread(image_path) + image_name = image_path.split("/")[-1].split(".")[0] + ".jpg" + h, w, channels = image.shape + + # 创建一个与原始图像大小相同的黑色图像 (所有像素值为0) + pre_alpha_image = np.zeros_like( + image + ) # 设置混合参数,alpha 为原图的权重,beta 为黑色图像的权重 + alpha = 0.7 + beta = 1.0 - alpha + image = cv2.addWeighted(image, alpha, pre_alpha_image, beta, 0) + + for i, [x, y] in enumerate(points, start=1): + # 画点 + cv2.circle( + image, (x, y), 4, (0, 255, 0), -1 + ) # 5是圆点的半径,(0, 255, 0)是颜色(绿色),-1表示填充 + + # 初始文本位置 + text_x, text_y = x + 5, y - 5 + # 调整文本位置以防止出界 + if text_x + 20 > w: # 如果文本超出右边界 + text_x = x - 20 + if text_y - 10 < 0: # 如果文本超出上边界 + text_y = y + 20 + if y + 10 > h: # 如果文本超出下边界 + text_y = y - 20 + + thickness = 2 + ### 红色字体 + # cv2.putText(image, str(i), (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 0), thickness + 2) + # # 在调整后的位置上标上数字 + # cv2.putText(image, str(i), (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), thickness) + + ### 黑底白字 + # 计算文本的宽度和高度 + text = str(i) + (text_width, text_height), baseline = cv2.getTextSize( + text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, thickness + ) + # 计算矩形背景的顶点坐标 + top_left = (text_x, text_y - text_height - baseline) + bottom_right = (text_x + text_width, text_y + baseline) + # 绘制黑色矩形作为背景 + cv2.rectangle(image, top_left, bottom_right, (0, 0, 0), -1) + # 在黑色矩形上绘制白色文字 + cv2.putText( + image, + text, + (text_x, text_y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (255, 255, 255), + thickness, + ) + + save_path = os.path.join("debug", f"point_{image_name}") + + cv2.imwrite(save_path, image) + + return save_path + + +def paint_text_box(image_path, bbox, rgb=(0, 255, 0), rect_thickness=2): + image = cv2.imread(image_path) + image_name = image_path.split("/")[-1].split(".")[0] + ".jpg" + h, w, channels = image.shape + + # 创建一个与原始图像大小相同的黑色图像 (所有像素值为0) + pre_alpha_image = np.zeros_like(image) + alpha = 0.8 + beta = 1.0 - alpha + image = cv2.addWeighted(image, alpha, pre_alpha_image, beta, 0) + + for i, (x, y, box_w, box_h) in enumerate(bbox, start=1): + # 画矩形框 + x, y, box_w, box_h = int(x), int(y), int(box_w), int(box_h) + cv2.rectangle(image, (x, y), (x + box_w, y + box_h), rgb, rect_thickness) + + # 初始文本位置 + text_x, text_y = x + 4, y + 20 + # 调整文本位置以防止出界 + if text_x < 0: # 如果文本超出左边界 + text_x = 0 + if text_y < 0: # 如果文本超出上边界 + text_y = y + box_h + 15 + if text_y > h: # 如果文本超出下边界 + text_y = h - 5 + + thickness = 2 + # 获取文本宽度和高度 + text = str(i) + (text_width, text_height), baseline = cv2.getTextSize( + text, cv2.FONT_HERSHEY_SIMPLEX, 0.65, thickness + ) + # 计算文本位置 + text_x = x + 4 + text_y = y + 20 + # 绘制文本矩形背景 + cv2.rectangle( + image, + (text_x, text_y - text_height - baseline), + (text_x + text_width, text_y + baseline), + (0, 0, 0), + -1, + ) + # 绘制文本 + cv2.putText( + image, + text, + (text_x, text_y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.65, + (255, 255, 255), + thickness, + ) + + save_path = os.path.join("debug", f"box_{image_name}") + # 保存图像 + cv2.imwrite(save_path, image) + + return save_path + + +def paint_text_polygan(image_path, dataset, polygons, dict_id, rgb, rect_thickness): + image = cv2.imread(image_path) + image_name = str(dict_id) + ".jpg" + h, w, channels = image.shape + + # 创建一个与原始图像大小相同的黑色图像 (所有像素值为0) + pre_alpha_image = np.zeros_like(image) + alpha = 0.8 + beta = 1.0 - alpha + image = cv2.addWeighted(image, alpha, pre_alpha_image, beta, 0) + + for idx, item in enumerate(polygons, start=1): + ori_points = [] + for i in range(0, len(item), 2): + ori_points.append([int(item[i]), int(item[i + 1])]) + points = np.array(ori_points).reshape((-1, 1, 2)) + cv2.polylines( + image, [points], isClosed=True, color=rgb, thickness=rect_thickness + ) + + min_y = float("inf") # 初始化最小的 y 坐标为正无穷大 + min_x = float("inf") # 初始化最小的 y 对应的 x 坐标为 None + for i in range(0, len(item), 2): + x = item[i] + y = item[i + 1] + if x < min_x: + min_y = y + min_x = x + + text_x = min_x + text_y = min_y + + # 调整文本位置以防止出界 + if text_x + 20 > w: + text_x = min_x - 20 + if text_x - 20 < 0: + text_x = min_x + 20 + if text_y - 10 < 0: + text_y = min_y + 20 + if min_y + 10 > h: + text_y = min_y - 20 + + thickness = 2 + text = str(idx) + + (text_width, text_height), baseline = cv2.getTextSize( + text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, thickness + ) + text_x, text_y, text_width, text_height, baseline = ( + int(text_x), + int(text_y), + int(text_width), + int(text_height), + int(baseline), + ) + # 计算文本位置 + # text_y = text_y - 5 if text_y - 5 > 0 else text_y + text_height + 15 + # 绘制文本矩形背景 + cv2.rectangle( + image, + (text_x, text_y - text_height - baseline), + (text_x + text_width, text_y + baseline), + (0, 0, 0), + -1, + ) + # 绘制文本 + cv2.putText( + image, + text, + (text_x, text_y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (255, 255, 255), + thickness, + ) + + save_path = os.path.join("save_data/" + dataset, image_name) + # 保存图像 + cv2.imwrite(save_path, image) + + return save_path diff --git a/evaluation/MDVP-Bench/summarize_gpt_score.py b/evaluation/MDVP-Bench/summarize_gpt_score.py new file mode 100644 index 0000000000000000000000000000000000000000..6b302de4d41950c06d819b0d7f6b0262187b322a --- /dev/null +++ b/evaluation/MDVP-Bench/summarize_gpt_score.py @@ -0,0 +1,63 @@ +""" +Reference: https://github.com/haotian-liu/LLaVA/blob/main/llava/eval/summarize_gpt_review.py +""" + +import argparse +import json +import os +from collections import defaultdict + +import numpy as np + + +def parse_args(): + parser = argparse.ArgumentParser(description="ChatGPT-based QA evaluation.") + parser.add_argument("-d", "--dir", default=None) + parser.add_argument("-f", "--files", nargs="*", default=None) + parser.add_argument("-i", "--ignore", nargs="*", default=None) + parser.add_argument("-s", "--save", action="store_true") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + review_files = [x for x in os.listdir(args.dir) if x.endswith(".jsonl")] + + metrics = [] + for review_file in sorted(review_files): + config = ( + os.path.basename(review_file) + .replace("gpt4_text_", "") + .replace(".jsonl", "") + ) + scores = defaultdict(list) + print(config) + with open( + os.path.join(args.dir, review_file) if args.dir is not None else review_file + ) as f: + for review_str in f: + review = json.loads(review_str) + if args.ignore is not None and review["question_id"] in args.ignore: + continue + if "category" in review: + scores[review["category"]].append(review["tuple"]) + scores["all"].append(review["tuple"]) + else: + if "tuple" in review: + scores["all"].append(review["tuple"]) + else: + scores["all"].append(review["score"]) + summ_dict = defaultdict(list) + for k, v in sorted(scores.items()): + stats = np.asarray(v).mean(0).tolist() + stats = [round(x, 3) for x in stats] + # print(k, stats, round(stats[1]/stats[0]*100, 1)) + print(k, round(stats[1] / (stats[0] + 1e-6) * 100, 2)) + summ_dict[k] = round(stats[1] / stats[0] * 100, 2) + print("=================================") + metrics.append(summ_dict) + + if args.save: + with open(os.path.join(args.dir, "metric.json"), "w") as f: + json.dump(metrics, f, indent=2) diff --git a/evaluation/MDVP-Bench/transfer.py b/evaluation/MDVP-Bench/transfer.py new file mode 100644 index 0000000000000000000000000000000000000000..fd6a573c10ea53db2f1d5b4f42b897f2f0d04c43 --- /dev/null +++ b/evaluation/MDVP-Bench/transfer.py @@ -0,0 +1,116 @@ +import argparse +import json +import os + +""" +question: +{ + "question_id": 1, + "image": "000000104486.jpg", + "category": "natural_box", + "text": "Please analyze the relationship between all marked regions in the image.", + "annotation": { + "bbox": [[157.23, 341.07, 10.67, 2.08]], + "segmentation": [] + } +} +answer: +{ + "question_id": 1, + "image": "000000104486.jpg", + "category": "natural_box", + "text": ": This region includes an individual who is caught in a moment that seems to involve some sort of task or activity. The person is engaged with a luggage cart, which suggests they might be arriving or departing from a location that offers such amenities, possibly a hotel. The cart holds luggage indicating travel or transit. The man's expression and attire provide clues to his role or state at the moment, such as potentially being a guest handling his luggage. The other individual seen partially in the background creates a sense of movement or interaction, but their relationship to the man or the context is unclear.\n" +} +predictions: +{ + "question_id": 1, + "image": "000000104486.jpg", + "category": "natural_box", + "text": ": The marked region does not appear to have any direct relationship with other marked regions, as there are no other marks to compare or contrast with.\n" +} +""" + + +def main(args): + output_name = args.output_path.split("/")[-1] # android_QA_box.json + + for phase in [ + "android_detailed_caption_box", + "multipanel_detailed_caption_box", + "natural_detailed_caption_box", + "ocr_doc_detailed_caption_box", + "ocr_spotting_detailed_caption_box", + "web_detailed_caption_box", + ]: + vp = "bbox" + domain = phase.split("_box")[0] # android_QA + + if not os.path.exists(f"mdvp_for_gpt4v_eval/{phase}"): + os.mkdir(f"mdvp_for_gpt4v_eval/{phase}") + + with open(args.output_path, "r") as f: + data = json.load(f) + + with open("annotations/mdvp_caption_mask.json", "r") as f: + mask_data = json.load(f) + + format_answer_list = [] + format_prediction_list = [] + for index, item in enumerate(data): + meta = mask_data[index] + assert meta["caption"] == item["gt"] + + try: + image_path = item["image_path"] + except: + image_path = item["file_name"] + + format_answer = { + "question_id": index + 1, + "image": image_path, + "category": meta["dataset_name"], + "text": item["gt"], + } + format_answer_list.append(format_answer) + + format_prediction = { + "question_id": index + 1, + "image": image_path, + "category": meta["dataset_name"], + "text": item["caption"], + } + format_prediction_list.append(format_prediction) + + with open(f"mdvp_for_gpt4v_eval/{phase}/answer.json", "w") as f: + json.dump(format_answer_list, f, indent=4, ensure_ascii=False) + print(f"mdvp_for_gpt4v_eval/{phase}/answer.json saved successfully!") + + with open(f"mdvp_for_gpt4v_eval/{phase}/prediction.json", "w") as f: + json.dump(format_prediction_list, f, indent=4, ensure_ascii=False) + print(f"mdvp_for_gpt4v_eval/{phase}/prediction.json saved successfully!") + + with open(f"data/{domain}/{domain}_box.json", "r") as f: + data = json.load(f) + format_question_list = [] + for index, item in enumerate(data): + format_question = { + "question_id": index + 1, + "image": item["image_name"], + "category": phase, + "text": item["question"], + "annotation": {f"{vp}": item[f"{vp}"], "segmentation": []}, + } + format_question_list.append(format_question) + with open(f"mdvp_for_gpt4v_eval/{phase}/question.json", "w") as f: + json.dump(format_question_list, f, indent=4, ensure_ascii=False) + print(f"mdvp_for_gpt4v_eval/{phase}/question.json saved successfully!") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process args.") + parser.add_argument( + "--output_path", type=str, required=True, help="Path to output results" + ) + args = parser.parse_args() + + main(args) diff --git a/evaluation/eval_dataset.py b/evaluation/eval_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e63d301f7bcb20f9ee70f032096516936183c7d1 --- /dev/null +++ b/evaluation/eval_dataset.py @@ -0,0 +1,313 @@ +# -------------------------------------------------------- +# Copyright (2025) Bytedance Ltd. and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License") +# Grasp Any Region Project +# Written by Haochen Wang +# -------------------------------------------------------- + +import os +import re +from copy import deepcopy + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import Dataset + + +class SingleRegionCaptionDataset(Dataset): + os.environ["TOKENIZERS_PARALLELISM"] = "true" + + def __init__( + self, + image, + mask, + processor, + prompt_token="", + prompt_number=5, + visual_prompt_tokens=[ + "", + "", + "", + "", + "", + "", + ], + data_dtype=torch.bfloat16, + **kwargs, + ): + self.processor = processor + self.prompt_token = prompt_token + + self.prompt_number = prompt_number + self.special_tokens = visual_prompt_tokens + self.visual_prompt_ids = { + token: self.processor.tokenizer.convert_tokens_to_ids(token) - 128256 + for token in self.special_tokens + } + + self.image = image + self.mask = mask + self.data_dtype = data_dtype + + def __len__(self): + return len(self.coco.anns) + + def _parse_annotations(self): + image = self.image + mask = self.mask # binary mask + + np.array(image) + mask_np = mask.astype(np.uint8) + + filled_matrix = -1 * np.ones((image.height, image.width), dtype=np.uint8) + prompt_token = self.prompt_token + prompt_id = self.visual_prompt_ids.get( + prompt_token, self.visual_prompt_ids[""] + ) + assert prompt_id < 16, f"prompt_id should be less than {16}, got {prompt_id}" + fill_area = (filled_matrix == -1) & mask_np.astype(bool) + filled_matrix[fill_area] = prompt_id + + filled_matrix[filled_matrix == -1] = self.visual_prompt_ids[""] + + bboxes = {} + + prompt_idx = int(re.match(r"", prompt_token).group(1)) + non_zero_coords = np.argwhere(mask_np) + y_min, x_min = non_zero_coords.min(axis=0) + y_max, x_max = non_zero_coords.max(axis=0) + bbox = ( + x_min / image.width, + y_min / image.height, + x_max / image.width, + y_max / image.height, + ) + bboxes[ + str( + self.processor.tokenizer.convert_tokens_to_ids( + f"<|reserved_special_token_{prompt_idx + 2}|>" + ) + ) + ] = bbox + + data_dict = { + "image": image, + "visual_prompt": Image.fromarray(filled_matrix), + "bboxes": bboxes, + } + return data_dict + + def __getitem__(self, index): + data_dict = deepcopy(self._parse_annotations()) + image = data_dict["image"] + visual_prompt = data_dict["visual_prompt"] + + prompt_idx = int(re.match(r"", self.prompt_token).group(1)) + + # <|reserved_special_token_{idx}|> actually starts from 2 + qs = f"There are some objects I am curious about: {self.prompt_token};\n{self.prompt_token}: <|reserved_special_token_{prompt_idx + 2}|>Describe this masked region in detail." + qs = qs.replace( + f"<|reserved_special_token_{prompt_idx + 2}|>", + f"<|reserved_special_token_{prompt_idx + 2}|>" * 256, + ) + + user_content = [{"type": "image", "image": image}, {"type": "text", "text": qs}] + + messages = [ + {"role": "user", "content": user_content}, + ] + + # Prepare input for model + raw_prompt = self.processor.apply_chat_template( + messages, + add_generation_prompt=True, + tokenize=False, + ) + + model_inputs = self.processor( + text=[raw_prompt], + images=[image], + visual_prompts=[visual_prompt], + return_tensors="pt", + ) + + pixel_values = model_inputs["pixel_values"] + mask_values = model_inputs["mask_values"] + input_ids = model_inputs["input_ids"].squeeze(0) + attention_mask = model_inputs["attention_mask"].squeeze(0) + aspect_ratio = model_inputs["aspect_ratio"] + + ret = dict( + input_ids=input_ids.cuda().unsqueeze(0), + attention_mask=attention_mask.cuda().to(self.data_dtype).unsqueeze(0), + pixel_values=pixel_values.cuda().to(self.data_dtype).flatten(0, 1), + global_mask_values=mask_values.cuda().to(self.data_dtype).squeeze(), + bboxes=[data_dict["bboxes"]], + aspect_ratios=aspect_ratio.unsqueeze(0).cuda(), + ) + return ret + + +class MultiRegionDataset(Dataset): + os.environ["TOKENIZERS_PARALLELISM"] = "true" + + def __init__( + self, + image, + masks, + question_str, + processor, + prompt_token="", + prompt_number=5, + visual_prompt_tokens=[ + "", + "", + "", + "", + "", + "", + ], + data_dtype=torch.bfloat16, + **kwargs, + ): + self.processor = processor + self.prompt_token = prompt_token + + self.prompt_number = prompt_number + self.special_tokens = visual_prompt_tokens + self.visual_prompt_ids = { + token: self.processor.tokenizer.convert_tokens_to_ids(token) - 128256 + for token in self.special_tokens + } + + self.image = image + self.masks = masks + self.question_str = question_str + self.data_dtype = data_dtype + + def __len__(self): + return len(self.coco.anns) + + def _parse_annotations(self): + image = self.image + masks = self.masks # binary mask + + width, height = image.size + + np.array(image) + masks_np = [np.array(mask).astype(np.uint8) for mask in masks] + + for mask_id, mask in enumerate(masks_np): + if image.width != mask.shape[1] or image.height != mask.shape[0]: + mask = mask.resize(image.size, Image.NEAREST) + masks[mask_id] = mask + masks_np[mask_id] = np.array(mask).astype(np.unint8) + + prompt_matches = set(re.findall(r"", self.question_str)) + assert len(prompt_matches) == len(masks) + + objects_desc = "There are some objects I am curious about: " + sub_image_desc = "" + for matched_prompt in prompt_matches: + objects_desc += f"{matched_prompt}; " + + prompt_idx = int(re.match(r"", matched_prompt).group(1)) + sub_image_desc += ( + f"{matched_prompt}: <|reserved_special_token_{prompt_idx + 2}|>\n" + ) + sub_image_desc = sub_image_desc.replace( + f"<|reserved_special_token_{prompt_idx + 2}|>", + f"<|reserved_special_token_{prompt_idx + 2}|>" * 256, + ) + + prompt = objects_desc + "\n" + sub_image_desc + "\n" + self.question_str + + filled_matrix = -1 * np.ones((image.height, image.width), dtype=np.uint8) + bboxes = {} + for matched_prompt in prompt_matches: + prompt_idx = int(re.match(r"", matched_prompt).group(1)) + mask = masks[prompt_idx] + prompt_token = matched_prompt + prompt_id = self.visual_prompt_ids.get( + prompt_token, self.visual_prompt_ids[""] + ) + assert ( + prompt_id < self.prompt_number + 1 + ), f"prompt_id should be less than {self.prompt_numbers + 1}, got {prompt_id}" + fill_area = (filled_matrix == -1) & mask.astype(bool) + filled_matrix[fill_area] = prompt_id + + non_zero_coords = np.argwhere(masks_np[mask_id]) + y_min, x_min = non_zero_coords.min(axis=0) + y_max, x_max = non_zero_coords.max(axis=0) + bbox = ( + x_min / image.width, + y_min / image.height, + x_max / image.width, + y_max / image.height, + ) + bboxes[ + str( + self.processor.tokenizer.convert_tokens_to_ids( + f"<|reserved_special_token_{prompt_idx + 2}|>" + ) + ) + ] = bbox + + filled_matrix[filled_matrix == -1] = self.visual_prompt_ids[""] + # convert masks to PIL.Image + masks = [ + Image.fromarray((masks_np[i] * 255).astype(np.uint8)) + for i in range(len(masks)) + ] + + data_dict = { + "image": image, + "visual_prompt": Image.fromarray(filled_matrix), + "bboxes": bboxes, + "prompt": prompt, + } + return data_dict + + def __getitem__(self, index): + data_dict = self._parse_annotations() + image = data_dict["image"] + visual_prompt = data_dict["visual_prompt"] + qs = data_dict["prompt"] + + user_content = [{"type": "image", "image": image}, {"type": "text", "text": qs}] + + messages = [ + {"role": "user", "content": user_content}, + ] + + # Prepare input for model + raw_prompt = self.processor.apply_chat_template( + messages, + add_generation_prompt=True, + tokenize=False, + ) + + model_inputs = self.processor( + text=[raw_prompt], + images=[image], + visual_prompts=[visual_prompt], + return_tensors="pt", + ) + + pixel_values = model_inputs["pixel_values"] + mask_values = model_inputs["mask_values"] + input_ids = model_inputs["input_ids"].squeeze(0) + attention_mask = model_inputs["attention_mask"].squeeze(0) + aspect_ratio = model_inputs["aspect_ratio"] + + ret = dict( + input_ids=input_ids.cuda().unsqueeze(0), + attention_mask=attention_mask.cuda().to(self.data_dtype).unsqueeze(0), + pixel_values=pixel_values.cuda().to(self.data_dtype).flatten(0, 1), + global_mask_values=mask_values.cuda().to(self.data_dtype).squeeze(), + bboxes=[data_dict["bboxes"]], + aspect_ratios=aspect_ratio.unsqueeze(0).cuda(), + ) + return ret diff --git a/projects/grasp_any_region/configs/gar_1b.py b/projects/grasp_any_region/configs/gar_1b.py new file mode 100644 index 0000000000000000000000000000000000000000..136dbc5c24c31f13f8d9ab8e724b4c985634c96c --- /dev/null +++ b/projects/grasp_any_region/configs/gar_1b.py @@ -0,0 +1,215 @@ +import torch +from mmengine.hooks import ( + CheckpointHook, + DistSamplerSeedHook, + IterTimerHook, + LoggerHook, + ParamSchedulerHook, +) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModel, AutoTokenizer +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.runner import TrainLoop + +from projects.grasp_any_region.datasets import GraspAnyRegionDataset +from projects.grasp_any_region.datasets.collect_fns import custom_collate_fn +from projects.grasp_any_region.models import GraspAnyRegion + +######################################################################### +# PART 1 Settings # +######################################################################### + +# Model +mllm_name_or_path = "facebook/Perception-LM-1B" +exp_name = "gar_1b" +work_dir = f"./work_dirs/{exp_name}" + +max_length = 16384 +lazy_load = True + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +# global batch_size: 64 = 1 (batch_size) * 2 (accumulative_counts) * 32 (num_gpus) +max_epochs = 1 +optim_type = AdamW +# official 128 -> 2e-5 +lr = 1e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 5000 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=mllm_name_or_path, + trust_remote_code=True, + padding_side="right", +) + +visual_prompt_nums = 5 +visual_prompt_tokens = [f"" for i in range(visual_prompt_nums)] +visual_prompt_tokens.append("") +special_tokens = visual_prompt_tokens + +model = dict( + type=GraspAnyRegion, + freeze_llm=False, + freeze_visual_encoder=False, + freeze_connector=False, + unfreeze_vocab=True, + unfreeze_lm_head=True, + use_activation_checkpointing=True, + vocab_embeds_name="tok_embeddings", + lm_head_name="output", + mllm=dict( + type=AutoModel.from_pretrained, + pretrained_model_name_or_path=mllm_name_or_path, + trust_remote_code=True, + attn_implementation="flash_attention_2", + torch_dtype=torch.bfloat16, + ), + pretrained_pth=None, + prompt_numbers=visual_prompt_nums, +) + + +######################################################################### +# PART 3 Dataset & DataLoader # +######################################################################### + +dam_annotations = [ + "data/Seed-Dataset", + "data/Fine-Grained-Dataset", + "data/Relation-Dataset", +] + +train_dataset = dict( + type=GraspAnyRegionDataset, + model_path=mllm_name_or_path, + pano_jsons=dam_annotations, + dynamic_image_size=True, + max_num_tiles=16, + repeat_time=1, + lazy_load=True, + group_by_length=True, + prompt_augmentation=True, + prompt_numbers=visual_prompt_nums, + special_tokens=special_tokens, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property="modality_length", + per_device_batch_size=batch_size * accumulative_counts, + ), + collate_fn=dict(type=custom_collate_fn), +) + +######################################################################### +# PART 4 Scheduler & Optimizer # +######################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, + lr=lr, + betas=betas, + weight_decay=weight_decay, + ), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale="dynamic", + dtype=torch.bfloat16, +) + +# learning policy +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True, + ), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True, + ), +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +######################################################################### +# PART 5 Runtime # +######################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 100 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=100), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit, + ), + # set sampler seed in distributed environment, + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend="nccl"), +) + +# set visualizer +visualizer = None + +# set log level +log_level = "INFO" + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=42, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/projects/grasp_any_region/configs/gar_8b.py b/projects/grasp_any_region/configs/gar_8b.py new file mode 100644 index 0000000000000000000000000000000000000000..60d02adbc1ca1185f5a1ad2f8fe8583ee6fdc45a --- /dev/null +++ b/projects/grasp_any_region/configs/gar_8b.py @@ -0,0 +1,215 @@ +import torch +from mmengine.hooks import ( + CheckpointHook, + DistSamplerSeedHook, + IterTimerHook, + LoggerHook, + ParamSchedulerHook, +) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModel, AutoTokenizer +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.runner import TrainLoop + +from projects.grasp_any_region.datasets import GraspAnyRegionDataset +from projects.grasp_any_region.datasets.collect_fns import custom_collate_fn +from projects.grasp_any_region.models import GraspAnyRegion + +######################################################################### +# PART 1 Settings # +######################################################################### + +# Model +mllm_name_or_path = "facebook/Perception-LM-8B" +exp_name = "gar_8b" +work_dir = f"./work_dirs/{exp_name}" + +max_length = 16384 +lazy_load = True + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 1 +dataloader_num_workers = 4 +# global batch_size: 64 = 1 (batch_size) * 1 (accumulative_counts) * 64 (num_gpus) +max_epochs = 1 +optim_type = AdamW +# official 128 -> 2e-5 +lr = 1e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 5000 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=mllm_name_or_path, + trust_remote_code=True, + padding_side="right", +) + +visual_prompt_nums = 5 +visual_prompt_tokens = [f"" for i in range(visual_prompt_nums)] +visual_prompt_tokens.append("") +special_tokens = visual_prompt_tokens + +model = dict( + type=GraspAnyRegion, + freeze_llm=False, + freeze_visual_encoder=False, + freeze_connector=False, + unfreeze_vocab=True, + unfreeze_lm_head=True, + use_activation_checkpointing=True, + vocab_embeds_name="tok_embeddings", + lm_head_name="output", + mllm=dict( + type=AutoModel.from_pretrained, + pretrained_model_name_or_path=mllm_name_or_path, + trust_remote_code=True, + attn_implementation="flash_attention_2", + torch_dtype=torch.bfloat16, + ), + pretrained_pth=None, + prompt_numbers=visual_prompt_nums, +) + + +######################################################################### +# PART 3 Dataset & DataLoader # +######################################################################### + +dam_annotations = [ + "data/Seed-Dataset", + "data/Fine-Grained-Dataset", + "data/Relation-Dataset", +] + +train_dataset = dict( + type=GraspAnyRegionDataset, + model_path=mllm_name_or_path, + pano_jsons=dam_annotations, + dynamic_image_size=True, + max_num_tiles=8, + repeat_time=1, + lazy_load=True, + group_by_length=True, + prompt_augmentation=True, + prompt_numbers=visual_prompt_nums, + special_tokens=special_tokens, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property="modality_length", + per_device_batch_size=batch_size * accumulative_counts, + ), + collate_fn=dict(type=custom_collate_fn), +) + +######################################################################### +# PART 4 Scheduler & Optimizer # +######################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, + lr=lr, + betas=betas, + weight_decay=weight_decay, + ), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale="dynamic", + dtype=torch.bfloat16, +) + +# learning policy +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True, + ), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True, + ), +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +######################################################################### +# PART 5 Runtime # +######################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 100 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=100), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit, + ), + # set sampler seed in distributed environment, + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend="nccl"), +) + +# set visualizer +visualizer = None + +# set log level +log_level = "INFO" + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=42, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/projects/grasp_any_region/datasets/GraspAnyRegion_Dataset.py b/projects/grasp_any_region/datasets/GraspAnyRegion_Dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..05f38e688036ec49420ee45935aa1110811ae593 --- /dev/null +++ b/projects/grasp_any_region/datasets/GraspAnyRegion_Dataset.py @@ -0,0 +1,739 @@ +import base64 +import copy +import io +import json +import math +import os +import random +import re + +import numpy as np +import pycocotools.mask as mask_util +import torch +from datasets import load_from_disk +from PIL import Image +from torch.utils.data import Dataset +from transformers.image_utils import PILImageResampling + +from ..models.modeling.image_processing_perception_lm_fast import ( + PerceptionLMImageProcessorFast, +) +from ..models.modeling.processing_perception_lm import PerceptionLMProcessor + +prompt_list = [ + "Describe the masked region {prompt_suffix}.", + "Describe the masked area {prompt_suffix}.", + "What can you describe about the masked region {prompt_suffix}?", + "Can you describe the masked region {prompt_suffix}?", + "Provide an explanation of the masked region {prompt_suffix}.", + "Depict the masked area {prompt_suffix}.", + "Portray the masked area {prompt_suffix}.", + "Describe what the masked region looks like {prompt_suffix}.", + "Illustrate the masked region {prompt_suffix}.", + "How would you explain the masked area {prompt_suffix}?", + "What details can you provide about the masked region {prompt_suffix}?", + "What does the masked region entail {prompt_suffix}?", + "How would you illustrate the masked region {prompt_suffix}?", + "How would you depict the masked area {prompt_suffix}?", + "How would you portray the masked area {prompt_suffix}?", + "Give a detailed description of the masked region.", + "Provide a thorough description of the masked region.", + "Can you explain the details of the masked area?", + "Give a detailed account of the masked region.", + "Describe the masked area comprehensively.", + "Provide an in-depth description of the masked region.", + "Explain the specifics of the masked area.", + "Can you provide a thorough explanation of the masked region?", + "What are the details of the masked area?", + "Provide a comprehensive description of the masked area.", + "What specific details can you provide about the masked region?", + "Can you give an in-depth account of the masked section?", + "What are the main characteristics of the masked region?", + "Give a thorough description of the masked area's details.", + "Provide detailed information about the masked area.", +] + + +def smart_resize( + height: int, + width: int, + factor: int = 28, + min_pixels: int = 56 * 56, + max_pixels: int = 768 * 768, +): + """Rescales the image so that the following conditions are met: + 1. Both dimensions are divisible by 'factor'. + 2. The total number of pixels is within ['min_pixels', 'max_pixels']. + 3. The aspect ratio is preserved as closely as possible. + """ + if height < factor or width < factor: + raise ValueError( + f"height:{height} or width:{width} must be larger than factor:{factor}" + ) + elif max(height, width) / min(height, width) > 200: + raise ValueError( + f"absolute aspect ratio must be smaller than 200, got {max(height, width) / min(height, width)}" + ) + h_bar = round(height / factor) * factor + w_bar = round(width / factor) * factor + if h_bar * w_bar > max_pixels: + beta = math.sqrt((height * width) / max_pixels) + h_bar = math.floor(height / beta / factor) * factor + w_bar = math.floor(width / beta / factor) * factor + elif h_bar * w_bar < min_pixels: + beta = math.sqrt(min_pixels / (height * width)) + h_bar = math.ceil(height * beta / factor) * factor + w_bar = math.ceil(width * beta / factor) * factor + return h_bar, w_bar + + +class GraspAnyRegionDataset(Dataset): + os.environ["TOKENIZERS_PARALLELISM"] = "true" + + def __init__( + self, + pano_jsons, + model_path, + special_tokens=None, + dynamic_image_size=True, + repeats=1, + max_num_tiles=16, + prompt_augmentation=False, + prompt_numbers=5, + **kwargs, + ): + self._system = "" + self.repeats = repeats + self.dynamic_image_size = dynamic_image_size + self.max_num_tiles = max_num_tiles if dynamic_image_size else 1 + self.prompt_augmentation = prompt_augmentation + self.prompt_numbers = prompt_numbers + + self.pano_jsons = pano_jsons + + self.processor = PerceptionLMProcessor.from_pretrained(model_path) + image_processor_config = self.processor.image_processor.__dict__ + image_processor_config.pop("_processor_class", None) + + self.processor.image_processor = PerceptionLMImageProcessorFast.from_dict( + image_processor_config + ) + self.processor.image_processor.max_num_tiles = self.max_num_tiles + + self.processor_mask = PerceptionLMProcessor.from_pretrained(model_path) + self.processor_mask.image_processor = PerceptionLMImageProcessorFast.from_dict( + image_processor_config + ) + self.processor_mask.image_processor.max_num_tiles = self.max_num_tiles + self.processor_mask.image_processor.resample = PILImageResampling.NEAREST + + if special_tokens is not None: + self.special_tokens = special_tokens + self.processor.tokenizer.add_tokens(special_tokens, special_tokens=True) + self.processor_mask.tokenizer.add_tokens( + special_tokens, special_tokens=True + ) + self.visual_prompt_ids = { + token: self.processor.tokenizer.convert_tokens_to_ids(token) - 128256 + for token in special_tokens + } + + self.datas, self.data_lengths = self.read_pano_json() + # self.max_length = max_length + self._max_refetch = 1000 + + self.tcs_loader = None + + print( + "GraspAnyRegion dataset, include {} items.".format(sum(self.data_lengths)) + ) + + def prompt_aug(self, caption): + # following DAM paper. + random_number = random.random() + + if random_number < 0.6: # default in detail, select from either set + prompt_index = random.randint(0, 29) + prompt = prompt_list[prompt_index] + if prompt_index < 15: + prompt = prompt.replace("{prompt_suffix}", "in detail") + + elif random_number > 0.8: # caption word count + caption_word_count = len(caption.split()) + + prompt_index = random.randint(0, 14) + prompt = prompt_list[prompt_index] + if caption_word_count < 10: + prompt = prompt.replace( + "{prompt_suffix}", f"in {caption_word_count} words" + ) + elif caption_word_count > 200: + prompt = prompt.replace("{prompt_suffix}", f"in more than 200 words") + else: + count_nearest_ten = round(caption_word_count / 10) * 10 + word = random.choice(["about", "around"]) + prompt = prompt.replace( + "{prompt_suffix}", f"in {word} {count_nearest_ten} words" + ) + + else: # sentences count + sentences = re.split(r"[.!?]", caption) + sentences_count = len([s for s in sentences if s.strip()]) + prompt_index = random.randint(0, 14) + prompt = prompt_list[prompt_index] + if sentences_count == 1: + prompt = prompt.replace( + "{prompt_suffix}", + random.choice( + ["in a sentence", "in one sentence", "in 1 sentence"] + ), + ) + else: + prompt = prompt.replace( + "{prompt_suffix}", f"in {sentences_count} sentences" + ) + + return prompt + + @property + def modality_length(self): + length_list = [] + for idx in range(sum(self.data_lengths)): + length_list.append(100) + return length_list * self.repeats + + def __len__(self): + return sum(self.data_lengths) * self.repeats + + def read_pano_json(self): + all_json_info = [] + all_json_length = [] + for pano_json in self.pano_jsons: + if pano_json.endswith(".json"): + with open(pano_json, "r") as f: + json_info = json.load(f) + else: + json_info = load_from_disk(pano_json) + + all_json_info.append(json_info) + all_json_length.append(len(json_info)) + print(f"=> Loaded {pano_json} with {len(json_info)} items.") + + print(f"Total data counts: {sum(all_json_length)}") + return all_json_info, all_json_length + + def sort_masks_by_area(self, masks): + areas = [] + for mask in masks: + area = np.sum(mask) + areas.append(area) + indexes = np.argsort(np.array(areas))[ + ::-1 + ] # sort the mask from large area to small area + return indexes + + def _parse_annotations(self, ann_info): + captions = [] + for conv in ann_info["conversations"]: + if conv["from"] == "gpt": + captions.append(conv["value"]) + + image_path = ann_info["image"] + if image_path is not None: + if isinstance(image_path, Image.Image): + image = image_path + elif image_path.startswith("data:base64,"): + base64_str = image_path.replace("data:base64,", "") + image_bytes = base64.b64decode(base64_str) + image = Image.open(io.BytesIO(image_bytes)).convert("RGB") + else: + image = Image.open(image_path).convert("RGB") + + if ann_info.get("mask_rle", None) is not None: + mask_caption_data = True + if isinstance(ann_info["mask_rle"], list): + masks = [ + mask_util.decode(rle_dict) for rle_dict in ann_info["mask_rle"] + ] + elif isinstance(ann_info["mask_rle"], dict): + masks = [mask_util.decode(ann_info["mask_rle"])] + else: + raise ValueError( + f"mask_rle should be list or dict, but got {type(ann_info['mask_rle'])}" + ) + else: + # all 1 mask + mask_caption_data = False + masks = [np.ones((image.height, image.width), dtype=np.uint8)] * len( + captions + ) + else: + print("no image, skip.") + return None + + masks_np = [np.array(mask).astype(np.uint8) for mask in masks] + bboxes = {} + + for mask_id, mask in enumerate(masks): + if image.width != mask.shape[1] or image.height != mask.shape[0]: + mask = mask.resize(image.size, Image.NEAREST) + masks[mask_id] = mask + masks_np[mask_id] = np.array(mask).astype(np.unint8) + + non_zero_coords = np.argwhere(masks_np[mask_id]) + y_min, x_min = non_zero_coords.min(axis=0) + y_max, x_max = non_zero_coords.max(axis=0) + bbox = ( + x_min / image.width, + y_min / image.height, + x_max / image.width, + y_max / image.height, + ) + bboxes[ + str( + self.processor.tokenizer.convert_tokens_to_ids( + f"<|reserved_special_token_{mask_id + 2}|>" + ) + ) + ] = bbox + + # random sampling used prompt indexes + prompt_indexes = [i_p for i_p in range(self.prompt_numbers)] + random.shuffle(prompt_indexes) + num_selected = min(len(masks_np), self.prompt_numbers - 1) + selected_prompt_indexes = prompt_indexes[:num_selected] + selected_prompt_tokens = [f"" for i_p in selected_prompt_indexes] + selected_prompt_img_tokens = [ + f"<|reserved_special_token_{i_p+2}|>" for i_p in selected_prompt_indexes + ] + # for none prompt + none_prompt = True + not_selected_prompt_indexes = prompt_indexes[num_selected:] + not_selected_prompt_tokens = [ + f"" for i_p in not_selected_prompt_indexes + ] + not_selected_prompt_img_tokens = [ + f"<|reserved_special_token_{i_p+2}|>" for i_p in not_selected_prompt_indexes + ] + + if not mask_caption_data: + filled_matrix = self.visual_prompt_ids[selected_prompt_tokens[0]] * np.ones( + (image.height, image.width), dtype=np.uint8 + ) + ret = { + "masks": masks, + "bboxes": bboxes, + "conversations": ann_info["conversations"], + "image": image, + "visual_prompt_matrix": Image.fromarray(filled_matrix), + "mask_caption_data": False, + } + return ret + + prompt_str = "" + for conv in ann_info["conversations"]: + prompt_str += f"\n{conv['value']}" + prompt_matches = set(re.findall(r"", prompt_str)) + + if len(prompt_matches) == 0: + # build visual_prompt list for each mask + crop_phrases = [] + for idx in range(num_selected): + prompt_id = self.visual_prompt_ids.get( + selected_prompt_tokens[idx], self.visual_prompt_ids[""] + ) + + crop_phrases.append(f"{selected_prompt_tokens[idx]}") + + # modify conversations + conversation = [] + ret_masks = [] + first_question_merged = False + if len(crop_phrases) > 0 and len(masks) > 0: + # 组合对象列表与第一个问题 + objects_desc = ( + "There are some objects I am curious about: " + + "; ".join(crop_phrases) + + "; " + ) + if self.prompt_augmentation: + prompt = self.prompt_aug(captions[0]) + else: + prompt = "Describe this masked region in detail." + first_question = f"{selected_prompt_tokens[0]}: {selected_prompt_img_tokens[0]}{prompt}" + first_question = first_question.replace( + selected_prompt_img_tokens[0], selected_prompt_img_tokens[0] * 256 + ) + + conversation.append( + { + "from": "human", + "value": objects_desc + "\n" + first_question, + } + ) + first_question_merged = True + + # 处理剩余对话 + for i in range(num_selected): + mask = masks[i] + obj_description = captions[i] + if i == 0 and first_question_merged: + # directly add answer for the first qustion + conversation.append({"from": "gpt", "value": obj_description}) + ret_masks.append(mask) + continue + if none_prompt and random.random() < 0.05: + question = f"{not_selected_prompt_tokens[0]}: {self.prompt_aug(obj_description)}" + conversation.append({"from": "human", "value": question}) + conversation.append( + { + "from": "gpt", + "value": f"{not_selected_prompt_tokens[0]} is not in the image.", + } + ) + none_prompt = False + + if self.prompt_augmentation: + prompt = self.prompt_aug(obj_description) + else: + prompt = "Describe this masked region in detail." + + question = f"{selected_prompt_tokens[i]}: {selected_prompt_img_tokens[i]}{prompt}" + question = question.replace( + selected_prompt_img_tokens[i], selected_prompt_img_tokens[i] * 256 + ) + conversation.append({"from": "human", "value": question}) + conversation.append({"from": "gpt", "value": f"{obj_description}"}) + ret_masks.append(mask) + + filled_matrix = -1 * np.ones((image.height, image.width), dtype=np.uint8) + bboxes = {} + for i in range(num_selected): + mask = masks[i] + prompt_token = selected_prompt_tokens[i] + prompt_id = self.visual_prompt_ids.get( + prompt_token, self.visual_prompt_ids[""] + ) + assert ( + prompt_id < self.prompt_numbers + 1 + ), f"prompt_id should be less than {self.prompt_numbers + 1}, got {prompt_id}" + fill_area = (filled_matrix == -1) & mask.astype(bool) + filled_matrix[fill_area] = prompt_id + + prompt_idx = int(re.match(r"", prompt_token).group(1)) + + non_zero_coords = np.argwhere(np.array(mask)) + y_min, x_min = non_zero_coords.min(axis=0) + y_max, x_max = non_zero_coords.max(axis=0) + bbox = ( + x_min / image.width, + y_min / image.height, + x_max / image.width, + y_max / image.height, + ) + bboxes[ + str( + self.processor.tokenizer.convert_tokens_to_ids( + f"<|reserved_special_token_{prompt_idx + 2}|>" + ) + ) + ] = bbox + + filled_matrix[filled_matrix == -1] = self.visual_prompt_ids[""] + # convert masks to PIL.Image + masks = [ + Image.fromarray((masks_np[i] * 255).astype(np.uint8)) + for i in range(num_selected) + ] + + else: + # modify the first conversations + conversation = copy.deepcopy(ann_info["conversations"]) + objects_desc = "There are some objects I am curious about: " + sub_image_desc = "" + for matched_prompt in prompt_matches: + objects_desc += f"{matched_prompt}; " + + prompt_idx = int(re.match(r"", matched_prompt).group(1)) + sub_image_desc += ( + f"{matched_prompt}: <|reserved_special_token_{prompt_idx + 2}|>\n" + ) + sub_image_desc = sub_image_desc.replace( + f"<|reserved_special_token_{prompt_idx + 2}|>", + f"<|reserved_special_token_{prompt_idx + 2}|>" * 256, + ) + + conversation[0]["value"] = ( + objects_desc + "\n" + sub_image_desc + "\n" + conversation[0]["value"] + ) + + new_masks_np = [] + filled_matrix = -1 * np.ones((image.height, image.width), dtype=np.uint8) + for matched_prompt in prompt_matches: + prompt_idx = int(re.match(r"", matched_prompt).group(1)) + mask = masks[prompt_idx] + prompt_token = matched_prompt + prompt_id = self.visual_prompt_ids.get( + prompt_token, self.visual_prompt_ids[""] + ) + assert ( + prompt_id < self.prompt_numbers + 1 + ), f"prompt_id should be less than {self.prompt_numbers + 1}, got {prompt_id}" + fill_area = (filled_matrix == -1) & mask.astype(bool) + filled_matrix[fill_area] = prompt_id + new_masks_np.append(np.array(mask).astype(np.uint8)) + + filled_matrix[filled_matrix == -1] = self.visual_prompt_ids[""] + masks_np = copy.deepcopy(new_masks_np) + # convert masks to PIL.Image + masks = [ + Image.fromarray((mask_np * 255).astype(np.uint8)) + for mask_np in masks_np + ] + + ret = { + "masks": masks, + "bboxes": bboxes, + "conversations": conversation, + "image": image, + "visual_prompt_matrix": Image.fromarray(filled_matrix), + "mask_caption_data": "mask_rle" in ann_info.keys(), + } + return ret + + def parse_label(self, labels): + start_tokens = torch.tensor([128006, 78191, 128007, 271], device=labels.device) + end_token = 128009 + + labels = labels.clone() + mask = torch.full_like(labels, fill_value=-100) + + i = 0 + while i < len(labels): + if i + len(start_tokens) <= len(labels) and torch.equal( + labels[i : i + len(start_tokens)], start_tokens + ): + start = i + len(start_tokens) + try: + end = (labels[start:] == end_token).nonzero(as_tuple=True)[0][ + 0 + ].item() + start + except IndexError: + break + # keep [start:end+1] + if end >= start: + mask[start : end + 1] = labels[start : end + 1] + i = end + 1 + else: + i += 1 + + return mask + + def prepare_data(self, index, **kwargs): + index = index % sum(self.data_lengths) + + def find_dataset_index(index, data_lengths): + cumulative = 0 + for i, length in enumerate(data_lengths): + if index < cumulative + length: + return i, index - cumulative + cumulative += length + + data_idx, internal_index = find_dataset_index(index, self.data_lengths) + + data_dict = copy.deepcopy(self.datas[data_idx][internal_index]) + data_dict = self._parse_annotations(data_dict) + + if data_dict is None: + return None + + image = data_dict["image"] + convs = data_dict["conversations"] + visual_prompt = data_dict["visual_prompt_matrix"] + + w, h = image.size + if w < 10 or h < 10: + return None + + if data_dict["mask_caption_data"]: + messages, messages_mask = [], [] + for i, conv in enumerate(convs): + if i == 0: + assert conv["from"] == "human" + messages.append( + { + "role": "user", + "content": [ + {"type": "image", "image": image}, + { + "type": "text", + "text": conv["value"].replace("\n", ""), + }, + ], + }, + ) + messages_mask.append( + { + "role": "user", + "content": [ + {"type": "image", "image": visual_prompt}, + { + "type": "text", + "text": conv["value"].replace("\n", ""), + }, + ], + }, + ) + continue + + assert "" not in conv["value"] + if conv["from"] == "human": + messages.append( + { + "role": "user", + "content": [{"type": "text", "text": conv["value"]}], + } + ) + messages_mask.append( + { + "role": "user", + "content": [{"type": "text", "text": conv["value"]}], + } + ) + elif conv["from"] == "gpt": + messages.append( + { + "role": "assistant", + "content": [{"type": "text", "text": conv["value"]}], + } + ) + messages_mask.append( + { + "role": "assistant", + "content": [{"type": "text", "text": conv["value"]}], + } + ) + else: + raise NotImplementedError + else: + # keep the same with the original provided conversation + messages = [ + { + "role": "user", + "content": [ + {"type": "image", "image": image}, + { + "type": "text", + "text": data_dict["conversations"][0]["value"].replace( + "\n", "" + ), + }, + ], + }, + ] + messages_mask = [ + { + "role": "user", + "content": [ + {"type": "image", "image": visual_prompt}, + { + "type": "text", + "text": data_dict["conversations"][0]["value"].replace( + "\n", "" + ), + }, + ], + }, + ] + for conv in data_dict["conversations"][1:]: + assert "" not in conv["value"] + if conv["from"] == "human": + messages.append( + { + "role": "user", + "content": [{"type": "text", "text": conv["value"]}], + } + ) + messages_mask.append( + { + "role": "user", + "content": [{"type": "text", "text": conv["value"]}], + } + ) + elif conv["from"] == "gpt": + messages.append( + { + "role": "assistant", + "content": [{"type": "text", "text": conv["value"]}], + } + ) + messages_mask.append( + { + "role": "assistant", + "content": [{"type": "text", "text": conv["value"]}], + } + ) + + try: + inputs = self.processor.apply_chat_template( + messages, + add_generation_prompt=False, + tokenize=True, + return_tensors="pt", + return_dict=True, + ) + + inputs_mask = self.processor.apply_chat_template( + messages_mask, + add_generation_prompt=False, + tokenize=True, + return_tensors="pt", + return_dict=True, + ) + except: + print("tokenization failed.") + return None + + pixel_values = inputs["pixel_values"] + aspect_ratio = inputs["aspect_ratio"] + mask_values = inputs_mask["pixel_values"] + input_ids = inputs["input_ids"].squeeze(0) + try: + assert torch.equal(inputs["input_ids"], inputs_mask["input_ids"]) + assert torch.equal(inputs["attention_mask"], inputs_mask["attention_mask"]) + except: + print("inputs are different, skip") + return None + + labels = inputs["input_ids"].squeeze(0).clone() + labels = self.parse_label(labels) + attention_mask = inputs["attention_mask"].squeeze(0) + + ret = dict( + input_ids=input_ids, + labels=labels, + attention_mask=attention_mask, + pixel_values=pixel_values, + global_mask_values=mask_values, + aspect_ratio=aspect_ratio.unsqueeze(0), + bboxes=data_dict["bboxes"], + ) + return ret + + def _rand_another(self): + idx = random.randint(0, sum(self.data_lengths)) + return idx + + def __getitem__(self, index): + for _ in range(self._max_refetch + 1): + try: + data = self.prepare_data(index, padding=False, return_tensors="pt") + except: + data = None + + if data is None: + index_old = index + index = self._rand_another() + print(f"[WARNING] data {index_old} is None, use {index}!") + continue + return data diff --git a/projects/grasp_any_region/datasets/__init__.py b/projects/grasp_any_region/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0dceff629cb16ee5f6c62843b02ff99d55d9b6d9 --- /dev/null +++ b/projects/grasp_any_region/datasets/__init__.py @@ -0,0 +1 @@ +from .GraspAnyRegion_Dataset import * diff --git a/projects/grasp_any_region/datasets/collect_fns/__init__.py b/projects/grasp_any_region/datasets/collect_fns/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9afcbcda4f4305c02b1f06ea88229a27c42429a6 --- /dev/null +++ b/projects/grasp_any_region/datasets/collect_fns/__init__.py @@ -0,0 +1 @@ +from .custom_collect_fn import * diff --git a/projects/grasp_any_region/datasets/collect_fns/custom_collect_fn.py b/projects/grasp_any_region/datasets/collect_fns/custom_collect_fn.py new file mode 100644 index 0000000000000000000000000000000000000000..9276be632dcc45438d9b83c344cc27f5564adcfa --- /dev/null +++ b/projects/grasp_any_region/datasets/collect_fns/custom_collect_fn.py @@ -0,0 +1,178 @@ +from typing import Dict, Sequence + +import numpy as np +import torch +from torch.nn.utils.rnn import pad_sequence +from xtuner.parallel.sequence import ( + get_sequence_parallel_world_size, + pad_for_sequence_parallel, +) +from xtuner.utils import DEFAULT_PAD_TOKEN_INDEX, IGNORE_INDEX + + +def custom_collate_fn( + instances: Sequence[Dict], + pad_index: int = DEFAULT_PAD_TOKEN_INDEX, + return_hf_format: bool = False, + use_varlen_attn: bool = False, +): + seq_parallel_world_size = get_sequence_parallel_world_size() + + input_ids, labels = [], [] + has_image = any(inst.get("pixel_values") is not None for inst in instances) + has_mask = any(inst.get("mask_values") is not None for inst in instances) + has_global_mask = any( + inst.get("global_mask_values") is not None for inst in instances + ) + has_aspect_ratio = any(inst.get("aspect_ratio") is not None for inst in instances) + if use_varlen_attn: + position_ids, cumulative_len = [], [] + assert len(instances) == 1, ( + f"If utilizing varlen attention, the batch size should be" + f" set to 1, but got {len(instances)}" + ) + assert not has_image, "Currently, it is not configured to " + "accommodate the use of varlen Attention in multimodal training" + + if has_image: + pixel_values = [] + if has_mask: + mask_values = [] + num_image_tokens = [] + if has_global_mask: + global_mask_values = [] + if has_aspect_ratio: + aspect_ratios = [] + bboxes = [] + + first = instances[0] + for example in instances: + input_ids.append(torch.LongTensor(example["input_ids"])) + labels.append(torch.LongTensor(example["labels"])) + if use_varlen_attn: + cumulative_len.append(torch.IntTensor(example["cumulative_len"])) + position_ids.append(torch.LongTensor(example["position_ids"])) + + if has_image: + pixel_values.append(example["pixel_values"].squeeze()) + if has_mask: + mask_values.append(example["mask_values"].squeeze()) + num_image_tokens.append(example["num_image_tokens"].squeeze()) + if has_global_mask: + global_mask_values.append(example["global_mask_values"].squeeze()) + if has_aspect_ratio: + aspect_ratios.append(example["aspect_ratio"]) + bboxes.append(example["bboxes"]) + + ori_length = [len(ids) for ids in input_ids] + if len(instances) > 1: + input_ids = pad_sequence(input_ids, batch_first=True, padding_value=pad_index) + labels = pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX) + else: + input_ids = torch.stack(input_ids) + labels = torch.stack(labels) + + if use_varlen_attn: + assert input_ids.size(1) % seq_parallel_world_size == 0 + attention_mask = None + position_ids = torch.stack(position_ids, dim=0) + else: + # Some tokenizers have the same eos token and pad token, so input_ids + # cannot be masked directly based on the pad token id. + attention_mask = torch.zeros_like(input_ids).bool() + for i, length in enumerate(ori_length): + attention_mask[i, :length] = True + + bs, seq_len = input_ids.shape + position_ids = torch.arange(seq_len).unsqueeze(0).long().repeat(bs, 1) + + if seq_parallel_world_size > 1: + input_ids = pad_for_sequence_parallel(input_ids, pad_index) + labels = pad_for_sequence_parallel(labels, IGNORE_INDEX) + position_ids = pad_for_sequence_parallel(position_ids, 0) + if attention_mask is not None: + attention_mask = pad_for_sequence_parallel(attention_mask, 0) + + if use_varlen_attn: + max_seqlen = ( + (cumulative_len[0][1:] - cumulative_len[0][:-1]).max().item() # noqa: W504 + ) + data_dict = { + "input_ids": input_ids, + "cumulative_len": cumulative_len, + "position_ids": position_ids, + "labels": labels, + "max_seqlen": max_seqlen, + } + else: + data_dict = { + "input_ids": input_ids, + "attention_mask": attention_mask, + "position_ids": position_ids, + "labels": labels, + } + + if has_image: + data_dict["pixel_values"] = torch.cat(pixel_values, dim=0) + if has_mask: + data_dict["pix_values"] = torch.cat(pixel_values, dim=0) + data_dict["mask_values"] = torch.cat(mask_values, dim=0) + data_dict["num_image_tokens"] = torch.cat(num_image_tokens, dim=0) + if has_global_mask: + data_dict["global_mask_values"] = torch.cat(global_mask_values, dim=0) + if has_aspect_ratio: + data_dict["aspect_ratios"] = torch.cat(aspect_ratios, dim=0) + data_dict["bboxes"] = bboxes + + for k, v in first.items(): + if k in ("ori_image_list"): + pass + else: + if ( + k + not in ( + "image_flags", + "num_patches", + "num_vprompts", + "sampled_mark_token_ids", + "pixel_values", + "visual_prompts", + "merged_visual_prompts", + "global_mask_values", + "aspect_ratios", + "bboxes", + ) + and v is not None + and not isinstance(v, str) + ): + if isinstance(v, torch.Tensor): + if all([example[k].size() == v.size() for example in instances]): + data_dict[k] = torch.stack( + [example[k] for example in instances] + ) + elif isinstance(v, np.ndarray): + if all([example[k].shape == first.shape] for example in instances): + data_dict[k] = torch.tensor( + np.stack([example[k] for example in instances]) + ) + else: + data_dict[k] = torch.tensor([example[k] for example in instances]) + if k in ( + "image_flags", + "num_patches", + "num_vprompts", + "sampled_mark_token_ids", + ): + if isinstance(v, torch.Tensor): + data_dict[k] = torch.cat([example[k] for example in instances]) + elif isinstance(v, np.ndarray): + data_dict[k] = torch.tensor( + np.stack([example[k] for example in instances]) + ) + else: + data_dict[k] = torch.tensor([example[k] for example in instances]) + + if return_hf_format: + return data_dict + else: + return {"data": data_dict, "data_samples": None} diff --git a/projects/grasp_any_region/hf_models/configuration_gar.py b/projects/grasp_any_region/hf_models/configuration_gar.py new file mode 100644 index 0000000000000000000000000000000000000000..1958a0502bf05ace1ef95d6d08de597fcd0c1e49 --- /dev/null +++ b/projects/grasp_any_region/hf_models/configuration_gar.py @@ -0,0 +1,76 @@ +import copy + +from transformers import AutoConfig, PerceptionLMConfig +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + +logger = logging.get_logger(__name__) + + +class GARConfig(PretrainedConfig): + model_type = "GAR" + is_composition = True + + def __init__( + self, + mllm_config=None, + prompt_numbers=5, + crop_tokens_ids=[128004, 128005, 128008, 128010, 128011], + use_flash_attn=True, + **kwargs, + ): + super().__init__(**kwargs) + if mllm_config is None: + mllm_config = {} + logger.info( + "mllm_config is None. Initializing the PerceptionLM with default values." + ) + + if mllm_config is None: + self.mllm_config = AutoConfig.from_pretrained("facebook/Perception-LM-1B") + else: + self.mllm_config = PerceptionLMConfig(**mllm_config) + self.prompt_numbers = prompt_numbers + + self.crop_tokens_ids = crop_tokens_ids + assert ( + len(self.crop_tokens_ids) == self.prompt_numbers + ), f"{self.crop_tokens_ids} crop_tokens_ids length should be {self.prompt_numbers}" + + try: + self.patch_size_h = ( + self.mllm_config.vision_config.model_args["img_size"][0] + // self.mllm_config.vision_config.model_args["ref_feat_shape"][0] + ) + self.patch_size_w = ( + self.mllm_config.vision_config.model_args["img_size"][1] + // self.mllm_config.vision_config.model_args["ref_feat_shape"][1] + ) + self.kernel_size = [self.patch_size_h, self.patch_size_w] + except: + self.patch_size_h = 16 + self.patch_size_w = 16 + self.kernel_size = [self.patch_size_h, self.patch_size_w] + + try: + self.mask_path_embedding_out_channels = ( + self.mllm_config.vision_config.num_features + ) + except: + self.mask_path_embedding_out_channels = 1280 + + self.mllm_config.use_flash_attn = True if use_flash_attn else False + self.mllm_config.text_config.use_flash_attn = True if use_flash_attn else False + self.mllm_config.vision_config.use_flash_attn = False + + def to_dict(self): + """ + Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. + + Returns: + `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, + """ + output = copy.deepcopy(self.__dict__) + output["mllm_config"] = self.mllm_config.to_dict() + output["model_type"] = self.__class__.model_type + return output diff --git a/projects/grasp_any_region/hf_models/convert_to_hf.py b/projects/grasp_any_region/hf_models/convert_to_hf.py new file mode 100644 index 0000000000000000000000000000000000000000..256aebbd13aeba1f256321f14389d1ad54940cf2 --- /dev/null +++ b/projects/grasp_any_region/hf_models/convert_to_hf.py @@ -0,0 +1,139 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os + +import numpy as np +import torch +from mmengine.config import Config, ConfigDict +from mmengine.dist import master_only +from mmengine.fileio import PetrelBackend, get_file_backend +from PIL import Image +from pycocotools.coco import COCO +from torch.utils.data import Dataset +from tqdm import tqdm +from transformers import AutoConfig, AutoProcessor, GenerationConfig +from xtuner.configs import cfgs_name_path +from xtuner.model.utils import guess_load_checkpoint +from xtuner.registry import BUILDER + +TORCH_DTYPE_MAP = dict( + fp16=torch.float16, bf16=torch.bfloat16, fp32=torch.float32, auto="auto" +) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Convert PTH model to HuggingFace model" + ) + + parser.add_argument( + "--config", + help="config file name or path.", + default="./work_dirs/gar_8b/gar_8b.py", + ) + parser.add_argument( + "--pth_model", + help="pth model file", + default="./work_dirs/gar_8b/iter_37891.pth", + ) + parser.add_argument( + "--save_dir", help="the dir to save results", default="./work_dirs/gar_8b_hf" + ) + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + torch.bfloat16 + + torch.cuda.set_device(0) + torch.distributed.init_process_group(backend="nccl") + + # build model + if not os.path.isfile(args.config): + try: + args.config = cfgs_name_path[args.config] + except KeyError: + raise FileNotFoundError(f"Cannot find {args.config}") + + # load config + cfg = Config.fromfile(args.config) + # if args.cfg_options is not None: + # cfg.merge_from_dict(args.cfg_options) + + original_load = torch.load + + def patched_load(*args, **kwargs): + kwargs["weights_only"] = False + return original_load(*args, **kwargs) + + torch.load = patched_load + + cfg.model.pretrained_pth = None + selfmodel = BUILDER.build(cfg.model) + + backend = get_file_backend(args.pth_model) + if isinstance(backend, PetrelBackend): + from xtuner.utils.fileio import patch_fileio + + with patch_fileio(): + state_dict = guess_load_checkpoint(args.pth_model) + else: + state_dict = guess_load_checkpoint(args.pth_model) + + # load the state dict + msg = selfmodel.load_state_dict(state_dict, strict=False) + print(f"Load PTH model from {args.pth_model} with msg: {msg}") + + selfmodel.cuda() + selfmodel.eval() + selfmodel.to(torch.bfloat16) + mllm_name_or_path = cfg.mllm_name_or_path + + processor = AutoProcessor.from_pretrained( + mllm_name_or_path, + trust_remote_code=True, + ) + + # convert to hf format + from projects.grasp_any_region.hf_models.configuration_gar import GARConfig + from projects.grasp_any_region.hf_models.modeling_gar import GARModel + + tokenizer = selfmodel.processor.tokenizer + prompt_numbers = selfmodel.prompt_numbers + crop_tokens_ids = [ + tokenizer.convert_tokens_to_ids(f"<|reserved_special_token_{pid+2}|>") + for pid in range(prompt_numbers) + ] + + base_config = AutoConfig.from_pretrained( + mllm_name_or_path, + trust_remote_code=True, + ) + base_config.text_config.vocab_size = len(tokenizer) + gar_config = GARConfig( + mllm_config=base_config.to_dict(), + prompt_numbers=prompt_numbers, + crop_tokens_ids=crop_tokens_ids, + auto_map={ + "AutoConfig": "configuration_gar.GARConfig", + "AutoModel": "modeling_gar.GARModel", + "AutoModelForCausalLM": "modeling_gar.GARModel", + }, + ) + + hf_model = GARModel( + gar_config, + mllm=selfmodel.model, + mask_patch_embedding=selfmodel.mask_patch_embedding, + use_flash_attn=True, + ) + + hf_model.save_pretrained(args.save_dir) + tokenizer.save_pretrained(args.save_dir) + selfmodel.processor.save_pretrained(args.save_dir) + + +if __name__ == "__main__": + main() diff --git a/projects/grasp_any_region/hf_models/modeling_gar.py b/projects/grasp_any_region/hf_models/modeling_gar.py new file mode 100644 index 0000000000000000000000000000000000000000..8eb2e6c0065e3fa7a79102c03898d7260d4562ae --- /dev/null +++ b/projects/grasp_any_region/hf_models/modeling_gar.py @@ -0,0 +1,428 @@ +from typing import List, Optional, Tuple, Union + +import torch +import torchvision +from einops import rearrange +from torch import nn +from transformers import GenerationConfig, PerceptionLMForConditionalGeneration +from transformers.modeling_outputs import CausalLMOutputWithPast +from transformers.modeling_utils import PreTrainedModel +from transformers.utils import logging + +from .configuration_gar import GARConfig + +logger = logging.get_logger(__name__) + + +class GARModel(PreTrainedModel): + config_class = GARConfig + main_input_name = "pixel_values" + base_model_prefix = "language_model" + _no_split_modules = ["LlamaDecoderLayer"] + _supports_flash_attn_2 = True + supports_gradient_checkpointing = True + + def __init__( + self, + config: GARConfig, + mllm=None, + mask_patch_embedding=None, + use_flash_attn=True, + ): + super().__init__(config) + use_flash_attn = use_flash_attn + config.mllm_config.use_flash_attn = True if use_flash_attn else False + config.mllm_config.text_config.use_flash_attn = ( + True if use_flash_attn else False + ) + config.mllm_config.vision_config.use_flash_attn = False + + config.mllm_config._attn_implementation = ( + "flash_attention_2" if use_flash_attn else "eager" + ) + config.mllm_config.vision_config._attn_implementation = "eager" + + self.prompt_numbers = config.prompt_numbers + + if mllm is not None: + self.mllm = mllm + else: + self.mllm = PerceptionLMForConditionalGeneration(config.mllm_config) + if mask_patch_embedding is not None: + self.mask_patch_embedding = mask_patch_embedding + else: + self.mask_patch_embedding = nn.Conv2d( + in_channels=3, + out_channels=config.mask_path_embedding_out_channels, + kernel_size=config.kernel_size, + stride=config.kernel_size, + bias=False, + ) + + self.crop_tokens_ids = config.crop_tokens_ids + + @property + def lm_head(self): + return self.mllm.model.language_model.get_output_embeddings() + + def get_input_embeddings(self): + return self.mllm.model.language_model.get_input_embeddings() + + def get_output_embeddings(self): + return self.mllm.model.language_model.get_output_embeddings() + + def forward(self, data, data_samples=None, mode="loss"): + crop_tokens = self.crop_tokens_ids + # (batch_size, num_tiles, channels, height, width) + pixel_values = data["pixel_values"].to(self.mllm.device).to(self.mllm.dtype) + mask_values = ( + torch.round((data["global_mask_values"] + 1.0) / 2.0 * 255.0) + .long() + .to(self.mllm.device) + ) + mask_values = torch.clamp(mask_values, min=0, max=self.prompt_numbers) + assert mask_values.max() < self.prompt_numbers + 1 and mask_values.min() >= 0 + + mask_embeds = self.mask_patch_embedding( + (mask_values != self.prompt_numbers).to(self.mllm.dtype) + ) # binary mask + input_ids = data["input_ids"] + aspect_ratios = data["aspect_ratios"] + bboxes = data["bboxes"] + assert input_ids.shape[0] == 1, "Currently only support batch_size=1" + + inputs_embeds = self.mllm.get_input_embeddings()(input_ids) + labels = data["labels"] + + image_features = None + if pixel_values is not None: + image_features = self.mllm.get_image_features( + pixel_values=pixel_values, + mask_embeds=mask_embeds, + ) + image_features = image_features.to( + inputs_embeds.device, dtype=inputs_embeds.dtype + ) + special_image_mask, _ = self.mllm.get_placeholder_mask( + input_ids, inputs_embeds=inputs_embeds, image_features=image_features + ) + inputs_embeds = inputs_embeds.masked_scatter( + special_image_mask, image_features + ) + + # feature replay + new_inputs_embeds = [] + new_labels = [] + image_features_tiles = rearrange( + image_features[1:].unsqueeze(0), "b n (h w) c -> b n c h w", h=16, w=16 + ) + for batch_idx in range(inputs_embeds.shape[0]): + curr_inputs_embeds = inputs_embeds[batch_idx] + curr_labels = labels[batch_idx] + for crop_token in crop_tokens: + if crop_token in input_ids[batch_idx]: + target_mask = input_ids[batch_idx].eq(crop_token) + target_indices = target_mask.nonzero().squeeze() + head_idx = target_indices.min().item() + tail_idx = target_indices.max().item() + image_features_recover = self._merge( + image_features_tiles, + aspect_ratios[batch_idx][0], + aspect_ratios[batch_idx][1], + ) + feat_h, feat_w = image_features_recover.shape[2:] + + x1, y1, x2, y2 = bboxes[batch_idx][str(crop_token)] + orig_h, orig_w = feat_h * 28, feat_w * 28 + + # origin box + roi_orig_x1 = x1 * orig_w + roi_orig_y1 = y1 * orig_h + roi_orig_x2 = x2 * orig_w + roi_orig_y2 = y2 * orig_h + + # feat box + spatial_scale = feat_w / orig_w + roi_feat_x1 = roi_orig_x1 * spatial_scale + roi_feat_y1 = roi_orig_y1 * spatial_scale + roi_feat_x2 = roi_orig_x2 * spatial_scale + roi_feat_y2 = roi_orig_y2 * spatial_scale + + roi = torch.tensor( + [0, roi_feat_x1, roi_feat_y1, roi_feat_x2, roi_feat_y2], + dtype=torch.float32, + device=image_features_recover.device, + ) + + roi_features = torchvision.ops.roi_align( + input=image_features_recover.float(), + boxes=roi.unsqueeze(0), + output_size=(16, 16), + spatial_scale=spatial_scale, + sampling_ratio=2, + aligned=True, + ) + + image_features_replay = ( + roi_features.permute(0, 2, 3, 1) + .flatten(1, 2) + .to(image_features_recover.dtype) + .squeeze() + ) + + curr_inputs_embeds = torch.cat( + [ + curr_inputs_embeds[:head_idx], + image_features_replay, + curr_inputs_embeds[tail_idx + 1 :], + ] + ) + curr_labels = torch.cat( + [ + curr_labels[:head_idx], + -100 + * torch.ones( + image_features_replay.shape[0], + dtype=torch.long, + device=labels.device, + ), + curr_labels[tail_idx + 1 :], + ] + ) + + assert ( + curr_inputs_embeds.shape[0] == curr_labels.shape[0] + ), f"shape mismatch, got {curr_inputs_embeds.shape[0]} != {curr_labels.shape[0]}" + + new_inputs_embeds.append(curr_inputs_embeds.unsqueeze(0)) + new_labels.append(curr_labels) + + inputs_embeds = torch.cat(new_inputs_embeds, dim=0) + labels = torch.cat(new_labels, dim=0) + + skip_this_batch = False + + if mode == "loss": + position_ids = ( + torch.arange( + 0, + inputs_embeds.shape[1], + dtype=torch.long, + device=inputs_embeds.device, + ) + .unsqueeze(0) + .repeat(inputs_embeds.shape[0], 1) + ) + attention_mask = torch.ones( + inputs_embeds.shape[0], + inputs_embeds.shape[1], + dtype=torch.long, + device=inputs_embeds.device, + ) + use_cache = False + + outputs, _skip_this_case = self._llm_forward( + inputs_embeds=inputs_embeds, + position_ids=position_ids, + attention_mask=attention_mask, + labels=labels, + use_cache=use_cache, + ) + + if skip_this_batch or _skip_this_case: + print("skip this batch!") + loss_dict = {"loss": outputs.loss * 0.0} + else: + loss_dict = {"loss": outputs.loss} + return loss_dict + + elif mode == "predict": + pass + elif mode == "tensor": + pass + else: + raise NotImplementedError + + return outputs + + def _merge(self, tiles: torch.Tensor, ncw: int, nch: int) -> torch.Tensor: + batch_size, num_tiles, num_channels, tile_height, tile_width = tiles.size() + assert num_tiles == ncw * nch, f"{ncw * nch} != {num_tiles}" + + tiles = tiles.view(batch_size, nch, ncw, num_channels, tile_height, tile_width) + tiles = tiles.permute(0, 3, 1, 4, 2, 5).contiguous() + + original_height = nch * tile_height + original_width = ncw * tile_width + + image = tiles.view(batch_size, num_channels, original_height, original_width) + + return image + + def _llm_forward( + self, + inputs_embeds: torch.FloatTensor, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + image_flags: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + return_dict = ( + return_dict if return_dict is not None else self.mllm.config.use_return_dict + ) + skip_this_case = False + + outputs = self.mllm( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + position_ids=position_ids, + labels=labels, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + return outputs, skip_this_case + + @torch.no_grad() + def generate( + self, + pixel_values: Optional[torch.FloatTensor] = None, + global_mask_values: Optional[torch.LongTensor] = None, + aspect_ratios: Optional[torch.FloatTensor] = None, + bboxes: Optional[torch.FloatTensor] = None, + input_ids: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.LongTensor] = None, + generation_config: Optional[GenerationConfig] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + **generate_kwargs, + ) -> torch.LongTensor: + device = self.device + + if pixel_values is not None: + pixel_values = pixel_values.to(device).to(self.mllm.dtype) + if global_mask_values is not None: + + mask_values = ( + torch.round((global_mask_values + 1.0) / 2.0 * 255.0) + .long() + .to(device) + ) + mask_values = torch.clamp(mask_values, min=0, max=self.prompt_numbers) + + assert ( + mask_values.max() < self.prompt_numbers + 1 + and mask_values.min() >= 0 + ), f"max: {mask_values.max()}, min: {mask_values.min()}" + mask_embeds = self.mask_patch_embedding( + (mask_values != self.prompt_numbers).to(self.mllm.dtype) + ) + else: + mask_embeds = None + + inputs_embeds = self.mllm.get_input_embeddings()(input_ids) + + image_features = self.mllm.get_image_features( + pixel_values=pixel_values, + mask_embeds=mask_embeds, + ) + image_features = image_features.to( + inputs_embeds.device, dtype=inputs_embeds.dtype + ) + special_image_mask, _ = self.mllm.get_placeholder_mask( + input_ids, inputs_embeds=inputs_embeds, image_features=image_features + ) + inputs_embeds = inputs_embeds.masked_scatter( + special_image_mask, image_features + ) + + # feature replay + new_inputs_embeds = [] + image_features_tiles = rearrange( + image_features[1:].unsqueeze(0), "b n (h w) c -> b n c h w", h=16, w=16 + ) + for batch_idx in range(inputs_embeds.shape[0]): + curr_inputs_embeds = inputs_embeds[batch_idx] + for crop_token in self.crop_tokens_ids: + if crop_token in input_ids[batch_idx]: + target_mask = input_ids[batch_idx].eq(crop_token) + target_indices = target_mask.nonzero().squeeze() + head_idx = target_indices.min().item() + tail_idx = target_indices.max().item() + image_features_recover = self._merge( + image_features_tiles, + aspect_ratios[batch_idx][0], + aspect_ratios[batch_idx][1], + ) + feat_h, feat_w = image_features_recover.shape[2:] + x1, y1, x2, y2 = bboxes[batch_idx][str(crop_token)] + orig_h, orig_w = feat_h * 28, feat_w * 28 + + # origin box + roi_orig_x1 = x1 * orig_w + roi_orig_y1 = y1 * orig_h + roi_orig_x2 = x2 * orig_w + roi_orig_y2 = y2 * orig_h + + # feat box + spatial_scale = feat_w / orig_w + roi_feat_x1 = roi_orig_x1 * spatial_scale + roi_feat_y1 = roi_orig_y1 * spatial_scale + roi_feat_x2 = roi_orig_x2 * spatial_scale + roi_feat_y2 = roi_orig_y2 * spatial_scale + + roi = torch.tensor( + [0, roi_feat_x1, roi_feat_y1, roi_feat_x2, roi_feat_y2], + dtype=torch.float32, + device=image_features_recover.device, + ) + + roi_features = torchvision.ops.roi_align( + input=image_features_recover.float(), + boxes=roi.unsqueeze(0), + output_size=(16, 16), + spatial_scale=spatial_scale, + sampling_ratio=2, + aligned=True, + ) + + image_features_replay = ( + roi_features.permute(0, 2, 3, 1) + .flatten(1, 2) + .to(image_features_recover.dtype) + .squeeze() + ) + + curr_inputs_embeds = torch.cat( + [ + curr_inputs_embeds[:head_idx], + image_features_replay, + curr_inputs_embeds[tail_idx + 1 :], + ] + ) + + new_inputs_embeds.append(curr_inputs_embeds.unsqueeze(0)) + inputs_embeds = torch.cat(new_inputs_embeds, dim=0) + else: + inputs_embeds = self.mllm.get_input_embeddings()(input_ids) + + outputs = self.mllm.generate( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + generation_config=generation_config, + output_hidden_states=output_hidden_states, + # return_dict=return_dict, + use_cache=True, + return_dict_in_generate=True, + ) + + return outputs diff --git a/projects/grasp_any_region/models/__init__.py b/projects/grasp_any_region/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4035dd6a6cc37bc766631f0c390ee61e318c4054 --- /dev/null +++ b/projects/grasp_any_region/models/__init__.py @@ -0,0 +1 @@ +from .grasp_any_region import * diff --git a/projects/grasp_any_region/models/grasp_any_region.py b/projects/grasp_any_region/models/grasp_any_region.py new file mode 100644 index 0000000000000000000000000000000000000000..cc1062a12efbae4e5279e14a814c01c323e2f699 --- /dev/null +++ b/projects/grasp_any_region/models/grasp_any_region.py @@ -0,0 +1,457 @@ +from collections import OrderedDict +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn as nn +import torchvision +from einops import rearrange +from mmengine import print_log +from mmengine.config import Config, ConfigDict +from mmengine.model import BaseModel +from peft import get_peft_model, prepare_model_for_kbit_training +from transformers import AutoConfig, AutoProcessor +from transformers.modeling_outputs import CausalLMOutputWithPast +from xtuner.model.modules import dispatch_modules +from xtuner.model.utils import ( + find_all_linear_names, + get_peft_model_state_dict, + guess_load_checkpoint, + make_inputs_require_grad, + traverse_dict, +) +from xtuner.registry import BUILDER + +from .modeling.modeling_perception_lm import PerceptionLMForConditionalGeneration + + +class GraspAnyRegion(BaseModel): + def __init__( + self, + mllm, + freeze_llm=False, + freeze_visual_encoder=False, + freeze_connector=False, + unfreeze_vocab=False, + unfreeze_lm_head=False, + llm_lora=None, + pretrained_pth=None, + use_activation_checkpointing=True, + vocab_embeds_name="tok_embeddings", + lm_head_name="output", + prompt_numbers=15, + ): + super().__init__() + + self.freeze_llm = freeze_llm + self.freeze_visual_encoder = freeze_visual_encoder + self.freeze_connector = freeze_connector + self.unfreeze_vocab = unfreeze_vocab + self.unfreeze_lm_head = unfreeze_lm_head + self.use_llm_lora = llm_lora is not None + self.use_activation_checkpointing = use_activation_checkpointing + self.vocab_embeds_name = vocab_embeds_name + self.lm_head_name = lm_head_name + self.prompt_numbers = prompt_numbers + + config = AutoConfig.from_pretrained( + mllm["pretrained_model_name_or_path"], trust_remote_code=True + ) + + self.config = config + + traverse_dict(mllm) + + self.model = PerceptionLMForConditionalGeneration.from_pretrained( + mllm["pretrained_model_name_or_path"], trust_remote_code=True + ) + + # build mask_patch_embedding + patch_size_h = ( + self.model.config.vision_config.model_args["img_size"][0] + // self.model.config.vision_config.model_args["ref_feat_shape"][0] + ) + patch_size_w = ( + self.model.config.vision_config.model_args["img_size"][1] + // self.model.config.vision_config.model_args["ref_feat_shape"][1] + ) + kernel_size = [patch_size_h, patch_size_w] + self.mask_patch_embedding = nn.Conv2d( + in_channels=3, + out_channels=self.model.config.vision_config.num_features, + kernel_size=kernel_size, + stride=kernel_size, + bias=False, + ) + # zero-init + for param in self.mask_patch_embedding.parameters(): + nn.init.zeros_(param) + + self.model.model.config.use_cache = False + + dispatch_modules(self.model.model) + + self.processor = AutoProcessor.from_pretrained( + mllm["pretrained_model_name_or_path"], trust_remote_code=True + ) + + if self.freeze_llm: + self.model.model.language_model.requires_grad_(False) + + if self.freeze_visual_encoder: + self.model.model.vision_tower.requires_grad_(False) + + if self.freeze_connector: + self.model.model.multi_modal_projector.requires_grad_(False) + + if use_activation_checkpointing: + # it is necessary when using gradient checkpointing + if hasattr(self.model.model, "enable_input_require_grads"): + self.model.model.enable_input_require_grads() + else: + self.model.model.get_input_embeddings().register_forward_hook( + make_inputs_require_grad + ) + + self._add_special_tokens() + self.gradient_checkpointing_enable() + + if self.use_llm_lora: + self._prepare_llm_for_lora(llm_lora) + + # put this after llm_lora + if self.unfreeze_vocab: + self.model.get_input_embeddings().requires_grad_(True) + if self.unfreeze_lm_head: + self.model.get_output_embeddings().requires_grad_(True) + + if pretrained_pth is not None: + pretrained_state_dict = guess_load_checkpoint(pretrained_pth) + msg = self.load_state_dict( + pretrained_state_dict, strict=False + ) # TODO, check whether the internvl2 weights are loaded correctly. + print(f"Load pretrained weight from {pretrained_pth} with msg: {msg}") + + self._count = 0 + print_log(self, logger="current") + print_log("Perception_LM construction is complete", logger="current") + + def _add_special_tokens(self): + assert hasattr(self, "processor") + + visual_prompt_nums = self.prompt_numbers + visual_prompt_tokens = [f"" for i in range(visual_prompt_nums)] + visual_prompt_tokens.append("") + special_tokens = visual_prompt_tokens + num_new_tokens = self.processor.tokenizer.add_tokens( + special_tokens, special_tokens=True + ) + self.model.resize_token_embeddings(len(self.processor.tokenizer)) + print_log(f"Added {num_new_tokens} special tokens.") + + def _parse_lora_config(self, lora_config): + if ( + isinstance(lora_config, dict) + or isinstance(lora_config, Config) + or isinstance(lora_config, ConfigDict) + ): + lora_config = BUILDER.build(lora_config) + return lora_config + + def _prepare_llm_for_lora(self, lora_config, use_activation_checkpointing=True): + lora_config = self._parse_lora_config(lora_config) + self.model.model = prepare_model_for_kbit_training( + self.model.model, use_activation_checkpointing + ) + if lora_config.target_modules is None: + modules = find_all_linear_names(self.model.model) + lora_config.target_modules = modules + + self.model.model = get_peft_model(self.model.model, lora_config) + + def gradient_checkpointing_enable(self): + self.activation_checkpointing_enable() + + def activation_checkpointing_enable(self): + self.model.model.gradient_checkpointing_enable() + + def gradient_checkpointing_disable(self): + self.activation_checkpointing_disable() + + def activation_checkpointing_disable(self): + self.model.model.gradient_checkpointing_disable() + + def state_dict(self, *args, **kwargs): + state_dict = super().state_dict(*args, **kwargs) + to_return = OrderedDict() + + to_return.update( + { + k: v + for k, v in state_dict.items() + if "tok_embeddings" in k or "embed" in k or "embed_tokens" in k + } + ) + # logit head + to_return.update( + { + k: v + for k, v in state_dict.items() + if "output." in k and "llm" in k and "lora" not in k + } + ) + to_return.update( + {k: v for k, v in state_dict.items() if "lm_head" in k and "lora" not in k} + ) + to_return.update( + {k: v for k, v in state_dict.items() if "output" in k and "lora" not in k} + ) + + # Step 1. visual_encoder + if not self.freeze_visual_encoder: + to_return.update( + {k: v for k, v in state_dict.items() if "model.visual." in k} + ) + # Step 2. LLM + if self.use_llm_lora: + to_return.update( + get_peft_model_state_dict(self.model.model, state_dict=state_dict) + ) + elif not self.freeze_llm: + to_return.update({k: v for k, v in state_dict.items() if "model.model."}) + + # Step 3. mask_patch_embedding + to_return.update( + {k: v for k, v in state_dict.items() if "mask_patch_embedding." in k} + ) + to_return.update({k: v for k, v in state_dict.items() if "mask_conv." in k}) + + return to_return + + def init_weights(self): + pass + + def _merge(self, tiles: torch.Tensor, ncw: int, nch: int) -> torch.Tensor: + batch_size, num_tiles, num_channels, tile_height, tile_width = tiles.size() + assert num_tiles == ncw * nch, f"{ncw * nch} != {num_tiles}" + + tiles = tiles.view(batch_size, nch, ncw, num_channels, tile_height, tile_width) + tiles = tiles.permute(0, 3, 1, 4, 2, 5).contiguous() + + original_height = nch * tile_height + original_width = ncw * tile_width + + image = tiles.view(batch_size, num_channels, original_height, original_width) + + return image + + def forward(self, data, data_samples=None, mode="loss"): + crop_tokens = [ + self.processor.tokenizer.convert_tokens_to_ids( + f"<|reserved_special_token_{pid+2}|>" + ) + for pid in range(self.prompt_numbers) + ] + # (batch_size, num_tiles, channels, height, width) + pixel_values = data["pixel_values"].to(self.model.device).to(self.model.dtype) + mask_values = ( + torch.round((data["global_mask_values"] + 1.0) / 2.0 * 255.0) + .long() + .to(self.model.device) + ) + mask_values = torch.clamp(mask_values, min=0, max=self.prompt_numbers) + assert mask_values.max() < self.prompt_numbers + 1 and mask_values.min() >= 0 + + mask_embeds = self.mask_patch_embedding( + (mask_values != self.prompt_numbers).to(self.model.dtype) + ) # binary mask + input_ids = data["input_ids"] + aspect_ratios = data["aspect_ratios"] + bboxes = data["bboxes"] + assert input_ids.shape[0] == 1, "Currently only support batch_size=1" + + inputs_embeds = self.model.get_input_embeddings()(input_ids) + labels = data["labels"] + + image_features = None + if pixel_values is not None: + image_features = self.model.get_image_features( + pixel_values=pixel_values, + mask_embeds=mask_embeds, + ) + image_features = image_features.to( + inputs_embeds.device, dtype=inputs_embeds.dtype + ) + special_image_mask, _ = self.model.get_placeholder_mask( + input_ids, inputs_embeds=inputs_embeds, image_features=image_features + ) + inputs_embeds = inputs_embeds.masked_scatter( + special_image_mask, image_features + ) + + # feature replay + new_inputs_embeds = [] + new_labels = [] + image_features_tiles = rearrange( + image_features[1:].unsqueeze(0), "b n (h w) c -> b n c h w", h=16, w=16 + ) + for batch_idx in range(inputs_embeds.shape[0]): + curr_inputs_embeds = inputs_embeds[batch_idx] + curr_labels = labels[batch_idx] + for crop_token in crop_tokens: + if crop_token in input_ids[batch_idx]: + target_mask = input_ids[batch_idx].eq(crop_token) + target_indices = target_mask.nonzero().squeeze() + head_idx = target_indices.min().item() + tail_idx = target_indices.max().item() + image_features_recover = self._merge( + image_features_tiles, + aspect_ratios[batch_idx][0], + aspect_ratios[batch_idx][1], + ) + feat_h, feat_w = image_features_recover.shape[2:] + x1, y1, x2, y2 = bboxes[batch_idx][str(crop_token)] + # RoI-Align + orig_h, orig_w = feat_h * 28, feat_w * 28 # 原图尺寸 + + # origin box + roi_orig_x1 = x1 * orig_w + roi_orig_y1 = y1 * orig_h + roi_orig_x2 = x2 * orig_w + roi_orig_y2 = y2 * orig_h + + # feat box + spatial_scale = feat_w / orig_w + roi_feat_x1 = roi_orig_x1 * spatial_scale + roi_feat_y1 = roi_orig_y1 * spatial_scale + roi_feat_x2 = roi_orig_x2 * spatial_scale + roi_feat_y2 = roi_orig_y2 * spatial_scale + + roi = torch.tensor( + [0, roi_feat_x1, roi_feat_y1, roi_feat_x2, roi_feat_y2], + dtype=torch.float32, + device=image_features_recover.device, + ) + + roi_features = torchvision.ops.roi_align( + input=image_features_recover.float(), + boxes=roi.unsqueeze(0), + output_size=(16, 16), + spatial_scale=spatial_scale, + sampling_ratio=2, + aligned=True, + ) + + image_features_replay = ( + roi_features.permute(0, 2, 3, 1) + .flatten(1, 2) + .to(image_features_recover.dtype) + .squeeze() + ) + + curr_inputs_embeds = torch.cat( + [ + curr_inputs_embeds[:head_idx], + image_features_replay, + curr_inputs_embeds[tail_idx + 1 :], + ] + ) + curr_labels = torch.cat( + [ + curr_labels[:head_idx], + -100 + * torch.ones( + image_features_replay.shape[0], + dtype=torch.long, + device=labels.device, + ), + curr_labels[tail_idx + 1 :], + ] + ) + + assert ( + curr_inputs_embeds.shape[0] == curr_labels.shape[0] + ), f"shape mismatch, got {curr_inputs_embeds.shape[0]} != {curr_labels.shape[0]}" + + new_inputs_embeds.append(curr_inputs_embeds.unsqueeze(0)) + new_labels.append(curr_labels) + + inputs_embeds = torch.cat(new_inputs_embeds, dim=0) + labels = torch.cat(new_labels, dim=0) + + skip_this_batch = False + + if mode == "loss": + position_ids = ( + torch.arange( + 0, + inputs_embeds.shape[1], + dtype=torch.long, + device=inputs_embeds.device, + ) + .unsqueeze(0) + .repeat(inputs_embeds.shape[0], 1) + ) + attention_mask = torch.ones( + inputs_embeds.shape[0], + inputs_embeds.shape[1], + dtype=torch.long, + device=inputs_embeds.device, + ) + use_cache = False + + outputs, _skip_this_case = self._llm_forward( + inputs_embeds=inputs_embeds, + position_ids=position_ids, + attention_mask=attention_mask, + labels=labels, + use_cache=use_cache, + ) + + if skip_this_batch or _skip_this_case: + print("skip this batch!") + loss_dict = {"loss": outputs.loss * 0.0} + else: + loss_dict = {"loss": outputs.loss} + return loss_dict + + elif mode == "predict": + pass + elif mode == "tensor": + pass + else: + raise NotImplementedError + + def _llm_forward( + self, + inputs_embeds: torch.FloatTensor, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + image_flags: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + return_dict = ( + return_dict + if return_dict is not None + else self.model.config.use_return_dict + ) + skip_this_case = False + + outputs = self.model( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + position_ids=position_ids, + labels=labels, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + return outputs, skip_this_case diff --git a/projects/grasp_any_region/models/modeling/__init__.py b/projects/grasp_any_region/models/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7e4593a102ee2a0f1b1ec5b7e780705574f5bd61 --- /dev/null +++ b/projects/grasp_any_region/models/modeling/__init__.py @@ -0,0 +1,28 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import TYPE_CHECKING + +from transformers.utils import _LazyModule +from transformers.utils.import_utils import define_import_structure + +if TYPE_CHECKING: + from .configuration_perception_lm import * + from .modeling_perception_lm import * +else: + import sys + + _file = globals()["__file__"] + sys.modules[__name__] = _LazyModule( + __name__, _file, define_import_structure(_file), module_spec=__spec__ + ) diff --git a/projects/grasp_any_region/models/modeling/configuration_perception_lm.py b/projects/grasp_any_region/models/modeling/configuration_perception_lm.py new file mode 100644 index 0000000000000000000000000000000000000000..81669cd3b36c5b704cb2f70b347a6cd5165f5f69 --- /dev/null +++ b/projects/grasp_any_region/models/modeling/configuration_perception_lm.py @@ -0,0 +1,89 @@ +# coding=utf-8 +# Copyright 2025 Meta Platforms, Inc. and the HuggingFace Inc. team. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PerceptionLM model configuration""" + +from transformers import CONFIG_MAPPING, AutoConfig +from transformers.configuration_utils import PretrainedConfig +from transformers.models.timm_wrapper.configuration_timm_wrapper import ( + TimmWrapperConfig, +) +from transformers.utils import logging + +logger = logging.get_logger(__name__) + + +class PerceptionLMConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`PerceptionLMForConditionalGeneration`]. It is used to instantiate an + PerceptionLM model according to the specified arguments, defining the model architecture. + + Example models: + - [facebook/Perception-LM-1B](https://huggingface.co/facebook/Perception-LM-1B). + - [facebook/Perception-LM-3B](https://huggingface.co/facebook/Perception-LM-3B). + - [facebook/Perception-LM-8B](https://huggingface.co/facebook/Perception-LM-8B). + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + Args: + vision_config (`Union[TimmWrapperConfig, dict]`, *optional*, defaults to `TimmWrapperConfig()`): + The config object or dictionary of the vision backbone. + text_config (`Union[PretrainedConfig, dict]`, *optional*, defaults to `LlamaConfig()`): + The config object or dictionary of the text backbone. + vision_use_cls_token (`bool`, *optional*, defaults to `True`): + Whether CLS token is used in the vision backbone. If used, we remove CLS token embedding from vision output. + projector_pooling_ratio (`int`, *optional*, defaults to 1): + The pooling ratio used in the multimodal projector. + image_token_id (`int`, *optional*, defaults to 128002): + The image token index to encode the image prompt. + video_token_id (`int`, *optional*, defaults to 128003): + The video token index to encode the video prompt. + """ + + model_type = "perception_lm" + sub_configs = {"text_config": AutoConfig, "vision_config": TimmWrapperConfig} + + def __init__( + self, + vision_config=None, + text_config=None, + vision_use_cls_token=True, + projector_pooling_ratio=1, + image_token_id=128002, + video_token_id=128003, + **kwargs, + ): + self.image_token_id = image_token_id + self.video_token_id = video_token_id + if isinstance(vision_config, dict): + vision_config = TimmWrapperConfig(**vision_config) + elif isinstance(vision_config, TimmWrapperConfig): + vision_config = vision_config + elif vision_config is None: + vision_config = TimmWrapperConfig() + self.vision_config = vision_config + self.vision_use_cls_token = vision_use_cls_token + + if isinstance(text_config, dict): + text_config["model_type"] = text_config.get("model_type", "llama") + text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) + elif text_config is None: + text_config = CONFIG_MAPPING["llama"]() + + self.text_config = text_config + self.projector_pooling_ratio = projector_pooling_ratio + super().__init__(**kwargs) + + +__all__ = ["PerceptionLMConfig"] diff --git a/projects/grasp_any_region/models/modeling/image_processing_perception_lm_fast.py b/projects/grasp_any_region/models/modeling/image_processing_perception_lm_fast.py new file mode 100644 index 0000000000000000000000000000000000000000..1216379465cf1e6fe28bcef03db40de4335f9bd3 --- /dev/null +++ b/projects/grasp_any_region/models/modeling/image_processing_perception_lm_fast.py @@ -0,0 +1,375 @@ +# ************************************************************************* +# This file may have been modified by Bytedance Inc. (“Bytedance Inc.'s Mo- +# difications”). All Bytedance Inc.'s Modifications are Copyright (2025) B- +# ytedance Inc.. +# ************************************************************************* + +# Adapted from https://github.com/huggingface/transformers/blob/v4.55.4/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py + +# Copyright 2025 Meta Platforms, Inc. and the HuggingFace Inc. team. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Fast Image processor class for PerceptionLM.""" + +import math +from functools import reduce +from typing import Optional, Union + +import numpy as np +from transformers.image_processing_utils import BatchFeature +from transformers.image_processing_utils_fast import ( + BaseImageProcessorFast, + DefaultFastImageProcessorKwargs, + get_image_size, + group_images_by_shape, + reorder_images, +) +from transformers.image_utils import ( + IMAGENET_STANDARD_MEAN, + IMAGENET_STANDARD_STD, + ChannelDimension, + PILImageResampling, +) +from transformers.processing_utils import Unpack +from transformers.utils import ( + TensorType, + auto_docstring, + is_torch_available, + is_torchvision_available, +) + +if is_torch_available(): + import torch + +if is_torchvision_available(): + from torchvision.transforms import functional as F + + +class PerceptionLMFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): + r""" + vision_input_type (`str`, *optional*, defaults to `"thumb+tile"`): + Vision processing strategy. `"thumb+tile"` uses both thumbnails and multiple tiles for + multi-scale processing, otherwise uses single tile for lower memory usage. + tile_size (`int`, *optional*, defaults to `448`): + Height and width dimension (in pixels) of each tile used for image processing. + max_num_tiles (`int`, *optional*, defaults to `36`): + Maximum number of tiles an image can be split into based on its aspect ratio. + """ + + vision_input_type: str = "thumb+tile" + tile_size: int = 448 + max_num_tiles: int = 36 + + +@auto_docstring +class PerceptionLMImageProcessorFast(BaseImageProcessorFast): + resample = PILImageResampling.BICUBIC + image_mean = IMAGENET_STANDARD_MEAN + image_std = IMAGENET_STANDARD_STD + do_resize = True + do_center_crop = False + do_rescale = True + do_normalize = True + do_convert_rgb = True + size = {"width": 448, "height": 448} # for backward compatibility in tests + valid_kwargs = PerceptionLMFastImageProcessorKwargs + + def __init__(self, **kwargs: Unpack[PerceptionLMFastImageProcessorKwargs]) -> None: + super().__init__(**kwargs) + + @auto_docstring + def preprocess( + self, images, **kwargs: Unpack[PerceptionLMFastImageProcessorKwargs] + ) -> BatchFeature: + return super().preprocess(images, **kwargs) + + @staticmethod + def _factors(n: int): + """Return all factors of a number.""" + return set( + reduce( + list.__add__, + ([i, n // i] for i in range(1, int(n**0.5) + 1) if n % i == 0), + ) + ) + + def _find_supported_aspect_ratios(self): + """ + This function computes all the allowed aspect ratios for a fixed + number of input chunks. The order of returned items matters for the result of `_fit_image_to_canvas` function. + If tie exists in `_fit_image_to_canvas`, the latter in `_find_supported_aspect_ratios` wins. + + For example, with `num_tiles=5`, it will return: + { + 0.2: [(1, 5)], + 5.0: [(5, 1)], + 0.25: [(1, 4)], + 1.0: [(2, 2), (1, 1)], + 4.0: [(4, 1)], + 0.3333333333333333: [(1, 3)], + 3.0: [(3, 1)], + 0.5: [(1, 2)], + 2.0: [(2, 1)] + } + """ + asp_dict = {} + for chunk_size in range(self.max_num_tiles, 0, -1): + _factors = sorted(self._factors(chunk_size)) + _asp_ratios = [(x, chunk_size // x) for x in _factors] + for ratio in _asp_ratios: + k = ratio[0] / ratio[1] + if k not in asp_dict: + asp_dict[k] = [ratio] + else: + asp_dict[k].append(ratio) + return asp_dict + + def _get_image_height_width( + self, image_width: int, image_height: int, target_width: int, target_height: int + ) -> tuple[int, int]: + """ + Given image width, height and target width, height for the canvas, return the dimensions of how the image would be resized + with aspect ratio preservation. + """ + scale = image_width / image_height + + if scale > 1.0: + # Width is larger than height + + # Rescaling factor is the minimum of the two scaling factors. Else one side would be outside of the canvas. + rescaling_factor = min( + target_width / image_width, target_height / image_height + ) + + # Set new width to target width and height to the rescaled height. + new_w = rescaling_factor * image_width + new_h = math.floor(new_w / scale) + + else: + # Height is larger than width + + # Rescaling factor is the minimum of the two scaling factors. Else one side would be outside of the canvas. + rescaling_factor = min( + target_width / image_width, target_height / image_height + ) + + # Set new height to target height and width to the rescaled width. + new_h = rescaling_factor * image_height + new_w = math.floor(new_h * scale) + + return new_w, new_h + + def _fit_image_to_canvas(self, img_width: int, img_height: int, tile_size: int): + """ + Given an image width, height and target number of chunks this function will see if the image + can be fit into any of the canvases that can be build from arranging the tiles in a grid. + If the image can be fit onto several canvases, it will return the canvas where the shorter edge + of the image will be largest. + """ + # Initialize the optimal canvas to None. If no canvas is found where image fits, function returns None. + optimal_canvas = None + optimal_image_width_height = None + + scale = img_width / img_height + + # Gather all potential supported image resolutions and iterate through them to find best match + potential_arrangements = [ + item + for sublist in self._find_supported_aspect_ratios().values() + for item in sublist + ] + for n_w, n_h in potential_arrangements: + # Compute the canvas size + canvas_width, canvas_height = n_w * tile_size, n_h * tile_size + + # Check if image can fit into the canvas without downsampling + if canvas_width >= img_width and canvas_height >= img_height: + # If we did not find a good canvas yet, we will use the current one + if optimal_canvas is None: + # Set optimal canvas and determine the actual image height and width in the canvas with aspect ratio preserving resampling + optimal_canvas = (n_w, n_h) + optimal_image_width_height = self._get_image_height_width( + image_width=img_width, + image_height=img_height, + target_width=n_w * tile_size, + target_height=n_h * tile_size, + ) + else: + # If we already found an optimal canvas before, we will check if the shorter edge of the image will be larger than the current optimal canvas. + # This means we can potentially upsample the image resolution which is beneficial to performance. + image_width_height = self._get_image_height_width( + image_width=img_width, + image_height=img_height, + target_width=n_w * tile_size, + target_height=n_h * tile_size, + ) + # Llama3V dynamic tiling. Priortize biggest canvas. + if ( + scale < 1.0 + and (image_width_height[0] >= optimal_image_width_height[0]) + ) or ( + scale >= 1.0 + and (image_width_height[1] >= optimal_image_width_height[1]) + ): + optimal_canvas = (n_w, n_h) + optimal_image_width_height = image_width_height + return optimal_canvas + + def _find_closest_aspect_ratio( + self, img_width: int, img_height: int, tile_size: int + ) -> tuple: + """ + Given an image width, height and target number of chunks + this function will find the closest supported aspect ratio. + """ + target_aspect_ratio = img_width / img_height + asp_dict = self._find_supported_aspect_ratios() + closest_aspect_ratio = None + if target_aspect_ratio >= 1: + closest_aspect_ratio = min( + [k for k in asp_dict if k <= target_aspect_ratio], + key=lambda x: abs(x - target_aspect_ratio), + ) + tiles_given_aspect_ratio = asp_dict[closest_aspect_ratio] + # select largest width + return max(tiles_given_aspect_ratio, key=lambda x: x[0]) + else: + closest_aspect_ratio = min( + [k for k in asp_dict if k > target_aspect_ratio], + key=lambda x: abs(1 / x - 1 / target_aspect_ratio), + ) + tiles_given_aspect_ratio = asp_dict[closest_aspect_ratio] + # select largest height + return max(tiles_given_aspect_ratio, key=lambda x: x[1]) + + def _split(self, image: torch.Tensor, ncw: int, nch: int) -> torch.Tensor: + # Split image into number of required tiles (width x height) + batch_size, num_channels, height, width = image.size() + image = image.view( + batch_size, num_channels, nch, height // nch, ncw, width // ncw + ) + # Permute dimensions to reorder the axes + image = image.permute(0, 2, 4, 1, 3, 5).contiguous() + # Reshape into the desired output shape (batch_size * 4, num_channels, width/2, height/2) + image = image.view( + batch_size, ncw * nch, num_channels, height // nch, width // ncw + ) + return image + + def resize( + self, + image: np.ndarray, + tile_size: int, + max_num_tiles: int, + resample: PILImageResampling = PILImageResampling.BICUBIC, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + ): + height, width = get_image_size(image, channel_dim=input_data_format) + if max_num_tiles > 1: + aspect_ratio = self._fit_image_to_canvas( + img_width=width, img_height=height, tile_size=tile_size + ) + if aspect_ratio is None: + # If we did not find a canvas, we have to find the closest aspect ratio and downsample the image + aspect_ratio = self._find_closest_aspect_ratio( + img_width=width, img_height=height, tile_size=tile_size + ) + else: + aspect_ratio = (1, 1) + new_width, new_height = aspect_ratio[0] * tile_size, aspect_ratio[1] * tile_size + image = F.resize(image, (new_height, new_width), interpolation=resample) + return image, aspect_ratio + + def _preprocess( + self, + images: list["torch.Tensor"], + do_resize: bool, + do_rescale: Optional[bool], + rescale_factor: Optional[Union[int, float]], + do_normalize: Optional[bool], + image_mean: Optional[Union[float, list[float]]], + image_std: Optional[Union[float, list[float]]], + vision_input_type: str, + tile_size: int, + max_num_tiles: int, + return_tensors: Optional[Union[str, TensorType]], + disable_grouping: bool, + **kwargs: Unpack[PerceptionLMFastImageProcessorKwargs], + ) -> BatchFeature: + # Group images by size for batched transformation + grouped_images, grouped_images_index = group_images_by_shape( + images, disable_grouping=disable_grouping + ) + resized_images_grouped = {} + aspect_ratio = [1, 1] + for shape, stacked_images in grouped_images.items(): + if do_resize: + if vision_input_type == "thumb+tile": + thumbnails, _ = self.resize( + stacked_images, + tile_size, + max_num_tiles=1, + resample=self.resample, + ) + images_for_tiling, (tiles_w, tiles_h) = self.resize( + stacked_images, + tile_size, + max_num_tiles=max_num_tiles, + resample=self.resample, + ) + image_tiles = self._split(images_for_tiling, tiles_w, tiles_h) + stacked_images = torch.cat( + [thumbnails.unsqueeze(1), image_tiles], dim=1 + ) + aspect_ratio = [tiles_w, tiles_h] + else: # vanilla single tile for low memory devices + stacked_images, _ = self.resize( + stacked_images, + tile_size, + max_num_tiles=1, + resample=self.resample, + ) + + resized_images_grouped[shape] = stacked_images + resized_images = reorder_images(resized_images_grouped, grouped_images_index) + + grouped_images, grouped_images_index = group_images_by_shape( + resized_images, disable_grouping=disable_grouping + ) + processed_images_grouped = {} + for shape, stacked_images in grouped_images.items(): + # Fused rescale and normalize + stacked_images = self.rescale_and_normalize( + stacked_images, + do_rescale, + rescale_factor, + do_normalize, + image_mean, + image_std, + ) + processed_images_grouped[shape] = stacked_images + processed_images = reorder_images( + processed_images_grouped, grouped_images_index + ) + processed_images = [ + p[None] if p.ndim == 3 else p for p in processed_images + ] # add tiles dimension if needed + processed_images = ( + torch.stack(processed_images, dim=0) if return_tensors else processed_images + ) + return BatchFeature( + data={"pixel_values": processed_images, "aspect_ratio": aspect_ratio}, + tensor_type=return_tensors, + ) + + +__all__ = ["PerceptionLMImageProcessorFast"] diff --git a/projects/grasp_any_region/models/modeling/modeling_perception_lm.py b/projects/grasp_any_region/models/modeling/modeling_perception_lm.py new file mode 100644 index 0000000000000000000000000000000000000000..3fe417928217176b814637d07fa61c38b3a97c02 --- /dev/null +++ b/projects/grasp_any_region/models/modeling/modeling_perception_lm.py @@ -0,0 +1,867 @@ +# ************************************************************************* +# This file may have been modified by Bytedance Inc. (“Bytedance Inc.'s Mo- +# difications”). All Bytedance Inc.'s Modifications are Copyright (2025) B- +# ytedance Inc.. +# ************************************************************************* + +# Adapted from https://github.com/huggingface/transformers/blob/v4.55.4/src/transformers/models/perception_lm/modeling_perception_lm.py + +# coding=utf-8 +# Copyright 2025 Meta Platforms, Inc. and the HuggingFace Inc. team. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Optional, Union + +import torch +import torch.nn.functional as F +import torchvision +from einops import rearrange +from timm.models._manipulate import checkpoint +from torch import nn +from transformers import AutoModel +from transformers.generation import GenerationMixin +from transformers.modeling_outputs import BaseModelOutputWithPast, ModelOutput +from transformers.modeling_utils import PreTrainedModel +from transformers.utils import auto_docstring, can_return_tuple + +from .configuration_perception_lm import PerceptionLMConfig + + +class PerceptionLMAdaptiveAvgPooling(nn.Module): + def __init__(self, pooling_ratio=2): + super().__init__() + self.pooling_ratio = pooling_ratio + + def forward(self, hidden_states): + b, num_tokens, c = hidden_states.shape + h = int(math.sqrt(num_tokens)) + if h * h != num_tokens: + raise ValueError( + f"num_tokens {num_tokens} is expected to be a square number" + ) + + shape = (h // self.pooling_ratio, h // self.pooling_ratio) + hidden_states = hidden_states.permute(0, 2, 1).reshape(b, -1, h, h) + hidden_states = F.adaptive_avg_pool2d(hidden_states, shape) + hidden_states = hidden_states.flatten(2).transpose(1, 2) + + return hidden_states + + +class PerceptionLMMultiModalProjector(nn.Module): + def __init__(self, config: PerceptionLMConfig): + super().__init__() + input_size = config.vision_config.model_args["embed_dim"] + output_size = config.text_config.hidden_size + self.linear_1 = nn.Linear( + in_features=input_size, + out_features=output_size, + bias=True, + ) + self.gelu = nn.GELU() + self.linear_2 = nn.Linear( + in_features=output_size, + out_features=output_size, + bias=True, + ) + self.pooling = ( + PerceptionLMAdaptiveAvgPooling(config.projector_pooling_ratio) + if config.projector_pooling_ratio > 1 + else nn.Identity() + ) + + def forward(self, features): + features = features.permute(1, 0, 2) # NLD -> LND + features = self.linear_1(features) + features = self.gelu(features) + features = self.linear_2(features) + features = features.permute(1, 0, 2) # LND -> NLD + features = self.pooling(features) + return features + + +@auto_docstring +class PerceptionLMPreTrainedModel(PreTrainedModel): + config: PerceptionLMConfig + base_model_prefix = "model" + supports_gradient_checkpointing = True + _skip_keys_device_placement = "past_key_values" + + _supports_flash_attn = True + _supports_sdpa = True + + _can_compile_fullgraph = True + _supports_flex_attn = True + _supports_attention_backend = True + + +@dataclass +@auto_docstring( + custom_intro=""" + Base class for PerceptionLM outputs, with hidden states and attentions. + """ +) +class PerceptionLMModelOutputWithPast(BaseModelOutputWithPast): + r""" + past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`): + Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape + `(batch_size, num_heads, sequence_length, embed_size_per_head)`) + + Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see + `past_key_values` input) to speed up sequential decoding. + image_hidden_states (`torch.FloatTensor`, *optional*): + A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`. + Image hidden_states of the model produced by the vision encoder and after projecting the last hidden state. + video_hidden_states (`torch.FloatTensor`, *optional*): + A `torch.FloatTensor` of size `(batch_size, num_videos, sequence_length, hidden_size)`. + Video hidden_states of the model produced by the vision encoder and after projecting the last hidden state. + """ + + image_hidden_states: Optional[torch.FloatTensor] = None + + video_hidden_states: Optional[torch.FloatTensor] = None + + +@dataclass +@auto_docstring( + custom_intro=""" + Base class for PerceptionLM causal language model (or autoregressive) outputs. + """ +) +class PerceptionLMCausalLMOutputWithPast(ModelOutput): + r""" + loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided): + Language modeling loss (for next-token prediction). + logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`): + Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). + past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`): + Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape + `(batch_size, num_heads, sequence_length, embed_size_per_head)`) + + Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see + `past_key_values` input) to speed up sequential decoding. + image_hidden_states (`torch.FloatTensor`, *optional*): + A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`. + Image hidden_states of the model produced by the vision encoder and after projecting the last hidden state. + video_hidden_states (`torch.FloatTensor`, *optional*): + A `torch.FloatTensor` of size `(batch_size, num_videos, sequence_length, hidden_size)`. + Video hidden_states of the model produced by the vision encoder and after projecting the last hidden state. + """ + + loss: Optional[torch.FloatTensor] = None + logits: Optional[torch.FloatTensor] = None + past_key_values: Optional[list[torch.FloatTensor]] = None + hidden_states: Optional[tuple[torch.FloatTensor]] = None + attentions: Optional[tuple[torch.FloatTensor]] = None + image_hidden_states: Optional[torch.FloatTensor] = None + + video_hidden_states: Optional[torch.FloatTensor] = None + + +@auto_docstring +class PerceptionLMModel(PerceptionLMPreTrainedModel): + _checkpoint_conversion_mapping = {} + + def __init__(self, config: PerceptionLMConfig): + super().__init__(config) + self.vision_tower = AutoModel.from_config(config.vision_config) + + def custom_forward_features( + self, + x: torch.Tensor, + mask_embeds: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """Forward pass through feature extraction layers. + + Args: + x: Input tensor. + + Returns: + Feature tensor. + """ + x = self.patch_embed(x) + if mask_embeds is not None: + x = x + mask_embeds.flatten(2).transpose(1, 2) + x, rot_pos_embed = self._pos_embed(x) + x = self.norm_pre(x) + + if getattr(self, "rope_mixed", False) and rot_pos_embed is not None: + # Handle depth-dependent embeddings for mixed mode + # pos embed has shape (depth, num_heads, H*W, dim) or (depth, batch_size, num_heads, H*W, dim) + for i, blk in enumerate(self.blocks): + if self.grad_checkpointing and not torch.jit.is_scripting(): + x = checkpoint(blk, x, rope=rot_pos_embed[i]) + else: + x = blk(x, rope=rot_pos_embed[i]) + else: + # Standard path for non-mixed mode + for blk in self.blocks: + if self.grad_checkpointing and not torch.jit.is_scripting(): + x = checkpoint(blk, x, rope=rot_pos_embed) + else: + x = blk(x, rope=rot_pos_embed) + + x = self.norm(x) + return x + + self.vision_tower.timm_model.forward_features = custom_forward_features.__get__( + self.vision_tower.timm_model + ) + + self.multi_modal_projector = PerceptionLMMultiModalProjector(config) + self.language_model = AutoModel.from_config(config.text_config) + self.post_init() + + def get_input_embeddings(self): + return self.language_model.get_input_embeddings() + + def set_input_embeddings(self, value): + self.language_model.set_input_embeddings(value) + + def set_decoder(self, decoder): + self.language_model = decoder + + def get_decoder(self): + return self.language_model + + def get_image_features( + self, + pixel_values: torch.FloatTensor, + mask_embeds: Optional[torch.FloatTensor] = None, + **kwargs, + ): + """ + Obtains image last hidden states from the vision tower and apply multimodal projection. + + Args: + pixel_values (`torch.FloatTensor]` of shape `(batch_size, num_tiles, channels, height, width)`) + The tensors corresponding to the input images. + Returns: + image_features (`torch.Tensor`): Image feature tensor of shape `(num_tiles, num_patches, embed_dim)`). + """ + if len(pixel_values.shape) == 5: + pixel_values = pixel_values.flatten(0, 1) + assert ( + len(pixel_values.shape) == 4 + ), f"pixel_values should be of shape (batch_size * num_tiles, channels, height, width). But got {pixel_values.shape}." + # pre-mask + image_outputs = self.vision_tower(pixel_values, mask_embeds=mask_embeds) + # image_outputs = self.vision_tower(pixel_values) + image_outputs = image_outputs.last_hidden_state + if self.config.vision_use_cls_token: + image_outputs = image_outputs[:, 1:, :] + # post-mask + # if mask_embeds is not None: + # image_outputs = image_outputs + mask_embeds.flatten(2).transpose(1, 2) + image_features = self.multi_modal_projector(image_outputs) + return image_features + + def get_placeholder_mask( + self, + input_ids: torch.LongTensor, + inputs_embeds: torch.FloatTensor, + image_features: torch.FloatTensor = None, + video_features: torch.FloatTensor = None, + ): + """ + Obtains multimodal placeholdr mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is + equal to the length of multimodal features. If the lengths are different, an error is raised. + """ + if input_ids is None: + special_image_mask = inputs_embeds == self.get_input_embeddings()( + torch.tensor( + self.config.image_token_id, + dtype=torch.long, + device=inputs_embeds.device, + ) + ) + special_image_mask = special_image_mask.all(-1) + special_video_mask = inputs_embeds == self.get_input_embeddings()( + torch.tensor( + self.config.video_token_id, + dtype=torch.long, + device=inputs_embeds.device, + ) + ) + special_video_mask = special_video_mask.all(-1) + else: + special_image_mask = input_ids == self.config.image_token_id + special_video_mask = input_ids == self.config.video_token_id + + n_image_tokens = special_image_mask.sum() + special_image_mask = ( + special_image_mask.unsqueeze(-1) + .expand_as(inputs_embeds) + .to(inputs_embeds.device) + ) + if ( + image_features is not None + and inputs_embeds[special_image_mask].numel() != image_features.numel() + ): + raise ValueError( + f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {image_features.size()[:-1].numel()}" + ) + + n_video_tokens = special_video_mask.sum() + special_video_mask = ( + special_video_mask.unsqueeze(-1) + .expand_as(inputs_embeds) + .to(inputs_embeds.device) + ) + if ( + video_features is not None + and inputs_embeds[special_video_mask].numel() != video_features.numel() + ): + raise ValueError( + f"Videos features and image tokens do not match: tokens: {n_video_tokens}, features {video_features.size()[:-1].numel()}" + ) + + return special_image_mask, special_video_mask + + @can_return_tuple + @auto_docstring + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + mask_embeds: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.Tensor] = None, # need + position_ids: Optional[torch.LongTensor] = None, # need + past_key_values: Optional[list[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, # need + use_cache: Optional[bool] = None, # need + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + logits_to_keep: Union[int, torch.Tensor] = 0, + **lm_kwargs, + ) -> Union[tuple, PerceptionLMModelOutputWithPast]: + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + if (input_ids is None) ^ (inputs_embeds is not None): + raise ValueError( + "You must specify exactly one of input_ids or inputs_embeds" + ) + if ( + pixel_values is not None or pixel_values_videos is not None + ) and inputs_embeds is not None: + raise ValueError( + "You cannot specify both (pixel_values or pixel_values_videos) and inputs_embeds at the same time, and must specify either one" + ) + + if inputs_embeds is None: + inputs_embeds = self.get_input_embeddings()(input_ids) + + image_features = None + if pixel_values is not None: + image_features = self.get_image_features( + pixel_values=pixel_values, mask_embeds=mask_embeds + ) + image_features = image_features.to( + inputs_embeds.device, dtype=inputs_embeds.dtype + ) + special_image_mask, _ = self.get_placeholder_mask( + input_ids, inputs_embeds=inputs_embeds, image_features=image_features + ) + inputs_embeds = inputs_embeds.masked_scatter( + special_image_mask, image_features + ) + + video_features = None + if pixel_values_videos is not None: + video_features = self.get_image_features(pixel_values=pixel_values_videos) + video_features = video_features.to( + inputs_embeds.device, dtype=inputs_embeds.dtype + ) + _, special_video_mask = self.get_placeholder_mask( + input_ids, inputs_embeds=inputs_embeds, video_features=video_features + ) + inputs_embeds = inputs_embeds.masked_scatter( + special_video_mask, video_features + ) + + outputs = self.language_model( + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=True, + cache_position=cache_position, + logits_to_keep=logits_to_keep, + **lm_kwargs, + ) + return PerceptionLMModelOutputWithPast( + last_hidden_state=outputs.last_hidden_state, + hidden_states=outputs.hidden_states, + past_key_values=outputs.past_key_values, + attentions=outputs.attentions, + image_hidden_states=image_features if pixel_values is not None else None, + video_hidden_states=( + video_features if pixel_values_videos is not None else None + ), + ) + + +@auto_docstring +class PerceptionLMForConditionalGeneration( + PerceptionLMPreTrainedModel, GenerationMixin +): + _checkpoint_conversion_mapping = {} + _tied_weights_keys = ["lm_head.weight"] + + def __init__(self, config: PerceptionLMConfig): + super().__init__(config) + self.model = PerceptionLMModel(config) + self.lm_head = nn.Linear( + config.text_config.hidden_size, config.text_config.vocab_size, bias=False + ) + self.post_init() + + def get_input_embeddings(self): + return self.model.get_input_embeddings() + + def set_input_embeddings(self, value): + self.model.set_input_embeddings(value) + + def get_output_embeddings(self) -> nn.Module: + return self.lm_head + + def set_decoder(self, decoder): + self.model.set_decoder(decoder) + + def get_decoder(self): + return self.model.get_decoder() + + def get_image_features( + self, + pixel_values: torch.FloatTensor, + mask_embeds: Optional[torch.FloatTensor] = None, + **kwargs, + ): + return self.model.get_image_features( + pixel_values=pixel_values, mask_embeds=mask_embeds, **kwargs + ) + + def get_placeholder_mask( + self, + input_ids: torch.LongTensor, + inputs_embeds: torch.FloatTensor, + image_features: torch.FloatTensor = None, + video_features: torch.FloatTensor = None, + ): + return self.model.get_placeholder_mask( + input_ids=input_ids, + inputs_embeds=inputs_embeds, + image_features=image_features, + video_features=video_features, + ) + + @can_return_tuple + @auto_docstring + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, # no need + pixel_values: Optional[torch.FloatTensor] = None, # no need + pixel_values_videos: Optional[torch.FloatTensor] = None, # no need + attention_mask: Optional[torch.Tensor] = None, # need + position_ids: Optional[torch.LongTensor] = None, # need + past_key_values: Optional[list[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, # need + labels: Optional[torch.LongTensor] = None, # need + use_cache: Optional[bool] = None, # need + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + logits_to_keep: Union[int, torch.Tensor] = 0, + **lm_kwargs, + ) -> Union[tuple, PerceptionLMCausalLMOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., + config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored + (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`. + + Example: + + ```python + >>> from PIL import Image + >>> import requests + >>> from transformers import AutoProcessor, PerceptionLMForConditionalGeneration + + >>> model = PerceptionLMForConditionalGeneration.from_pretrained("perception_lm-hf/perception_lm-1.5-7b-hf") + >>> processor = AutoProcessor.from_pretrained("perception_lm-hf/perception_lm-1.5-7b-hf") + + >>> prompt = "USER: \nWhat's the content of the image? ASSISTANT:" + >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg" + >>> image = Image.open(requests.get(url, stream=True).raw) + + >>> inputs = processor(images=image, text=prompt, return_tensors="pt") + + >>> # Generate + >>> generate_ids = model.generate(**inputs, max_new_tokens=15) + >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + "USER: \nWhat's the content of the image? ASSISTANT: The image features a busy city street with a stop sign prominently displayed" + ```""" + outputs = self.model( + input_ids=input_ids, + pixel_values=pixel_values, + pixel_values_videos=pixel_values_videos, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + cache_position=cache_position, + logits_to_keep=logits_to_keep, + **lm_kwargs, + ) + + hidden_states = outputs[0] + # Only compute necessary logits, and do not upcast them to float if we are not computing the loss + slice_indices = ( + slice(-logits_to_keep, None) + if isinstance(logits_to_keep, int) + else logits_to_keep + ) + logits = self.lm_head(hidden_states[:, slice_indices, :]) + + loss = None + + if labels is not None: + loss = self.loss_function( + logits=logits, + labels=labels, + vocab_size=self.config.text_config.vocab_size, + **lm_kwargs, + ) + + return PerceptionLMCausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + image_hidden_states=outputs.image_hidden_states, + video_hidden_states=outputs.video_hidden_states, + ) + + def prepare_inputs_for_generation( + self, + input_ids, + past_key_values=None, + inputs_embeds=None, + pixel_values=None, + mask_embeds=None, + pixel_values_videos=None, + attention_mask=None, + cache_position=None, + logits_to_keep=None, + feature_replay=None, + feature_replay_video=None, + crop_tokens=[128004], + roi_align=None, + bboxes=None, + aspect_ratios=True, + processor=None, + **kwargs, + ): + # Overwritten -- in specific circumstances we don't want to forward image inputs to the model + + model_inputs = super().prepare_inputs_for_generation( + input_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + cache_position=cache_position, + logits_to_keep=logits_to_keep, + **kwargs, + ) + + assert not (feature_replay and feature_replay_video) + + if cache_position[0] == 0: + inputs_embeds = model_inputs["inputs_embeds"] + + if inputs_embeds is None: + inputs_embeds = self.get_input_embeddings()(input_ids) + + image_features = None + if pixel_values is not None: + image_features = self.get_image_features( + pixel_values=pixel_values, mask_embeds=mask_embeds + ) + image_features = image_features.to( + inputs_embeds.device, dtype=inputs_embeds.dtype + ) + special_image_mask, _ = self.get_placeholder_mask( + input_ids, + inputs_embeds=inputs_embeds, + image_features=image_features, + ) + inputs_embeds = inputs_embeds.masked_scatter( + special_image_mask, image_features + ) + + video_features = None + if pixel_values_videos is not None: + video_features = self.get_image_features( + pixel_values=pixel_values_videos + ) + video_features = video_features.to( + inputs_embeds.device, dtype=inputs_embeds.dtype + ) + _, special_video_mask = self.get_placeholder_mask( + input_ids, + inputs_embeds=inputs_embeds, + video_features=video_features, + ) + inputs_embeds = inputs_embeds.masked_scatter( + special_video_mask, video_features + ) + + if feature_replay: + assert ( + inputs_embeds.shape[0] == 1 + ), "Currently only support batch_size=1 for feature replay" + + def _merge(tiles: torch.Tensor, ncw: int, nch: int) -> torch.Tensor: + # merge image tiles to the original image + # input: (batch_size, ncw * nch, num_channels, height//nch, width//ncw) + # output: (batch_size, num_channels, height, width) + + batch_size, num_tiles, num_channels, tile_height, tile_width = ( + tiles.size() + ) + assert num_tiles == ncw * nch, f"{ncw * nch} != {num_tiles}" + + tiles = tiles.view( + batch_size, nch, ncw, num_channels, tile_height, tile_width + ) + tiles = tiles.permute(0, 3, 1, 4, 2, 5).contiguous() + + original_height = nch * tile_height + original_width = ncw * tile_width + + image = tiles.view( + batch_size, num_channels, original_height, original_width + ) + + return image + + new_inputs_embeds = [] + image_features_tiles = rearrange( + image_features[1:].unsqueeze(0), + "b n (h w) c -> b n c h w", + h=16, + w=16, + ) + for batch_idx in range(inputs_embeds.shape[0]): + curr_inputs_emebds = inputs_embeds[batch_idx] + for crop_token in crop_tokens: + if crop_token in input_ids[batch_idx]: + target_mask = input_ids[batch_idx].eq(crop_token) + target_indices = target_mask.nonzero().squeeze() + head_idx = target_indices.min().item() + tail_idx = target_indices.max().item() + image_features_recover = _merge( + image_features_tiles, + aspect_ratios[batch_idx][0], + aspect_ratios[batch_idx][1], + ) + x1, y1, x2, y2 = bboxes[batch_idx][str(crop_token)] + feat_h, feat_w = image_features_recover.shape[2:] + orig_h, orig_w = feat_h * 28, feat_w * 28 # 原图尺寸 + + # origin box + roi_orig_x1 = x1 * orig_w + roi_orig_y1 = y1 * orig_h + roi_orig_x2 = x2 * orig_w + roi_orig_y2 = y2 * orig_h + + # feat box + spatial_scale = feat_w / orig_w + roi_feat_x1 = roi_orig_x1 * spatial_scale + roi_feat_y1 = roi_orig_y1 * spatial_scale + roi_feat_x2 = roi_orig_x2 * spatial_scale + roi_feat_y2 = roi_orig_y2 * spatial_scale + + roi = torch.tensor( + [0, roi_feat_x1, roi_feat_y1, roi_feat_x2, roi_feat_y2], + dtype=torch.float32, + device=image_features_recover.device, + ) + + roi_features = torchvision.ops.roi_align( + input=image_features_recover.float(), + boxes=roi.unsqueeze(0), + output_size=(16, 16), + spatial_scale=spatial_scale, + sampling_ratio=2, + aligned=True, + ) + + image_features_replay = ( + roi_features.permute(0, 2, 3, 1) + .flatten(1, 2) + .to(image_features_recover.dtype) + .squeeze() + ) + + curr_inputs_emebds = torch.cat( + [ + inputs_embeds[batch_idx][:head_idx], + image_features_replay, + inputs_embeds[batch_idx][tail_idx + 1 :], + ] + ) + + new_inputs_embeds.append(curr_inputs_emebds.unsqueeze(0)) + + inputs_embeds = torch.cat(new_inputs_embeds, dim=0) + model_inputs["position_ids"] = ( + torch.arange( + 0, + inputs_embeds.shape[1], + dtype=torch.long, + device=inputs_embeds.device, + ) + .unsqueeze(0) + .repeat(inputs_embeds.shape[0], 1) + ) + model_inputs["attention_mask"] = torch.ones( + inputs_embeds.shape[0], + inputs_embeds.shape[1], + dtype=torch.long, + device=inputs_embeds.device, + ) + model_inputs["cache_position"] = model_inputs["position_ids"].clone() + + elif feature_replay_video: + assert ( + inputs_embeds.shape[0] == 1 + ), "Currently only support batch_size=1 for feature replay" + assert processor is not None, "Need processor" + + new_inputs_embeds = [] + image_features_tiles = rearrange( + image_features.unsqueeze(0), "b n (h w) c -> b n c h w", h=16, w=16 + ) + for batch_idx in range(inputs_embeds.shape[0]): + curr_inputs_emebds = inputs_embeds[batch_idx] + for frame_idx in range(image_features.shape[0]): + crop_token = processor.tokenizer.convert_tokens_to_ids( + f"<|reserved_special_token_{2 + frame_idx}|>" + ) + if crop_token in input_ids[batch_idx]: + target_mask = input_ids[batch_idx].eq(crop_token) + target_indices = target_mask.nonzero().squeeze() + head_idx = target_indices.min().item() + tail_idx = target_indices.max().item() + x1, y1, x2, y2 = bboxes[batch_idx][str(crop_token)] + feat_h, feat_w = 16, 16 + orig_h, orig_w = feat_h * 28, feat_w * 28 + + # origin box + roi_orig_x1 = x1 * orig_w + roi_orig_y1 = y1 * orig_h + roi_orig_x2 = x2 * orig_w + roi_orig_y2 = y2 * orig_h + + # feat box + spatial_scale = feat_w / orig_w + roi_feat_x1 = roi_orig_x1 * spatial_scale + roi_feat_y1 = roi_orig_y1 * spatial_scale + roi_feat_x2 = roi_orig_x2 * spatial_scale + roi_feat_y2 = roi_orig_y2 * spatial_scale + + roi = torch.tensor( + [0, roi_feat_x1, roi_feat_y1, roi_feat_x2, roi_feat_y2], + dtype=torch.float32, + device=image_features_tiles.device, + ) + + roi_features = torchvision.ops.roi_align( + input=image_features_tiles[:, frame_idx].float(), + boxes=roi.unsqueeze(0), + output_size=(16, 16), + spatial_scale=spatial_scale, + sampling_ratio=2, + aligned=True, + ) + + image_features_replay = ( + roi_features.permute(0, 2, 3, 1) + .flatten(1, 2) + .to(image_features_tiles.dtype) + .squeeze() + ) + + curr_inputs_emebds = torch.cat( + [ + curr_inputs_emebds[:head_idx], + image_features_replay, + curr_inputs_emebds[tail_idx + 1 :], + ] + ) + + new_inputs_embeds.append(curr_inputs_emebds.unsqueeze(0)) + + inputs_embeds = torch.cat(new_inputs_embeds, dim=0) + model_inputs["position_ids"] = ( + torch.arange( + 0, + inputs_embeds.shape[1], + dtype=torch.long, + device=inputs_embeds.device, + ) + .unsqueeze(0) + .repeat(inputs_embeds.shape[0], 1) + ) + model_inputs["attention_mask"] = torch.ones( + inputs_embeds.shape[0], + inputs_embeds.shape[1], + dtype=torch.long, + device=inputs_embeds.device, + ) + model_inputs["cache_position"] = model_inputs["position_ids"].clone() + + model_inputs["inputs_embeds"] = inputs_embeds + model_inputs["input_ids"] = None + model_inputs["pixel_values"] = None + model_inputs["pixel_values_videos"] = None + model_inputs["mask_embeds"] = None + + return model_inputs + + +__all__ = [ + "PerceptionLMForConditionalGeneration", + "PerceptionLMPreTrainedModel", + "PerceptionLMModel", +] diff --git a/projects/grasp_any_region/models/modeling/processing_perception_lm.py b/projects/grasp_any_region/models/modeling/processing_perception_lm.py new file mode 100644 index 0000000000000000000000000000000000000000..78518a828203d41d03379a216dde15a3c648d5c7 --- /dev/null +++ b/projects/grasp_any_region/models/modeling/processing_perception_lm.py @@ -0,0 +1,298 @@ +# coding=utf-8 +# Copyright 2025 Meta Platforms, Inc. and the HuggingFace Inc. team. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Processor class for PerceptionLM. +""" + +from typing import Iterable, Union + +import numpy as np +from transformers.feature_extraction_utils import BatchFeature +from transformers.image_utils import ImageInput, get_image_size, to_numpy_array +from transformers.processing_utils import ( + MultiModalData, + ProcessingKwargs, + ProcessorMixin, + Unpack, +) +from transformers.tokenization_utils_base import PreTokenizedInput, TextInput +from transformers.utils import logging +from transformers.video_utils import VideoInput + +logger = logging.get_logger(__name__) + + +class PerceptionLMProcessorKwargs(ProcessingKwargs, total=False): + _defaults = { + "text_kwargs": { + "padding": False, + "return_mm_token_type_ids": False, + }, + } + + +class PerceptionLMProcessor(ProcessorMixin): + r""" + Constructs a PerceptionLM processor which wraps a PerceptionLM image processor, a PerceptionLM video processor, and a tokenizer into a single processor. + + [`PerceptionLMProcessor`] offers all the functionalities of [`PerceptionLMImageProcessorFast`], [`PerceptionLMVideoProcessor`], and the tokenizer (e.g. [`LlamaTokenizerFast`]). See the + [`~PerceptionLMProcessor.__call__`] and [`~PerceptionLMProcessor.decode`] for more information. + + Args: + video_processor ([`PerceptionLMVideoProcessor`], *optional*): + The video processor to process video inputs. + image_processor ([`PerceptionLMImageProcessorFast`], *optional*): + The image processor to process image inputs. + tokenizer ([`LlamaTokenizerFast`] or similar, *optional*): + The tokenizer to process text inputs. + patch_size (`int`, *optional*): + Patch size from the vision tower. + chat_template (`str`, *optional*): + A Jinja template which will be used to convert lists of messages in a chat into a tokenizable string. + pooling_ratio (`int`, *optional*, defaults to 2): + Pooling ratio for vision tokens. If not 1, 2D adaptive pooling is applied over projected vision tokens. + """ + + attributes = ["video_processor", "image_processor", "tokenizer"] + image_processor_class = "AutoImageProcessor" + video_processor_class = "AutoVideoProcessor" + tokenizer_class = "AutoTokenizer" + + def __init__( + self, + video_processor=None, + image_processor=None, + tokenizer=None, + patch_size=None, + chat_template=None, + pooling_ratio=2, + **kwargs, + ): + self.patch_size = patch_size + self.pooling_ratio = pooling_ratio + self.image_token = tokenizer.image_token + self.video_token = tokenizer.video_token + self.image_token_id = tokenizer.image_token_id + self.video_token_id = tokenizer.video_token_id + super().__init__( + video_processor, image_processor, tokenizer, chat_template=chat_template + ) + + def __call__( + self, + images: ImageInput = None, + text: Union[ + TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput] + ] = None, + audio=None, + videos: VideoInput = None, + **kwargs: Unpack[PerceptionLMProcessorKwargs], + ) -> BatchFeature: + """ + Prepares a batch containing one or more sequences of text and/or images and/or videos. + + If `text` is provided, it is tokenized using the tokenizer. + If `images` is provided, they are processed using the image processor. + If `videos` is provided, they are processed using the video processor. + + Args: + images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*): + The image or batch of images to be processed. Each image can be a PIL image, NumPy array, or PyTorch tensor. + Both channels-first and channels-last formats are supported. + text (`str`, `List[str]`, *optional*): + The sequence or batch of sequences to be tokenized. Each sequence can be a string. + videos (`Any`, *optional*): + The video or batch of videos to be processed. + return_tensors (`str` or [`~utils.TensorType`], *optional*): + If set, will return tensors of a particular framework. Acceptable values are: + - `'tf'`: Return TensorFlow `tf.constant` objects. + - `'pt'`: Return PyTorch `torch.Tensor` objects. + - `'np'`: Return NumPy `np.ndarray` objects. + - `'jax'`: Return JAX `jnp.ndarray` objects. + + Returns: + [`BatchFeature`]: A [`BatchFeature`] with the following fields: + + - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is provided. + - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when + `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is provided). + - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is provided. + - **pixel_values_videos** -- Video pixel values to be fed to a model. Returned when `videos` is provided. + """ + if text is None: + raise ValueError( + "You have to specify at least `text` input. Optionally, you can also specify `images` or `videos`." + ) + + output_kwargs = self._merge_kwargs( + PerceptionLMProcessorKwargs, + tokenizer_init_kwargs=self.tokenizer.init_kwargs, + **kwargs, + ) + if images is not None: + image_inputs = self.image_processor( + images=images, **output_kwargs["images_kwargs"] + ) + else: + image_inputs = {} + + if videos is not None: + videos_inputs = self.video_processor( + videos, **output_kwargs["videos_kwargs"] + ) + else: + videos_inputs = {} + + if isinstance(text, str): + text = [text] + elif not isinstance(text, list) and not isinstance(text[0], str): + raise ValueError( + "Invalid input text. Please provide a string, or a list of strings" + ) + + # try to expand inputs in processing if we have the necessary parts + prompt_strings = [] + pixel_values = iter(image_inputs.get("pixel_values", [])) + pixel_values_videos = iter(videos_inputs.get("pixel_values_videos", [])) + for sample in text: + # Replace the media token with the expanded media token sequence + sample = self._expand_media_tokens( + sample, self.tokenizer.image_token, pixel_values + ) + sample = self._expand_media_tokens( + sample, self.tokenizer.video_token, pixel_values_videos + ) + prompt_strings.append(sample) + + return_tensors = output_kwargs["text_kwargs"].pop("return_tensors", None) + return_mm_token_type_ids = output_kwargs["text_kwargs"].pop( + "return_mm_token_type_ids", False + ) + text_inputs = self.tokenizer( + prompt_strings, **output_kwargs["text_kwargs"], return_tensors=None + ) + self._check_special_mm_tokens( + prompt_strings, text_inputs, modalities=["image", "video"] + ) + + if return_mm_token_type_ids: + array_ids = np.array(text_inputs["input_ids"]) + mm_token_type_ids = np.zeros_like(text_inputs["input_ids"]) + mm_token_type_ids[array_ids == self.image_token_id] = 1 + text_inputs["mm_token_type_ids"] = mm_token_type_ids.tolist() + + return BatchFeature( + data={**text_inputs, **image_inputs, **videos_inputs}, + tensor_type=return_tensors, + ) + + def _expand_media_tokens(self, sample, media_token: str, media_iter: Iterable): + media_count = sample.count(media_token) + if media_count > 0: + media_list = [next(media_iter) for _ in range(media_count)] + sample_splits = sample.split(media_token) + media_token_list = [] + for media in media_list: + height, width = get_image_size(to_numpy_array(media)) + num_tiles = media.shape[0] + num_media_tokens = ( + (height // self.patch_size // self.pooling_ratio) + * (width // self.patch_size // self.pooling_ratio) + * num_tiles + ) + media_token_list.append(num_media_tokens) + sample = "" + for i, num_media_tokens in enumerate(media_token_list): + sample += sample_splits[i] + sample += media_token * num_media_tokens + sample += sample_splits[-1] + return sample + + def _get_num_multimodal_tokens(self, image_sizes=None, **kwargs): + """ + Computes the number of placeholder tokens needed for multimodal inputs with the given sizes. + + Args: + image_sizes (`list[list[int]]`, *optional*): + The input sizes formatted as (height, width) per each image. + + Returns: + `MultiModalData`: A `MultiModalData` object holding number of tokens per each of the provided + input modalities, along with other useful data. + """ + + vision_data = {} + if image_sizes is not None: + images_kwargs = PerceptionLMProcessorKwargs._defaults.get( + "images_kwargs", {} + ) + images_kwargs.update(kwargs) + tile_size = ( + images_kwargs.get("tile_size", None) or self.image_processor.tile_size + ) + + num_image_tokens = [] + num_image_patches = [] + for height, width in image_sizes: + if self.image_processor.vision_input_type == "thumb+tile": + aspect_ratio = self.image_processor._fit_image_to_canvas( + img_width=width, img_height=height, tile_size=tile_size + ) + if aspect_ratio is None: + aspect_ratio = self.image_processor._find_closest_aspect_ratio( + img_width=width, img_height=height, tile_size=tile_size + ) + num_tiles = ( + aspect_ratio[0] * aspect_ratio[1] + 1 + ) # base image and tiles + else: + num_tiles = 1 + + num_image_tokens.append( + (tile_size // self.patch_size // self.pooling_ratio) + * (tile_size // self.patch_size // self.pooling_ratio) + * num_tiles + ) + num_image_patches.append(num_tiles) + + vision_data.update( + { + "num_image_tokens": num_image_tokens, + "num_image_patches": num_image_patches, + } + ) + return MultiModalData(**vision_data) + + def batch_decode(self, *args, **kwargs): + """ + This method forwards all its arguments to PerceptionLMTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please + refer to the docstring of this method for more information. + """ + return self.tokenizer.batch_decode(*args, **kwargs) + + def decode(self, *args, **kwargs): + """ + This method forwards all its arguments to PerceptionLMTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to + the docstring of this method for more information. + """ + return self.tokenizer.decode(*args, **kwargs) + + @property + def model_input_names(self): + tokenizer_input_names = self.tokenizer.model_input_names + image_processor_input_names = self.image_processor.model_input_names + return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names)) + + +__all__ = ["PerceptionLMProcessor"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..cea7eba1e1c6a444b3bba4418a2a11d498f9f454 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,48 @@ +# Gradio (REQUIRED - specific version) +gradio==5.49.1 + +# ZeroGPU support +spaces==0.30.4 + +# PyTorch (pinned for FlashAttention compatibility) +torch==2.8.0 +torchvision +torchaudio + +# FlashAttention (prebuilt wheel for PyTorch 2.8, Python 3.10, CUDA 12) +flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp310-cp310-linux_x86_64.whl + +# Transformers and ML libraries +transformers==4.56.2 +accelerate>=0.28.0 +timm==1.0.19 +peft==0.15.2 + +# Vision and image processing +opencv-python +pillow>=9.4.0 +pycocotools +git+https://github.com/cocodataset/panopticapi.git +git+https://github.com/facebookresearch/segment-anything.git + +# Data handling +numpy==1.26.4 +pydantic>=2.10.1 +protobuf<=3.20.3 +sentencepiece + +# Utilities +httpx>=0.24.1,<1.0 +requests +inflect +distinctipy +attrdict + +# Video processing (optional) +pytorchvideo + +# VLLM (optional - for evaluation) +# vllm==0.7.3 + +# xTuner (for training - not needed for inference) +# xtuner==0.2.0rc0 diff --git a/tools/dist.sh b/tools/dist.sh new file mode 100644 index 0000000000000000000000000000000000000000..0300e378aa0c83bb5000f3cf3dbb7c1be2db2001 --- /dev/null +++ b/tools/dist.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -x + +export MASTER_ADDR=${ARNOLD_WORKER_0_HOST} + +export PORT=(${ARNOLD_WORKER_0_PORT//,/ }) +export NPROC_PER_NODE=${ARNOLD_WORKER_GPU} +export NNODES=${ARNOLD_WORKER_NUM} +export NODE_RANK=${ARNOLD_ID} + +FILE=$1 +CONFIG=$2 +GPUS=$3 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} +PORT=${PORT:-$((55500 + $RANDOM % 2000))} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} +DEEPSPEED=${DEEPSPEED:-deepspeed_zero2} + + +if command -v torchrun &> /dev/null +then + echo "Using torchrun mode." + TORCHELASTIC_TIMEOUT=18000 PYTHONPATH="$(dirname $0)/..":$PYTHONPATH OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 \ + torchrun --nnodes=${NNODES} \ + --node_rank=${NODE_RANK} \ + --master_addr=${MASTER_ADDR} \ + --master_port=${PORT} \ + --nproc_per_node=${GPUS} \ + tools/${FILE}.py ${CONFIG} --launcher pytorch --deepspeed $DEEPSPEED "${@:4}" +else + echo "Using launch mode." + TORCHELASTIC_TIMEOUT=18000 PYTHONPATH="$(dirname $0)/..":$PYTHONPATH OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 \ + python -m torch.distributed.launch \ + --nnodes=${NNODES} \ + --node_rank=${NODE_RANK} \ + --master_addr=${MASTER_ADDR} \ + --master_port=${PORT} \ + --nproc_per_node=${GPUS} \ + tools/${FILE}.py ${CONFIG} --launcher pytorch --deepspeed $DEEPSPEED "${@:4}" +fi \ No newline at end of file diff --git a/tools/test.py b/tools/test.py new file mode 100644 index 0000000000000000000000000000000000000000..b8a07a8118b3acabfa81cf5354be34ae9f9c240d --- /dev/null +++ b/tools/test.py @@ -0,0 +1,126 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import logging +import os +import os.path as osp +from types import FunctionType + +from mmengine import print_log +from mmengine.config import Config, DictAction +from mmengine.model import is_model_wrapper +from mmengine.registry import RUNNERS +from mmengine.runner import Runner +from xtuner.configs import cfgs_name_path +from xtuner.model.utils import guess_load_checkpoint +from xtuner.registry import MAP_FUNC + + +def parse_args(): + parser = argparse.ArgumentParser(description="Test model") + parser.add_argument("config", help="config file name or path.") + parser.add_argument("--checkpoint", default=None, help="checkpoint file") + parser.add_argument( + "--work-dir", + help="the directory to save the file containing evaluation metrics", + ) + parser.add_argument( + "--cfg-options", + nargs="+", + action=DictAction, + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) + parser.add_argument("--deepspeed", default=None, help="Dummy option") + parser.add_argument( + "--launcher", + choices=["none", "pytorch", "slurm", "mpi"], + default="none", + help="job launcher", + ) + parser.add_argument("--local_rank", "--local-rank", type=int, default=0) + args = parser.parse_args() + if "LOCAL_RANK" not in os.environ: + os.environ["LOCAL_RANK"] = str(args.local_rank) + return args + + +def register_function(cfg_dict): + if isinstance(cfg_dict, dict): + for key, value in dict.items(cfg_dict): + if isinstance(value, FunctionType): + value_str = str(value) + if value_str not in MAP_FUNC: + MAP_FUNC.register_module(module=value, name=value_str) + cfg_dict[key] = value_str + else: + register_function(value) + elif isinstance(cfg_dict, (list, tuple)): + for value in cfg_dict: + register_function(value) + + +def main(): + args = parse_args() + + if args.deepspeed is not None: + print_log( + "Deepspeed is not adopted during inference, Skipped.", level=logging.WARN + ) + + # parse config + if not osp.isfile(args.config): + try: + args.config = cfgs_name_path[args.config] + except KeyError: + raise FileNotFoundError(f"Cannot find {args.config}") + + # load config + cfg = Config.fromfile(args.config) + cfg.launcher = args.launcher + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # register FunctionType object in cfg to `MAP_FUNC` Registry and + # change these FunctionType object to str + register_function(cfg._cfg_dict) + + # work_dir is determined in this priority: CLI > segment in file > filename + if args.work_dir is not None: + # update configs according to CLI args if args.work_dir is not None + cfg.work_dir = args.work_dir + elif cfg.get("work_dir", None) is None: + # use config filename as default work_dir if cfg.work_dir is None + cfg.work_dir = osp.join( + "./work_dirs", osp.splitext(osp.basename(args.config))[0] + ) + + # build the runner from config + if "runner_type" not in cfg: + # build the default runner + runner = Runner.from_cfg(cfg) + else: + # build customized runner from the registry + # if 'runner_type' is set in the cfg + runner = RUNNERS.build(cfg) + + if args.checkpoint is not None: + state_dict = guess_load_checkpoint(args.checkpoint) + + if is_model_wrapper(runner.model): + runner.model.module.load_state_dict(state_dict, strict=False) + else: + runner.model.load_state_dict(state_dict, strict=False) + runner.logger.info(f"Load checkpoint from {args.checkpoint}") + else: + Warning("No checkpoint !!!") + + # start testing + runner.test() + + +if __name__ == "__main__": + main() diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a4b1f5a87a813c5f50bd66b37e3e89b738e7dba3 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,10 @@ +from xtuner.tools.train import main as train + +try: + import torch + import torch_npu + from torch_npu.contrib import transfer_to_npu +except: + pass +if __name__ == "__main__": + train()